package com.jwetherell.algorithms.sequence;
import java.util.HashSet;
import java.util.Set;
/**
* The longest common subsequence (LCS) problem is the problem of finding the longest subsequence common to all sequences in a set of sequences (often just two sequences). It differs from problems
* of finding common substrings: unlike substrings, subsequences are not required to occupy consecutive positions within the original sequences.
*
* https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
*
* @author Justin Wetherell
*/
@SuppressWarnings("unchecked")
public class LongestCommonSubsequence {
private static int[][] lengthMatrix = null;
private static Set[][] sequenceMatrix = null;
private LongestCommonSubsequence() { }
public static MatrixPair getLCS(char[] seq1, char[] seq2) {
try {
populateMatrix(seq1, seq2);
for (int i = 0; i < seq1.length; i++) {
for (int j = 0; j < seq2.length; j++) {
lengthMatrix[i + 1][j + 1] = longestCommonSubsequence(i, j, seq1, seq2);
}
}
return (new MatrixPair(lengthMatrix, sequenceMatrix));
} finally {
lengthMatrix = null;
sequenceMatrix = null;
}
}
private static void populateMatrix(char[] seq1, char[] seq2) {
lengthMatrix = new int[seq1.length + 1][seq2.length + 1];
sequenceMatrix = new HashSet[seq1.length][seq2.length];
}
private static int longestCommonSubsequence(int i, int j, char[] seq1, char[] seq2) {
char x = seq1[i];
char y = seq2[j];
int result = 0;
Set set = sequenceMatrix[i][j];
if (set == null)
set = new HashSet();
if (x == y) {
if (i > 0 && j > 0)
set = new HashSet(sequenceMatrix[i - 1][j - 1]);
distribute(x, set);
result = (lengthMatrix[i][j]) + 1;
} else {
int a = lengthMatrix[i][j + 1];
int b = lengthMatrix[i + 1][j];
if (a > b) {
set = new HashSet(sequenceMatrix[i - 1][j]);
result = a;
} else if (b > a) {
set = new HashSet(sequenceMatrix[i][j - 1]);
result = b;
} else if (a == b) {
if (i > 0 && j > 0) {
Set list1 = sequenceMatrix[i - 1][j];
Set list2 = sequenceMatrix[i][j - 1];
set.addAll(list1);
set.addAll(list2);
}
result = a; // a==b
}
}
sequenceMatrix[i][j] = set;
return result;
}
private static void distribute(char c, Set set) {
if (set.size() == 0) {
set.add(String.valueOf(c));
} else {
Object[] strings = set.toArray();
set.clear();
for (Object object : strings) {
String string = (String) object;
String seq = String.valueOf(c);
if (!string.contains(seq))
string = string.concat(seq);
set.add(string);
}
}
}
public static class MatrixPair {
private int[][] lenMatrix = null;
private Set[][] seqMatrix = null;
public MatrixPair(int[][] lengthMatrix, Set[][] sequenceMatrix) {
this.lenMatrix = lengthMatrix;
this.seqMatrix = sequenceMatrix;
}
public int getLongestSequenceLength() {
if (lenMatrix == null)
return 0;
int length1 = lenMatrix.length;
int length2 = lenMatrix[length1 - 1].length;
return lenMatrix[length1 - 1][length2 - 1];
}
public Set getLongestSequences() {
if (seqMatrix == null)
return (new HashSet());
int length1 = seqMatrix.length;
int length2 = seqMatrix[length1 - 1].length;
return seqMatrix[length1 - 1][length2 - 1];
}
public int[][] getLengthMatrix() {
return lenMatrix;
}
public Set[][] getSequenceMatrix() {
return seqMatrix;
}
public String getLengthMatrixString() {
StringBuilder builder = new StringBuilder();
if (lenMatrix == null) {
builder.append("Length matrix is NULL.\n");
} else {
for (int i = 0; i < lenMatrix.length; i++) {
int length = lenMatrix[i].length;
for (int j = 0; j < length; j++) {
int size = lenMatrix[i][j];
builder.append(size);
if (j < length - 1)
builder.append(",\t");
}
builder.append("\n");
}
}
return builder.toString();
}
public String getSequenceMatrixString() {
StringBuilder builder = new StringBuilder();
if (seqMatrix == null) {
builder.append("Sequence matrix is NULL.\n");
} else {
for (int i = 0; i < seqMatrix.length; i++) {
int length = seqMatrix[i].length;
for (int j = 0; j < length; j++) {
Set set = seqMatrix[i][j];
builder.append(set.toString());
if (j < length - 1)
builder.append(", ");
}
builder.append("\n");
}
}
return builder.toString();
}
}
}