113 lines
3.8 KiB
Java
113 lines
3.8 KiB
Java
|
import java.util.*;
|
||
|
import java.util.stream.IntStream;
|
||
|
|
||
|
// https://en.wikipedia.org/wiki/Suffix_array
|
||
|
public class SuffixArray {
|
||
|
|
||
|
// sort suffixes of S in O(n*log(n))
|
||
|
public static int[] suffixArray(CharSequence S) {
|
||
|
int n = S.length();
|
||
|
|
||
|
// stable sort of characters
|
||
|
int[] sa = IntStream.range(0, n).mapToObj(i -> n - 1 - i).
|
||
|
sorted((a, b) -> Character.compare(S.charAt(a), S.charAt(b))).mapToInt(Integer::intValue).toArray();
|
||
|
|
||
|
int[] classes = S.chars().toArray();
|
||
|
// sa[i] - suffix on i'th position after sorting by first len characters
|
||
|
// classes[i] - equivalence class of the i'th suffix after sorting by first len characters
|
||
|
|
||
|
for (int len = 1; len < n; len *= 2) {
|
||
|
int[] c = classes.clone();
|
||
|
for (int i = 0; i < n; i++) {
|
||
|
// condition sa[i - 1] + len < n simulates 0-symbol at the end of the string
|
||
|
// a separate class is created for each suffix followed by simulated 0-symbol
|
||
|
classes[sa[i]] = i > 0 && c[sa[i - 1]] == c[sa[i]] && sa[i - 1] + len < n && c[sa[i - 1] + len / 2] == c[sa[i] + len / 2] ? classes[sa[i - 1]] : i;
|
||
|
}
|
||
|
// Suffixes are already sorted by first len characters
|
||
|
// Now sort suffixes by first len * 2 characters
|
||
|
int[] cnt = IntStream.range(0, n).toArray();
|
||
|
int[] s = sa.clone();
|
||
|
for (int i = 0; i < n; i++) {
|
||
|
// s[i] - order of suffixes sorted by first len characters
|
||
|
// (s[i] - len) - order of suffixes sorted only by second len characters
|
||
|
int s1 = s[i] - len;
|
||
|
// sort only suffixes of length > len, others are already sorted
|
||
|
if (s1 >= 0)
|
||
|
sa[cnt[classes[s1]]++] = s1;
|
||
|
}
|
||
|
}
|
||
|
return sa;
|
||
|
}
|
||
|
|
||
|
// sort rotations of S in O(n*log(n))
|
||
|
public static int[] rotationArray(CharSequence S) {
|
||
|
int n = S.length();
|
||
|
int[] sa = IntStream.range(0, n).mapToObj(Integer::valueOf).
|
||
|
sorted((a, b) -> Character.compare(S.charAt(a), S.charAt(b))).mapToInt(Integer::intValue).toArray();
|
||
|
int[] classes = S.chars().toArray();
|
||
|
for (int len = 1; len < n; len *= 2) {
|
||
|
int[] c = classes.clone();
|
||
|
for (int i = 0; i < n; i++)
|
||
|
classes[sa[i]] = i > 0 && c[sa[i - 1]] == c[sa[i]] && c[(sa[i - 1] + len / 2) % n] == c[(sa[i] + len / 2) % n] ? classes[sa[i - 1]] : i;
|
||
|
int[] cnt = IntStream.range(0, n).toArray();
|
||
|
int[] s = sa.clone();
|
||
|
for (int i = 0; i < n; i++) {
|
||
|
int s1 = (s[i] - len + n) % n;
|
||
|
sa[cnt[classes[s1]]++] = s1;
|
||
|
}
|
||
|
}
|
||
|
return sa;
|
||
|
}
|
||
|
|
||
|
// longest common prefixes array in O(n)
|
||
|
public static int[] lcp(int[] sa, CharSequence s) {
|
||
|
int n = sa.length;
|
||
|
int[] rank = new int[n];
|
||
|
for (int i = 0; i < n; i++)
|
||
|
rank[sa[i]] = i;
|
||
|
int[] lcp = new int[n - 1];
|
||
|
for (int i = 0, h = 0; i < n; i++) {
|
||
|
if (rank[i] < n - 1) {
|
||
|
for (int j = sa[rank[i] + 1]; Math.max(i, j) + h < s.length() && s.charAt(i + h) == s.charAt(j + h); ++h)
|
||
|
;
|
||
|
lcp[rank[i]] = h;
|
||
|
if (h > 0)
|
||
|
--h;
|
||
|
}
|
||
|
}
|
||
|
return lcp;
|
||
|
}
|
||
|
|
||
|
// Usage example
|
||
|
public static void main(String[] args) {
|
||
|
String s1 = "abcab";
|
||
|
int[] sa1 = suffixArray(s1);
|
||
|
|
||
|
// print suffixes in lexicographic order
|
||
|
for (int p : sa1)
|
||
|
System.out.println(s1.substring(p));
|
||
|
|
||
|
System.out.println("lcp = " + Arrays.toString(lcp(sa1, s1)));
|
||
|
|
||
|
// random test
|
||
|
Random rnd = new Random(1);
|
||
|
for (int step = 0; step < 100000; step++) {
|
||
|
int n = rnd.nextInt(100) + 1;
|
||
|
StringBuilder s = rnd.ints(n, 0, 10).collect(StringBuilder::new, (sb, i) -> sb.append((char) ('\1' + i)), StringBuilder::append);
|
||
|
int[] sa = suffixArray(s);
|
||
|
int[] ra = rotationArray(s.toString() + '\0');
|
||
|
int[] lcp = lcp(sa, s);
|
||
|
for (int i = 0; i + 1 < n; i++) {
|
||
|
String a = s.substring(sa[i]);
|
||
|
String b = s.substring(sa[i + 1]);
|
||
|
if (a.compareTo(b) >= 0
|
||
|
|| !a.substring(0, lcp[i]).equals(b.substring(0, lcp[i]))
|
||
|
|| (a + " ").charAt(lcp[i]) == (b + " ").charAt(lcp[i])
|
||
|
|| sa[i] != ra[i + 1])
|
||
|
throw new RuntimeException();
|
||
|
}
|
||
|
}
|
||
|
System.out.println("Test passed");
|
||
|
}
|
||
|
}
|