#include #include #include using namespace std; unsigned char mask[] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 }; #define tget(i) ( (t[(i)/8]&mask[(i)%8]) ? 1 : 0 ) #define tset(i, b) t[(i)/8]=(b) ? (mask[(i)%8]|t[(i)/8]) : ((~mask[(i)%8])&t[(i)/8]) #define chr(i) (cs==sizeof(int)?((int*)s)[i]:((unsigned char *)s)[i]) #define isLMS(i) (i>0 && tget(i) && !tget(i-1)) // find the start or end of each bucket void getBuckets(unsigned char *s, int *bkt, int n, int K, int cs, bool end) { int i, sum = 0; for (i = 0; i <= K; i++) bkt[i] = 0; // clear all buckets for (i = 0; i < n; i++) bkt[chr(i)]++; // compute the size of each bucket for (i = 0; i <= K; i++) { sum += bkt[i]; bkt[i] = end ? sum : sum - bkt[i]; } } // compute SAl void induceSAl(unsigned char *t, int *SA, unsigned char *s, int *bkt, int n, int K, int cs, bool end) { int i, j; getBuckets(s, bkt, n, K, cs, end); // find starts of buckets for (i = 0; i < n; i++) { j = SA[i] - 1; if (j >= 0 && !tget(j)) SA[bkt[chr(j)]++] = j; } } // compute SAs void induceSAs(unsigned char *t, int *SA, unsigned char *s, int *bkt, int n, int K, int cs, bool end) { int i, j; getBuckets(s, bkt, n, K, cs, end); // find ends of buckets for (i = n - 1; i >= 0; i--) { j = SA[i] - 1; if (j >= 0 && tget(j)) SA[--bkt[chr(j)]] = j; } } // find the suffix array SA of s[0..n-1] in {1..K}^n // require s[n-1]=0 (the sentinel!), n>=2 // use a working space (excluding s and SA) of at most 2.25n+O(1) for a constant alphabet void SA_IS(unsigned char *s, int *SA, int n, int K, int cs) { int i, j; unsigned char *t = (unsigned char *) malloc(n / 8 + 1); // LS-type array in bits // Classify the type of each character tset(n-2, 0); tset(n-1, 1); // the sentinel must be in s1, important!!! for (i = n - 3; i >= 0; i--) tset(i, (chr(i) 0 && (isLMS(pos+d) || isLMS(prev+d))) break; if (diff) { name++; prev = pos; } pos = (pos % 2 == 0) ? pos / 2 : (pos - 1) / 2; SA[n1 + pos] = name - 1; } for (i = n - 1, j = n - 1; i >= n1; i--) if (SA[i] >= 0) SA[j--] = SA[i]; // stage 2: solve the reduced problem // recurse if names are not yet unique int *SA1 = SA, *s1 = SA + n - n1; if (name < n1) SA_IS((unsigned char*) s1, SA1, n1, name - 1, sizeof(int)); else // generate the suffix array of s1 directly for (i = 0; i < n1; i++) SA1[s1[i]] = i; // stage 3: induce the result for the original problem bkt = (int *) malloc(sizeof(int) * (K + 1)); // bucket array // put all left-most S characters into their buckets getBuckets(s, bkt, n, K, cs, true); // find ends of buckets for (i = 1, j = 0; i < n; i++) if (isLMS(i)) s1[j++] = i; // get p1 for (i = 0; i < n1; i++) SA1[i] = s1[SA1[i]]; // get index in s for (i = n1; i < n; i++) SA[i] = -1; // init SA[n1..n-1] for (i = n1 - 1; i >= 0; i--) { j = SA[i]; SA[i] = -1; SA[--bkt[chr(j)]] = j; } induceSAl(t, SA, s, bkt, n, K, cs, false); induceSAs(t, SA, s, bkt, n, K, cs, true); free(bkt); free(t); } const int maxn = 200000; int sa[maxn]; int lcp[maxn]; int Rank[maxn]; unsigned char *s; int n; void calc_lcp() { for (int i = 0; i < n; i++) Rank[sa[i]] = i; for (int i = 0, h = 0; i < n; i++) { if (Rank[i] < n - 1) { for (int j = sa[Rank[i] + 1]; s[i + h] == s[j + h]; ++h) ; lcp[Rank[i]] = h; if (h > 0) --h; } } } int main() { string str = "abcab"; n = str.size(); s = (unsigned char*) str.c_str(); SA_IS(s, sa, n + 1, 256, 1); calc_lcp(); for (int i = 0; i < n; i++) { cout << str.substr(sa[i + 1]); if (i < n - 1) cout << " " << lcp[i + 1]; cout << endl; } }