/*This is a Java Program to implement Suffix Tree. A suffix tree is a compressed trie containing all the suffixes of the given text as their keys and positions in the text as their values. Suffix tree allows a particularly fast implementation of many important string operations. The construction of such a tree for the string S takes time and space linear in the length of S. Once constructed, several operations can be performed quickly, for instance locating a substring in S, locating a substring if a certain number of mistakes are allowed, locating matches for a regular expression pattern etc. Suffix trees also provided one of the first linear-time solutions for the longest common substring problem. These speedups come at a cost: storing a string’s suffix tree typically requires significantly more space than storing the string itself. This program is based on Mark Nelson’s implementation of Ukkonen’s algorithm.*/ /* * Java Program to Implement Suffix Tree */ import java.io.*; /** Class Node **/ class Node { public int suffix_node; public static int Count = 1; /** Constructor **/ public Node() { suffix_node = -1; } } /** Class Suffix Tree **/ class SuffixTree { private static final int MAX_LENGTH = 1000; private static final int HASH_TABLE_SIZE = 2179; private char[] T = new char[ MAX_LENGTH ]; private int N; private Edge[] Edges ; private Node[] Nodes ; private Suffix active; /** Class Suffix **/ class Suffix { public int origin_node; public int first_char_index; public int last_char_index; /** Constructor **/ public Suffix(int node, int start, int stop ) { origin_node = node ; first_char_index = start ; last_char_index = stop; } /** Function Implicit **/ public boolean Implicit() { return first_char_index > last_char_index; } /** Function Explicit **/ public boolean Explicit() { return first_char_index > last_char_index; } /** Function Canonize() * A suffix in the tree is denoted by a Suffix structure * that denotes its last character. The canonical * representation of a suffix for this algorithm requires * that the origin_node by the closest node to the end * of the tree. To force this to be true, we have to * slide down every edge in our current path until we * reach the final node **/ public void Canonize() { if (!Explicit() ) { Edge edge = Find( origin_node, T[ first_char_index ] ); int edge_span = edge.last_char_index - edge.first_char_index; while ( edge_span <= ( last_char_index - first_char_index ) ) { first_char_index = first_char_index + edge_span + 1; origin_node = edge.end_node; if ( first_char_index <= last_char_index ) { edge = Find( edge.end_node, T[ first_char_index ] ); edge_span = edge.last_char_index - edge.first_char_index; } } } } } /** Class Edge **/ class Edge { public int first_char_index; public int last_char_index; public int end_node; public int start_node; /** Constructor **/ public Edge() { start_node = -1; } /** Constructor **/ public Edge( int init_first, int init_last, int parent_node ) { first_char_index = init_first; last_char_index = init_last; start_node = parent_node; end_node = Node.Count++; } /** function Insert () * A given edge gets a copy of itself inserted into the table * with this function. It uses a linear probe technique, which * means in the case of a collision, we just step forward through * the table until we find the first unused slot. **/ public void Insert() { int i = Hash( start_node, T[ first_char_index ] ); while ( Edges[ i ].start_node != -1 ) i = ++i % HASH_TABLE_SIZE; Edges[ i ] = this; } /** function SplitEdge () * This function is called * to split an edge at the point defined by the Suffix argument **/ public int SplitEdge( Suffix s ) { Remove(); Edge new_edge = new Edge( first_char_index, first_char_index + s.last_char_index - s.first_char_index, s.origin_node ); new_edge.Insert(); Nodes[ new_edge.end_node ].suffix_node = s.origin_node; first_char_index += s.last_char_index - s.first_char_index + 1; start_node = new_edge.end_node; Insert(); return new_edge.end_node; } /** function Remove () * This function is called to remove an edge from hash table **/ public void Remove() { int i = Hash( start_node, T[ first_char_index ] ); while ( Edges[ i ].start_node != start_node || Edges[ i ].first_char_index != first_char_index ) i = ++i % HASH_TABLE_SIZE; for ( ; ; ) { Edges[ i ].start_node = -1; int j = i; for ( ; ; ) { i = ++i % HASH_TABLE_SIZE; if ( Edges[ i ].start_node == -1 ) return; int r = Hash( Edges[ i ].start_node, T[ Edges[ i ].first_char_index ] ); if ( i >= r && r > j ) continue; if ( r > j && j > i ) continue; if ( j > i && i >= r ) continue; break; } Edges[ j ] = Edges[ i ]; } } } /** Constructor */ public SuffixTree() { Edges = new Edge[ HASH_TABLE_SIZE ]; for (int i = 0; i < HASH_TABLE_SIZE; i++) Edges[i] = new Edge(); Nodes = new Node[ MAX_LENGTH * 2 ]; for (int i = 0; i < MAX_LENGTH * 2 ; i++) Nodes[i] = new Node(); active = new Suffix( 0, 0, -1 ); } /** Function Find() - function to find an edge **/ public Edge Find( int node, int c ) { int i = Hash( node, c ); for ( ; ; ) { if ( Edges[ i ].start_node == node ) if ( c == T[ Edges[ i ].first_char_index ] ) return Edges[ i ]; if ( Edges[ i ].start_node == -1 ) return Edges[ i ]; i = ++i % HASH_TABLE_SIZE; } } /** Function Hash() - edges are inserted into the hash table using this hashing function **/ public static int Hash( int node, int c ) { return (( node << 8 ) + c ) % HASH_TABLE_SIZE; } /** Function AddPrefix() - called repetitively, once for each of the prefixes of the input string **/ public void AddPrefix( Suffix active, int last_char_index ) { int parent_node; int last_parent_node = -1; for ( ; ; ) { Edge edge; parent_node = active.origin_node; if ( active.Explicit() ) { edge = Find( active.origin_node, T[ last_char_index ] ); if ( edge.start_node != -1 ) break; } else { edge = Find( active.origin_node, T[ active.first_char_index ] ); int span = active.last_char_index - active.first_char_index; if ( T[ edge.first_char_index + span + 1 ] == T[ last_char_index ] ) break; parent_node = edge.SplitEdge( active ); } Edge new_edge = new Edge( last_char_index, N, parent_node ); new_edge.Insert(); if ( last_parent_node > 0 ) Nodes[ last_parent_node ].suffix_node = parent_node; last_parent_node = parent_node; if ( active.origin_node == 0 ) active.first_char_index++; else active.origin_node = Nodes[ active.origin_node ].suffix_node; active.Canonize(); } if ( last_parent_node > 0 ) Nodes[ last_parent_node ].suffix_node = parent_node; active.last_char_index++; active.Canonize(); } /** Function to print all contents and details of suffix tree **/ public void dump_edges(int current_n ) { System.out.println(" Start End Suf First Last String\n"); for ( int j = 0 ; j < HASH_TABLE_SIZE ; j++ ) { Edge s = Edges[j]; if ( s.start_node == -1 ) continue; System.out.printf("%5d %5d %3d %5d %6d ", s.start_node, s.end_node, Nodes[ s.end_node ].suffix_node, s.first_char_index, s.last_char_index); int top; if ( current_n > s.last_char_index ) top = s.last_char_index; else top = current_n; for ( int l = s.first_char_index ; l <= top; l++) System.out.print( T[ l ]); System.out.println(); } } /** Main Function **/ public static void main(String[] args) throws IOException { BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); System.out.println("Suffix Tree Test\n"); System.out.println("Enter string\n"); String str = br.readLine(); /** Construct Suffix Tree **/ SuffixTree st = new SuffixTree(); st.T = str.toCharArray(); st.N = st.T.length - 1; for (int i = 0 ; i <= st.N ; i++ ) st.AddPrefix( st.active, i ); st.dump_edges( st.N ); } } /* Suffix Tree Test Enter string Start End Suf First Last String 0 2 -1 1 9 anfoundry 0 9 -1 7 9 dry 0 4 -1 3 9 foundry 0 7 0 2 2 n 0 5 -1 4 9 oundry 0 10 -1 8 9 ry 0 1 -1 0 9 sanfoundry 0 6 -1 5 9 undry 0 11 -1 9 9 y 7 8 -1 7 9 dry 7 3 -1 3 9 foundry