You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

145 lines
4.2 KiB
Java

package com.jwetherell.algorithms.data_structures;
import java.util.Set;
import java.util.TreeSet;
/**
* A suffix trie is a data structure that presents the suffixes of a given
* string in a way that allows for a particularly fast implementation of many
* important string operations. This implementation is based upon a patricia
* trie which IS a compact trie.
*
* http://en.wikipedia.org/wiki/Suffix_trie
*
* @author Justin Wetherell <phishman3579@gmail.com>
*/
@SuppressWarnings("unchecked")
public class CompactSuffixTrie<C extends CharSequence> {
private PatriciaTrie<C> tree = null;
/**
* Create a compact suffix trie from sequence
*
* @param sequence
* to create a suffix trie from.
*/
public CompactSuffixTrie(C sequence) {
tree = new PatriciaTrie<C>();
int length = sequence.length();
for (int i = 0; i < length; i++) {
CharSequence seq = sequence.subSequence(i, length);
tree.add((C) seq);
}
}
/**
* Add character sequence to the trie.
*
* @param sequence
* to add to trie.
* @return True if added successfully.
*/
public boolean add(C sequence) {
int length = sequence.length();
for (int i = 0; i < length; i++) {
CharSequence seq = sequence.subSequence(i, length);
tree.add((C) seq);
}
return true;
}
/**
* Does the sequence exists in the trie.
*
* @param sequence
* to locate in the trie.
* @return True if sequence exists in trie.
*/
public boolean doesSubStringExist(C sequence) {
char[] chars = sequence.toString().toCharArray();
int length = chars.length;
PatriciaTrie.Node current = tree.root;
int index = 0;
for (int i = 0; i < length; i++) {
int innerStringLength = (current.string != null) ? current.string.length : 0;
char c = chars[i];
if (innerStringLength > index) {
boolean inThis = current.partOfThis(c, index++);
if (!inThis)
return false;
} else {
int idx = current.childIndex(c);
if (idx < 0)
return false;
current = current.getChild(idx);
index = 1;
}
}
return true;
}
/**
* Get all suffixes in the trie.
*
* @return set of suffixes in trie.
*/
public Set<String> getSuffixes() {
return this.getSuffixes(tree.root);
}
/**
* Get all suffixes at node.
*
* @param node
* to get all suffixes at.
* @return set of suffixes in trie at node.
*/
private Set<String> getSuffixes(PatriciaTrie.Node node) {
StringBuilder builder = new StringBuilder();
if (node.string != null)
builder.append(node.string);
Set<String> set = new TreeSet<String>();
if (node.type == PatriciaTrie.WHITE) {
set.add(builder.toString());
}
for (int i = 0; i < node.getChildrenSize(); i++) {
PatriciaTrie.Node c = node.getChild(i);
set.addAll(getSuffixes(c, builder.toString()));
}
return set;
}
/**
* Get all suffixes at node and prepend the prefix.
*
* @param node
* to get all suffixes from.
* @param prefix
* to prepend to suffixes.
* @return set of suffixes in trie at node.
*/
private Set<String> getSuffixes(PatriciaTrie.Node node, String prefix) {
StringBuilder builder = new StringBuilder(prefix);
if (node.string != null)
builder.append(node.string);
Set<String> set = new TreeSet<String>();
if (node.type == PatriciaTrie.WHITE) {
set.add(builder.toString());
}
for (int i = 0; i < node.getChildrenSize(); i++) {
PatriciaTrie.Node c = node.getChild(i);
set.addAll(getSuffixes(c, builder.toString()));
}
return set;
}
/**
* {@inheritDoc}
*/
@Override
public String toString() {
return PatriciaTrie.PatriciaTriePrinter.getString(tree);
}
}