programming-examples/java/Data_Structures/TrieST.java
2019-11-15 12:59:38 +01:00

300 lines
9.7 KiB
Java

import edu.princeton.cs.introcs.StdIn;
import edu.princeton.cs.introcs.StdOut;
/*************************************************************************
* Compilation: javac TrieST.java
* Execution: java TrieST < words.txt
* Dependencies: StdIn.java
*
* A string symbol table for extended ASCII strings, implemented
* using a 256-way trie.
*
* % java TrieST < shellsST.txt
* by 4
* sea 6
* sells 1
* she 0
* shells 3
* shore 7
* the 5
*
*************************************************************************/
/**
* The TrieST class represents an symbol table of key-value
* pairs, with string keys and generic values.
* It supports the usual put , get , contains ,
* delete , size , and is-empty methods.
* It also provides character-based methods for finding the string
* in the symbol table that is the longest prefix of a given prefix,
* finding all strings in the symbol table that start with a given prefix,
* and finding all strings in the symbol table that match a given pattern.
* A symbol table implements the associative array abstraction:
* when associating a value with a key that is already in the symbol table,
* the convention is to replace the old value with the new value.
* Unlike {@link java.util.Map}, this class uses the convention that
* values cannot be null &mdash;setting the
* value associated with a key to null is equivalent to deleting the key
* from the symbol table.
*
* This implementation uses a 256-way trie.
* The put , contains , delete , and
* longest prefix operations take time proportional to the length
* of the key (in the worst case). Construction takes constant time.
* The size , and is-empty operations take constant time.
* Construction takes constant time.
*
* For additional documentation, see <a href="http://algs4.cs.princeton.edu/52trie">Section 5.2</a> of
* Algorithms, 4th Edition by Robert Sedgewick and Kevin Wayne.
*/
public class TrieST<Value> {
private static final int R = 256; // extended ASCII
private Node root; // root of trie
private int N; // number of keys in trie
// R-way trie node
private static class Node {
private Object val;
private Node[] next = new Node[R];
}
public TrieST() {
}
/**
* Initializes an empty string symbol table.
*/
/**
* Returns the value associated with the given key.
* @param key the key
* @return the value associated with the given key if the key is in the symbol table
* and null if the key is not in the symbol table
* @throws NullPointerException if key is null
*/
public Value get(String key) {
Node x = get(root, key, 0);
if (x == null) return null;
return (Value) x.val;
}
/**
* Does this symbol table contain the given key?
* @param key the key
* @return true if this symbol table contains key and
* false otherwise
* @throws NullPointerException if key is null
*/
public boolean contains(String key) {
return get(key) != null;
}
private Node get(Node x, String key, int d) {
if (x == null) return null;
if (d == key.length()) return x;
char c = key.charAt(d);
return get(x.next[c], key, d+1);
}
/**
* Inserts the key-value pair into the symbol table, overwriting the old value
* with the new value if the key is already in the symbol table.
* If the value is null , this effectively deletes the key from the symbol table.
* @param key the key
* @param val the value
* @throws NullPointerException if key is null
*/
public void put(String key, Value val) {
if (val == null) delete(key);
else root = put(root, key, val, 0);
}
private Node put(Node x, String key, Value val, int d) {
if (x == null) x = new Node();
if (d == key.length()) {
if (x.val == null) N++;
x.val = val;
return x;
}
char c = key.charAt(d);
x.next[c] = put(x.next[c], key, val, d+1);
return x;
}
/**
* Returns the number of key-value pairs in this symbol table.
* @return the number of key-value pairs in this symbol table
*/
public int size() {
return N;
}
/**
* Is this symbol table empty?
* @return true if this symbol table is empty and false otherwise
*/
public boolean isEmpty() {
return size() == 0;
}
/**
* Returns all keys in the symbol table as an Iterable .
* To iterate over all of the keys in the symbol table named st ,
* use the foreach notation: for (Key key : st.keys()) .
* @return all keys in the sybol table as an Iterable
*/
public Iterable<String> keys() {
return keysWithPrefix("");
}
/**
* Returns all of the keys in the set that start with prefix .
* @param prefix the prefix
* @return all of the keys in the set that start with prefix ,
* as an iterable
*/
public Iterable<String> keysWithPrefix(String prefix) {
Queue<String> results = new Queue<String>();
Node x = get(root, prefix, 0);
collect(x, new StringBuilder(prefix), results);
return results;
}
private void collect(Node x, StringBuilder prefix, Queue<String> results) {
if (x == null) return;
if (x.val != null) results.enqueue(prefix.toString());
for (char c = 0; c < R; c++) {
prefix.append(c);
collect(x.next[c], prefix, results);
prefix.deleteCharAt(prefix.length() - 1);
}
}
/**
* Returns all of the keys in the symbol table that match pattern ,
* where . symbol is treated as a wildcard character.
* @param pattern the pattern
* @return all of the keys in the symbol table that match pattern ,
* as an iterable, where . is treated as a wildcard character.
*/
public Iterable<String> keysThatMatch(String pattern) {
Queue<String> results = new Queue<String>();
collect(root, new StringBuilder(), pattern, results);
return results;
}
private void collect(Node x, StringBuilder prefix, String pattern, Queue<String> results) {
if (x == null) return;
int d = prefix.length();
if (d == pattern.length() && x.val != null)
results.enqueue(prefix.toString());
if (d == pattern.length())
return;
char c = pattern.charAt(d);
if (c == '.') {
for (char ch = 0; ch < R; ch++) {
prefix.append(ch);
collect(x.next[ch], prefix, pattern, results);
prefix.deleteCharAt(prefix.length() - 1);
}
}
else {
prefix.append(c);
collect(x.next[c], prefix, pattern, results);
prefix.deleteCharAt(prefix.length() - 1);
}
}
/**
* Returns the string in the symbol table that is the longest prefix of query ,
* or null , if no such string.
* @param query the query string
* @throws NullPointerException if query is null
* @return the string in the symbol table that is the longest prefix of query ,
* or null if no such string
*/
public String longestPrefixOf(String query) {
int length = longestPrefixOf(root, query, 0, 0);
return query.substring(0, length);
}
// returns the length of the longest string key in the subtrie
// rooted at x that is a prefix of the query string,
// assuming the first d character match and we have already
// found a prefix match of length length
private int longestPrefixOf(Node x, String query, int d, int length) {
if (x == null) return length;
if (x.val != null) length = d;
if (d == query.length()) return length;
char c = query.charAt(d);
return longestPrefixOf(x.next[c], query, d+1, length);
}
/**
* Removes the key from the set if the key is present.
* @param key the key
* @throws NullPointerException if key is null
*/
public void delete(String key) {
root = delete(root, key, 0);
}
private Node delete(Node x, String key, int d) {
if (x == null) return null;
if (d == key.length()) {
if (x.val != null) N--;
x.val = null;
}
else {
char c = key.charAt(d);
x.next[c] = delete(x.next[c], key, d+1);
}
// remove subtrie rooted at x if it is completely empty
if (x.val != null) return x;
for (int c = 0; c < R; c++)
if (x.next[c] != null)
return x;
return null;
}
/**
* Unit tests the TrieSET data type.
*/
public static void main(String[] args) {
// build symbol table from standard input
TrieST<Integer> st = new TrieST<Integer>();
for (int i = 0; !StdIn.isEmpty(); i++) {
String key = StdIn.readString();
st.put(key, i);
}
// print results
if (st.size() < 100) {
StdOut.println("keys(\"\"):");
for (String key : st.keys()) {
StdOut.println(key + " " + st.get(key));
}
StdOut.println();
}
StdOut.println("longestPrefixOf(\"shellsort\"):");
StdOut.println(st.longestPrefixOf("shellsort"));
StdOut.println();
StdOut.println("keysWithPrefix(\"shor\"):");
for (String s : st.keysWithPrefix("shor"))
StdOut.println(s);
StdOut.println();
StdOut.println("keysThatMatch(\".he.l.\"):");
for (String s : st.keysThatMatch(".he.l."))
StdOut.println(s);
}
}