programming-examples/java/Data_Structures/AhoCorasick.java
2019-11-15 12:59:38 +01:00

81 lines
1.9 KiB
Java
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import java.util.*;
// https://en.wikipedia.org/wiki/AhoCorasick_algorithm
public class AhoCorasick {
static final int ALPHABET_SIZE = 26;
Node[] nodes;
int nodeCount;
public static class Node {
int parent;
char charFromParent;
int suffLink = -1;
int[] children = new int[ALPHABET_SIZE];
int[] transitions = new int[ALPHABET_SIZE];
boolean leaf;
{
Arrays.fill(children, -1);
Arrays.fill(transitions, -1);
}
}
public AhoCorasick(int maxNodes) {
nodes = new Node[maxNodes];
// create root
nodes[0] = new Node();
nodes[0].suffLink = 0;
nodes[0].parent = -1;
nodeCount = 1;
}
public void addString(String s) {
int cur = 0;
for (char ch : s.toCharArray()) {
int c = ch - 'a';
if (nodes[cur].children[c] == -1) {
nodes[nodeCount] = new Node();
nodes[nodeCount].parent = cur;
nodes[nodeCount].charFromParent = ch;
nodes[cur].children[c] = nodeCount++;
}
cur = nodes[cur].children[c];
}
nodes[cur].leaf = true;
}
public int suffLink(int nodeIndex) {
Node node = nodes[nodeIndex];
if (node.suffLink == -1)
node.suffLink = node.parent == 0 ? 0 : transition(suffLink(node.parent), node.charFromParent);
return node.suffLink;
}
public int transition(int nodeIndex, char ch) {
int c = ch - 'a';
Node node = nodes[nodeIndex];
if (node.transitions[c] == -1)
node.transitions[c] = node.children[c] != -1 ? node.children[c] : (nodeIndex == 0 ? 0 : transition(suffLink(nodeIndex), ch));
return node.transitions[c];
}
// Usage example
public static void main(String[] args) {
AhoCorasick ahoCorasick = new AhoCorasick(1000);
ahoCorasick.addString("bc");
ahoCorasick.addString("abc");
String s = "tabcbc";
int node = 0;
List<Integer> positions = new ArrayList<>();
for (int i = 0; i < s.length(); i++) {
node = ahoCorasick.transition(node, s.charAt(i));
if (ahoCorasick.nodes[node].leaf)
positions.add(i);
}
System.out.println(positions);
}
}