You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

115 lines
3.0 KiB
C++

String matching in linear-time
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
typedef std::vector<int> int_vec;
class string_search
{
int_vec shifts;
void compute_shifts(const std::string &pattern);
public:
int find_first(const std::string &text, const std::string &pattern);
int_vec find_all(const std::string &text, const std::string &pattern);
};
// create the shift-lookup-table
void string_search::compute_shifts(const std::string &pattern)
{
int next_shift = 0;
shifts.clear();
shifts.push_back(0); // shift to the first character
// start with the second character, since the shift to the first is always 0
for(int i = 1; i < pattern.length(); i++)
{
while(next_shift > 0 && pattern[next_shift] != pattern[i])
next_shift = shifts[next_shift];
if(pattern[next_shift] == pattern[i])
next_shift++;
shifts.push_back(next_shift);
}
}
// search the string and return when the first occurrence is found
int
string_search::find_first(const std::string &text, const std::string &pattern)
{
int next_shift = 0;
compute_shifts(pattern);
for(int i = 0; i < text.length(); i++)
{
while(next_shift > 0 && pattern[next_shift] != text[i])
next_shift = shifts[next_shift - 1];
if(pattern[next_shift] == text[i])
next_shift++;
if(next_shift == pattern.length())
return i - (pattern.length() - 1); // found the first so return
}
return -1;
}
// search the string and put every occurence in a vector
int_vec
string_search::find_all(const std::string &text, const std::string &pattern)
{
int next_shift = 0;
int_vec positions;
compute_shifts(pattern);
for(int i = 0; i < text.length(); i++)
{
while(next_shift > 0 && pattern[next_shift] != text[i])
next_shift = shifts[next_shift - 1];
if(pattern[next_shift] == text[i])
next_shift++;
if(next_shift == pattern.length())
{
positions.push_back(i - (pattern.length() - 1)); // found one, put in list
next_shift = shifts[next_shift - 1]; // restart pattern with last shift
}
}
return positions;
}
int main(int argc, char **argv)
{
if(argc <= 2){
cout << "Usage: " << argv[0] << " filename searchpattern" << endl;
return 0;
}
std::string pattern = argv[2];
// read the file. Since the file is read like this all white-characters
// are eaten, so a search including white-characters will fail...
fstream fs;
std::string text, temp;
fs.open(argv[1], ios::in);
while(!fs.eof()){
fs >> temp;
text += temp;
}
fs.close();
// search the file
string_search search;
int_vec pos_list = search.find_all(text, pattern);
// print out result
std::vector<int>::iterator it;
cout << "Found " << pos_list.size() << " occurrences" << endl;
for(it = pos_list.begin(); it != pos_list.end(); it++){
temp = text.substr(*it, pattern.length());
cout << "Pos=" << *it << " == " << temp << endl;
}
}