Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 83a9480

Browse files
Add files via upload
1 parent 146f957 commit 83a9480

11 files changed

+892
-0
lines changed
538 KB
Binary file not shown.

‎aho_corasick.cpp

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
#include <bits/stdc++.h>
2+
#include <iostream>
3+
#include <fstream>
4+
#include <string>
5+
6+
using namespace std;
7+
8+
int cou=0;
9+
// Max number of states in the matching machine.
10+
// Should be equal to the sum of the length of all keywords.
11+
const int MAXS = 500;
12+
13+
// Maximum number of characters in input alphabet
14+
const int MAXC = 26;
15+
16+
// OUTPUT FUNCTION IS IMPLEMENTED USING out[]
17+
// Bit i in this mask is one if the word with index i
18+
// appears when the machine enters this state.
19+
int out[MAXS];
20+
21+
// FAILURE FUNCTION IS IMPLEMENTED USING f[]
22+
int f[MAXS];
23+
24+
// GOTO FUNCTION (OR TRIE) IS IMPLEMENTED USING g[][]
25+
int g[MAXS][MAXC];
26+
27+
// Builds the string matching machine.
28+
// arr - array of words. The index of each keyword is important:
29+
// "out[state] & (1 << i)" is > 0 if we just found word[i]
30+
// in the text.
31+
// Returns the number of states that the built machine has.
32+
// States are numbered 0 up to the return value - 1, inclusive.
33+
int buildMatchingMachine(string arr[], int k)
34+
{
35+
// Initialize all values in output function as 0.
36+
memset(out, 0, sizeof out);
37+
38+
// Initialize all values in goto function as -1.
39+
memset(g, -1, sizeof g);
40+
41+
// Initially, we just have the 0 state
42+
int states = 1;
43+
44+
// Construct values for goto function, i.e., fill g[][]
45+
// This is same as building a Trie for arr[]
46+
for (int i = 0; i < k; ++i)
47+
{
48+
const string &word = arr[i];
49+
int currentState = 0;
50+
51+
// Insert all characters of current word in arr[]
52+
for (int j = 0; j < word.size(); ++j)
53+
{
54+
int ch = word[j] - 'a';
55+
56+
// Allocate a new node (create a new state) if a
57+
// node for ch doesn't exist.
58+
if (g[currentState][ch] == -1)
59+
g[currentState][ch] = states++;
60+
61+
currentState = g[currentState][ch];
62+
}
63+
64+
// Add current word in output function
65+
out[currentState] |= (1 << i);
66+
}
67+
68+
// For all characters which don't have an edge from
69+
// root (or state 0) in Trie, add a goto edge to state
70+
// 0 itself
71+
for (int ch = 0; ch < MAXC; ++ch)
72+
if (g[0][ch] == -1)
73+
g[0][ch] = 0;
74+
75+
// Now, let's build the failure function
76+
77+
// Initialize values in fail function
78+
memset(f, -1, sizeof f);
79+
80+
// Failure function is computed in breadth first order
81+
// using a queue
82+
queue<int> q;
83+
84+
// Iterate over every possible input
85+
for (int ch = 0; ch < MAXC; ++ch)
86+
{
87+
// All nodes of depth 1 have failure function value
88+
// as 0. For example, in above diagram we move to 0
89+
// from states 1 and 3.
90+
if (g[0][ch] != 0)
91+
{
92+
f[g[0][ch]] = 0;
93+
q.push(g[0][ch]);
94+
}
95+
}
96+
97+
// Now queue has states 1 and 3
98+
while (q.size())
99+
{
100+
// Remove the front state from queue
101+
int state = q.front();
102+
q.pop();
103+
104+
// For the removed state, find failure function for
105+
// all those characters for which goto function is
106+
// not defined.
107+
for (int ch = 0; ch <= MAXC; ++ch)
108+
{
109+
// If goto function is defined for character 'ch'
110+
// and 'state'
111+
if (g[state][ch] != -1)
112+
{
113+
// Find failure state of removed state
114+
int failure = f[state];
115+
116+
// Find the deepest node labeled by proper
117+
// suffix of string from root to current
118+
// state.
119+
while (g[failure][ch] == -1)
120+
failure = f[failure];
121+
122+
failure = g[failure][ch];
123+
f[g[state][ch]] = failure;
124+
125+
// Merge output values
126+
out[g[state][ch]] |= out[failure];
127+
128+
// Insert the next level node (of Trie) in Queue
129+
q.push(g[state][ch]);
130+
}
131+
}
132+
}
133+
134+
return states;
135+
}
136+
137+
// Returns the next state the machine will transition to using goto
138+
// and failure functions.
139+
// currentState - The current state of the machine. Must be between
140+
// 0 and the number of states - 1, inclusive.
141+
// nextInput - The next character that enters into the machine.
142+
int findNextState(int currentState, char nextInput)
143+
{
144+
int answer = currentState;
145+
int ch = nextInput - 'a';
146+
147+
// If goto is not defined, use failure function
148+
while (g[answer][ch] == -1)
149+
answer = f[answer];
150+
151+
return g[answer][ch];
152+
}
153+
154+
// This function finds all occurrences of all array words
155+
// in text.
156+
void searchWords(string arr[], int k, string text)
157+
{
158+
// Preprocess patterns.
159+
// Build machine with goto, failure and output functions
160+
buildMatchingMachine(arr, k);
161+
162+
// Initialize current state
163+
int currentState = 0;
164+
165+
// Traverse the text through the nuilt machine to find
166+
// all occurrences of words in arr[]
167+
for (int i = 0; i < text.size(); ++i)
168+
{
169+
currentState = findNextState(currentState, text[i]);
170+
171+
// If match not found, move to next state
172+
if (out[currentState] == 0)
173+
continue;
174+
175+
// Match found, print all matching words of arr[]
176+
// using output function.
177+
for (int j = 0; j < k; ++j)
178+
{
179+
if (out[currentState] & (1 << j))
180+
{
181+
cout << "Word " << arr[j] << " appears from "
182+
<< i - arr[j].size() + 1 << " to " << i << endl;
183+
}
184+
}
185+
}
186+
}
187+
188+
189+
// Driver program to test above function
190+
int main()
191+
{
192+
char txt[10000];
193+
//scanf("%s",&txt);
194+
ifstream file;
195+
file.open("inputtext.txt");
196+
//file>>txt;
197+
file.getline(txt,9999);
198+
file.close();
199+
//printf("100%s100\n",txt );
200+
//char *pat = "ABABAAABAB";
201+
string arr[] = {"ABABAAABAB"};
202+
//char *pat ="learning";
203+
204+
int k = sizeof(arr)/sizeof(arr[0]);
205+
206+
clock_t begin = clock();
207+
searchWords(arr, k, text);
208+
clock_t end = clock();
209+
double elapsed_secs = double(end - begin)*1000 / CLOCKS_PER_SEC;
210+
cout<<"Number of matches "<<cou<<endl<<"Time taken:"<<elapsed_secs<<endl;
211+
212+
return 0;
213+
}

‎automata.cpp

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#include <bits/stdc++.h>
2+
#include <iostream>
3+
#include <fstream>
4+
#include <string>
5+
6+
using namespace std;
7+
8+
int cou=0;
9+
#define NO_OF_CHARS 256
10+
11+
int getNextState(char *pat, int M, int state, int x)
12+
{
13+
// If the character c is same as next character in pattern,
14+
// then simply increment state
15+
if (state < M && x == pat[state])
16+
return state+1;
17+
18+
int ns, i; // ns stores the result which is next state
19+
20+
// ns finally contains the longest prefix which is also suffix
21+
// in "pat[0..state-1]c"
22+
23+
// Start from the largest possible value and stop when you find
24+
// a prefix which is also suffix
25+
for (ns = state; ns > 0; ns--)
26+
{
27+
if(pat[ns-1] == x)
28+
{
29+
for(i = 0; i < ns-1; i++)
30+
{
31+
if (pat[i] != pat[state-ns+1+i])
32+
break;
33+
}
34+
if (i == ns-1)
35+
return ns;
36+
}
37+
}
38+
39+
return 0;
40+
}
41+
42+
/* This function builds the TF table which represents Finite Automata for a
43+
given pattern */
44+
void computeTF(char *pat, int M, int TF[][NO_OF_CHARS])
45+
{
46+
int state, x;
47+
for (state = 0; state <= M; ++state)
48+
for (x = 0; x < NO_OF_CHARS; ++x)
49+
TF[state][x] = getNextState(pat, M, state, x);
50+
}
51+
52+
/* Prints all occurrences of pat in txt */
53+
void search(char *pat, char *txt)
54+
{
55+
int M = strlen(pat);
56+
int N = strlen(txt);
57+
58+
int TF[M+1][NO_OF_CHARS];
59+
60+
computeTF(pat, M, TF);
61+
62+
// Process txt over FA.
63+
int i, state=0;
64+
for (i = 0; i < N; i++)
65+
{
66+
state = TF[state][txt[i]];
67+
if (state == M)
68+
{
69+
//printf ("\n Pattern found at index %d", i-M+1);
70+
cou++;
71+
}
72+
}
73+
}
74+
75+
// Driver program to test above function
76+
int main()
77+
{
78+
char txt[20000];
79+
//scanf("%s",&txt);
80+
std::ifstream file;
81+
file.open("inputtext.txt");
82+
//file>>txt;
83+
file.getline(txt,19999);
84+
file.close();
85+
//printf("100%s100\n",txt );
86+
//char *pat = "ABABAAABAB";
87+
//char *pat ="learning";
88+
char pat[30];
89+
90+
file.open("pat.txt");
91+
while(file.getline(pat,29))
92+
{
93+
clock_t begin = clock();
94+
search(pat, txt);
95+
clock_t end = clock();
96+
double elapsed_secs = double(end - begin)*1000 / CLOCKS_PER_SEC;
97+
//cout<<txt<<endl;
98+
cout<<"Number of matches of \""<<pat<<"\" is "<<cou<<endl<<"Time taken:"<<elapsed_secs<<endl;
99+
cou=0;
100+
}
101+
file.close();
102+
103+
return 0;
104+
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /