1
+ #include < bits/stdc++.h>
2
+ #include < iostream>
3
+ #include < fstream>
4
+ #include < string>
5
+
6
+ using namespace std ;
7
+
8
+ int cou=0 ;
9
+ // Max number of states in the matching machine.
10
+ // Should be equal to the sum of the length of all keywords.
11
+ const int MAXS = 500 ;
12
+
13
+ // Maximum number of characters in input alphabet
14
+ const int MAXC = 26 ;
15
+
16
+ // OUTPUT FUNCTION IS IMPLEMENTED USING out[]
17
+ // Bit i in this mask is one if the word with index i
18
+ // appears when the machine enters this state.
19
+ int out[MAXS];
20
+
21
+ // FAILURE FUNCTION IS IMPLEMENTED USING f[]
22
+ int f[MAXS];
23
+
24
+ // GOTO FUNCTION (OR TRIE) IS IMPLEMENTED USING g[][]
25
+ int g[MAXS][MAXC];
26
+
27
+ // Builds the string matching machine.
28
+ // arr - array of words. The index of each keyword is important:
29
+ // "out[state] & (1 << i)" is > 0 if we just found word[i]
30
+ // in the text.
31
+ // Returns the number of states that the built machine has.
32
+ // States are numbered 0 up to the return value - 1, inclusive.
33
+ int buildMatchingMachine (string arr[], int k)
34
+ {
35
+ // Initialize all values in output function as 0.
36
+ memset (out, 0 , sizeof out);
37
+
38
+ // Initialize all values in goto function as -1.
39
+ memset (g, -1 , sizeof g);
40
+
41
+ // Initially, we just have the 0 state
42
+ int states = 1 ;
43
+
44
+ // Construct values for goto function, i.e., fill g[][]
45
+ // This is same as building a Trie for arr[]
46
+ for (int i = 0 ; i < k; ++i)
47
+ {
48
+ const string &word = arr[i];
49
+ int currentState = 0 ;
50
+
51
+ // Insert all characters of current word in arr[]
52
+ for (int j = 0 ; j < word.size (); ++j)
53
+ {
54
+ int ch = word[j] - ' a' ;
55
+
56
+ // Allocate a new node (create a new state) if a
57
+ // node for ch doesn't exist.
58
+ if (g[currentState][ch] == -1 )
59
+ g[currentState][ch] = states++;
60
+
61
+ currentState = g[currentState][ch];
62
+ }
63
+
64
+ // Add current word in output function
65
+ out[currentState] |= (1 << i);
66
+ }
67
+
68
+ // For all characters which don't have an edge from
69
+ // root (or state 0) in Trie, add a goto edge to state
70
+ // 0 itself
71
+ for (int ch = 0 ; ch < MAXC; ++ch)
72
+ if (g[0 ][ch] == -1 )
73
+ g[0 ][ch] = 0 ;
74
+
75
+ // Now, let's build the failure function
76
+
77
+ // Initialize values in fail function
78
+ memset (f, -1 , sizeof f);
79
+
80
+ // Failure function is computed in breadth first order
81
+ // using a queue
82
+ queue<int > q;
83
+
84
+ // Iterate over every possible input
85
+ for (int ch = 0 ; ch < MAXC; ++ch)
86
+ {
87
+ // All nodes of depth 1 have failure function value
88
+ // as 0. For example, in above diagram we move to 0
89
+ // from states 1 and 3.
90
+ if (g[0 ][ch] != 0 )
91
+ {
92
+ f[g[0 ][ch]] = 0 ;
93
+ q.push (g[0 ][ch]);
94
+ }
95
+ }
96
+
97
+ // Now queue has states 1 and 3
98
+ while (q.size ())
99
+ {
100
+ // Remove the front state from queue
101
+ int state = q.front ();
102
+ q.pop ();
103
+
104
+ // For the removed state, find failure function for
105
+ // all those characters for which goto function is
106
+ // not defined.
107
+ for (int ch = 0 ; ch <= MAXC; ++ch)
108
+ {
109
+ // If goto function is defined for character 'ch'
110
+ // and 'state'
111
+ if (g[state][ch] != -1 )
112
+ {
113
+ // Find failure state of removed state
114
+ int failure = f[state];
115
+
116
+ // Find the deepest node labeled by proper
117
+ // suffix of string from root to current
118
+ // state.
119
+ while (g[failure][ch] == -1 )
120
+ failure = f[failure];
121
+
122
+ failure = g[failure][ch];
123
+ f[g[state][ch]] = failure;
124
+
125
+ // Merge output values
126
+ out[g[state][ch]] |= out[failure];
127
+
128
+ // Insert the next level node (of Trie) in Queue
129
+ q.push (g[state][ch]);
130
+ }
131
+ }
132
+ }
133
+
134
+ return states;
135
+ }
136
+
137
+ // Returns the next state the machine will transition to using goto
138
+ // and failure functions.
139
+ // currentState - The current state of the machine. Must be between
140
+ // 0 and the number of states - 1, inclusive.
141
+ // nextInput - The next character that enters into the machine.
142
+ int findNextState (int currentState, char nextInput)
143
+ {
144
+ int answer = currentState;
145
+ int ch = nextInput - ' a' ;
146
+
147
+ // If goto is not defined, use failure function
148
+ while (g[answer][ch] == -1 )
149
+ answer = f[answer];
150
+
151
+ return g[answer][ch];
152
+ }
153
+
154
+ // This function finds all occurrences of all array words
155
+ // in text.
156
+ void searchWords (string arr[], int k, string text)
157
+ {
158
+ // Preprocess patterns.
159
+ // Build machine with goto, failure and output functions
160
+ buildMatchingMachine (arr, k);
161
+
162
+ // Initialize current state
163
+ int currentState = 0 ;
164
+
165
+ // Traverse the text through the nuilt machine to find
166
+ // all occurrences of words in arr[]
167
+ for (int i = 0 ; i < text.size (); ++i)
168
+ {
169
+ currentState = findNextState (currentState, text[i]);
170
+
171
+ // If match not found, move to next state
172
+ if (out[currentState] == 0 )
173
+ continue ;
174
+
175
+ // Match found, print all matching words of arr[]
176
+ // using output function.
177
+ for (int j = 0 ; j < k; ++j)
178
+ {
179
+ if (out[currentState] & (1 << j))
180
+ {
181
+ cout << " Word " << arr[j] << " appears from "
182
+ << i - arr[j].size () + 1 << " to " << i << endl;
183
+ }
184
+ }
185
+ }
186
+ }
187
+
188
+
189
+ // Driver program to test above function
190
+ int main ()
191
+ {
192
+ char txt[10000 ];
193
+ // scanf("%s",&txt);
194
+ ifstream file;
195
+ file.open (" inputtext.txt" );
196
+ // file>>txt;
197
+ file.getline (txt,9999 );
198
+ file.close ();
199
+ // printf("100%s100\n",txt );
200
+ // char *pat = "ABABAAABAB";
201
+ string arr[] = {" ABABAAABAB" };
202
+ // char *pat ="learning";
203
+
204
+ int k = sizeof (arr)/sizeof (arr[0 ]);
205
+
206
+ clock_t begin = clock ();
207
+ searchWords (arr, k, text);
208
+ clock_t end = clock ();
209
+ double elapsed_secs = double (end - begin)*1000 / CLOCKS_PER_SEC;
210
+ cout<<" Number of matches " <<cou<<endl<<" Time taken:" <<elapsed_secs<<endl;
211
+
212
+ return 0 ;
213
+ }
0 commit comments