Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit b7b2510

Browse files
Initial Commit
1 parent ac5dbb6 commit b7b2510

File tree

4 files changed

+175
-0
lines changed

4 files changed

+175
-0
lines changed

‎string-algorithms/aho_corasick.cpp‎

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
/*
2+
***************************
3+
* *
4+
* Author: Swaraj Deep *
5+
* *
6+
***************************
7+
*/
8+
9+
/*
10+
Preprocessing: Build an automaton of all words in the given dictionary. The automaton has three functions.
11+
1. go_to: This function simply follows edges
12+
of Trie of all words in dict[]. It is
13+
represented as 2D array g[][] where
14+
we store next state for current state
15+
and character.
16+
17+
2. failure: This function stores all edges that are
18+
followed when current character doesn't
19+
have edge in Trie. It is represented as
20+
1D array f[] where we store next state for
21+
current state.
22+
23+
3. output : Stores indexes of all words that end at
24+
current state. It is represented as 1D
25+
array o[] where we store indexes
26+
of all matching words as a bitmap for
27+
current state.
28+
*/
29+
30+
#include <iostream>
31+
#include <vector>
32+
#include <queue>
33+
#define MAXS 250 // Sum of length of all the string in the given dictionary of words
34+
#define MAXC 26 // Size of alphabet
35+
36+
using namespace std;
37+
38+
vector<int> out(MAXS, 0); // This array is used by the output function. Bit i in this mask is one of the word with index i.
39+
vector<int> f(MAXS, -1); // This array is used by the failure function.
40+
vector<vector<int>> g(MAXS, vector<int>(MAXC, -1)); // This array is used by the go_to function
41+
42+
// Build the string matching automaton
43+
// dict -> the array of string
44+
// returns number of states build automaton has where states start from 0
45+
int build_matching_automaton(const vector<string> &dict)
46+
{
47+
int states = 1; // Initially we have just 0 state.
48+
// Build the trie of words in dict
49+
int i = 0;
50+
for (string word : dict)
51+
{
52+
int current_state = 0;
53+
for (char ch : word)
54+
{
55+
ch = ch - 'a';
56+
// Allocate a new node if node for ch doesn't exist
57+
if (g[current_state][ch] == -1)
58+
{
59+
g[current_state][ch] = states++;
60+
}
61+
current_state = g[current_state][ch];
62+
}
63+
// Add current word in output function
64+
out[current_state] |= (1 << i++);
65+
}
66+
// For all characters which don't have an edge from root (or state 0) in TRIE, add a go_to edge to state 0 itself
67+
for (int ch = 0; ch < MAXC; ++ch)
68+
{
69+
if (g[0][ch] == -1)
70+
{
71+
g[0][ch] = 0;
72+
}
73+
}
74+
// build the failure links
75+
queue<int> q;
76+
for (int ch = 0; ch < MAXC; ++ch)
77+
{
78+
if (g[0][ch] != 0)
79+
{
80+
f[g[0][ch]] = 0;
81+
q.push(g[0][ch]);
82+
}
83+
}
84+
while (!q.empty())
85+
{
86+
// remove the front state from queue
87+
int state = q.front();
88+
q.pop();
89+
for (int ch = 0; ch < MAXC; ++ch)
90+
{
91+
if (g[state][ch] != -1)
92+
{
93+
int failure = f[state];
94+
while (g[failure][ch] == -1)
95+
{
96+
failure = f[failure];
97+
}
98+
failure = g[failure][ch];
99+
f[g[state][ch]] = failure;
100+
out[g[state][ch]] |= out[failure];
101+
q.push(g[state][ch]);
102+
}
103+
}
104+
}
105+
return states;
106+
}
107+
108+
// Returns the next state the machine will transition to using goto
109+
// and failure functions.
110+
// current_state - The current state of the machine. Must be between
111+
// 0 and the number of states - 1, inclusive.
112+
// next_input - The next character that enters into the machine.
113+
int find_next_state(int current_state, char next_input)
114+
{
115+
int answer = current_state;
116+
int ch = next_input - 'a';
117+
// If goto is not defined, use failure function
118+
while (g[answer][ch] == -1)
119+
{
120+
answer = f[answer];
121+
}
122+
return g[answer][ch];
123+
}
124+
125+
// This function finds all occurrences of all array words in text.
126+
void search_words(const vector<string> &dict, string text)
127+
{
128+
// Preprocess patterns.
129+
// Build machine with goto, failure and output functions
130+
build_matching_automaton(dict);
131+
// Initialize current state
132+
int current_state = 0;
133+
// Traverse the text through the nuilt machine to find all occurrences of words in dict[]
134+
for (int i = 0; i < text.size(); ++i)
135+
{
136+
current_state = find_next_state(current_state, text[i]);
137+
// If match not found, move to next state
138+
if (out[current_state] == 0)
139+
{
140+
continue;
141+
}
142+
// Match found, print all matching words of dict[] using output function.
143+
for (int j = 0, len = dict.size(); j < len; ++j)
144+
{
145+
if (out[current_state] & (1 << j))
146+
{
147+
cout << "Word " << dict[j] << " appears from " << i - dict[j].size() + 1 << " to " << i << endl;
148+
}
149+
}
150+
}
151+
}

‎string-algorithms/kmp_algorithm.cpp‎

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
/*
2+
***************************
3+
* *
4+
* Author: Swaraj Deep *
5+
* *
6+
***************************
7+
*/
8+
19
#include <iostream>
210
#include <vector>
311

‎string-algorithms/suffix_array.cpp‎

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
/*
2+
***************************
3+
* *
4+
* Author: Swaraj Deep *
5+
* *
6+
***************************
7+
*/
8+
19
#include <iostream>
210
#include <vector>
311
#include <algorithm>

‎string-algorithms/z_algorithm.cpp‎

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
/*
2+
***************************
3+
* *
4+
* Author: Swaraj Deep *
5+
* *
6+
***************************
7+
*/
8+
19
#include <iostream>
210
#include <vector>
311

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /