Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit f7bbea6

Browse files
add aho corasick to lib and shorter hash code
1 parent 0e5d527 commit f7bbea6

File tree

6 files changed

+348
-9
lines changed

6 files changed

+348
-9
lines changed

‎Library/Strings/ahoCorasick.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#include <bits/stdc++.h>
2+
using namespace std;
3+
4+
template<const int ALPHA = 26, const int off = 'a'>
5+
struct Aho {
6+
struct Node {
7+
Node* p = NULL;
8+
Node* sl = NULL;
9+
Node* ol = NULL;
10+
array<Node*, ALPHA> nxt;
11+
12+
char c;
13+
int idw = -1;
14+
15+
Node(){ nxt.fill(NULL); }
16+
Node(Node* p, char c) : p(p), c(c) { nxt.fill(NULL); }
17+
};
18+
typedef Node* trie;
19+
20+
trie root;
21+
int nwords = 0;
22+
Aho(){ root = new Node(); }
23+
24+
void add(string &s){
25+
trie t = root;
26+
for(auto c : s){ c -= off;
27+
if(!t->nxt[c])
28+
t->nxt[c] = new Node(t, c);
29+
t = t->nxt[c];
30+
}
31+
t->idw = nwords++; //cuidado com strings iguais! use vector
32+
}
33+
34+
void buildSufixLink(){
35+
deque<trie> q(1, root);
36+
37+
while(!q.empty()){
38+
trie t = q.front();
39+
q.pop_front();
40+
41+
if(trie w = t->p){
42+
do w = w->sl; while(w && !w->nxt[t->c]);
43+
t->sl = w ? w->nxt[t->c] : root;
44+
t->ol = t->sl->idw == -1 ? t->sl->ol : t->sl;
45+
}
46+
47+
for(int c=0; c<ALPHA; c++)
48+
if(t->nxt[c])
49+
q.push_back(t->nxt[c]);
50+
}
51+
}
52+
53+
vector<bool> findPattern(string &s){
54+
vector<bool> ans(nwords, 0);
55+
trie w = root;
56+
for(auto c : s){ c -= off;
57+
while(w && !w->nxt[c]) w = w->sl;
58+
w = w ? w->nxt[c] : root;
59+
60+
for(trie z=w, nl; z; nl=z->ol, z->ol=NULL, z=nl)
61+
if(z->idw != -1) //get ALL occ: dont delete ol (may slow)
62+
ans[z->idw] = true;
63+
}
64+
return ans;
65+
}
66+
};
67+
68+
/*LATEX_DESC_BEGIN***************************
69+
Aho-Corasick: Trie automaton to search multiple patterns in a text
70+
71+
Complexity: O(SUM|P| + |S|) * ALPHA
72+
73+
Aho<26,'a'> aho;
74+
for(auto p: patterns) aho.add(p);
75+
aho.buildSufixLink();
76+
auto ans = aho.findPattern(s);
77+
78+
parent(p), sufixLink(sl), outputLink(ol), patternID(idw)
79+
outputLink -> edge to other pattern end (when p is a sufix of it)
80+
ALPHA -> Size of the alphabet. If big, consider changing nxt to map
81+
82+
To find ALL occurrences of all patterns, don't delete ol in findPattern. But it can be slow (at number of occ), so consider using DP on the automaton.
83+
*****************************LATEX_DESC_END*/

‎SH12-Notebook.pdf

14.6 KB
Binary file not shown.
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
#include <bits/stdc++.h>
2+
using namespace std;
3+
#define cerr if(false)cerr
4+
//https://codeforces.com/gym/101064/problem/E
5+
//2016 USP Try-outs
6+
const int ALPHA = 10;
7+
const int off = 'a';
8+
9+
struct Node {
10+
Node* p = NULL;
11+
Node* sl = NULL;
12+
array<Node*, ALPHA> nxt;
13+
14+
int no = 0;
15+
16+
char c;
17+
int mask = 0;
18+
19+
Node(int id) : no(id){ nxt.fill(NULL); }
20+
Node(Node* p, char c, int id) : p(p), c(c), no(id) { nxt.fill(NULL); }
21+
};
22+
typedef Node* trie;
23+
24+
struct Aho {
25+
trie root;
26+
int nwords = 0, nodesid=0;
27+
vector<trie> nodes;
28+
29+
trie new_Node(Node* p, char c){
30+
trie no = new Node(p, c, nodesid++);
31+
nodes.push_back(no);
32+
return no;
33+
}
34+
35+
Aho(){ root = new_Node(NULL, 0); }
36+
37+
void add(string &s, bool bad=true){
38+
trie t = root;
39+
for(auto c : s){ c -= off;
40+
if(!t->nxt[c])
41+
t->nxt[c] = new_Node(t, c);
42+
t = t->nxt[c];
43+
}
44+
int id = nwords++;
45+
t->mask |= 1<<id;
46+
if(!bad) t->mask = -1;
47+
}
48+
49+
void buildSufixLink(){
50+
deque<trie> q(1, root);
51+
52+
while(!q.empty()){
53+
trie t = q.front();
54+
q.pop_front();
55+
56+
if(trie w = t->p){
57+
do w = w->sl; while(w && !w->nxt[t->c]);
58+
t->sl = w ? w->nxt[t->c] : root;
59+
t->mask |= t->sl->mask;
60+
}
61+
62+
for(int c=0; c<ALPHA; c++)
63+
if(t->nxt[c])
64+
q.push_back(t->nxt[c]);
65+
else if(t->sl && t->sl->nxt[c])
66+
t->nxt[c] = t->sl->nxt[c];
67+
}
68+
}
69+
};
70+
Aho aho;
71+
72+
const int MAXN = 310;
73+
int vis[MAXN][1<<15];
74+
75+
int main(){
76+
memset(vis, -1, sizeof vis);
77+
ios::sync_with_stdio(false); cin.tie(NULL);
78+
int n, m;
79+
cin >> n >> m;
80+
81+
string s;
82+
for(int i=0; i<n; i++)
83+
cin >> s, aho.add(s, true);
84+
85+
for(int i=0; i<m; i++)
86+
cin >> s, aho.add(s, false);
87+
88+
aho.buildSufixLink();
89+
90+
deque<pair<int, int>> q;
91+
q.push_back({0, 0});
92+
vis[0][0] = -2;
93+
int ans = -1;
94+
95+
while(!q.empty())
96+
{
97+
auto [u, mask] = q.front();
98+
q.pop_front();
99+
100+
if(mask == (1<<n)-1){ ans = u; break; }
101+
102+
trie no = aho.nodes[u];
103+
for(int c=0; c<ALPHA; c++)
104+
if(no->nxt[c]){
105+
int v = no->nxt[c]->no, msk = mask|no->nxt[c]->mask;
106+
if(msk == -1) continue;
107+
if(vis[v][msk] != -1) continue;
108+
109+
q.push_back({v, msk});
110+
vis[v][msk] = (u<<15)|mask;
111+
}
112+
}
113+
114+
if(ans == -1){
115+
cout << "-";
116+
return 0;
117+
}
118+
119+
s = "";
120+
int u = ans, mask = (1<<n)-1;
121+
122+
while(u || mask){
123+
s += (char)(off+aho.nodes[u]->c);
124+
u = vis[u][mask], mask = u;
125+
u >>= 15; mask ^= u<<15;
126+
}
127+
128+
reverse(begin(s), end(s));
129+
cout << s;
130+
}
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#include <bits/stdc++.h>
2+
#define endl "\n"
3+
using namespace std;
4+
// https://onlinejudge.org/index.php?option=onlinejudge&page=show_problem&problem=1620
5+
6+
const int SIGMA = 26 + 32;
7+
const int OFF = 'A';
8+
9+
struct node {
10+
node* p = NULL;
11+
node* sl = NULL;
12+
array<node*, SIGMA> son;
13+
char c;
14+
vector<int> end;
15+
node* ol = NULL;
16+
17+
node(){ son.fill(NULL); }
18+
node(node* p, char c) : p(p), c(c) { son.fill(NULL); }
19+
};
20+
21+
typedef node* trie;
22+
23+
void add(string &s, trie t, int id){
24+
for(auto c : s){
25+
c -= OFF;
26+
if(!t->son[c])
27+
t->son[c] = new node(t, c);
28+
t = t->son[c];
29+
}
30+
t->end.push_back(id); //the strings are unique?
31+
}
32+
33+
void buildSufixLink(trie root){
34+
deque<trie> q;
35+
q.push_back(root);
36+
37+
while(!q.empty()){
38+
trie t = q.front();
39+
q.pop_front();
40+
41+
if(trie w = t->p){
42+
w = w->sl;
43+
while(w && !w->son[t->c]) w = w->sl;
44+
t->sl = w ? w->son[t->c] : root;
45+
t->ol = t->sl->end.empty() ? t->sl->ol : t->sl;
46+
}
47+
48+
49+
for(int c=0; c<SIGMA; c++)
50+
if(t->son[c])
51+
q.push_back(t->son[c]);
52+
}
53+
54+
root->sl = NULL;
55+
}
56+
57+
vector<bool> findPattern(string &s, trie root, int q){
58+
vector<bool> ans(q, 0);
59+
trie w = root;
60+
for(int i=0, c; i<s.size(); i++){
61+
c = s[i] - OFF;
62+
while(w && !w->son[c]) w = w->sl;
63+
w = w ? w->son[c] : root;
64+
65+
for(trie ol=w, nol; ol; nol=ol->ol, ol->ol=NULL, ol=nol) //find at least one occurence of each pattern
66+
if(!ol->end.empty()){
67+
for(auto idx : ol->end)
68+
ans[idx] = true;
69+
}
70+
}
71+
return ans;
72+
}
73+
74+
int main(){
75+
ios::sync_with_stdio(false); cin.tie(NULL);
76+
int tt;
77+
cin >> tt;
78+
while(tt--)
79+
{
80+
string s, t;
81+
cin >> s;
82+
83+
trie tri = new node();
84+
85+
int q; cin >> q;
86+
for(int i=0; i<q; i++){
87+
cin >> t;
88+
add(t, tri ,i);
89+
}
90+
91+
buildSufixLink(tri);
92+
93+
auto ans = findPattern(s, tri, q);
94+
95+
for(auto x : ans) cout << "ny"[x] << endl;
96+
}
97+
}
98+
99+
/*
100+
1
101+
abcdefabcde
102+
4
103+
abcde
104+
bcde
105+
cde
106+
bcd
107+
108+
*/

‎pdf/contents.txt

102 Bytes
Binary file not shown.

‎pdf/hash/md5hsh.cpp

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,16 @@
22
#define ll long long
33
using namespace std;
44

5-
string getHash(string s, int dig=3){
6-
ofstream ip("temp.cpp");
7-
ip.sync_with_stdio(false); // LATEX_IGNORED_LINE
8-
ip << s; ip.close();
5+
string getHash(string s){
6+
ofstream ip("temp.cpp"); ip << s; ip.close();
97
system("g++ -E -P -dD -fpreprocessed ./temp.cpp | tr -d '[:space:]' | md5sum > hsh.temp");
10-
ifstream f("hsh.temp"); f >> s; f.close();
11-
for(auto&c:s)if('a'<=c)c^=32; //optional
12-
return s.substr(0, dig);
8+
ifstream fo("hsh.temp"); fo >> s; fo.close();
9+
return s.substr(0, 3);
1310
}
1411

1512
int main_(){
1613
string l, t;
17-
vector<string> st(100);
14+
vector<string> st(10);
1815
while(getline(cin, l)){
1916
t = l;
2017
for(auto c : l)
@@ -33,11 +30,32 @@ BLOCK_DESC_BEGIN g++ hash.cpp -o hash \\ hash < code.cpp BLOCK_DESC_END to get t
3330
The hash ignores comments and whitespaces.
3431
The hash of a line whith } is the hash of all the code since the { that opens it. (is the hash of that context)
3532
33+
(Optional) To make letters upperCase: for(auto&c:s)if('a'<=c) c^=32;
3634
LATEX_DESC_END*/
3735

3836
//LATEX_IGNORED_BEGIN
3937
string _ESCAPE_ = "@";
4038

39+
string getHash_Fast(string s, int dig=3){
40+
ofstream ip("temp.cpp");
41+
42+
ip.sync_with_stdio(false);
43+
44+
ip << s;
45+
ip.close();
46+
47+
system("g++ -E -P -dD -fpreprocessed ./temp.cpp | tr -d '[:space:]' | md5sum > hsh.temp");
48+
49+
ifstream f("hsh.temp");
50+
f >> s;
51+
f.close();
52+
53+
for(auto&c:s)if('a'<=c) c^=32; //Make letters uppercase
54+
55+
return s.substr(0, dig);
56+
}
57+
58+
4159
string getCodeHash(const string& code, int dig=3){
4260
string hcode, l, t, hs, trimed;
4361
stringstream ss(code);
@@ -50,7 +68,7 @@ string getCodeHash(const string& code, int dig=3){
5068
if(c == '{') st.push_back(""); else
5169
if(c == '}') t = st.back() + l, st.pop_back();
5270

53-
hs = getHash(t, dig);
71+
hs = getHash_Fast(t, dig);
5472
trimed = t;
5573
while(!trimed.empty() && isspace(trimed.back())) trimed.pop_back(); // remove trailing spaces
5674
if(trimed.empty()) for(hs = ""; hs.size()<dig; hs += " "); //empty line -> no hash

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /