Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit bdebd23

Browse files
author
werewolf
committed
finish method
1 parent 4d786b9 commit bdebd23

File tree

8 files changed

+275
-228
lines changed

8 files changed

+275
-228
lines changed

‎src/algorithm/DNA_Analyzer.cc‎

Lines changed: 211 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,237 @@
11
#include "DNA_Analyzer.h"
22

3-
void DNA_Analyzer::RabinKarpAlgorithm(const std::filesystem::path& path_1, const std::filesystem::path& path_2) {
3+
void DNA_Analyzer::RabinKarpAlgorithm(const std::filesystem::path& path_1,
4+
const std::filesystem::path& path_2) {
45
std::ifstream file_a(path_1), file_b(path_2);
56

6-
std::string a((std::istreambuf_iterator<char>(file_a)), std::istreambuf_iterator<char>());
7-
std::string b((std::istreambuf_iterator<char>(file_b)), std::istreambuf_iterator<char>());
7+
std::string a((std::istreambuf_iterator<char>(file_a)),
8+
std::istreambuf_iterator<char>());
9+
std::string b((std::istreambuf_iterator<char>(file_b)),
10+
std::istreambuf_iterator<char>());
811

9-
const int p = 31;// простое число
10-
const int m = 1e9 + 9;// модуль
12+
const int p = 31;
13+
const int m = 1e9 + 9;
1114
int S = a.size(), T = b.size();
12-
13-
std::vector<long long> p_pow(std::max(S, T));// предподсчет степеней числа p
15+
16+
std::vector<long long> p_pow(std::max(S, T));
1417
p_pow[0] = 1;
1518
for (int i = 1; i < (int)p_pow.size(); i++) {
1619
p_pow[i] = (p_pow[i - 1] * p) % m;
1720
}
18-
19-
std::vector<long long> h(S + 1, 0); // хеши от всех префиксов строки text
21+
22+
std::unordered_map<char, int> code_map{
23+
{'A', 1}, {'C', 2}, {'G', 3}, {'T', 4}};
24+
25+
std::vector<long long> h(S + 1, 0);
2026
for (int i = 0; i < S; i++) {
21-
int code;
22-
switch(a[i]) { // перевод из алфавита {A, C, G, T} в числа
23-
case 'A': code = 1; break;
24-
case 'C': code = 2; break;
25-
case 'G': code = 3; break;
26-
case 'T': code = 4; break;
27-
}
27+
char c = a[i];
28+
int code = code_map[c];
2829
h[i + 1] = (h[i] + code * p_pow[i]) % m;
2930
}
30-
31-
long long h_s = 0;// хеш подстроки длины T строки text
31+
32+
long long h_s = 0;
3233
for (int i = 0; i < T; i++) {
33-
int code;
34-
switch(b[i]) { // перевод из алфавита {A, C, G, T} в числа
35-
case 'A': code = 1; break;
36-
case 'C': code = 2; break;
37-
case 'G': code = 3; break;
38-
case 'T': code = 4; break;
39-
}
34+
char c = b[i];
35+
int code = code_map[c];
4036
h_s = (h_s + code * p_pow[i]) % m;
4137
}
42-
43-
std::vector<int> positions;// позиции вхождения подстроки pattern в строку text
38+
39+
std::vector<int> positions;
4440
for (int i = 0; i + T - 1 < S; i++) {
45-
long long cur_h = (h[i + T] + m - h[i]) % m;// вычисление хеша для подстроки
41+
long long cur_h = (h[i + T] + m - h[i]) % m;
4642
if (cur_h == h_s * p_pow[i] % m) {
4743
positions.push_back(i);
4844
}
4945
}
5046

51-
for (auto &it: positions) {
47+
for (auto& it : positions) {
5248
std::cout << it << " ";
5349
}
5450
std::cout << std::endl;
5551
}
52+
53+
void DNA_Analyzer::NWAlgorithm(const std::filesystem::path& path) {
54+
std::ifstream file(path);
55+
56+
Score score(0, 0, 0);
57+
file >> score.match >> score.mismatch >> score.gap;
58+
std::string a, b;
59+
file >> a >> b;
60+
61+
std::cout << optimalScore(score, a, b);
62+
63+
auto k = optimalAlignment(score, a, b);
64+
std::cout << k.first << std::endl;
65+
std::cout << k.second.first << std::endl;
66+
std::cout << k.second.second << std::endl;
67+
}
68+
69+
int DNA_Analyzer::optimalScore(Score score, std::string s1, std::string s2) {
70+
int n = s1.size(), m = s2.size();
71+
std::vector<std::vector<int>> dp(n + 1, std::vector<int>(m + 1));
72+
for (int i = 1; i <= n; i++) dp[i][0] = dp[i - 1][0] + score.gap;
73+
for (int i = 1; i <= m; i++) dp[0][i] = dp[0][i - 1] + score.gap;
74+
for (int i = 1; i <= n; i++) {
75+
for (int j = 1; j <= m; j++) {
76+
int match = dp[i - 1][j - 1] +
77+
(s1[i - 1] == s2[j - 1] ? score.match : score.mismatch);
78+
int ins = dp[i][j - 1] + score.gap;
79+
int del = dp[i - 1][j] + score.gap;
80+
dp[i][j] = std::max({match, ins, del});
81+
}
82+
}
83+
return dp[n][m];
84+
}
85+
86+
std::pair<int, std::pair<std::string, std::string>>
87+
DNA_Analyzer::optimalAlignment(Score score, std::string s1, std::string s2) {
88+
int n = s1.size(), m = s2.size();
89+
std::vector<std::vector<int>> dp(n + 1, std::vector<int>(m + 1));
90+
std::vector<std::vector<int>> p(
91+
n + 1, std::vector<int>(m + 1)); // 0-diag, 1-up, 2-left
92+
93+
for (int i = 1; i <= n; i++) dp[i][0] = dp[i - 1][0] + score.gap, p[i][0] = 1;
94+
for (int i = 1; i <= m; i++) dp[0][i] = dp[0][i - 1] + score.gap, p[0][i] = 2;
95+
for (int i = 1; i <= n; i++) {
96+
for (int j = 1; j <= m; j++) {
97+
int match = dp[i - 1][j - 1] +
98+
(s1[i - 1] == s2[j - 1] ? score.match : score.mismatch);
99+
int ins = dp[i][j - 1] + score.gap;
100+
int del = dp[i - 1][j] + score.gap;
101+
if (match >= ins && match >= del) {
102+
dp[i][j] = match;
103+
p[i][j] = 0;
104+
} else if (ins >= match && ins >= del) {
105+
dp[i][j] = ins;
106+
p[i][j] = 2;
107+
} else {
108+
dp[i][j] = del;
109+
p[i][j] = 1;
110+
}
111+
}
112+
}
113+
114+
int i = n, j = m;
115+
std::string res1, res2;
116+
while (i > 0 || j > 0) {
117+
if (p[i][j] == 0) {
118+
res1 += s1[i - 1];
119+
res2 += s2[j - 1];
120+
i--;
121+
j--;
122+
} else if (p[i][j] == 1) {
123+
res1 += s1[i - 1];
124+
res2 += '-';
125+
i--;
126+
} else {
127+
res1 += '-';
128+
res2 += s2[j - 1];
129+
j--;
130+
}
131+
}
132+
reverse(res1.begin(), res1.end());
133+
reverse(res2.begin(), res2.end());
134+
135+
return {dp[n][m], {res1, res2}};
136+
}
137+
138+
void DNA_Analyzer::RegexAlgorithm(const std::filesystem::path& path) {
139+
std::ifstream file(path);
140+
std::string s, p;
141+
file >> s >> p;
142+
std::cout << isMatch(s, p) << std::endl;
143+
}
144+
145+
bool DNA_Analyzer::isMatch(std::string s, std::string p) {
146+
int m = p.size(), n = s.size();
147+
std::vector<std::vector<bool>> dp(m + 1, std::vector<bool>(n + 1));
148+
dp[0][0] = true;
149+
for (int i = 1; i <= m; i++) {
150+
if (p[i - 1] == '*') dp[i][0] = dp[i - 2][0];
151+
}
152+
for (int i = 1; i <= m; i++) {
153+
for (int j = 1; j <= n; j++) {
154+
if (s[j - 1] == p[i - 1] || p[i - 1] == '.')
155+
dp[i][j] = dp[i - 1][j - 1];
156+
else if (p[i - 1] == '*') {
157+
dp[i][j] = dp[i - 2][j] ||
158+
(dp[i][j - 1] && (s[j - 1] == p[i - 2] || p[i - 2] == '.'));
159+
} else if (p[i - 1] == '?') {
160+
dp[i][j] = dp[i - 1][j - 1] || dp[i - 1][j] || dp[i][j - 1];
161+
}
162+
}
163+
}
164+
return dp[m][n];
165+
}
166+
167+
void DNA_Analyzer::KSimilarAlgorithm(const std::filesystem::path& path) {
168+
std::ifstream file(path);
169+
std::string s, p;
170+
file >> s >> p;
171+
std::cout << kSimilarity(s, p) << std::endl;
172+
}
173+
174+
int DNA_Analyzer::kSimilarity(std::string s1, std::string s2) {
175+
if (s1.size() != s2.size()) return -1;
176+
std::unordered_set<std::string> visited;
177+
std::queue<std::pair<std::string, int>> q;
178+
q.push({s1, 0});
179+
visited.insert(s1);
180+
while (!q.empty()) {
181+
std::string curr = q.front().first;
182+
int swaps = q.front().second;
183+
q.pop();
184+
if (curr == s2) return swaps;
185+
int i = 0;
186+
while (curr[i] == s2[i]) i++;
187+
for (int j = i + 1; j < curr.size(); j++) {
188+
if (curr[j] == s2[i] && curr[j] != s2[j]) {
189+
std::swap(curr[i], curr[j]);
190+
if (visited.count(curr) == 0) {
191+
q.push({curr, swaps + 1});
192+
visited.insert(curr);
193+
}
194+
std::swap(curr[i], curr[j]);
195+
}
196+
}
197+
}
198+
return -1;
199+
}
200+
201+
void DNA_Analyzer::WindowAlgorithm(const std::filesystem::path& path) {
202+
std::ifstream file(path);
203+
std::string s, p;
204+
file >> s >> p;
205+
std::cout << minWindowSubstring(s, p) << std::endl;
206+
}
207+
208+
std::string DNA_Analyzer::minWindowSubstring(std::string s, std::string t) {
209+
std::unordered_map<char, int> mp;
210+
for (char c : t) mp[c]++;
211+
int cnt = mp.size(), left = 0, right = 0, ansL = -1, ansR = -1,
212+
minLen = INT_MAX;
213+
while (right < s.size()) {
214+
if (mp.count(s[right])) {
215+
mp[s[right]]--;
216+
if (mp[s[right]] == 0) cnt--;
217+
}
218+
right++;
219+
220+
while (cnt == 0) {
221+
if (right - left < minLen) {
222+
minLen = right - left;
223+
ansL = left;
224+
ansR = right;
225+
}
226+
if (mp.count(s[left])) {
227+
mp[s[left]]++;
228+
if (mp[s[left]] > 0) cnt++;
229+
}
230+
left++;
231+
}
232+
}
233+
if (ansL == -1)
234+
return "";
235+
else
236+
return s.substr(ansL, ansR - ansL);
237+
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /