11#include " DNA_Analyzer.h"
22
3- void DNA_Analyzer::RabinKarpAlgorithm (const std::filesystem::path& path_1, const std::filesystem::path& path_2) {
3+ void DNA_Analyzer::RabinKarpAlgorithm (const std::filesystem::path& path_1,
4+ const std::filesystem::path& path_2) {
45 std::ifstream file_a (path_1), file_b (path_2);
56
6- std::string a ((std::istreambuf_iterator<char >(file_a)), std::istreambuf_iterator<char >());
7- std::string b ((std::istreambuf_iterator<char >(file_b)), std::istreambuf_iterator<char >());
7+ std::string a ((std::istreambuf_iterator<char >(file_a)),
8+ std::istreambuf_iterator<char >());
9+ std::string b ((std::istreambuf_iterator<char >(file_b)),
10+ std::istreambuf_iterator<char >());
811
9- const int p = 31 ;// простое число
10- const int m = 1e9 + 9 ;// модуль
12+ const int p = 31 ;
13+ const int m = 1e9 + 9 ;
1114 int S = a.size (), T = b.size ();
12-
13- std::vector<long long > p_pow (std::max (S, T));// предподсчет степеней числа p
15+ 16+ std::vector<long long > p_pow (std::max (S, T));
1417 p_pow[0 ] = 1 ;
1518 for (int i = 1 ; i < (int )p_pow.size (); i++) {
1619 p_pow[i] = (p_pow[i - 1 ] * p) % m;
1720 }
18- 19- std::vector<long long > h (S + 1 , 0 ); // хеши от всех префиксов строки text
21+ 22+ std::unordered_map<char , int > code_map{
23+ {' A' , 1 }, {' C' , 2 }, {' G' , 3 }, {' T' , 4 }};
24+ 25+ std::vector<long long > h (S + 1 , 0 );
2026 for (int i = 0 ; i < S; i++) {
21- int code;
22- switch (a[i]) { // перевод из алфавита {A, C, G, T} в числа
23- case ' A' : code = 1 ; break ;
24- case ' C' : code = 2 ; break ;
25- case ' G' : code = 3 ; break ;
26- case ' T' : code = 4 ; break ;
27- }
27+ char c = a[i];
28+ int code = code_map[c];
2829 h[i + 1 ] = (h[i] + code * p_pow[i]) % m;
2930 }
30-
31- long long h_s = 0 ;// хеш подстроки длины T строки text
31+ 32+ long long h_s = 0 ;
3233 for (int i = 0 ; i < T; i++) {
33- int code;
34- switch (b[i]) { // перевод из алфавита {A, C, G, T} в числа
35- case ' A' : code = 1 ; break ;
36- case ' C' : code = 2 ; break ;
37- case ' G' : code = 3 ; break ;
38- case ' T' : code = 4 ; break ;
39- }
34+ char c = b[i];
35+ int code = code_map[c];
4036 h_s = (h_s + code * p_pow[i]) % m;
4137 }
42-
43- std::vector<int > positions;// позиции вхождения подстроки pattern в строку text
38+ 39+ std::vector<int > positions;
4440 for (int i = 0 ; i + T - 1 < S; i++) {
45- long long cur_h = (h[i + T] + m - h[i]) % m;// вычисление хеша для подстроки
41+ long long cur_h = (h[i + T] + m - h[i]) % m;
4642 if (cur_h == h_s * p_pow[i] % m) {
4743 positions.push_back (i);
4844 }
4945 }
5046
51- for (auto &it : positions) {
47+ for (auto & it : positions) {
5248 std::cout << it << " " ;
5349 }
5450 std::cout << std::endl;
5551}
52+ 53+ void DNA_Analyzer::NWAlgorithm (const std::filesystem::path& path) {
54+ std::ifstream file (path);
55+ 56+ Score score (0 , 0 , 0 );
57+ file >> score.match >> score.mismatch >> score.gap ;
58+ std::string a, b;
59+ file >> a >> b;
60+ 61+ std::cout << optimalScore (score, a, b);
62+ 63+ auto k = optimalAlignment (score, a, b);
64+ std::cout << k.first << std::endl;
65+ std::cout << k.second .first << std::endl;
66+ std::cout << k.second .second << std::endl;
67+ }
68+ 69+ int DNA_Analyzer::optimalScore (Score score, std::string s1, std::string s2) {
70+ int n = s1.size (), m = s2.size ();
71+ std::vector<std::vector<int >> dp (n + 1 , std::vector<int >(m + 1 ));
72+ for (int i = 1 ; i <= n; i++) dp[i][0 ] = dp[i - 1 ][0 ] + score.gap ;
73+ for (int i = 1 ; i <= m; i++) dp[0 ][i] = dp[0 ][i - 1 ] + score.gap ;
74+ for (int i = 1 ; i <= n; i++) {
75+ for (int j = 1 ; j <= m; j++) {
76+ int match = dp[i - 1 ][j - 1 ] +
77+ (s1[i - 1 ] == s2[j - 1 ] ? score.match : score.mismatch );
78+ int ins = dp[i][j - 1 ] + score.gap ;
79+ int del = dp[i - 1 ][j] + score.gap ;
80+ dp[i][j] = std::max ({match, ins, del});
81+ }
82+ }
83+ return dp[n][m];
84+ }
85+ 86+ std::pair<int , std::pair<std::string, std::string>>
87+ DNA_Analyzer::optimalAlignment (Score score, std::string s1, std::string s2) {
88+ int n = s1.size (), m = s2.size ();
89+ std::vector<std::vector<int >> dp (n + 1 , std::vector<int >(m + 1 ));
90+ std::vector<std::vector<int >> p (
91+ n + 1 , std::vector<int >(m + 1 )); // 0-diag, 1-up, 2-left
92+ 93+ for (int i = 1 ; i <= n; i++) dp[i][0 ] = dp[i - 1 ][0 ] + score.gap , p[i][0 ] = 1 ;
94+ for (int i = 1 ; i <= m; i++) dp[0 ][i] = dp[0 ][i - 1 ] + score.gap , p[0 ][i] = 2 ;
95+ for (int i = 1 ; i <= n; i++) {
96+ for (int j = 1 ; j <= m; j++) {
97+ int match = dp[i - 1 ][j - 1 ] +
98+ (s1[i - 1 ] == s2[j - 1 ] ? score.match : score.mismatch );
99+ int ins = dp[i][j - 1 ] + score.gap ;
100+ int del = dp[i - 1 ][j] + score.gap ;
101+ if (match >= ins && match >= del) {
102+ dp[i][j] = match;
103+ p[i][j] = 0 ;
104+ } else if (ins >= match && ins >= del) {
105+ dp[i][j] = ins;
106+ p[i][j] = 2 ;
107+ } else {
108+ dp[i][j] = del;
109+ p[i][j] = 1 ;
110+ }
111+ }
112+ }
113+ 114+ int i = n, j = m;
115+ std::string res1, res2;
116+ while (i > 0 || j > 0 ) {
117+ if (p[i][j] == 0 ) {
118+ res1 += s1[i - 1 ];
119+ res2 += s2[j - 1 ];
120+ i--;
121+ j--;
122+ } else if (p[i][j] == 1 ) {
123+ res1 += s1[i - 1 ];
124+ res2 += ' -' ;
125+ i--;
126+ } else {
127+ res1 += ' -' ;
128+ res2 += s2[j - 1 ];
129+ j--;
130+ }
131+ }
132+ reverse (res1.begin (), res1.end ());
133+ reverse (res2.begin (), res2.end ());
134+ 135+ return {dp[n][m], {res1, res2}};
136+ }
137+ 138+ void DNA_Analyzer::RegexAlgorithm (const std::filesystem::path& path) {
139+ std::ifstream file (path);
140+ std::string s, p;
141+ file >> s >> p;
142+ std::cout << isMatch (s, p) << std::endl;
143+ }
144+ 145+ bool DNA_Analyzer::isMatch (std::string s, std::string p) {
146+ int m = p.size (), n = s.size ();
147+ std::vector<std::vector<bool >> dp (m + 1 , std::vector<bool >(n + 1 ));
148+ dp[0 ][0 ] = true ;
149+ for (int i = 1 ; i <= m; i++) {
150+ if (p[i - 1 ] == ' *' ) dp[i][0 ] = dp[i - 2 ][0 ];
151+ }
152+ for (int i = 1 ; i <= m; i++) {
153+ for (int j = 1 ; j <= n; j++) {
154+ if (s[j - 1 ] == p[i - 1 ] || p[i - 1 ] == ' .' )
155+ dp[i][j] = dp[i - 1 ][j - 1 ];
156+ else if (p[i - 1 ] == ' *' ) {
157+ dp[i][j] = dp[i - 2 ][j] ||
158+ (dp[i][j - 1 ] && (s[j - 1 ] == p[i - 2 ] || p[i - 2 ] == ' .' ));
159+ } else if (p[i - 1 ] == ' ?' ) {
160+ dp[i][j] = dp[i - 1 ][j - 1 ] || dp[i - 1 ][j] || dp[i][j - 1 ];
161+ }
162+ }
163+ }
164+ return dp[m][n];
165+ }
166+ 167+ void DNA_Analyzer::KSimilarAlgorithm (const std::filesystem::path& path) {
168+ std::ifstream file (path);
169+ std::string s, p;
170+ file >> s >> p;
171+ std::cout << kSimilarity (s, p) << std::endl;
172+ }
173+ 174+ int DNA_Analyzer::kSimilarity (std::string s1, std::string s2) {
175+ if (s1.size () != s2.size ()) return -1 ;
176+ std::unordered_set<std::string> visited;
177+ std::queue<std::pair<std::string, int >> q;
178+ q.push ({s1, 0 });
179+ visited.insert (s1);
180+ while (!q.empty ()) {
181+ std::string curr = q.front ().first ;
182+ int swaps = q.front ().second ;
183+ q.pop ();
184+ if (curr == s2) return swaps;
185+ int i = 0 ;
186+ while (curr[i] == s2[i]) i++;
187+ for (int j = i + 1 ; j < curr.size (); j++) {
188+ if (curr[j] == s2[i] && curr[j] != s2[j]) {
189+ std::swap (curr[i], curr[j]);
190+ if (visited.count (curr) == 0 ) {
191+ q.push ({curr, swaps + 1 });
192+ visited.insert (curr);
193+ }
194+ std::swap (curr[i], curr[j]);
195+ }
196+ }
197+ }
198+ return -1 ;
199+ }
200+ 201+ void DNA_Analyzer::WindowAlgorithm (const std::filesystem::path& path) {
202+ std::ifstream file (path);
203+ std::string s, p;
204+ file >> s >> p;
205+ std::cout << minWindowSubstring (s, p) << std::endl;
206+ }
207+ 208+ std::string DNA_Analyzer::minWindowSubstring (std::string s, std::string t) {
209+ std::unordered_map<char , int > mp;
210+ for (char c : t) mp[c]++;
211+ int cnt = mp.size (), left = 0 , right = 0 , ansL = -1 , ansR = -1 ,
212+ minLen = INT_MAX;
213+ while (right < s.size ()) {
214+ if (mp.count (s[right])) {
215+ mp[s[right]]--;
216+ if (mp[s[right]] == 0 ) cnt--;
217+ }
218+ right++;
219+ 220+ while (cnt == 0 ) {
221+ if (right - left < minLen) {
222+ minLen = right - left;
223+ ansL = left;
224+ ansR = right;
225+ }
226+ if (mp.count (s[left])) {
227+ mp[s[left]]++;
228+ if (mp[s[left]] > 0 ) cnt++;
229+ }
230+ left++;
231+ }
232+ }
233+ if (ansL == -1 )
234+ return " " ;
235+ else
236+ return s.substr (ansL, ansR - ansL);
237+ }
0 commit comments