|
| 1 | +#include <bits/stdc++.h> |
| 2 | +using namespace std; |
| 3 | + |
| 4 | +// The Sequence Alignment problem is one of the fundamental problems of Biological Sciences, |
| 5 | +// aimed at finding the similarity of two amino-acid sequences. Comparing amino-acids is of prime importance to humans, |
| 6 | +// since it gives vital information on evolution and development. The feasible solution is to introduce gaps into the strings, so as to equalise the lengths. |
| 7 | +// Since it can be easily proved that the addition of extra gaps after equalising the lengths will only lead to increment of penalty. |
| 8 | + |
| 9 | + |
| 10 | +void getMinimumPenalty(string x, string y, int pxy, int pgap) |
| 11 | +{ |
| 12 | + int i, j; |
| 13 | + |
| 14 | + int m = x.length(); |
| 15 | + int n = y.length(); |
| 16 | + |
| 17 | + int dp[m+1][n+1] = {0}; |
| 18 | + |
| 19 | + for (i = 0; i <= (n+m); i++) |
| 20 | + { |
| 21 | + dp[i][0] = i * pgap; |
| 22 | + dp[0][i] = i * pgap; |
| 23 | + } |
| 24 | + |
| 25 | + for (i = 1; i <= m; i++) |
| 26 | + { |
| 27 | + for (j = 1; j <= n; j++) |
| 28 | + { |
| 29 | + if (x[i - 1] == y[j - 1]) |
| 30 | + { |
| 31 | + dp[i][j] = dp[i - 1][j - 1]; |
| 32 | + } |
| 33 | + else |
| 34 | + { |
| 35 | + dp[i][j] = min({dp[i - 1][j - 1] + pxy , |
| 36 | + dp[i - 1][j] + pgap , |
| 37 | + dp[i][j - 1] + pgap }); |
| 38 | + } |
| 39 | + } |
| 40 | + } |
| 41 | + |
| 42 | + int l = n + m; |
| 43 | + |
| 44 | + i = m; j = n; |
| 45 | + |
| 46 | + int xpos = l; |
| 47 | + int ypos = l; |
| 48 | + |
| 49 | + int xans[l+1], yans[l+1]; |
| 50 | + |
| 51 | + while ( !(i == 0 || j == 0)) |
| 52 | + { |
| 53 | + if (x[i - 1] == y[j - 1]) |
| 54 | + { |
| 55 | + xans[xpos--] = (int)x[i - 1]; |
| 56 | + yans[ypos--] = (int)y[j - 1]; |
| 57 | + i--; j--; |
| 58 | + } |
| 59 | + else if (dp[i - 1][j - 1] + pxy == dp[i][j]) |
| 60 | + { |
| 61 | + xans[xpos--] = (int)x[i - 1]; |
| 62 | + yans[ypos--] = (int)y[j - 1]; |
| 63 | + i--; j--; |
| 64 | + } |
| 65 | + else if (dp[i - 1][j] + pgap == dp[i][j]) |
| 66 | + { |
| 67 | + xans[xpos--] = (int)x[i - 1]; |
| 68 | + yans[ypos--] = (int)'_'; |
| 69 | + i--; |
| 70 | + } |
| 71 | + else if (dp[i][j - 1] + pgap == dp[i][j]) |
| 72 | + { |
| 73 | + xans[xpos--] = (int)'_'; |
| 74 | + yans[ypos--] = (int)y[j - 1]; |
| 75 | + j--; |
| 76 | + } |
| 77 | + } |
| 78 | + while (xpos > 0) |
| 79 | + { |
| 80 | + if (i > 0) xans[xpos--] = (int)x[--i]; |
| 81 | + else xans[xpos--] = (int)'_'; |
| 82 | + } |
| 83 | + while (ypos > 0) |
| 84 | + { |
| 85 | + if (j > 0) yans[ypos--] = (int)y[--j]; |
| 86 | + else yans[ypos--] = (int)'_'; |
| 87 | + } |
| 88 | + |
| 89 | + // Since we have assumed the answer to be n+m long, |
| 90 | + // we need to remove the extra gaps in the starting |
| 91 | + // id represents the index from which the arrays |
| 92 | + // xans, yans are useful |
| 93 | + int id = 1; |
| 94 | + for (i = l; i >= 1; i--) |
| 95 | + { |
| 96 | + if ((char)yans[i] == '_' && (char)xans[i] == '_') |
| 97 | + { |
| 98 | + id = i + 1; |
| 99 | + break; |
| 100 | + } |
| 101 | + } |
| 102 | + |
| 103 | + cout << "Minimum Penalty in aligning the genes = "; |
| 104 | + cout << dp[m][n] << "\n"; |
| 105 | + cout << "The aligned genes are :\n"; |
| 106 | + for (i = id; i <= l; i++) |
| 107 | + { |
| 108 | + cout<<(char)xans[i]; |
| 109 | + } |
| 110 | + cout << "\n"; |
| 111 | + for (i = id; i <= l; i++) |
| 112 | + { |
| 113 | + cout << (char)yans[i]; |
| 114 | + } |
| 115 | + return; |
| 116 | +} |
| 117 | + |
| 118 | +int main(){ |
| 119 | + |
| 120 | + string gene1 = "AGGGCT"; |
| 121 | + string gene2 = "AGGCA"; |
| 122 | + |
| 123 | + int misMatchPenalty = 3; |
| 124 | + int gapPenalty = 2; |
| 125 | + |
| 126 | + getMinimumPenalty(gene1, gene2, |
| 127 | + misMatchPenalty, gapPenalty); |
| 128 | + return 0; |
| 129 | +} |
0 commit comments