Skip to main content
Code Review

Return to Question

Post Reopened by 200_success
Embedded code
Source Link

You can find the code here

public class DNASequence {
 protected final String seq_1, seq_2; // The two sequences to be analyzed
 public int alignmentScore; // Using Needleman-Wunsch algorithm
 protected Node[][] matrix; // Store scores and indels
 protected final int matchScore, mismatchScore, indel;
 // Strings to be used to print the DNA sequnce analysis
 String top = ""; // Sequence 1
 String buffer = ""; // Matches between Sequnce 1 & 2
 String bottom = ""; // Sequence 2
 public DNASequence(String s1, String s2, ScoreScheme s) {
 // I use a しかく as a buffer so that the Sequence string aligns properly
 // with the indels and scores within the matrix.
 seq_1 = "\u25A0" + s1;
 seq_2 = "\u25A0" + s2;
 // Setup the scoring schema. For this program, I am implementing a simple
 // gap cost instead of complicating it further with gap extention and
 // opening costs.
 matchScore = s.matchScore;
 mismatchScore = s.mismatchScore;
 indel = s.indel;
 // I instiate the matrix and progressively build the indels on the first
 // row and the first column.
 matrix = new Node[seq_1.length()][seq_2.length()];
 for (int i = 0; i < seq_1.length(); i++)
 matrix[i][0] = new Node(i * indel, i, 0);
 for (int i = 0; i < seq_2.length(); i++)
 matrix[0][i] = new Node(i * indel, 0, i);
 }
 // Helper method that helps decide what kind of match/mismatch score to use.
 protected int score(int i, int j) {
 if (seq_1.charAt(i) == seq_2.charAt(j))
 return matchScore;
 else
 return mismatchScore;
 }
 // Helper method that implements the Needleman-Wunsch algo on a local level.
 protected Node match(int i, int j) {
 Node s1,s2,s3;
 s1 = new Node(matrix[i-1][j-1].score + score(i, j), i, j);
 s2 = new Node(matrix[i-1][j].score + indel, i, j);
 s3 = new Node(matrix[i][j-1].score + indel, i, j);
 // Since the aim of the Needleman-Wunsch algo is to find the shortest path
 // with the lowest cost and highest score, I check whichever of the viable
 // nodes nets me the highest score (up till that point). Once found, I set
 // a pointer back to the previous node (to make it easier to traceback)
 // and return the current node (to be placed in the matrix).
 Node largest = new Node(Math.max(s1.score, Math.max(s2.score, s3.score)), i, j);
 if (s1.compareTo(largest) == 0)
 largest.prev = matrix[i-1][j-1];
 else if(s2.compareTo(largest) == 0)
 largest.prev = matrix[i-1][j];
 else
 largest.prev = matrix[i][j-1];
 return largest;
 }
 // Runs the Needleman-Wunsch algo on every node in the matrix, sets the aligment
 // score and returns the last node in the matrix -- the one that holds the score.
 public Node runAnalysis() {
 for (int i = 1; i < seq_1.length(); i++) {
 for (int j = 1; j < seq_2.length(); j++){
 matrix[i][j] = match(i, j);
 }
 }
 alignmentScore = matrix[seq_1.length()-1][seq_2.length()-1].score;
 return matrix[seq_1.length()-1][seq_2.length()-1];
 }
 // Helper method that progressively builds the analysis result. It returns the 
 // 'tail' because we may still need to do some work on it.
 protected Node traceHelper(Node curr) {
 while (curr.prev != null) {
 // Print out the 'path'
 // System.out.print(curr.score + "[" + curr.i + ", " + curr.j + "] -> ");
 if (curr.i - curr.prev.i == 1 && curr.j - curr.prev.j == 1){ // If the path leads diagonal
 boolean x = seq_1.charAt(curr.i) == seq_2.charAt(curr.j) ? true : false;
 // System.out.println("Going diag: " + x);
 if(x){
 top = seq_1.charAt(curr.i) + " " + top;
 buffer = "|" + " " + buffer;
 bottom = seq_2.charAt(curr.j) + " " + bottom;
 }else{
 top = seq_1.charAt(curr.i) + " " + top;
 buffer = " " + " " + buffer;
 bottom = seq_2.charAt(curr.j) + " " + bottom;
 }
 }else if (curr.i - curr.prev.i == 1){ // If the path leads up
 // System.out.println("Going up: " + indel);
 top = seq_1.charAt(curr.i) + " " + top;
 buffer = " " + " " + buffer;
 bottom = "-" + " " + bottom; // If the path leads left
 }else if (curr.j - curr.prev.j == 1){
 // System.out.println("Going left: " + indel);
 top = "-" + " " + top;
 buffer = " " + " " + buffer;
 bottom = seq_2.charAt(curr.j) + " " + bottom;
 }
 curr = curr.prev;
 }
 return curr;
 }
 // Traceback from the last node in the matrix back to the first indel node.
 public void traceback() {
 Node curr = matrix[seq_1.length()-1][seq_2.length()-1];
 curr = traceHelper(curr);
 // Sometimes the tail or the traceback path ends on an indel. As a result,
 // we end up with an incomplete path because the algorithm doesn't see past
 // it. To counter that, I check if I am on an indel and if so, I help it
 // move along the path back to 0,0.
 while (curr.i != 0 || curr.j != 0) {
 if (curr.i != 0 && curr.j == 0){
 curr.prev = matrix[curr.i-1][curr.j];
 curr = traceHelper(curr);
 }else if (curr.i == 0 && curr.j != 0) {
 curr.prev = matrix[curr.i][curr.j-1];
 curr = traceHelper(curr);
 }
 }
 // Print out the DNA sequence analysis
 System.out.println(top);
 System.out.println(buffer);
 System.out.println(bottom);
 }
 // Print the matrix.
 public void printMatrix() {
 System.out.printf("%4s", "\u25A0");
 for (int i = 0; i < matrix[0].length; i++) {
 System.out.printf("%4s", seq_2.charAt(i));
 }
 System.out.println();
 for (int i = 0; i < matrix.length; i++) {
 System.out.printf("%4s", seq_1.charAt(i));
 for (int j = 0; j < matrix[i].length; j++) {
 System.out.printf("%4d", matrix[i][j].score);
 }
 System.out.println();
 }
 }
 // Creates a random DNA sequence -- for testing purposes.
 public static String randseq(int n) {
 char[] S = new char[n];
 String DNA = "ACGT";
 for (int i = 0; i < n; i++) {
 int r = (int)(Math.random() * 4);
 S[i] = DNA.charAt(r);
 }
 return new String(S);
 }
 public static void main(String[] args) {
 // Test with randomly generated DNA sequences
 String seq_1 = randseq(34);
 String seq_2 = randseq(32);
 // Or test with the following
 // String seq_1 = "CATTAATTACACTCTCGCACTCACCACCAAACATCCTAAACCCAGACAGGCCTCGACTCC";
 // String seq_2 = "ACTAAACAAGACTCGCCTGTCTAACTAGGGAGTTTATAATGAACCGTGGCGTAGACCA";
 ScoreScheme s = new ScoreScheme(2, -1, -2); // MatchScore: 2; MismatchScore: -2; Indel: -2
 DNASequence dna = new DNASequence(seq_1, seq_2, s); // Create a new DNA Sequence using two strands
 dna.runAnalysis();
 dna.traceback();
 System.out.println("The alignment score is " + dna.alignmentScore);
 // dna.printMatrix();
 }
}
class Node implements Comparable<Node>{
 int i, j;
 int score;
 Node prev;
 public Node(int score, int x, int y) {
 this.i = x;
 this.j = y;
 this.score = score;
 this.prev = null;
 }
 public int compareTo(Node n) {
 return this.score - n.score;
 }
 public String toString() {
 return ""+score;
 }
}
class ScoreScheme {
 int matchScore, mismatchScore, indel;
 public ScoreScheme(int m1, int m2, int i) {
 matchScore = m1;
 mismatchScore = m2;
 indel = i;
 }
}

You can find the code here

public class DNASequence {
 protected final String seq_1, seq_2; // The two sequences to be analyzed
 public int alignmentScore; // Using Needleman-Wunsch algorithm
 protected Node[][] matrix; // Store scores and indels
 protected final int matchScore, mismatchScore, indel;
 // Strings to be used to print the DNA sequnce analysis
 String top = ""; // Sequence 1
 String buffer = ""; // Matches between Sequnce 1 & 2
 String bottom = ""; // Sequence 2
 public DNASequence(String s1, String s2, ScoreScheme s) {
 // I use a しかく as a buffer so that the Sequence string aligns properly
 // with the indels and scores within the matrix.
 seq_1 = "\u25A0" + s1;
 seq_2 = "\u25A0" + s2;
 // Setup the scoring schema. For this program, I am implementing a simple
 // gap cost instead of complicating it further with gap extention and
 // opening costs.
 matchScore = s.matchScore;
 mismatchScore = s.mismatchScore;
 indel = s.indel;
 // I instiate the matrix and progressively build the indels on the first
 // row and the first column.
 matrix = new Node[seq_1.length()][seq_2.length()];
 for (int i = 0; i < seq_1.length(); i++)
 matrix[i][0] = new Node(i * indel, i, 0);
 for (int i = 0; i < seq_2.length(); i++)
 matrix[0][i] = new Node(i * indel, 0, i);
 }
 // Helper method that helps decide what kind of match/mismatch score to use.
 protected int score(int i, int j) {
 if (seq_1.charAt(i) == seq_2.charAt(j))
 return matchScore;
 else
 return mismatchScore;
 }
 // Helper method that implements the Needleman-Wunsch algo on a local level.
 protected Node match(int i, int j) {
 Node s1,s2,s3;
 s1 = new Node(matrix[i-1][j-1].score + score(i, j), i, j);
 s2 = new Node(matrix[i-1][j].score + indel, i, j);
 s3 = new Node(matrix[i][j-1].score + indel, i, j);
 // Since the aim of the Needleman-Wunsch algo is to find the shortest path
 // with the lowest cost and highest score, I check whichever of the viable
 // nodes nets me the highest score (up till that point). Once found, I set
 // a pointer back to the previous node (to make it easier to traceback)
 // and return the current node (to be placed in the matrix).
 Node largest = new Node(Math.max(s1.score, Math.max(s2.score, s3.score)), i, j);
 if (s1.compareTo(largest) == 0)
 largest.prev = matrix[i-1][j-1];
 else if(s2.compareTo(largest) == 0)
 largest.prev = matrix[i-1][j];
 else
 largest.prev = matrix[i][j-1];
 return largest;
 }
 // Runs the Needleman-Wunsch algo on every node in the matrix, sets the aligment
 // score and returns the last node in the matrix -- the one that holds the score.
 public Node runAnalysis() {
 for (int i = 1; i < seq_1.length(); i++) {
 for (int j = 1; j < seq_2.length(); j++){
 matrix[i][j] = match(i, j);
 }
 }
 alignmentScore = matrix[seq_1.length()-1][seq_2.length()-1].score;
 return matrix[seq_1.length()-1][seq_2.length()-1];
 }
 // Helper method that progressively builds the analysis result. It returns the 
 // 'tail' because we may still need to do some work on it.
 protected Node traceHelper(Node curr) {
 while (curr.prev != null) {
 // Print out the 'path'
 // System.out.print(curr.score + "[" + curr.i + ", " + curr.j + "] -> ");
 if (curr.i - curr.prev.i == 1 && curr.j - curr.prev.j == 1){ // If the path leads diagonal
 boolean x = seq_1.charAt(curr.i) == seq_2.charAt(curr.j) ? true : false;
 // System.out.println("Going diag: " + x);
 if(x){
 top = seq_1.charAt(curr.i) + " " + top;
 buffer = "|" + " " + buffer;
 bottom = seq_2.charAt(curr.j) + " " + bottom;
 }else{
 top = seq_1.charAt(curr.i) + " " + top;
 buffer = " " + " " + buffer;
 bottom = seq_2.charAt(curr.j) + " " + bottom;
 }
 }else if (curr.i - curr.prev.i == 1){ // If the path leads up
 // System.out.println("Going up: " + indel);
 top = seq_1.charAt(curr.i) + " " + top;
 buffer = " " + " " + buffer;
 bottom = "-" + " " + bottom; // If the path leads left
 }else if (curr.j - curr.prev.j == 1){
 // System.out.println("Going left: " + indel);
 top = "-" + " " + top;
 buffer = " " + " " + buffer;
 bottom = seq_2.charAt(curr.j) + " " + bottom;
 }
 curr = curr.prev;
 }
 return curr;
 }
 // Traceback from the last node in the matrix back to the first indel node.
 public void traceback() {
 Node curr = matrix[seq_1.length()-1][seq_2.length()-1];
 curr = traceHelper(curr);
 // Sometimes the tail or the traceback path ends on an indel. As a result,
 // we end up with an incomplete path because the algorithm doesn't see past
 // it. To counter that, I check if I am on an indel and if so, I help it
 // move along the path back to 0,0.
 while (curr.i != 0 || curr.j != 0) {
 if (curr.i != 0 && curr.j == 0){
 curr.prev = matrix[curr.i-1][curr.j];
 curr = traceHelper(curr);
 }else if (curr.i == 0 && curr.j != 0) {
 curr.prev = matrix[curr.i][curr.j-1];
 curr = traceHelper(curr);
 }
 }
 // Print out the DNA sequence analysis
 System.out.println(top);
 System.out.println(buffer);
 System.out.println(bottom);
 }
 // Print the matrix.
 public void printMatrix() {
 System.out.printf("%4s", "\u25A0");
 for (int i = 0; i < matrix[0].length; i++) {
 System.out.printf("%4s", seq_2.charAt(i));
 }
 System.out.println();
 for (int i = 0; i < matrix.length; i++) {
 System.out.printf("%4s", seq_1.charAt(i));
 for (int j = 0; j < matrix[i].length; j++) {
 System.out.printf("%4d", matrix[i][j].score);
 }
 System.out.println();
 }
 }
 // Creates a random DNA sequence -- for testing purposes.
 public static String randseq(int n) {
 char[] S = new char[n];
 String DNA = "ACGT";
 for (int i = 0; i < n; i++) {
 int r = (int)(Math.random() * 4);
 S[i] = DNA.charAt(r);
 }
 return new String(S);
 }
 public static void main(String[] args) {
 // Test with randomly generated DNA sequences
 String seq_1 = randseq(34);
 String seq_2 = randseq(32);
 // Or test with the following
 // String seq_1 = "CATTAATTACACTCTCGCACTCACCACCAAACATCCTAAACCCAGACAGGCCTCGACTCC";
 // String seq_2 = "ACTAAACAAGACTCGCCTGTCTAACTAGGGAGTTTATAATGAACCGTGGCGTAGACCA";
 ScoreScheme s = new ScoreScheme(2, -1, -2); // MatchScore: 2; MismatchScore: -2; Indel: -2
 DNASequence dna = new DNASequence(seq_1, seq_2, s); // Create a new DNA Sequence using two strands
 dna.runAnalysis();
 dna.traceback();
 System.out.println("The alignment score is " + dna.alignmentScore);
 // dna.printMatrix();
 }
}
class Node implements Comparable<Node>{
 int i, j;
 int score;
 Node prev;
 public Node(int score, int x, int y) {
 this.i = x;
 this.j = y;
 this.score = score;
 this.prev = null;
 }
 public int compareTo(Node n) {
 return this.score - n.score;
 }
 public String toString() {
 return ""+score;
 }
}
class ScoreScheme {
 int matchScore, mismatchScore, indel;
 public ScoreScheme(int m1, int m2, int i) {
 matchScore = m1;
 mismatchScore = m2;
 indel = i;
 }
}
Post Closed as "Not suitable for this site" by 200_success
Source Link

Optimizing Needleman-Wunsch algorithm in Java

For a class assignment, I was required to implement the Needleman-Wunsch algorithm in Java. I have already completed the assignment but I was wondering if there were any algorithmic (or even syntactic) improvements that could be made over my existing code to make it more efficient.

You can find the code here

lang-java

AltStyle によって変換されたページ (->オリジナル) /