\$\begingroup\$
\$\endgroup\$
3
The Needleman–Wunsch algorithm is an algorithm used in bioinformatics to align protein or nucleotide sequences. Here is an implementation in Rust:
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Direction {
Match, // go diagonally, left and up
Left,
Up,
Undefined,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct ResultEntry {
pub score: i64,
pub direction: Direction,
}
impl ResultEntry {
fn new(s: i64, d: Direction) -> ResultEntry {
ResultEntry {
score: s,
direction: d,
}
}
}
pub trait NeedlemanWunsch<T> {
fn align(&mut self, vec: &mut Vec<T>, similarity: &Fn(&T, &T) -> i64, gap_penalty: i64) -> (Vec<Option<T>>, Vec<Option<T>>);
}
impl<T> NeedlemanWunsch<T> for Vec<T> where T: Eq + Copy + Clone {
fn align(&mut self, vec: &mut Vec<T>, similarity: &Fn(&T, &T) -> i64, gap_penalty: i64) -> (Vec<Option<T>>, Vec<Option<T>>) {
let mut mat = init_align_matrix(self.len() , vec.len());
fill_align_matrix(&mut mat, self, vec, similarity, gap_penalty);
let mat = &mat; // no need to mutate mat any more.
let mut vec1: Vec<Option<T>> = Vec::new();
let mut vec2: Vec<Option<T>> = Vec::new();
// the dimension of the alignment matrix is one larger than vec1.len by vec2.len
let mut i = self.len();
let mut j = vec.len();
loop {
match mat[i][j].direction {
Direction::Match => {
vec1.insert(0, Some(self[i-1]));
vec2.insert(0, Some(vec[j-1]));
i -= 1;
j -= 1;
},
Direction::Left => {
vec1.insert(0, None);
// the dimension of the alignment matrix is one larger than vec1.len by vec2.len
vec2.insert(0, Some(vec[j-1]));
j -= 1;
},
Direction::Up => {
// the dimension of the alignment matrix is one larger than vec1.len by vec2.len
vec1.insert(0, Some(self[i-1]));
vec2.insert(0, None);
i -= 1;
},
Direction::Undefined => {
break;
}
}
}
return (vec1, vec2);
}
}
#[test]
fn test_align() {
fn similarity(c1: &char, c2: &char) -> i64 {
if c1 == c2 { 1 } else { -1 }
}
let mut vec1: Vec<_> = "what".chars().collect();
let mut vec2: Vec<_> = "white".chars().collect();
let res = vec1.align(&mut vec2, &similarity, -1);
let res_should_be = (
vec![Some('w'), Some('h'), Some('a'), Some('t'), None],
vec![Some('w'), Some('h'), Some('i'), Some('t'), Some('e')],
);
assert_eq!(res, res_should_be);
let mut vec1: Vec<_> = "ab".chars().collect();
let mut vec2: Vec<_> = "aeb".chars().collect();
let res = vec1.align(&mut vec2, &similarity, -1);
let res_should_be = (
vec![Some('a'), None, Some('b')],
vec![Some('a'), Some('e'), Some('b')],
);
assert_eq!(res, res_should_be);
}
type AlignMatrix = Vec<Vec<ResultEntry>>;
/// len1: Length of the first vector, or number of rows for the alignment matrix.
/// len2: Length of the second vector, or number of cols for the alignment matrix.
fn init_align_matrix(len1: usize, len2: usize) -> AlignMatrix {
let len1 = len1 + 1;
let len2 = len2 + 1;
// starting point has undefined direction
let mut mat: AlignMatrix = vec![vec![ResultEntry::new(0, Direction::Undefined); len2]; len1];
// first row all go left
for i in 1..len2 {
mat[0][i] = ResultEntry::new(-(i as i64), Direction::Left);
}
// first col all go up
for i in 1..len1 {
mat[i][0] = ResultEntry::new(-(i as i64), Direction::Up);
}
mat[0][0] = ResultEntry::new(0, Direction::Undefined);
mat
}
#[test]
fn test_align_mat() {
let m = init_align_matrix(2, 3);
let m_should_be = vec![
vec![ResultEntry::new( 0, Direction::Undefined), ResultEntry::new(-1, Direction::Left), ResultEntry::new(-2, Direction::Left), ResultEntry::new(-3, Direction::Left)],
vec![ResultEntry::new(-1, Direction::Up), ResultEntry::new( 0, Direction::Undefined), ResultEntry::new( 0, Direction::Undefined), ResultEntry::new( 0, Direction::Undefined)],
vec![ResultEntry::new(-2, Direction::Up), ResultEntry::new( 0, Direction::Undefined), ResultEntry::new( 0, Direction::Undefined), ResultEntry::new( 0, Direction::Undefined)],
];
assert_eq!(m, m_should_be);
}
#[test]
fn test_fill_mat() {
fn similarity(c1: &char, c2: &char) -> i64 {
if c1 == c2 {
1
} else {
-1
}
}
let mut vec1: Vec<_> = "ab".chars().collect();
let mut vec2: Vec<_> = "aeb".chars().collect();
let mut mat = init_align_matrix(vec1.len(), vec2.len());
fill_align_matrix(&mut mat, &mut vec1, &mut vec2, &similarity, -1);
use self::Direction::Undefined as ud;
use self::Direction::Left as lt;
use self::Direction::Up as up;
use self::Direction::Match as mt;
let res_should_be = vec![
vec![ResultEntry::new(0, ud), ResultEntry::new(-1, lt), ResultEntry::new(-2, lt), ResultEntry::new(-3, lt)],
vec![ResultEntry::new(-1, up), ResultEntry::new(1, mt), ResultEntry::new(0, lt), ResultEntry::new(-1, lt)],
vec![ResultEntry::new(-2, up), ResultEntry::new(0, up), ResultEntry::new(0, mt), ResultEntry::new(1, mt)],
];
assert_eq!(mat, res_should_be);
}
fn fill_align_matrix<T>(mat: &mut AlignMatrix, vec1: &mut Vec<T>, vec2: &mut Vec<T>, similarity: &Fn(&T, &T) -> i64, gap_penalty: i64) {
assert_eq!(mat.len(), vec1.len() + 1);
assert_eq!(mat[0].len(), vec2.len() + 1);
fn get_score_set_direction<U>(mat: &mut AlignMatrix, vec1: &Vec<U>, vec2: &Vec<U>, similarity: &Fn(&U, &U) -> i64, gap_penalty: i64, i: usize, j: usize) -> i64 {
if i == 0 || j == 0 {
mat[i][j].score
} else if mat[i][j].direction != Direction::Undefined {
mat[i][j].score
} else {
let up_left = get_score_set_direction(mat, vec1, vec2, similarity, gap_penalty, i - 1, j - 1);
let up = get_score_set_direction(mat, vec1, vec2, similarity, gap_penalty, i - 1, j);
let left = get_score_set_direction(mat, vec1, vec2, similarity, gap_penalty, i, j - 1);
// the dimension of the alignment matrix is one larger than vec1.len by vec2.len
let similarity_score = similarity(&vec1[i-1], &vec2[j-1]);
let match_score = up_left + similarity_score;
let up_score = up + gap_penalty;
let left_score = left + gap_penalty;
let m = *[match_score, up_score, left_score].iter().max().unwrap();
#[cfg(feature = "verbose")]
{
print_mat(mat);
println!("Node: {:?}", (i, j));
println!("up_left, up, left: {:?}", (up_left, up, left));
println!("Match, up, left: {:?}", (match_score, up_score, left_score));
println!("Max score: {}", m);
}
if m == match_score {
#[cfg(feature = "verbose")]
{
println!("Go Up and Left!");
}
mat[i][j] = ResultEntry::new(m, Direction::Match);
} else if m == up_score {
#[cfg(feature = "verbose")]
{
println!("Go Up!");
}
mat[i][j] = ResultEntry::new(m, Direction::Up);
} else {
#[cfg(feature = "verbose")]
{
println!("Go Left!");
}
mat[i][j] = ResultEntry::new(m, Direction::Left);
}
m
}
}
for i in 1..(vec1.len() + 1) {
for j in 1..(vec2.len() + 1) {
get_score_set_direction(mat, vec1, vec2, similarity, gap_penalty, i, j);
}
}
#[cfg(feature = "verbose")]
{
print_mat(mat);
}
}
fn print_mat(mat: &AlignMatrix) {
use std::fmt::Write;
fn direction_to_arrow(d: Direction) -> char {
match d {
Direction::Match => '↖',
Direction::Up => '↑',
Direction::Left => '←',
Direction::Undefined => '.',
}
}
for row in mat {
let mut s = String::new();
for el in row {
write!(&mut s, "{:>4}", el.score).unwrap();
}
println!("{}", s);
}
for row in mat {
let mut s = String::new();
for el in row {
write!(&mut s, "{:>4}", direction_to_arrow(el.direction)).unwrap();
}
println!("{}", s);
}
}
This algorithm is usually applied to a sequence of chars or bytes, here I have generalized to any type of Eq.
All suggestions are welcome.
200_success
145k22 gold badges190 silver badges478 bronze badges
asked Sep 20, 2016 at 10:48
-
2\$\begingroup\$ This code exhibits some of the same problems as your previous submission; specifically: "Let inference handle the type inside the collection with _" and "Remove commented code". If you are going to ignore previous feedback; what benefit do we have to providing more? \$\endgroup\$Shepmaster– Shepmaster2016年09月20日 12:43:49 +00:00Commented Sep 20, 2016 at 12:43
-
\$\begingroup\$ Sorry, I was busy making this thing compile and pass tests. Explicit type annotation does have the benefit of clarity, but I do prefer to saving quite a few keystrokes. Commented codes are now in conditional compilation blocks, because I like to have the option of printing out things when necessary. \$\endgroup\$qed– qed2016年09月20日 14:43:37 +00:00Commented Sep 20, 2016 at 14:43
-
6\$\begingroup\$ Sorry, I was busy making this thing compile and pass tests — We aren't in a hurry to review your code, forcing you to post it before it's ready ^_^. Make it compile, then take your time to make it good. Once you've finished making it as good as you can, then it's a good time for external review. \$\endgroup\$Shepmaster– Shepmaster2016年09月20日 14:50:34 +00:00Commented Sep 20, 2016 at 14:50
lang-rust