Minimum Spanning Tree in Rust

Question 1

As a project I have worked on implementing and benchmarking two different minimum spanning tree algorithms in rust. My main concern is adhering to good style and not programming any glaring inefficiencies.

lib.rs:

#![allow(dead_code)]
#![feature(test)]
use std::cmp::{Ord, Ordering, PartialOrd};
use std::collections::{HashMap, HashSet};
use std::hash::Hash;
use std::rc::Rc;
use std::fs::File;
use std::io::{self, BufRead};
use priority_queue::PriorityQueue;
mod priority_queue;
#[derive(Eq, PartialEq, Hash, Debug)]
pub struct Edge<T>(Rc<T>, Rc<T>, usize);
/// A simple graph data structure
#[derive(PartialEq, Debug)]
pub struct EdgeListGraph<T: Eq + Hash> {
 vertices: HashSet<Rc<T>>,
 edges: HashSet<Edge<T>>,
}
#[derive(PartialEq, Debug)]
pub struct AdjacencyListGraph<T: Eq + Hash> {
 vertices: HashMap<Rc<T>, HashSet<Edge<T>>>,
}
/// A union find data structure where each item is a map entry with a refernce to its "parent"
pub struct UnionFind<T: Eq + Hash> {
 items: HashMap<Rc<T>, Rc<T>>,
}
trait Graph<T>: Default
where
 T: Eq + Hash,
{
 fn vertices<'a>(&'a self) -> Vec<&'a Rc<T>>;
 fn edges<'a>(&'a self) -> Vec<&'a Edge<T>>;
 fn add_vertex(&mut self, vertex: &Rc<T>);
 fn add_edge(&mut self, start: &Rc<T>, end: &Rc<T>, weight: usize);
 fn has_vertex(&self, vertex: &Rc<T>) -> bool;
 fn edges_from<'a>(&'a self, edge: &Rc<T>) -> Vec<&'a Edge<T>>;
 fn add_edge_safe(&mut self, start: &Rc<T>, end: &Rc<T>, weight: usize) {
 if !self.has_vertex(start) {
 self.add_vertex(start);
 }
 if !self.has_vertex(end) {
 self.add_vertex(end);
 }
 self.add_edge(start, end, weight);
 }
 /// Returns a minimuim spanning tree of a graph use Kruskal Algorithm.
 /// The graph is not modified or consumed by the operation, rather, all references are copied
 /// which is much cheaper than creating a deep copy.
 fn mst_kruskal(&self) -> Self {
 // The minimun spanning tree to return
 let mut mst = Self::default();
 let mut union_finder: UnionFind<T> = UnionFind::new();
 for vertex in self.vertices().into_iter() {
 union_finder.make_set(vertex);
 }
 let mut sorted_edges: Vec<&Edge<T>> = self.edges().into_iter().collect();
 // Sort the edges by weight in ascending order
 sorted_edges.sort_by(|Edge(_, _, a), Edge(_, _, b)| a.cmp(&b));
 for Edge(a, b, weight) in sorted_edges {
 // if a and b are not in the same set (ie. connected) the current edge is a light edge
 if union_finder.find_set(a) != union_finder.find_set(b) {
 mst.add_vertex(a);
 mst.add_vertex(b);
 mst.add_edge(a, b, *weight);
 // If something goes wrong here, just throw an error.
 // It might be better to return a Result<Self, Err>
 union_finder.union(a, b).unwrap();
 }
 }
 // Return the mst
 mst
 }
 fn mst_prim(&self) -> Self {
 if self.vertices().is_empty() {
 return Self::default();
 }
 let mut mst = Self::default();
 let mut pq: PriorityQueue<&Edge<T>> = PriorityQueue::new();
 let mut nodes: Vec<&Rc<T>> = self.vertices();
 let start = nodes.pop().unwrap();
 mst.add_vertex(start);
 self.edges_from(start)
 .into_iter()
 .for_each(|edge| pq.insert(edge));
 while nodes.len() > 0 {
 if let Some(Edge(start, end, weight)) = pq.pop() {
 if mst.has_vertex(start) && !mst.has_vertex(end) {
 mst.add_vertex(end);
 mst.add_edge(start, end, *weight);
 self.edges_from(end)
 .into_iter()
 .for_each(|edge| pq.insert(edge));
 } else if mst.has_vertex(end) && !mst.has_vertex(start) {
 mst.add_vertex(start);
 mst.add_edge(start, end, *weight);
 self.edges_from(start)
 .into_iter()
 .for_each(|edge| pq.insert(edge));
 }
 } else {
 return mst;
 }
 }
 mst
 }
 /// Calculates the number of connnected components in a graph by constructing a [UnionFind] and
 /// calling the [sets](UnionFind::sets) method on the [UnionFind] object.
 fn connected_components(&self) -> usize {
 let mut union_finder = UnionFind::new();
 // add each vertex to the union find object
 for vertex in self.vertices().into_iter() {
 union_finder.make_set(vertex);
 }
 // connect connected edges in the union find object
 for Edge(a, b, _) in self.edges().into_iter() {
 union_finder.union(a, b).unwrap();
 }
 union_finder.sets()
 }
}
fn load_from<'a, T: Graph<String> + Default>(path: &str) -> T {
 let mut graph = T::default();
 let file = File::open(path).expect("Unable to open graph file");
 for line in io::BufReader::new(file)
 .lines()
 .map(|line| line.expect("Unable to read line"))
 {
 let words: Vec<_> = line.split(" ").collect();
 if words[0] == "a" {
 graph.add_edge_safe(
 &Rc::new(words[1].into()),
 &Rc::new(words[2].into()),
 words[3]
 .parse()
 .expect("Cannot convert edge weight to integer"),
 );
 }
 }
 graph
}
impl<T: Eq + Hash> EdgeListGraph<T> {
 pub fn new() -> Self {
 Self::default()
 }
}
impl<T: Eq + Hash> Default for EdgeListGraph<T> {
 fn default() -> Self {
 Self {
 vertices: HashSet::new(),
 edges: HashSet::new(),
 }
 }
}
impl<T: Eq + Hash> Graph<T> for EdgeListGraph<T> {
 fn vertices<'a>(&'a self) -> Vec<&'a Rc<T>> {
 self.vertices.iter().collect()
 }
 fn edges<'a>(&'a self) -> Vec<&'a Edge<T>> {
 self.edges.iter().collect()
 }
 fn add_vertex(&mut self, t: &Rc<T>) {
 self.vertices.insert(Rc::clone(t));
 }
 fn add_edge(&mut self, a: &Rc<T>, b: &Rc<T>, weight: usize) {
 self.edges.insert(Edge(Rc::clone(a), Rc::clone(b), weight));
 }
 fn has_vertex(&self, vertex: &Rc<T>) -> bool {
 self.vertices.contains(vertex)
 }
 fn edges_from<'a>(&'a self, vertex: &Rc<T>) -> Vec<&'a Edge<T>> {
 self.edges
 .iter()
 .filter(|&Edge(a, b, _)| a == vertex || b == vertex)
 .collect()
 }
}
impl<T: Eq + Hash> AdjacencyListGraph<T> {
 pub fn new() -> Self {
 Self::default()
 }
}
impl<T: Eq + Hash> Default for AdjacencyListGraph<T> {
 fn default() -> Self {
 Self {
 vertices: HashMap::new(),
 }
 }
}
impl<T: Eq + Hash> Graph<T> for AdjacencyListGraph<T> {
 fn vertices<'a>(&'a self) -> Vec<&'a Rc<T>> {
 self.vertices.keys().collect()
 }
 fn edges<'a>(&'a self) -> Vec<&'a Edge<T>> {
 self.vertices
 .values()
 .flat_map(|edge_list| edge_list.iter())
 .collect()
 }
 fn add_vertex(&mut self, vertex: &Rc<T>) {
 self.vertices
 .entry(Rc::clone(vertex))
 .or_insert_with(|| HashSet::new());
 }
 fn add_edge(&mut self, start: &Rc<T>, end: &Rc<T>, weight: usize) {
 self.vertices
 .get_mut(start)
 .expect("Cannot add edge to vertex that does not exist")
 .insert(Edge(Rc::clone(start), Rc::clone(end), weight));
 self.vertices
 .get_mut(end)
 .expect("Cannot add edge to vertex that does not exist")
 .insert(Edge(Rc::clone(end), Rc::clone(start), weight));
 }
 fn has_vertex(&self, vertex: &Rc<T>) -> bool {
 self.vertices.contains_key(vertex)
 }
 fn edges_from(&self, vertex: &Rc<T>) -> Vec<&Edge<T>> {
 self.vertices
 .get(vertex)
 .expect("Cannot list edges from vertex that does not exist")
 .iter()
 .collect()
 }
}
impl<T: PartialEq> PartialOrd for Edge<T> {
 fn partial_cmp(&self, &Edge(_, _, other): &Self) -> Option<Ordering> {
 Some(self.2.cmp(&other))
 }
}
impl<T: Eq> Ord for Edge<T> {
 fn cmp(&self, &Edge(_, _, other): &Self) -> Ordering {
 self.2.cmp(&other)
 }
}
impl<T> UnionFind<T>
where
 T: Eq + Hash,
{
 pub fn new() -> Self {
 Self {
 items: HashMap::new(),
 }
 }
 pub fn make_set(&mut self, t: &Rc<T>) {
 self.items.insert(Rc::clone(&t), Rc::clone(&t));
 }
 /// Mutates the underlying [HashMap] so that `a` and `b` have the same parent.
 /// There is not guarantee as to which parents `a` and `b` will end up with, only that they are
 /// the same.
 pub fn union(&mut self, a: &Rc<T>, b: &Rc<T>) -> Result<(), &str> {
 if let (Ok(a), Ok(b)) = (self.find_set(a), self.find_set(b)) {
 self.items.insert(a, b);
 Ok(())
 } else {
 Err("Cannot perform union on objects without set")
 }
 }
 /// Returns a [Rc] pointer to the "parent" of a given item.
 /// Returns `Err` if `t` is not the the set
 pub fn find_set(&self, t: &Rc<T>) -> Result<Rc<T>, &str> {
 match self.items.get(t) {
 Some(p) if p == t => Ok(Rc::clone(p)),
 Some(p) => self.find_set(p),
 None => Err("Cannot find set of object without set"),
 }
 }
 /// Returns the number of unique sets in the struct
 pub fn sets(&self) -> usize {
 // count the number of items who are their own parent
 self.items
 .iter()
 .filter(|(item, parent)| item == parent)
 .count()
 }
}
#[cfg(test)]
mod tests {
 extern crate test;
 use super::*;
 use test::Bencher;
 #[bench]
 fn bench_kruskal_edgelist_rome(b: &mut Bencher) {
 let rome = load_from::<EdgeListGraph<String>>("graphs/Rome.gr");
 b.iter(|| rome.mst_kruskal());
 }
 #[bench]
 fn bench_prim_edgelist_rome(b: &mut Bencher) {
 let rome = load_from::<EdgeListGraph<String>>("graphs/Rome.gr");
 b.iter(|| rome.mst_prim());
 }
 #[bench]
 fn bench_kruskal_adjacencylist_rome(b: &mut Bencher) {
 let rome = load_from::<AdjacencyListGraph<String>>("graphs/Rome.gr");
 b.iter(|| rome.mst_kruskal());
 }
 #[bench]
 fn bench_prim_adjacencylist_rome(b: &mut Bencher) {
 let rome = load_from::<AdjacencyListGraph<String>>("graphs/Rome.gr");
 b.iter(|| rome.mst_prim());
 }
 #[bench]
 fn bench_kruskal_edgelist_ny(b: &mut Bencher) {
 let ny = load_from::<EdgeListGraph<String>>("graphs/NY.gr");
 b.iter(|| ny.mst_kruskal());
 }
 #[bench]
 fn bench_prim_edgelist_ny(b: &mut Bencher) {
 let ny = load_from::<EdgeListGraph<String>>("graphs/NY.gr");
 b.iter(|| ny.mst_prim());
 }
 #[bench]
 fn bench_kruskal_adjacencylist_ny(b: &mut Bencher) {
 let ny = load_from::<AdjacencyListGraph<String>>("graphs/NY.gr");
 b.iter(|| ny.mst_kruskal());
 }
 #[bench]
 fn bench_prim_adjacencylist_ny(b: &mut Bencher) {
 let ny = load_from::<AdjacencyListGraph<String>>("graphs/NY.gr");
 b.iter(|| ny.mst_prim());
 }
 #[bench]
 fn bench_kruskal_edgelist_sf(b: &mut Bencher) {
 let sf = load_from::<EdgeListGraph<String>>("graphs/SF.gr");
 b.iter(|| sf.mst_kruskal());
 }
 #[bench]
 fn bench_prim_edgelist_sf(b: &mut Bencher) {
 let sf = load_from::<EdgeListGraph<String>>("graphs/SF.gr");
 b.iter(|| sf.mst_prim());
 }
 #[bench]
 fn bench_kruskal_adjacencylist_sf(b: &mut Bencher) {
 let sf = load_from::<AdjacencyListGraph<String>>("graphs/SF.gr");
 b.iter(|| sf.mst_kruskal());
 }
 #[bench]
 fn bench_prim_adjacencylist_sf(b: &mut Bencher) {
 let sf = load_from::<AdjacencyListGraph<String>>("graphs/SF.gr");
 b.iter(|| sf.mst_prim());
 }
 #[test]
 fn test_adjacencylist() {
 let mut graph = AdjacencyListGraph::new();
 let p1 = Rc::new((1, 1));
 let p2 = Rc::new((4, 6));
 let p3 = Rc::new((-3, 8));
 assert!(!graph.has_vertex(&p1));
 graph.add_vertex(&p1);
 assert!(graph.has_vertex(&p1));
 graph.add_vertex(&p2);
 graph.add_vertex(&p3);
 assert!(graph.has_vertex(&p2));
 assert!(graph.has_vertex(&p3));
 }
 #[test]
 fn test_connected_components() {
 let mut graph = EdgeListGraph::new();
 let p1 = Rc::new((1, 1));
 let p2 = Rc::new((4, 6));
 let p3 = Rc::new((-3, 8));
 let p4 = Rc::new((-4, 4));
 let p5 = Rc::new((-9, -20));
 let p6 = Rc::new((-2, -19));
 let p7 = Rc::new((-3, -3));
 let p8 = Rc::new((17, -13));
 graph.add_vertex(&p1);
 graph.add_vertex(&p2);
 graph.add_vertex(&p3);
 graph.add_vertex(&p4);
 graph.add_vertex(&p5);
 graph.add_vertex(&p6);
 graph.add_vertex(&p7);
 graph.add_vertex(&p8);
 graph.add_edge(&p1, &p2, 0);
 graph.add_edge(&p3, &p4, 0);
 graph.add_edge(&p3, &p5, 0);
 graph.add_edge(&p6, &p7, 0);
 assert_eq!(graph.connected_components(), 4);
 }
 #[test]
 fn test_edgelist_kruskal() {
 test_kruskal::<EdgeListGraph<&str>>();
 }
 #[test]
 fn test_edgelist_prim() {
 test_prim::<EdgeListGraph<&str>>();
 }
 #[test]
 fn test_adjacencylist_kruskal() {
 test_kruskal::<AdjacencyListGraph<&str>>();
 }
 #[test]
 fn test_adjacencylist_prim() {
 test_prim::<AdjacencyListGraph<&str>>();
 }
 #[test]
 fn test_union_find() {
 let mut union_find = UnionFind::<usize>::new();
 let one = Rc::new(1);
 let two = Rc::new(2);
 let five = Rc::new(5);
 let seven_twenty = Rc::new(720);
 assert!(union_find.find_set(&one).is_err());
 assert!(union_find.find_set(&five).is_err());
 union_find.make_set(&one);
 union_find.make_set(&five);
 union_find.make_set(&two);
 union_find.make_set(&seven_twenty);
 assert_eq!(union_find.find_set(&two), Ok(Rc::clone(&two)));
 assert_eq!(union_find.find_set(&five), Ok(Rc::clone(&five)));
 assert!(union_find.union(&one, &five).is_ok());
 assert!(union_find.union(&two, &seven_twenty).is_ok());
 assert_eq!(union_find.find_set(&one), union_find.find_set(&five));
 assert_ne!(
 union_find.find_set(&seven_twenty),
 union_find.find_set(&one)
 );
 assert!(union_find.union(&five, &two).is_ok());
 assert_eq!(
 union_find.find_set(&one),
 union_find.find_set(&seven_twenty)
 );
 }
 fn test_kruskal<'a, T: Graph<&'a str> + Default + PartialEq + std::fmt::Debug>() {
 let mut graph = T::default();
 let cow = Rc::new("cow");
 let how = Rc::new("how");
 let hat = Rc::new("hat");
 let mow = Rc::new("mow");
 let cowering = Rc::new("cowering");
 let hatred = Rc::new("hatred");
 graph.add_vertex(&cow);
 graph.add_vertex(&how);
 graph.add_vertex(&hat);
 graph.add_vertex(&mow);
 graph.add_vertex(&cowering);
 graph.add_vertex(&hatred);
 graph.add_edge(&cow, &how, 1);
 graph.add_edge(&how, &hat, 2);
 graph.add_edge(&hat, &mow, 3);
 graph.add_edge(&how, &cowering, 6);
 graph.add_edge(&hat, &cowering, 7);
 graph.add_edge(&cow, &hatred, 6);
 let mut mst = T::default();
 mst.add_vertex(&cow);
 mst.add_vertex(&how);
 mst.add_vertex(&hat);
 mst.add_vertex(&mow);
 mst.add_vertex(&cowering);
 mst.add_vertex(&hatred);
 mst.add_edge(&cow, &how, 1);
 mst.add_edge(&how, &hat, 2);
 mst.add_edge(&hat, &mow, 3);
 mst.add_edge(&how, &cowering, 6);
 mst.add_edge(&cow, &hatred, 6);
 assert_eq!(graph.mst_kruskal(), mst);
 }
 fn test_prim<'a, T: Graph<&'a str> + Default + PartialEq + std::fmt::Debug>() {
 let mut graph = T::default();
 let cow = Rc::new("cow");
 let how = Rc::new("how");
 let hat = Rc::new("hat");
 let mow = Rc::new("mow");
 let cowering = Rc::new("cowering");
 let hatred = Rc::new("hatred");
 graph.add_vertex(&cow);
 graph.add_vertex(&how);
 graph.add_vertex(&hat);
 graph.add_vertex(&mow);
 graph.add_vertex(&cowering);
 graph.add_vertex(&hatred);
 graph.add_edge(&cow, &how, 1);
 graph.add_edge(&how, &hat, 2);
 graph.add_edge(&hat, &mow, 3);
 graph.add_edge(&how, &cowering, 6);
 graph.add_edge(&hat, &cowering, 7);
 graph.add_edge(&cow, &hatred, 6);
 let mut mst = T::default();
 mst.add_vertex(&cow);
 mst.add_vertex(&how);
 mst.add_vertex(&hat);
 mst.add_vertex(&mow);
 mst.add_vertex(&cowering);
 mst.add_vertex(&hatred);
 mst.add_edge(&cow, &how, 1);
 mst.add_edge(&how, &hat, 2);
 mst.add_edge(&hat, &mow, 3);
 mst.add_edge(&how, &cowering, 6);
 mst.add_edge(&cow, &hatred, 6);
 assert_eq!(graph.mst_prim(), mst);
 }
}

I know that rust has a default priority queue implementation, but I made my own for practice:

/// A minimum priority queue
pub struct PriorityQueue<T: Ord> {
 items: Vec<T>,
}
impl<T: Ord> PriorityQueue<T> {
 pub fn new() -> Self {
 Self { items: Vec::new() }
 }
 pub fn insert(&mut self, t: T) {
 self.items.push(t);
 self.shift_up(self.items.len() - 1);
 }
 pub fn pop(&mut self) -> Option<T> {
 if self.items.is_empty() {
 None
 } else {
 // Swap remove does exactly what we want it to do :)
 let item = self.items.swap_remove(0);
 self.shift_down(0);
 Some(item)
 }
 }
 /// Upshifts the `i`th element in the items [Vec] so that the heap property is preserved
 fn shift_up(&mut self, i: usize) {
 if i == 0 {
 return;
 }
 let mut current = i;
 let mut parent = Self::parent(i).unwrap();
 while current > 0 && self.items[current] < self.items[parent] {
 self.items.swap(current, parent);
 current = parent;
 match Self::parent(current) {
 Some(item) => parent = item,
 None => return,
 }
 }
 }
 /// Downshifts the `i`th element in the items [Vec] so that the heap property in preserved
 fn shift_down(&mut self, i: usize) {
 let len = self.items.len();
 let mut current_index = i;
 loop {
 let left_index = Self::left_child(current_index);
 let right_index = Self::right_child(current_index);
 let swap_index = if left_index >= len && right_index >= len {
 return;
 }
 // no children, nothing to do
 else if left_index >= len {
 right_index
 } else if right_index >= len {
 left_index
 } else if self.items[left_index] < self.items[right_index] {
 left_index
 } else {
 right_index
 };
 self.items.swap(current_index, swap_index);
 current_index = swap_index;
 }
 }
 fn parent(i: usize) -> Option<usize> {
 match i {
 0 => None,
 i => Some((i - 1) / 2),
 }
 }
 fn left_child(i: usize) -> usize {
 2 * i + 1
 }
 fn right_child(i: usize) -> usize {
 2 * i + 2
 }
}
#[cfg(test)]
mod tests {
 use super::*;
 #[test]
 pub fn test_priority_queue() {
 let mut pq = PriorityQueue::new();
 pq.insert(4);
 pq.insert(8);
 pq.insert(5);
 pq.insert(9);
 pq.insert(0);
 pq.insert(12);
 pq.insert(10);
 assert_eq!(pq.pop(), Some(0));
 assert_eq!(pq.pop(), Some(4));
 assert_eq!(pq.pop(), Some(5));
 assert_eq!(pq.pop(), Some(8));
 assert_eq!(pq.pop(), Some(9));
 assert_eq!(pq.pop(), Some(10));
 assert_eq!(pq.pop(), Some(12));
 }
}

One of my concerns is with using Rc pointers. Should the edges instead store a reference to an Rc pointer, or is it ok if is use Rc::clone a lot?

Question 2

It's fine to use Rc::clone a lot, and in a program like this it is to be expected. I am pretty sure you could not use references. They are not suitable for representing a graph.

Question 3

Well, maybe you could use references to refer to the already built graph, that might run faster, but it's hard to say.

Question 4

This is not going to be a complete review, rather two minor points I noticed on a 30 second read.

#![allow(dead_code)]

Do you need this? It's not desirable in a finished program ( or at any point, IMO! ).

impl<T: Eq + Hash> EdgeListGraph<T> {
 pub fn new() -> Self {
 Self::default()
 }
}
impl<T: Eq + Hash> Default for EdgeListGraph<T> {
 fn default() -> Self {
 Self {
 vertices: HashSet::new(),
 edges: HashSet::new(),
 }
 }
}

I think you can shorten all this using #[derive(Default)]. You don't need new at all, just default.

George Barwood George Barwood 1,2567 silver badges14 bronze badges · Answer 1 · 2022-01-10 12:14:17Z

This is not going to be a complete review, rather two minor points I noticed on a 30 second read.

#![allow(dead_code)]

Do you need this? It's not desirable in a finished program ( or at any point, IMO! ).

impl<T: Eq + Hash> EdgeListGraph<T> {
 pub fn new() -> Self {
 Self::default()
 }
}
impl<T: Eq + Hash> Default for EdgeListGraph<T> {
 fn default() -> Self {
 Self {
 vertices: HashSet::new(),
 edges: HashSet::new(),
 }
 }
}

I think you can shorten all this using #[derive(Default)]. You don't need new at all, just default.

Stack Exchange Network

Minimum Spanning Tree in Rust

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Minimum Spanning Tree in Rust

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions