As a project I have worked on implementing and benchmarking two different minimum spanning tree algorithms in rust. My main concern is adhering to good style and not programming any glaring inefficiencies.
lib.rs
:
#![allow(dead_code)]
#![feature(test)]
use std::cmp::{Ord, Ordering, PartialOrd};
use std::collections::{HashMap, HashSet};
use std::hash::Hash;
use std::rc::Rc;
use std::fs::File;
use std::io::{self, BufRead};
use priority_queue::PriorityQueue;
mod priority_queue;
#[derive(Eq, PartialEq, Hash, Debug)]
pub struct Edge<T>(Rc<T>, Rc<T>, usize);
/// A simple graph data structure
#[derive(PartialEq, Debug)]
pub struct EdgeListGraph<T: Eq + Hash> {
vertices: HashSet<Rc<T>>,
edges: HashSet<Edge<T>>,
}
#[derive(PartialEq, Debug)]
pub struct AdjacencyListGraph<T: Eq + Hash> {
vertices: HashMap<Rc<T>, HashSet<Edge<T>>>,
}
/// A union find data structure where each item is a map entry with a refernce to its "parent"
pub struct UnionFind<T: Eq + Hash> {
items: HashMap<Rc<T>, Rc<T>>,
}
trait Graph<T>: Default
where
T: Eq + Hash,
{
fn vertices<'a>(&'a self) -> Vec<&'a Rc<T>>;
fn edges<'a>(&'a self) -> Vec<&'a Edge<T>>;
fn add_vertex(&mut self, vertex: &Rc<T>);
fn add_edge(&mut self, start: &Rc<T>, end: &Rc<T>, weight: usize);
fn has_vertex(&self, vertex: &Rc<T>) -> bool;
fn edges_from<'a>(&'a self, edge: &Rc<T>) -> Vec<&'a Edge<T>>;
fn add_edge_safe(&mut self, start: &Rc<T>, end: &Rc<T>, weight: usize) {
if !self.has_vertex(start) {
self.add_vertex(start);
}
if !self.has_vertex(end) {
self.add_vertex(end);
}
self.add_edge(start, end, weight);
}
/// Returns a minimuim spanning tree of a graph use Kruskal Algorithm.
/// The graph is not modified or consumed by the operation, rather, all references are copied
/// which is much cheaper than creating a deep copy.
fn mst_kruskal(&self) -> Self {
// The minimun spanning tree to return
let mut mst = Self::default();
let mut union_finder: UnionFind<T> = UnionFind::new();
for vertex in self.vertices().into_iter() {
union_finder.make_set(vertex);
}
let mut sorted_edges: Vec<&Edge<T>> = self.edges().into_iter().collect();
// Sort the edges by weight in ascending order
sorted_edges.sort_by(|Edge(_, _, a), Edge(_, _, b)| a.cmp(&b));
for Edge(a, b, weight) in sorted_edges {
// if a and b are not in the same set (ie. connected) the current edge is a light edge
if union_finder.find_set(a) != union_finder.find_set(b) {
mst.add_vertex(a);
mst.add_vertex(b);
mst.add_edge(a, b, *weight);
// If something goes wrong here, just throw an error.
// It might be better to return a Result<Self, Err>
union_finder.union(a, b).unwrap();
}
}
// Return the mst
mst
}
fn mst_prim(&self) -> Self {
if self.vertices().is_empty() {
return Self::default();
}
let mut mst = Self::default();
let mut pq: PriorityQueue<&Edge<T>> = PriorityQueue::new();
let mut nodes: Vec<&Rc<T>> = self.vertices();
let start = nodes.pop().unwrap();
mst.add_vertex(start);
self.edges_from(start)
.into_iter()
.for_each(|edge| pq.insert(edge));
while nodes.len() > 0 {
if let Some(Edge(start, end, weight)) = pq.pop() {
if mst.has_vertex(start) && !mst.has_vertex(end) {
mst.add_vertex(end);
mst.add_edge(start, end, *weight);
self.edges_from(end)
.into_iter()
.for_each(|edge| pq.insert(edge));
} else if mst.has_vertex(end) && !mst.has_vertex(start) {
mst.add_vertex(start);
mst.add_edge(start, end, *weight);
self.edges_from(start)
.into_iter()
.for_each(|edge| pq.insert(edge));
}
} else {
return mst;
}
}
mst
}
/// Calculates the number of connnected components in a graph by constructing a [UnionFind] and
/// calling the [sets](UnionFind::sets) method on the [UnionFind] object.
fn connected_components(&self) -> usize {
let mut union_finder = UnionFind::new();
// add each vertex to the union find object
for vertex in self.vertices().into_iter() {
union_finder.make_set(vertex);
}
// connect connected edges in the union find object
for Edge(a, b, _) in self.edges().into_iter() {
union_finder.union(a, b).unwrap();
}
union_finder.sets()
}
}
fn load_from<'a, T: Graph<String> + Default>(path: &str) -> T {
let mut graph = T::default();
let file = File::open(path).expect("Unable to open graph file");
for line in io::BufReader::new(file)
.lines()
.map(|line| line.expect("Unable to read line"))
{
let words: Vec<_> = line.split(" ").collect();
if words[0] == "a" {
graph.add_edge_safe(
&Rc::new(words[1].into()),
&Rc::new(words[2].into()),
words[3]
.parse()
.expect("Cannot convert edge weight to integer"),
);
}
}
graph
}
impl<T: Eq + Hash> EdgeListGraph<T> {
pub fn new() -> Self {
Self::default()
}
}
impl<T: Eq + Hash> Default for EdgeListGraph<T> {
fn default() -> Self {
Self {
vertices: HashSet::new(),
edges: HashSet::new(),
}
}
}
impl<T: Eq + Hash> Graph<T> for EdgeListGraph<T> {
fn vertices<'a>(&'a self) -> Vec<&'a Rc<T>> {
self.vertices.iter().collect()
}
fn edges<'a>(&'a self) -> Vec<&'a Edge<T>> {
self.edges.iter().collect()
}
fn add_vertex(&mut self, t: &Rc<T>) {
self.vertices.insert(Rc::clone(t));
}
fn add_edge(&mut self, a: &Rc<T>, b: &Rc<T>, weight: usize) {
self.edges.insert(Edge(Rc::clone(a), Rc::clone(b), weight));
}
fn has_vertex(&self, vertex: &Rc<T>) -> bool {
self.vertices.contains(vertex)
}
fn edges_from<'a>(&'a self, vertex: &Rc<T>) -> Vec<&'a Edge<T>> {
self.edges
.iter()
.filter(|&Edge(a, b, _)| a == vertex || b == vertex)
.collect()
}
}
impl<T: Eq + Hash> AdjacencyListGraph<T> {
pub fn new() -> Self {
Self::default()
}
}
impl<T: Eq + Hash> Default for AdjacencyListGraph<T> {
fn default() -> Self {
Self {
vertices: HashMap::new(),
}
}
}
impl<T: Eq + Hash> Graph<T> for AdjacencyListGraph<T> {
fn vertices<'a>(&'a self) -> Vec<&'a Rc<T>> {
self.vertices.keys().collect()
}
fn edges<'a>(&'a self) -> Vec<&'a Edge<T>> {
self.vertices
.values()
.flat_map(|edge_list| edge_list.iter())
.collect()
}
fn add_vertex(&mut self, vertex: &Rc<T>) {
self.vertices
.entry(Rc::clone(vertex))
.or_insert_with(|| HashSet::new());
}
fn add_edge(&mut self, start: &Rc<T>, end: &Rc<T>, weight: usize) {
self.vertices
.get_mut(start)
.expect("Cannot add edge to vertex that does not exist")
.insert(Edge(Rc::clone(start), Rc::clone(end), weight));
self.vertices
.get_mut(end)
.expect("Cannot add edge to vertex that does not exist")
.insert(Edge(Rc::clone(end), Rc::clone(start), weight));
}
fn has_vertex(&self, vertex: &Rc<T>) -> bool {
self.vertices.contains_key(vertex)
}
fn edges_from(&self, vertex: &Rc<T>) -> Vec<&Edge<T>> {
self.vertices
.get(vertex)
.expect("Cannot list edges from vertex that does not exist")
.iter()
.collect()
}
}
impl<T: PartialEq> PartialOrd for Edge<T> {
fn partial_cmp(&self, &Edge(_, _, other): &Self) -> Option<Ordering> {
Some(self.2.cmp(&other))
}
}
impl<T: Eq> Ord for Edge<T> {
fn cmp(&self, &Edge(_, _, other): &Self) -> Ordering {
self.2.cmp(&other)
}
}
impl<T> UnionFind<T>
where
T: Eq + Hash,
{
pub fn new() -> Self {
Self {
items: HashMap::new(),
}
}
pub fn make_set(&mut self, t: &Rc<T>) {
self.items.insert(Rc::clone(&t), Rc::clone(&t));
}
/// Mutates the underlying [HashMap] so that `a` and `b` have the same parent.
/// There is not guarantee as to which parents `a` and `b` will end up with, only that they are
/// the same.
pub fn union(&mut self, a: &Rc<T>, b: &Rc<T>) -> Result<(), &str> {
if let (Ok(a), Ok(b)) = (self.find_set(a), self.find_set(b)) {
self.items.insert(a, b);
Ok(())
} else {
Err("Cannot perform union on objects without set")
}
}
/// Returns a [Rc] pointer to the "parent" of a given item.
/// Returns `Err` if `t` is not the the set
pub fn find_set(&self, t: &Rc<T>) -> Result<Rc<T>, &str> {
match self.items.get(t) {
Some(p) if p == t => Ok(Rc::clone(p)),
Some(p) => self.find_set(p),
None => Err("Cannot find set of object without set"),
}
}
/// Returns the number of unique sets in the struct
pub fn sets(&self) -> usize {
// count the number of items who are their own parent
self.items
.iter()
.filter(|(item, parent)| item == parent)
.count()
}
}
#[cfg(test)]
mod tests {
extern crate test;
use super::*;
use test::Bencher;
#[bench]
fn bench_kruskal_edgelist_rome(b: &mut Bencher) {
let rome = load_from::<EdgeListGraph<String>>("graphs/Rome.gr");
b.iter(|| rome.mst_kruskal());
}
#[bench]
fn bench_prim_edgelist_rome(b: &mut Bencher) {
let rome = load_from::<EdgeListGraph<String>>("graphs/Rome.gr");
b.iter(|| rome.mst_prim());
}
#[bench]
fn bench_kruskal_adjacencylist_rome(b: &mut Bencher) {
let rome = load_from::<AdjacencyListGraph<String>>("graphs/Rome.gr");
b.iter(|| rome.mst_kruskal());
}
#[bench]
fn bench_prim_adjacencylist_rome(b: &mut Bencher) {
let rome = load_from::<AdjacencyListGraph<String>>("graphs/Rome.gr");
b.iter(|| rome.mst_prim());
}
#[bench]
fn bench_kruskal_edgelist_ny(b: &mut Bencher) {
let ny = load_from::<EdgeListGraph<String>>("graphs/NY.gr");
b.iter(|| ny.mst_kruskal());
}
#[bench]
fn bench_prim_edgelist_ny(b: &mut Bencher) {
let ny = load_from::<EdgeListGraph<String>>("graphs/NY.gr");
b.iter(|| ny.mst_prim());
}
#[bench]
fn bench_kruskal_adjacencylist_ny(b: &mut Bencher) {
let ny = load_from::<AdjacencyListGraph<String>>("graphs/NY.gr");
b.iter(|| ny.mst_kruskal());
}
#[bench]
fn bench_prim_adjacencylist_ny(b: &mut Bencher) {
let ny = load_from::<AdjacencyListGraph<String>>("graphs/NY.gr");
b.iter(|| ny.mst_prim());
}
#[bench]
fn bench_kruskal_edgelist_sf(b: &mut Bencher) {
let sf = load_from::<EdgeListGraph<String>>("graphs/SF.gr");
b.iter(|| sf.mst_kruskal());
}
#[bench]
fn bench_prim_edgelist_sf(b: &mut Bencher) {
let sf = load_from::<EdgeListGraph<String>>("graphs/SF.gr");
b.iter(|| sf.mst_prim());
}
#[bench]
fn bench_kruskal_adjacencylist_sf(b: &mut Bencher) {
let sf = load_from::<AdjacencyListGraph<String>>("graphs/SF.gr");
b.iter(|| sf.mst_kruskal());
}
#[bench]
fn bench_prim_adjacencylist_sf(b: &mut Bencher) {
let sf = load_from::<AdjacencyListGraph<String>>("graphs/SF.gr");
b.iter(|| sf.mst_prim());
}
#[test]
fn test_adjacencylist() {
let mut graph = AdjacencyListGraph::new();
let p1 = Rc::new((1, 1));
let p2 = Rc::new((4, 6));
let p3 = Rc::new((-3, 8));
assert!(!graph.has_vertex(&p1));
graph.add_vertex(&p1);
assert!(graph.has_vertex(&p1));
graph.add_vertex(&p2);
graph.add_vertex(&p3);
assert!(graph.has_vertex(&p2));
assert!(graph.has_vertex(&p3));
}
#[test]
fn test_connected_components() {
let mut graph = EdgeListGraph::new();
let p1 = Rc::new((1, 1));
let p2 = Rc::new((4, 6));
let p3 = Rc::new((-3, 8));
let p4 = Rc::new((-4, 4));
let p5 = Rc::new((-9, -20));
let p6 = Rc::new((-2, -19));
let p7 = Rc::new((-3, -3));
let p8 = Rc::new((17, -13));
graph.add_vertex(&p1);
graph.add_vertex(&p2);
graph.add_vertex(&p3);
graph.add_vertex(&p4);
graph.add_vertex(&p5);
graph.add_vertex(&p6);
graph.add_vertex(&p7);
graph.add_vertex(&p8);
graph.add_edge(&p1, &p2, 0);
graph.add_edge(&p3, &p4, 0);
graph.add_edge(&p3, &p5, 0);
graph.add_edge(&p6, &p7, 0);
assert_eq!(graph.connected_components(), 4);
}
#[test]
fn test_edgelist_kruskal() {
test_kruskal::<EdgeListGraph<&str>>();
}
#[test]
fn test_edgelist_prim() {
test_prim::<EdgeListGraph<&str>>();
}
#[test]
fn test_adjacencylist_kruskal() {
test_kruskal::<AdjacencyListGraph<&str>>();
}
#[test]
fn test_adjacencylist_prim() {
test_prim::<AdjacencyListGraph<&str>>();
}
#[test]
fn test_union_find() {
let mut union_find = UnionFind::<usize>::new();
let one = Rc::new(1);
let two = Rc::new(2);
let five = Rc::new(5);
let seven_twenty = Rc::new(720);
assert!(union_find.find_set(&one).is_err());
assert!(union_find.find_set(&five).is_err());
union_find.make_set(&one);
union_find.make_set(&five);
union_find.make_set(&two);
union_find.make_set(&seven_twenty);
assert_eq!(union_find.find_set(&two), Ok(Rc::clone(&two)));
assert_eq!(union_find.find_set(&five), Ok(Rc::clone(&five)));
assert!(union_find.union(&one, &five).is_ok());
assert!(union_find.union(&two, &seven_twenty).is_ok());
assert_eq!(union_find.find_set(&one), union_find.find_set(&five));
assert_ne!(
union_find.find_set(&seven_twenty),
union_find.find_set(&one)
);
assert!(union_find.union(&five, &two).is_ok());
assert_eq!(
union_find.find_set(&one),
union_find.find_set(&seven_twenty)
);
}
fn test_kruskal<'a, T: Graph<&'a str> + Default + PartialEq + std::fmt::Debug>() {
let mut graph = T::default();
let cow = Rc::new("cow");
let how = Rc::new("how");
let hat = Rc::new("hat");
let mow = Rc::new("mow");
let cowering = Rc::new("cowering");
let hatred = Rc::new("hatred");
graph.add_vertex(&cow);
graph.add_vertex(&how);
graph.add_vertex(&hat);
graph.add_vertex(&mow);
graph.add_vertex(&cowering);
graph.add_vertex(&hatred);
graph.add_edge(&cow, &how, 1);
graph.add_edge(&how, &hat, 2);
graph.add_edge(&hat, &mow, 3);
graph.add_edge(&how, &cowering, 6);
graph.add_edge(&hat, &cowering, 7);
graph.add_edge(&cow, &hatred, 6);
let mut mst = T::default();
mst.add_vertex(&cow);
mst.add_vertex(&how);
mst.add_vertex(&hat);
mst.add_vertex(&mow);
mst.add_vertex(&cowering);
mst.add_vertex(&hatred);
mst.add_edge(&cow, &how, 1);
mst.add_edge(&how, &hat, 2);
mst.add_edge(&hat, &mow, 3);
mst.add_edge(&how, &cowering, 6);
mst.add_edge(&cow, &hatred, 6);
assert_eq!(graph.mst_kruskal(), mst);
}
fn test_prim<'a, T: Graph<&'a str> + Default + PartialEq + std::fmt::Debug>() {
let mut graph = T::default();
let cow = Rc::new("cow");
let how = Rc::new("how");
let hat = Rc::new("hat");
let mow = Rc::new("mow");
let cowering = Rc::new("cowering");
let hatred = Rc::new("hatred");
graph.add_vertex(&cow);
graph.add_vertex(&how);
graph.add_vertex(&hat);
graph.add_vertex(&mow);
graph.add_vertex(&cowering);
graph.add_vertex(&hatred);
graph.add_edge(&cow, &how, 1);
graph.add_edge(&how, &hat, 2);
graph.add_edge(&hat, &mow, 3);
graph.add_edge(&how, &cowering, 6);
graph.add_edge(&hat, &cowering, 7);
graph.add_edge(&cow, &hatred, 6);
let mut mst = T::default();
mst.add_vertex(&cow);
mst.add_vertex(&how);
mst.add_vertex(&hat);
mst.add_vertex(&mow);
mst.add_vertex(&cowering);
mst.add_vertex(&hatred);
mst.add_edge(&cow, &how, 1);
mst.add_edge(&how, &hat, 2);
mst.add_edge(&hat, &mow, 3);
mst.add_edge(&how, &cowering, 6);
mst.add_edge(&cow, &hatred, 6);
assert_eq!(graph.mst_prim(), mst);
}
}
I know that rust has a default priority queue implementation, but I made my own for practice:
/// A minimum priority queue
pub struct PriorityQueue<T: Ord> {
items: Vec<T>,
}
impl<T: Ord> PriorityQueue<T> {
pub fn new() -> Self {
Self { items: Vec::new() }
}
pub fn insert(&mut self, t: T) {
self.items.push(t);
self.shift_up(self.items.len() - 1);
}
pub fn pop(&mut self) -> Option<T> {
if self.items.is_empty() {
None
} else {
// Swap remove does exactly what we want it to do :)
let item = self.items.swap_remove(0);
self.shift_down(0);
Some(item)
}
}
/// Upshifts the `i`th element in the items [Vec] so that the heap property is preserved
fn shift_up(&mut self, i: usize) {
if i == 0 {
return;
}
let mut current = i;
let mut parent = Self::parent(i).unwrap();
while current > 0 && self.items[current] < self.items[parent] {
self.items.swap(current, parent);
current = parent;
match Self::parent(current) {
Some(item) => parent = item,
None => return,
}
}
}
/// Downshifts the `i`th element in the items [Vec] so that the heap property in preserved
fn shift_down(&mut self, i: usize) {
let len = self.items.len();
let mut current_index = i;
loop {
let left_index = Self::left_child(current_index);
let right_index = Self::right_child(current_index);
let swap_index = if left_index >= len && right_index >= len {
return;
}
// no children, nothing to do
else if left_index >= len {
right_index
} else if right_index >= len {
left_index
} else if self.items[left_index] < self.items[right_index] {
left_index
} else {
right_index
};
self.items.swap(current_index, swap_index);
current_index = swap_index;
}
}
fn parent(i: usize) -> Option<usize> {
match i {
0 => None,
i => Some((i - 1) / 2),
}
}
fn left_child(i: usize) -> usize {
2 * i + 1
}
fn right_child(i: usize) -> usize {
2 * i + 2
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
pub fn test_priority_queue() {
let mut pq = PriorityQueue::new();
pq.insert(4);
pq.insert(8);
pq.insert(5);
pq.insert(9);
pq.insert(0);
pq.insert(12);
pq.insert(10);
assert_eq!(pq.pop(), Some(0));
assert_eq!(pq.pop(), Some(4));
assert_eq!(pq.pop(), Some(5));
assert_eq!(pq.pop(), Some(8));
assert_eq!(pq.pop(), Some(9));
assert_eq!(pq.pop(), Some(10));
assert_eq!(pq.pop(), Some(12));
}
}
One of my concerns is with using Rc
pointers. Should the edges instead store a reference to an Rc pointer, or is it ok if is use Rc::clone
a lot?
-
\$\begingroup\$ It's fine to use Rc::clone a lot, and in a program like this it is to be expected. I am pretty sure you could not use references. They are not suitable for representing a graph. \$\endgroup\$George Barwood– George Barwood2022年01月10日 11:50:52 +00:00Commented Jan 10, 2022 at 11:50
-
\$\begingroup\$ Well, maybe you could use references to refer to the already built graph, that might run faster, but it's hard to say. \$\endgroup\$George Barwood– George Barwood2022年01月10日 12:00:22 +00:00Commented Jan 10, 2022 at 12:00
1 Answer 1
This is not going to be a complete review, rather two minor points I noticed on a 30 second read.
#![allow(dead_code)]
Do you need this? It's not desirable in a finished program ( or at any point, IMO! ).
impl<T: Eq + Hash> EdgeListGraph<T> {
pub fn new() -> Self {
Self::default()
}
}
impl<T: Eq + Hash> Default for EdgeListGraph<T> {
fn default() -> Self {
Self {
vertices: HashSet::new(),
edges: HashSet::new(),
}
}
}
I think you can shorten all this using #[derive(Default)]
. You don't need new
at all, just default
.