I'd love to get feedback on my first go at Dijkstra's algorithm in Rust:
use std::cell::Cell;
use std::cmp::Ordering;
use std::collections::{BinaryHeap, HashMap};
use std::fmt;
use std::fmt::Debug;
use std::hash::{Hash, Hasher};
use std::rc::Rc;
fn main() {
let s = Vertex::new("s");
let t = Vertex::new("t");
let x = Vertex::new("x");
let y = Vertex::new("y");
let z = Vertex::new("z");
// A map from vertices to their adjacent vertices including costs
let mut adjacency_list = HashMap::new();
adjacency_list.insert(&s, vec![(&t, 10), (&y, 5)]);
adjacency_list.insert(&t, vec![(&y, 2), (&x, 1)]);
adjacency_list.insert(&x, vec![(&z, 4)]);
adjacency_list.insert(&y, vec![(&t, 3), (&x, 9), (&z, 2)]);
adjacency_list.insert(&z, vec![(&s, 7), (&x, 6)]);
dijkstra(&s, &adjacency_list);
adjacency_list.keys().for_each(|v| println!("{:?}", v));
}
// Multiple lifetime parameters to avoid the error:
// "borrowed value does not live long enough"
fn dijkstra<'a, 's: 'a>(
start: &'a Vertex<'s>,
adjacency_list: &'a HashMap<&'a Vertex<'s>, Vec<(&'a Vertex<'s>, usize)>>,
) {
start.distance.set(0);
// Fill the binary heap, vertices with the smallest distance go first
let mut to_visit = BinaryHeap::new();
adjacency_list.keys().for_each(|v| to_visit.push(*v));
// We visit the vertices with the smallest distance first, this is
// what makes Dijkstra a greedy algorithm
while let Some(v) = to_visit.pop() {
if let Some(neighbors) = adjacency_list.get(v) {
for (n, cost) in neighbors {
let new_distance = v.distance.get() + cost;
if new_distance < n.distance.get() {
n.distance.set(new_distance);
// n.predecessor.set(Some(Rc::new(*v)));
}
}
// When changing a vertex' distance, the BinaryHeap doesn't
// update the position of the vertex.
// That's why we create a new heap with the right order.
let mut new_heap = BinaryHeap::new();
to_visit.iter().for_each(|x| new_heap.push(*x));
to_visit = new_heap;
}
}
}
#[derive(Eq)]
struct Vertex<'a> {
name: &'a str,
distance: Cell<usize>,
// predecessor: Cell<Option<Rc<Vertex<'a>>>>,
}
impl<'a> Vertex<'a> {
fn new(name: &'a str) -> Vertex<'a> {
Vertex {
name,
distance: Cell::new(usize::max_value()),
// predecessor: Cell::new(None),
}
}
}
impl<'a> Hash for Vertex<'a> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.name.hash(state);
}
}
/// Since this Vertex will be put in a priority queue where the vertices
/// with the *smallest* distance should be processed first, `cmp`
/// returns GT if self.distance().get() < other.distance().get()
impl<'a> Ord for Vertex<'a> {
fn cmp(&self, other: &Vertex<'a>) -> Ordering {
other.distance.get().cmp(&self.distance.get())
}
}
impl<'a> PartialOrd for Vertex<'a> {
fn partial_cmp(&self, other: &Vertex<'a>) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl<'a> PartialEq for Vertex<'a> {
fn eq(&self, other: &Vertex<'a>) -> bool {
self.name == other.name
}
}
impl<'a> Debug for Vertex<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"name: {}, distance: {:?}",
self.name,
self.distance.get()
)
}
}
Things I'd especially like to improve and simplify, if possible:
- Type signatures: I've wrestled a bit with the borrow checker to get rid of "borrowed value doesn't live long enough". As a consequence I had to introduce (multiple) lifetime parameters at a couple of places
- BinaryHeap: At first it seemed like a good data structure to have fast access to the vertex with the smallest distance. But since BinaryHeap doesn't resort the values when they change, I had to improvise (and incur worse performance)
1 Answer 1
Pay attention to compiler warnings. Rust is a statically-compiled language. One of the big reasons you choose such a language is to get information at compile time:
warning: unused import: `std::rc::Rc` --> src/main.rs:7:5 | 7 | use std::rc::Rc; | ^^^^^^^^^^^ | = note: #[warn(unused_imports)] on by default
Modern Rust coalesces imports from the same crate.
You are inconsistent with your usage of
for
loops andfor_each
. Have a reason for using one or the other.dijkstra
requires no lifetime generics, so don't include them.Don't have commented out code; that's what source control is for.
It feels very wrong to have
Eq
andHash
not working from the same data. This is undoubtedly sure to cause problems in the future.It feels very wrong to have
distance
be a part of aVertex
; You can't calculate the distance for two things simultaneously.Re-allocating the
BinaryHeap
so frequently feels like it will be highly inefficient.You are deliberately formatting
Vertex
for output to the user. That means you should useDisplay
, notDebug
Using something like
usize::MAX
to indicate "not visited" is not idiomatic Rust. This conflates two concepts.
use std::{
cell::Cell,
cmp::Ordering,
collections::{BinaryHeap, HashMap},
fmt,
hash::{Hash, Hasher},
};
fn main() {
let s = Vertex::new("s");
let t = Vertex::new("t");
let x = Vertex::new("x");
let y = Vertex::new("y");
let z = Vertex::new("z");
// A map from vertices to their adjacent vertices including costs
let mut adjacency_list = HashMap::new();
adjacency_list.insert(&s, vec![(&t, 10), (&y, 5)]);
adjacency_list.insert(&t, vec![(&y, 2), (&x, 1)]);
adjacency_list.insert(&x, vec![(&z, 4)]);
adjacency_list.insert(&y, vec![(&t, 3), (&x, 9), (&z, 2)]);
adjacency_list.insert(&z, vec![(&s, 7), (&x, 6)]);
dijkstra(&s, &adjacency_list);
adjacency_list.keys().for_each(|v| println!("{}", v));
}
fn dijkstra(
start: &Vertex<'_>,
adjacency_list: &HashMap<&Vertex<'_>, Vec<(&Vertex<'_>, usize)>>,
) {
start.distance.set(0);
// Fill the binary heap, vertices with the smallest distance go first
let mut to_visit = BinaryHeap::new();
adjacency_list.keys().for_each(|v| to_visit.push(*v));
// We visit the vertices with the smallest distance first, this is
// what makes Dijkstra a greedy algorithm
while let Some(v) = to_visit.pop() {
if let Some(neighbors) = adjacency_list.get(v) {
for (n, cost) in neighbors {
let new_distance = v.distance.get() + cost;
if new_distance < n.distance.get() {
n.distance.set(new_distance);
}
}
// When changing a vertex' distance, the BinaryHeap doesn't
// update the position of the vertex.
// That's why we create a new heap with the right order.
let mut new_heap = BinaryHeap::new();
to_visit.iter().for_each(|x| new_heap.push(*x));
to_visit = new_heap;
}
}
}
#[derive(Eq)]
struct Vertex<'a> {
name: &'a str,
distance: Cell<usize>,
}
impl<'a> Vertex<'a> {
fn new(name: &'a str) -> Vertex<'a> {
Vertex {
name,
distance: Cell::new(usize::max_value()),
}
}
}
impl<'a> Hash for Vertex<'a> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.name.hash(state);
}
}
/// Since this Vertex will be put in a priority queue where the vertices
/// with the *smallest* distance should be processed first, `cmp`
/// returns GT if self.distance().get() < other.distance().get()
impl<'a> Ord for Vertex<'a> {
fn cmp(&self, other: &Vertex<'a>) -> Ordering {
other.distance.get().cmp(&self.distance.get())
}
}
impl<'a> PartialOrd for Vertex<'a> {
fn partial_cmp(&self, other: &Vertex<'a>) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl<'a> PartialEq for Vertex<'a> {
fn eq(&self, other: &Vertex<'a>) -> bool {
self.name == other.name
}
}
impl<'a> fmt::Display for Vertex<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"name: {}, distance: {}",
self.name,
self.distance.get()
)
}
}
Turning to petgraph's implementation of Dijkstra's algorithm for inspiration, I'd:
- Separate the costs from the graph
- Maintain a list of visited nodes
- Return the distances as a value
use std::{
cmp::Ordering,
collections::{BinaryHeap, HashMap, HashSet},
};
fn main() {
let s = Vertex::new("s");
let t = Vertex::new("t");
let x = Vertex::new("x");
let y = Vertex::new("y");
let z = Vertex::new("z");
let mut adjacency_list = HashMap::new();
adjacency_list.insert(s, vec![(t, 10), (y, 5)]);
adjacency_list.insert(t, vec![(y, 2), (x, 1)]);
adjacency_list.insert(x, vec![(z, 4)]);
adjacency_list.insert(y, vec![(t, 3), (x, 9), (z, 2)]);
adjacency_list.insert(z, vec![(s, 7), (x, 6)]);
let distances = dijkstra(s, &adjacency_list);
for (v, d) in &distances {
println!("name: {}, distance: {}", v.name, d);
}
assert_eq!(distances.get(&t), Some(&8));
assert_eq!(distances.get(&s), Some(&0));
assert_eq!(distances.get(&y), Some(&5));
assert_eq!(distances.get(&x), Some(&9));
assert_eq!(distances.get(&z), Some(&7));
}
fn dijkstra<'a>(
start: Vertex<'a>,
adjacency_list: &HashMap<Vertex<'a>, Vec<(Vertex<'a>, usize)>>,
) -> HashMap<Vertex<'a>, usize> {
let mut distances = HashMap::new();
let mut visited = HashSet::new();
let mut to_visit = BinaryHeap::new();
distances.insert(start, 0);
to_visit.push(Visit {
vertex: start,
distance: 0,
});
while let Some(Visit { vertex, distance }) = to_visit.pop() {
if !visited.insert(vertex) {
// Already visited this node
continue;
}
if let Some(neighbors) = adjacency_list.get(&vertex) {
for (neighbor, cost) in neighbors {
let new_distance = distance + cost;
let is_shorter = distances
.get(&neighbor)
.map_or(true, |¤t| new_distance < current);
if is_shorter {
distances.insert(*neighbor, new_distance);
to_visit.push(Visit {
vertex: *neighbor,
distance: new_distance,
});
}
}
}
}
distances
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
struct Vertex<'a> {
name: &'a str,
}
impl<'a> Vertex<'a> {
fn new(name: &'a str) -> Vertex<'a> {
Vertex { name }
}
}
#[derive(Debug)]
struct Visit<V> {
vertex: V,
distance: usize,
}
impl<V> Ord for Visit<V> {
fn cmp(&self, other: &Self) -> Ordering {
other.distance.cmp(&self.distance)
}
}
impl<V> PartialOrd for Visit<V> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl<V> PartialEq for Visit<V> {
fn eq(&self, other: &Self) -> bool {
self.distance.eq(&other.distance)
}
}
impl<V> Eq for Visit<V> {}
Additional changes:
- There's no need to take references to
Vertex
if it's only a reference itself. ImplementCopy
instead.
-
\$\begingroup\$ Thanks a lot for the great feedback! Regarding "It feels very wrong to have
Eq
andHash
not working from the same data": I agree, but I assumedderive(Eq)
would look to thePartialEq
implementation which uses exclusivelyname
as its data (just asHash
does). \$\endgroup\$Matthias Braun– Matthias Braun2018年09月01日 12:20:56 +00:00Commented Sep 1, 2018 at 12:20