use std::collections::HashMap;
use std::collections::HashSet;
use std::collections::vec_deque::VecDeque;
use std::ops::AddAssign;
type Vertex = u64;
type Graph = HashMap<Vertex, HashSet<Vertex>>;
/**
* Constructs a graph from a sequence of parent child pairs
*/
pub fn from_pairs(pairs: &[(u64, u64)]) -> Graph {
let mut graph: Graph = HashMap::new();
for &(parent, child) in pairs {
let children = graph.entry(parent).or_insert(HashSet::new());
children.insert(child);
}
return graph;
}
#[derive(Debug)]
pub struct DFSResult {
starting_times: HashMap<Vertex, u64>,
finishing_times: HashMap<Vertex, u64>,
parents: HashMap<Vertex, Option<Vertex>>,
forest: VecDeque<HashSet<Vertex>>,
topologically_sorted: VecDeque<Vertex>,
}
/**
* Returns the starting and finishing times and the parents of every node
* found with dfs as well as the dfs forest
*/
pub fn dfs(graph: &Graph) -> DFSResult {
let mut state = DFSResult {
// The DFS forest
forest: VecDeque::new(),
// all the starting times for each vertex
starting_times: HashMap::new(),
// the finishing times for each vertex
finishing_times: HashMap::new(),
// the parents of each vertex
parents: HashMap::new(),
// the topologically sorted list of verticies
topologically_sorted: VecDeque::new(),
};
// the verticies that bave been seen so far
let mut seen: HashSet<Vertex> = HashSet::new();
// current time
let mut time = 0;
fn dfs_visit (graph: &Graph,
vertex: Vertex,
time: &mut u64,
seen: &mut HashSet<Vertex>,
state: &mut DFSResult) {
time.add_assign(1);
state.starting_times.insert(vertex, *time);
seen.insert(vertex);
let mut branch = state.forest.pop_back().expect("The Forest should not be empty!");
branch.insert(vertex);
state.forest.push_back(branch);
for neighbor in graph.get(&vertex).unwrap_or(&HashSet::new()) {
if !seen.contains(neighbor) {
state.parents.insert(*neighbor, Option::Some(vertex));
dfs_visit(graph, *neighbor, time, seen, state);
}
}
time.add_assign(1);
state.finishing_times.insert(vertex, *time);
state.topologically_sorted.push_front(vertex);
};
for vertex in graph.keys() {
state.parents.insert(*vertex, Option::None);
}
for vertex in graph.keys() {
if !seen.contains(vertex) {
state.forest.push_back(HashSet::new());
dfs_visit(graph, *vertex, &mut time, &mut seen, &mut state);
}
}
return state;
}
fn topological_sort(graph: &Graph) -> VecDeque<Vertex> {
let DFSResult{topologically_sorted, ..} = dfs(graph);
return topologically_sorted
}
fn main() {
let pairs = [(1, 2), (2, 1)];
let g = from_pairs(&pairs);
println!("{:?}", g);
println!("{:?}", dfs(&g));
println!("{:?}", topological_sort(&g));
}
I want to make an implementation of Depth-First Search that I can use for implementing classical algorithms. I am unsure about using the DFSResult struct; It feels like I should be able to conditionally compute each one of those fields, but I don't know how to structure my code to accomplish that without repeating myself.
2 Answers 2
Before I begin, you should be aware tha petgraph has already implemented most of the graph algorithms people may need. Use well-tested existing code whenever you can.
Cleanup
General
Run rustfmt. It will automatically tell you such things as:
- There's no space between a function name and the parenthesis (
fn dfs_visit (
) When a function signature gets too long, arguments start on the subsequent line:
fn dfs_visit( graph: &Graph, vertex: Vertex, time: &mut u64, seen: &mut HashSet<Vertex>, state: &mut DFSResult, ) {
- There's no space between a function name and the parenthesis (
Run clippy. It will automatically tell you such things as:
- Don't use
return
on the last expression in a function. - To avoid
or_insert
with a function call as the argument (although it's acceptable in this case).
- Don't use
Use curly brackets to group imports from the same path.
There's no need to directly use the
AddAssign
trait; just use the operator+=
.I appreciate the usage of type aliases as a first step to organizing your code.
It's not common to use
/* */
style comments; idiomatic Rust uses//
instead.
from_pairs
You document that this accepts a sequence, but it actually takes a slice. Taking an
Iterator
is more flexible and better fits the documentation.Be consistent about your usage of the type alias; you have places that still say
u64
. This is why a newtype can sometimes be a better choice.Instead of calling
HashMap::new
, callGraph::new
and avoid the redundant type specification.I'd convert the insertion into the graph into a single expression instead of introducing a temporary variable.
dfs
When nesting a helper function, it's important to not split up the flow of the surrounding code. Declaring all your variables before the helper and using them after seems very confusing.
The initializer for
DFSResult
should be a method on it, not just stuck in thedfs
method.In fact, you can just derive
Default
and avoid any of the implementation yourself.Why are there comments on the fields in the initializer? They seem awfully redundant with the names of the fields... Either way, they should be on the struct definition.
It's worth checking your comments for typos ("verticies" => "vertices", "bave" => "have", etc.)
Why populate
parents
withNone
? Wouldn't absence in theHashMap
be enough to know there are no parents?Don't use
Option::None
/Option::Some
.None
andSome
are imported as part of the prelude and can be used directly.If you have to document a variable like
time
with "current time", consider renaming the variable.Don't redundantly specify types (
seen: HashSet<Vertex> = HashSet::new();
)There's no need to
pop_back
and thenpush_back
a value; just useback_mut
.
topological_sort
- You don't need to store the result of
dfs
into a variable to pull out just one field, you can directly call.topologically_sorted
.
use std::collections::{HashMap, HashSet, VecDeque};
type Vertex = u64;
type Graph = HashMap<Vertex, HashSet<Vertex>>;
/// Constructs a graph from a sequence of parent child pairs
pub fn from_pairs<I>(pairs: I) -> Graph
where
I: IntoIterator<Item = (Vertex, Vertex)>,
{
let mut graph = Graph::new();
for (parent, child) in pairs {
graph
.entry(parent)
.or_insert_with(HashSet::new)
.insert(child);
}
graph
}
#[derive(Debug, Default)]
pub struct DFSResult {
/// all the starting times for each vertex
starting_times: HashMap<Vertex, u64>,
/// the finishing times for each vertex
finishing_times: HashMap<Vertex, u64>,
/// the parents of each vertex
parents: HashMap<Vertex, Option<Vertex>>,
/// The DFS forest
forest: VecDeque<HashSet<Vertex>>,
/// the topologically sorted list of verticies
topologically_sorted: VecDeque<Vertex>,
}
/// Returns the starting and finishing times and the parents of every node
/// found with dfs as well as the dfs forest
pub fn dfs(graph: &Graph) -> DFSResult {
fn dfs_visit(
graph: &Graph,
vertex: Vertex,
time: &mut u64,
seen: &mut HashSet<Vertex>,
state: &mut DFSResult,
) {
*time += 1;
state.starting_times.insert(vertex, *time);
seen.insert(vertex);
state
.forest
.back_mut()
.expect("The Forest should not be empty!")
.insert(vertex);
for neighbor in graph.get(&vertex).unwrap_or(&HashSet::new()) {
if !seen.contains(neighbor) {
state.parents.insert(*neighbor, Some(vertex));
dfs_visit(graph, *neighbor, time, seen, state);
}
}
*time += 1;
state.finishing_times.insert(vertex, *time);
state.topologically_sorted.push_front(vertex);
};
let mut state = DFSResult::default();
for vertex in graph.keys() {
state.parents.insert(*vertex, None);
}
let mut seen = HashSet::new();
let mut current_time = 0;
for vertex in graph.keys() {
if !seen.contains(vertex) {
state.forest.push_back(HashSet::new());
dfs_visit(graph, *vertex, &mut current_time, &mut seen, &mut state);
}
}
state
}
fn topological_sort(graph: &Graph) -> VecDeque<Vertex> {
dfs(graph).topologically_sorted
}
fn main() {
let pairs = [(1, 2), (2, 1)];
let g = from_pairs(pairs.iter().cloned());
println!("{:?}", g);
println!("{:?}", dfs(&g));
println!("{:?}", topological_sort(&g));
}
Abstraction
Now that everything is cleaned up, let's turn to abstracting it a bit. The most important thing is to notice which pieces of code are core to the DFS algorithm and what is incidental. Luckily, your code already shows that to some degree. Everything in DfsResult
is incidental, so we comment it out. There's some collateral damage (like current_time
) which also gets commented out.
The only data that's truly required is the seen
variable. Everything else can be boiled down to a set of actions that can be taken sometime during the DFS lifecycle.
To that end, we create a trait with methods for each lifecycle event. We provide default implementations which are empty; this makes end implementations much nicer. Naming and documentation are key for this trait, and I didn't do a good job. You should definitely improve upon my names.
#[allow(unused_variables)]
pub trait DfsAction {
fn initial_visit(&mut self, graph: &Graph, vertex: Vertex) {}
fn start(&mut self, graph: &Graph, vertex: Vertex) {}
fn pre_visit(&mut self, graph: &Graph, current: Vertex, next: Vertex) {}
fn end(&mut self, graph: &Graph, vertex: Vertex) {}
}
pub fn dfs<A>(graph: &Graph, action: &mut A)
where
A: DfsAction,
{
fn dfs_visit<A>(graph: &Graph, vertex: Vertex, seen: &mut HashSet<Vertex>, action: &mut A)
where
A: DfsAction,
{
action.start(graph, vertex);
seen.insert(vertex);
for neighbor in graph.get(&vertex).unwrap_or(&HashSet::new()) {
if !seen.contains(neighbor) {
action.pre_visit(graph, vertex, *neighbor);
dfs_visit(graph, *neighbor, seen, action);
}
}
action.end(graph, vertex);
};
let mut seen = HashSet::new();
for vertex in graph.keys() {
if !seen.contains(vertex) {
action.initial_visit(graph, *vertex);
dfs_visit(graph, *vertex, &mut seen, action);
}
}
}
We can implement the trait for each of the distinct pieces, instantiate one of the pieces, then pass it in:
#[derive(Debug, Default)]
struct TopologicalSort(Vec<Vertex>);
impl DfsAction for TopologicalSort {
fn end(&mut self, _: &Graph, vertex: Vertex) {
self.0.push(vertex);
}
}
fn topological_sort(graph: &Graph) -> Vec<Vertex> {
let mut topo = TopologicalSort::default();
dfs(graph, &mut topo);
topo.0.reverse();
topo.0
}
This continues for all of the other pieces:
type Times = HashMap<Vertex, u64>;
#[derive(Debug, Default)]
struct Timer {
time: u64,
starting_times: Times,
finishing_times: Times,
}
impl DfsAction for Timer {
fn start(&mut self, _: &Graph, vertex: Vertex) {
self.time += 1;
self.starting_times.insert(vertex, self.time);
}
fn end(&mut self, _: &Graph, vertex: Vertex) {
self.time += 1;
self.finishing_times.insert(vertex, self.time);
}
}
fn times(graph: &Graph) -> (Times, Times) {
let mut times = Timer::default();
dfs(graph, &mut times);
(times.starting_times, times.finishing_times)
}
#[derive(Debug, Default)]
pub struct Parents(HashMap<Vertex, Option<Vertex>>);
impl Parents {
fn new(graph: &Graph) -> Self {
let mut parents = HashMap::new();
for vertex in graph.keys() {
parents.insert(*vertex, None);
}
Parents(parents)
}
}
impl DfsAction for Parents {
fn pre_visit(&mut self, _: &Graph, current: Vertex, next: Vertex) {
self.0.insert(next, Some(current));
}
}
fn parents(graph: &Graph) -> HashMap<Vertex, Option<Vertex>> {
let mut parents = Parents::new(graph);
dfs(graph, &mut parents);
parents.0
}
#[derive(Debug, Default)]
pub struct Forest(Vec<HashSet<Vertex>>);
impl DfsAction for Forest {
fn initial_visit(&mut self, _: &Graph, _: Vertex) {
self.0.push(HashSet::new());
}
fn start(&mut self, _: &Graph, vertex: Vertex) {
self.0
.last_mut()
.expect("The Forest should not be empty!")
.insert(vertex);
}
}
fn forest(graph: &Graph) -> Vec<HashSet<Vertex>> {
let mut forest = Forest::default();
dfs(graph, &mut forest);
forest.0
}
If you want to get back to your "do everything" set, you can implement a BothAction
that takes two actions and calls both, combining those into a Both(Both(A, B), Both(C, D))
.
Splitting up all of the actions also makes it easier to see that further simplifications can be made. For example, you don't really need a VecDeque
— I've changed to a simple Vec
in the code above.
The code is clear and it looks good to me, I have no criticisms to offer. In particular, I don't see a better approach than what you did with state
.
There is room to improve the internal documentation, in terms of invariants you are defending. There are some clear checks and error cases that you handle, but there is room to be more explicit about invariants and demonstrating that termination is guaranteed. Consider adding comments or code assertions that show a certain value always decreases closer to zero on each iteration, for example. Make strong promises for what some functions guarantee about their return values. It would be helpful for a comment to cite a standard spanning tree or Dijkstra or DFS reference that mentions starting/ending times. You might describe expected complexity in big-O notation.
DFSResult
are not needed for certain operations (e.g. onlyDFSResult::topologically_sorted
is needed fortopological_sort
), but you compute all of the fields all the time in order to avoid code duplication? You never seem to use some of the fields (likestarting_times
), so why not just delete them entirely? \$\endgroup\$