To teach myself Rust, I implemented a naive binary tree with support for insert, delete and lookup operations as well as in-order iteration.
I'm still a little rusty (no pun intended) when it comes to writing idiomatic code and especially elegant handling of borrows.
use std::cmp::Ordering;
use std::cmp::Ordering::{Less, Greater, Equal};
use std::mem::swap;
// A type implementing Indexed<K> that is used as value in a BinaryTree may be indexed by K,
// that is, lookup functions can take a key: K instead of the full value. This is necessary for
// implementing associative containers.
pub trait Indexed<K: ?Sized> {
fn key(&self) -> &K;
}
impl<T> Indexed<T> for T where T: Ord {
fn key(&self) -> &T { self }
}
type Link<T> = Option<Box<Node<T>>>;
struct Node<T> {
value: T,
left: Link<T>,
right: Link<T>
}
pub struct BinaryTree<T> {
root: Link<T>
}
impl<T> BinaryTree<T> {
pub fn new() -> Self {
BinaryTree { root: None }
}
// Get a reference to the link at which "key" is or should be located
fn locate<K>(&self, key: &K) -> &Link<T> where T: Indexed<K>, K: Ord {
let mut anchor = &self.root;
while let Some(ref node) = *anchor {
match node.value.key().cmp(&key) {
Less => anchor = &node.left,
Greater => anchor = &node.right,
Equal => return anchor
}
}
// No such entry, anchor is pointing to the insert position of value
anchor
}
// Like locate(), but returns a &mut for insertion and deletion
fn locate_mut<K>(&mut self, key: &K) -> &mut Link<T> where T: Indexed<K>, K: Ord {
let mut anchor = &mut self.root;
loop {
// Not as simple as locate(): The binding of anchor must be removed before
// destructuring and re-assigning it in order to avoid duplicate &muts
match {anchor} {
&mut Some(ref mut node) if key != node.value.key() => {
anchor = if key < node.value.key() { &mut node.left } else { &mut node.right }
},
// Either &mut Some(node) with node.value == value or &mut None if value was
// not found
other => return other
}
}
}
pub fn lookup<K>(&self, key: &K) -> Option<&T> where T: Indexed<K>, K: Ord {
self.locate(key).as_ref().map(|node| &node.value)
}
pub fn insert(&mut self, value: T) -> bool where T: Ord {
let anchor = self.locate_mut(&value);
match *anchor {
Some(_) => false,
None => {
*anchor = Some(Box::new(Node { value: value, left: None, right: None }));
true
}
}
}
pub fn delete<K>(&mut self, key: &K) where T: Indexed<K>, K: Ord {
delete_node(self.locate_mut(key));
}
pub fn iter(&self) -> Iter<T> {
Iter { current: &self.root, stack: Vec::new() }
}
}
// Returns the next in-order successor in a subtree
fn successor<T>(mut next: &mut Link<T>) -> &mut Link<T> {
loop {
match {next} {
&mut Some(ref mut node) if node.left.is_some() => next = &mut node.left,
other => {
debug_assert!(other.is_some());
return other;
}
}
}
}
// Removes a node, either by simply discarding it if it is a leaf, or by swapping it with
// its inorder successor (which, in this case, is always in a leaf) and then deleting the leaf.
fn delete_node<T>(link: &mut Link<T>) {
if let Some(mut boxed_node) = link.take() {
match (boxed_node.left.take(), boxed_node.right.take()) {
(None, None) => (),
(Some(left), None) => *link = Some(left),
(None, Some(right)) => *link = Some(right),
(Some(left), Some(right)) => {
// take() followed by re-assignment looks like an awful hackjob, but appears
// to be the only way to satisfy all cases in the match
{
let node = &mut *boxed_node; // unbox
node.left = Some(left);
node.right = Some(right);
let next = successor(&mut node.right);
swap(&mut node.value, &mut next.as_mut().unwrap().value);
delete_node(next);
}
*link = Some(boxed_node);
}
}
}
}
// Allow iterating over &tree
impl<'a, T: 'a> IntoIterator for &'a BinaryTree<T> {
type Item = &'a T;
type IntoIter = Iter<'a, T>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
pub struct Iter<'a, T: 'a> {
current: &'a Link<T>,
stack: Vec<&'a Node<T>>
}
impl<'a, T: 'a> Iterator for Iter<'a, T> {
type Item = &'a T;
fn next(&mut self) -> Option<Self::Item> {
while let Some(ref node) = *self.current {
self.stack.push(node);
self.current = &node.left;
}
self.stack.pop().map(|node| {
self.current = &node.right;
&node.value
})
}
}
An example using the above code to represent a set:
fn main() {
let mut set = BinaryTree::new();
for value in &[100, 5, 1, 10, -1] {
set.insert(*value);
}
for value in &[5, 1, 151] {
set.delete(value);
}
for value in &set {
println!("{}", value);
}
}
And a map with a custom Entry
type:
#[derive(Debug, Copy, Clone)]
pub struct Entry<K, V>(K, V);
// Make Entry<K, V> indexable and ordered by K
impl<K, V> Indexed<K> for Entry<K, V> where K: Ord {
fn key(&self) -> &K {
&self.0
}
}
impl<K, V> PartialEq for Entry<K, V> where K: PartialEq {
fn eq(&self, other: &Self) -> bool {
self.0.eq(&other.0)
}
}
impl<K, V> Eq for Entry<K, V> where K: Eq {
}
impl<K, V> PartialOrd for Entry<K, V> where K: PartialOrd {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.0.partial_cmp(&other.0)
}
}
impl<K, V> Ord for Entry<K, V> where K: Ord {
fn cmp(&self, other: &Self) -> Ordering {
self.0.cmp(&other.0)
}
}
// Example
fn main() {
let mut map = BinaryTree::new();
for entry in &[Entry("France", "Paris"), Entry("Germany", "Berlin"), Entry("Italy", "Rome")] {
map.insert(*entry);
}
for &Entry(key, value) in &map {
println!("{} => {}", key, value);
}
}
1 Answer 1
Examples have first-class support in Cargo. Place them in a directory
examples
next tosrc
. They can then be run withcargo run --example foo
. They will need to import the crate in order to use it.Prefer to use
module::function
instead of importing a free function directly. This helps the developer track where they come from. The only exception I make is formin
andmax
.When
where
clauses are used, they go on a separate line from the argument and return types, and the opening curly brace does as well. When there are multiple clauses, each goes on a separate line.Use trailing commas on arrays and structures. This reduces the lines of diff when a new struct member is added.
Evaluate comments for what consumers of a function need to know versus what the implementor needs to know. Use Rustdoc comments (
///
) when talking to the consumer. It's reasonable to add Rustdoc for non-public methods, so the consumer may be another developer of the library.Remove useless documentation like "... like foo, but mutable". Don't repeat information that is described in the function name or type. Document information that cannot be captured that way.
Note that Rustdoc uses Markdown; use Markdown syntax to refer to variables as
code
, for example.Instead of adding comments for each of the "other" cases, write them out. Generally, annotating something in code is less brittle than writing it in comments, as the compiler can check the former and not the latter.
Instead of a debug assertion, add the explicit patterns it cannot be and use something like
unimplemented!
.Prefer regular assertions instead of debug assertions in general; there's no worse time for invalid data to occur than in production. Only switch to debug assertions if profiling shows that the assertion is adding significant overhead.
Instead of dereferencing a value at every use in a loop, pattern-match to
&foo
, if the type isCopy
.All code should have some tests... doubly so for data structures that are intended to be used pervasively. The examples also require a human to look at the output and decide if they are correct.
For code like this, I'd advocate looking into QuickCheck. Data structures have certain invariants that should be upheld at all times, and generative testing helps tease those out.
I'd go ahead and make a
Node::new
to avoid needing to set left and right toNone
at the call site.Check out the
|
and@
aspects of pattern matching. These are helpful to coalesce duplicate cases (like left/right symmetry) and to avoid unwrapping and re-wrapping types likeOption
.I might prefer
while let Some(node) = self.current.as_ref()
versuswhile let Some(ref node) = *self.current
, but I can't explain why.The comment about "take & re-assignment" isn't near a
take
, so it's unclear to which of the 3takes
in that function it's referring to.The comment about "unbox" doesn't say why such a thing is needed. It is only slightly better than
1 + 1 // adds numbers
."Unbox" usually means to remove a value from a box and free the box (
Box<T> -> T
).Clippy noticed that
node.value.key().cmp(&key)
is redundant -node.value.key().cmp(key)
is fine becausekey
is already a reference.
src/lib.rs
use std::cmp::Ordering::{Less, Greater, Equal};
use std::mem;
// A type implementing Indexed<K> that is used as value in a BinaryTree may be indexed by K,
// that is, lookup functions can take a key: K instead of the full value. This is necessary for
// implementing associative containers.
pub trait Indexed<K: ?Sized> {
fn key(&self) -> &K;
}
impl<T> Indexed<T> for T
where T: Ord
{
fn key(&self) -> &T { self }
}
type Link<T> = Option<Box<Node<T>>>;
struct Node<T> {
value: T,
left: Link<T>,
right: Link<T>,
}
impl<T> Node<T> {
fn new(value: T) -> Self {
Node { value: value, left: None, right: None }
}
}
pub struct BinaryTree<T> {
root: Link<T>,
}
impl<T> BinaryTree<T> {
pub fn new() -> Self {
BinaryTree { root: None }
}
// Get a reference to the link at which `key` is or should be located
fn locate<K>(&self, key: &K) -> &Link<T>
where T: Indexed<K>,
K: Ord
{
let mut anchor = &self.root;
while let Some(ref node) = *anchor {
match node.value.key().cmp(key) {
Less => anchor = &node.left,
Greater => anchor = &node.right,
Equal => return anchor,
}
}
// No such entry, anchor is pointing to the insert position of value
anchor
}
fn locate_mut<K>(&mut self, key: &K) -> &mut Link<T>
where T: Indexed<K>,
K: Ord
{
let mut anchor = &mut self.root;
loop {
// Not as simple as `locate`: The binding of `anchor` must
// be removed before destructuring and re-assigning it in
// order to avoid duplicate &muts
match {anchor} {
&mut Some(ref mut node) if key != node.value.key() => {
anchor = if key < node.value.key() {
&mut node.left
} else {
&mut node.right
}
},
other @ &mut Some(_) |
other @ &mut None => return other
}
}
}
pub fn lookup<K>(&self, key: &K) -> Option<&T>
where T: Indexed<K>,
K: Ord
{
self.locate(key).as_ref().map(|node| &node.value)
}
pub fn insert(&mut self, value: T) -> bool
where T: Ord
{
let anchor = self.locate_mut(&value);
match *anchor {
Some(_) => false,
None => {
*anchor = Some(Box::new(Node::new(value)));
true
}
}
}
pub fn delete<K>(&mut self, key: &K)
where T: Indexed<K>,
K: Ord
{
delete_node(self.locate_mut(key));
}
pub fn iter(&self) -> Iter<T> {
Iter { current: &self.root, stack: Vec::new() }
}
}
// Returns the next in-order successor in a subtree
fn successor<T>(mut next: &mut Link<T>) -> &mut Link<T> {
loop {
match {next} {
&mut Some(ref mut node) if node.left.is_some() => next = &mut node.left,
other @ &mut Some(_) => return other,
_ => unreachable!(),
}
}
}
// Removes a node by discarding it if it is a leaf, or by swapping it
// with its inorder successor (which, in this case, is always in a
// leaf) and then deleting the leaf.
fn delete_node<T>(link: &mut Link<T>) {
if let Some(mut boxed_node) = link.take() {
match (boxed_node.left.take(), boxed_node.right.take()) {
(None, None) => (),
(leaf @ Some(_), None) |
(None, leaf @ Some(_)) => *link = leaf,
(left, right) => {
// take() followed by re-assignment looks like an awful hackjob, but appears
// to be the only way to satisfy all cases in the match
boxed_node.left = left;
boxed_node.right = right;
{
let node = &mut *boxed_node; // unbox
let next = successor(&mut node.right);
mem::swap(&mut node.value, &mut next.as_mut().unwrap().value);
delete_node(next);
}
*link = Some(boxed_node);
}
}
}
}
impl<'a, T: 'a> IntoIterator for &'a BinaryTree<T> {
type Item = &'a T;
type IntoIter = Iter<'a, T>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
pub struct Iter<'a, T: 'a> {
current: &'a Link<T>,
stack: Vec<&'a Node<T>>
}
impl<'a, T: 'a> Iterator for Iter<'a, T> {
type Item = &'a T;
fn next(&mut self) -> Option<Self::Item> {
while let Some(node) = self.current.as_ref() {
self.stack.push(node);
self.current = &node.left;
}
self.stack.pop().map(|node| {
self.current = &node.right;
&node.value
})
}
}
examples/set.rs
extern crate bt;
use bt::BinaryTree;
fn main() {
let mut set = BinaryTree::new();
for &value in &[100, 5, 1, 10, -1] {
set.insert(value);
}
for value in &[5, 1, 151] {
set.delete(value);
}
for value in &set {
println!("{}", value);
}
}
examples/map.rs
extern crate bt;
use bt::{BinaryTree, Indexed};
use std::cmp::Ordering;
#[derive(Debug, Copy, Clone)]
pub struct Entry<K, V>(K, V);
impl<K, V> Indexed<K> for Entry<K, V>
where K: Ord
{
fn key(&self) -> &K {
&self.0
}
}
impl<K, V> PartialEq for Entry<K, V>
where K: PartialEq
{
fn eq(&self, other: &Self) -> bool {
self.0.eq(&other.0)
}
}
impl<K, V> Eq for Entry<K, V>
where K: Eq
{}
impl<K, V> PartialOrd for Entry<K, V>
where K: PartialOrd
{
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.0.partial_cmp(&other.0)
}
}
impl<K, V> Ord for Entry<K, V>
where K: Ord
{
fn cmp(&self, other: &Self) -> Ordering {
self.0.cmp(&other.0)
}
}
fn main() {
let mut map = BinaryTree::new();
for &entry in &[Entry("France", "Paris"), Entry("Germany", "Berlin"), Entry("Italy", "Rome")] {
map.insert(entry);
}
for &Entry(key, value) in &map {
println!("{} => {}", key, value);
}
}
The benefit of the Indexed
trait is unclear to me; why not just require that the key implements Ord
?
-
1\$\begingroup\$ Thank you for the comprehensive answer.
Indexed
allows lookups with types different than the value type stored in a node, because unfortuanely,Ord
unlikePartialOrd
does not allow a second type to be specified. \$\endgroup\$Fabian Knorr– Fabian Knorr2016年07月07日 07:00:49 +00:00Commented Jul 7, 2016 at 7:00 -
\$\begingroup\$ @FabianKnorr Have you seen how
HashMap::get
allows different keys? I believe you could also requireT: PartialOrd<K> + Ord
.Ord
just means that the result of comparing two things is transitive and all that. \$\endgroup\$Shepmaster– Shepmaster2016年07月07日 16:04:32 +00:00Commented Jul 7, 2016 at 16:04
{anchor}
instead ofanchor
infn locate_mut
\$\endgroup\$anchor
is re-assigned, so it must be explicitly moved before reassignment by writing{anchor}
. Otherwise the borrow checker will complain about an aliased&mut
. \$\endgroup\$anchor= next_anchor; match anchor { ... next_anchor = x ... }
is it more explicit? \$\endgroup\$