3
\$\begingroup\$

I'm new to Rust and am learning by implementing my own version of cut. This is a snippet that parses the <list> of ranges required for the -f, -b, or -c options. The relevant section of the spec states:

The application shall ensure that the option-argument list (see options -b, -c, and -f below) is a -separated list or -separated list of positive numbers and ranges. Ranges can be in three forms. The first is two positive numbers separated by a (low- high), which represents all fields from the first number to the second number. The second is a positive number preceded by a (- high), which represents all fields from field number 1 to that number. The third is a positive number followed by a ( low-), which represents that number to the last field, inclusive. The elements in list can be repeated, can overlap, and can be specified in any order, but the bytes, characters, or fields selected shall be written in the order of the input data. If an element appears in the selection list more than once, it shall be written exactly once.

I'm interested in tips for writing more idiomatic Rust (especially the error handling), and any other hints for a new Rust programmer. Thanks!

use std::error::Error;
use std::fmt;
use std::fmt::{Display, Formatter};
use std::iter::FromIterator;
use std::num::ParseIntError;
pub type Result<T> = std::result::Result<T, RangeError>;
#[derive(Debug, Eq, PartialEq)]
pub enum RangeError {
 MalformedRangSpec,
 Parse(ParseIntError),
}
impl Display for RangeError {
 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
 use RangeError::*;
 let msg = match self {
 MalformedRangSpec => format!("Invalid range spec"),
 Parse(e) => e.to_string(),
 };
 write!(f, "Error: {}", msg)
 }
}
impl From<ParseIntError> for RangeError {
 fn from(e: ParseIntError) -> Self {
 RangeError::Parse(e)
 }
}
impl Error for RangeError {}
#[derive(Debug, Eq, PartialEq, Hash)]
pub enum Range {
 From(usize),
 To(usize),
 Inclusive(usize, usize),
 Singleton(usize),
}
#[derive(Debug, Eq, PartialEq)]
pub struct RangeSet {
 ranges: Vec<Range>,
}
impl RangeSet {
 pub fn from<I: IntoIterator<Item = Range>>(iter: I) -> RangeSet {
 RangeSet {
 ranges: Vec::from_iter(iter),
 }
 }
 pub fn from_spec<T: AsRef<str>>(spec: T) -> Result<RangeSet> {
 // "-5,10,14-17,20-"
 let tuples = spec
 .as_ref()
 .split(|c| c == ',' || c == ' ') // e.g. ["-5", "10", "14-17", "20-"]
 // [[None, Some(5)], [Some(10)], [Some(14), Some(17)], [Some(20), None]]
 .map(|element| {
 element
 .split('-') // e.g. first iter: ["", 5]
 .map(|bound| match bound {
 // e.g. [None, Some("5")]
 "" => Ok(None),
 s => {
 let n: usize = s.parse()?;
 Ok(Some(n))
 }
 })
 .collect::<Result<Vec<_>>>()
 })
 .collect::<Result<Vec<_>>>()?;
 let ranges: Vec<Range> = tuples
 .iter()
 .map(|range| match range.as_slice() {
 [Some(n)] => Ok(Range::Singleton(*n)),
 [Some(s), Some(e)] => Ok(Range::Inclusive(*s, *e)),
 [Some(s), None] => Ok(Range::From(*s)),
 [None, Some(e)] => Ok(Range::To(*e)),
 _ => Err(RangeError::MalformedRangSpec),
 })
 .collect::<Result<Vec<_>>>()?;
 Ok(RangeSet::from(ranges))
 }
 pub fn contains(&self, n: usize) -> bool {
 if n == 0 {
 // range defined to start at 1
 return false;
 }
 self.ranges.iter().any(|range| match range {
 Range::From(from) => (*from..).contains(&n),
 Range::To(to) => (1..=*to).contains(&n),
 Range::Inclusive(from, to) => (*from..=*to).contains(&n),
 Range::Singleton(s) => s == &n,
 })
 }
}
#[cfg(test)]
mod test {
 use super::*;
 #[test]
 fn contains() {
 let r = RangeSet::from(vec![
 Range::From(100),
 Range::Inclusive(50, 60),
 Range::Singleton(40),
 Range::To(10),
 ]);
 for n in 0..1000 {
 match n {
 1..=10 | 40..=40 | 50..=60 | 100..=1000 => {
 assert!(r.contains(n), "should contain {}", n)
 }
 _ => assert!(!r.contains(n), "shouldn't contain {}", n),
 }
 }
 }
 #[test]
 fn from_spec() {
 let r1 = RangeSet::from(vec![Range::Singleton(1)]);
 let r2 = RangeSet::from_spec("1");
 assert_eq!(Ok(r1), r2);
 let r1 = RangeSet::from(vec![
 Range::To(10),
 Range::Singleton(40),
 Range::Inclusive(50, 60),
 Range::From(100),
 ]);
 let r2 = RangeSet::from_spec("-10,40,50-60,100-");
 assert_eq!(Ok(r1), r2);
 }
 #[test]
 fn from_spec_bad() {
 assert!(RangeSet::from_spec("b").is_err());
 assert!(RangeSet::from_spec("4-5-6").is_err());
 }
}
asked Feb 27, 2021 at 7:53
\$\endgroup\$

1 Answer 1

1
\$\begingroup\$

Nice use of the custom types RangeError, Range, and RangeSet. Some feedback:

  • The biggest problem with your code I see is that the RangeSet::from_spec function is quite gnarly. On inspection, my diagnosis of the problem is that you are implementing functionality in RangeSet that would be better implemented on Range. Here is a refactoring:

    impl FromStr for Range {
     type Err = RangeError;
     fn from_str(s: &str) -> Result<Range> {
     let bounds = s
     .split('-')
     .map(|bound| match bound {
     "" => Ok(None),
     s => Ok(Some(s.parse()?))
     })
     .collect::<Result<Vec<_>>>()?;
     match bounds.as_slice() {
     [Some(n)] => Ok(Range::Singleton(*n)),
     [Some(s), Some(e)] => Ok(Range::Inclusive(*s, *e)),
     [Some(s), None] => Ok(Range::From(*s)),
     [None, Some(e)] => Ok(Range::To(*e)),
     _ => Err(RangeError::MalformedRangSpec),
     }
     }
    }
    impl Range {
     pub fn contains(&self, n: usize) -> bool {
     if n == 0 {
     return false;
     }
     match self {
     Range::From(from) => (*from..).contains(&n),
     Range::To(to) => (1..=*to).contains(&n),
     Range::Inclusive(from, to) => (*from..=*to).contains(&n),
     Range::Singleton(s) => s == &n,
     }
     }
    }
    
  • In match statements, you can also clean up all of those extraneous *s by pattern matching on match *x instead of match x, here's how that looks:

    impl Range {
     pub fn contains(&self, n: usize) -> bool {
     if n == 0 {
     return false;
     }
     match *self {
     Range::From(from) => (from..).contains(&n),
     Range::To(to) => (1..=to).contains(&n),
     Range::Inclusive(from, to) => (from..=to).contains(&n),
     Range::Singleton(s) => s == n,
     }
     }
    }
    

    and similarly in fn from_str: match *bounds.as_slice() and [Some(n)] => Ok(Range::Singleton(n)) etc.

  • In the &[Some(s), Some(e)] => Ok(Range::Inclusive(s, e)) case, maybe you want to check that the range is nontrivial:

     [Some(s), Some(e)] => {
     if s < e {
     Ok(Range::Inclusive(s, e))
     } else {
     Err(RangeError::MalformedRangSpec)
     }
     },
    
  • With these implementations on Range, RangeSet becomes much simpler:

    impl RangeSet {
     pub fn from<I: IntoIterator<Item = Range>>(iter: I) -> RangeSet {
     RangeSet {
     ranges: Vec::from_iter(iter),
     }
     }
     pub fn from_spec<T: AsRef<str>>(spec: T) -> RangeResult<RangeSet> {
     // "-5,10,14-17,20-"
     let ranges = spec
     .as_ref()
     .split(|c| c == ',' || c == ' ')
     // e.g. ["-5", "10", "14-17", "20-"]
     .map(|element| element.parse())
     .collect::<RangeResult<Vec<_>>>()?;
     Ok(RangeSet::from(ranges))
     }
     pub fn contains(&self, n: usize) -> bool {
     self.ranges.iter().any(|range| range.contains(n))
     }
    }
    
  • Your implementation currently lacks a CLI. For that, you will probably want to use the structopt crate. It is worth considering that this may make some of your functionality, in particular RangeSet::from_spec unnecessary. In particular, if you define a command-line argument of type Vec<Range> StructOpt would automatically determine a reasonable way to parse a string into a vector of ranges (this relies on Range implementing FromStr which we did above).

  • The RangeError implementation is an idiomatic way of handling errors and looks good. However, I personally would prefer not to redefine standard library types like pub type Result<T> = std::result::Result<T, RangeError>, since it leads to non-standard function signatures that are not immediately readable. The error type is part of the behavior of a function, so better to explicitly return Result<T, RangeError> in your functions. Alternatively you could just use a new name:

    pub type RangeResult<T> = Result<Range, RangeError>
    

    (FWIW, I also think std::fmt::Result and std::io::Result<T> were mistakes.)

Final code:

use std::error::Error;
use std::fmt;
use std::fmt::{Display, Formatter};
use std::iter::FromIterator;
use std::num::ParseIntError;
use std::str::FromStr;
pub type RangeResult<T> = std::result::Result<T, RangeError>;
#[derive(Debug, Eq, PartialEq)]
pub enum RangeError {
 MalformedRangSpec,
 Parse(ParseIntError),
}
impl Display for RangeError {
 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
 use RangeError::*;
 let msg = match self {
 MalformedRangSpec => "Invalid range spec".to_string(),
 Parse(e) => e.to_string(),
 };
 write!(f, "Error: {}", msg)
 }
}
impl From<ParseIntError> for RangeError {
 fn from(e: ParseIntError) -> Self {
 RangeError::Parse(e)
 }
}
impl Error for RangeError {}
#[derive(Debug, Eq, PartialEq, Hash)]
pub enum Range {
 From(usize),
 To(usize),
 Inclusive(usize, usize),
 Singleton(usize),
}
// Added
impl FromStr for Range {
 type Err = RangeError;
 fn from_str(s: &str) -> RangeResult<Range> {
 let bounds = s
 .split('-')
 .map(|bound| match bound {
 "" => Ok(None),
 s => Ok(Some(s.parse()?))
 })
 .collect::<RangeResult<Vec<_>>>()?;
 match *bounds.as_slice() {
 [Some(n)] => Ok(Range::Singleton(n)),
 [Some(s), Some(e)] => {
 if s < e {
 Ok(Range::Inclusive(s, e))
 } else {
 Err(RangeError::MalformedRangSpec)
 }
 },
 [Some(s), None] => Ok(Range::From(s)),
 [None, Some(e)] => Ok(Range::To(e)),
 _ => Err(RangeError::MalformedRangSpec),
 }
 }
}
impl Range {
 pub fn contains(&self, n: usize) -> bool {
 if n == 0 {
 return false;
 }
 match *self {
 Range::From(from) => (from..).contains(&n),
 Range::To(to) => (1..=to).contains(&n),
 Range::Inclusive(from, to) => (from..=to).contains(&n),
 Range::Singleton(s) => s == n,
 }
 }
}
#[derive(Debug, Eq, PartialEq)]
pub struct RangeSet {
 ranges: Vec<Range>,
}
impl RangeSet {
 pub fn from<I: IntoIterator<Item = Range>>(iter: I) -> RangeSet {
 RangeSet {
 ranges: Vec::from_iter(iter),
 }
 }
 pub fn from_spec<T: AsRef<str>>(spec: T) -> RangeResult<RangeSet> {
 // "-5,10,14-17,20-"
 let ranges = spec
 .as_ref()
 .split(|c| c == ',' || c == ' ')
 // e.g. ["-5", "10", "14-17", "20-"]
 .map(|element| element.parse())
 .collect::<RangeResult<Vec<_>>>()?;
 Ok(RangeSet::from(ranges))
 }
 pub fn contains(&self, n: usize) -> bool {
 self.ranges.iter().any(|range| range.contains(n))
 }
}
#[cfg(test)]
mod test {
 use super::*;
 #[test]
 fn contains() {
 let r = RangeSet::from(vec![
 Range::From(100),
 Range::Inclusive(50, 60),
 Range::Singleton(40),
 Range::To(10),
 ]);
 for n in 0..1000 {
 match n {
 1..=10 | 40..=40 | 50..=60 | 100..=1000 => {
 assert!(r.contains(n), "should contain {}", n)
 }
 _ => assert!(!r.contains(n), "shouldn't contain {}", n),
 }
 }
 }
 #[test]
 fn from_spec() {
 let r1 = RangeSet::from(vec![Range::Singleton(1)]);
 let r2 = RangeSet::from_spec("1");
 assert_eq!(Ok(r1), r2);
 let r1 = RangeSet::from(vec![
 Range::To(10),
 Range::Singleton(40),
 Range::Inclusive(50, 60),
 Range::From(100),
 ]);
 let r2 = RangeSet::from_spec("-10,40,50-60,100-");
 assert_eq!(Ok(r1), r2);
 }
 #[test]
 fn from_spec_bad() {
 assert!(RangeSet::from_spec("b").is_err());
 assert!(RangeSet::from_spec("4-5-6").is_err());
 }
}
answered Feb 27, 2021 at 14:53
\$\endgroup\$
1
  • 1
    \$\begingroup\$ Thanks for insight! "(FWIW, I also think std::fmt::Result and std::io::Result<T> were mistakes.)" I thought this was confusing too but it seemed to be the convention. I like your suggestion better. \$\endgroup\$ Commented Feb 28, 2021 at 0:11

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.