I'm new to Rust and am learning by implementing my own version of cut
. This is a snippet that parses the <list>
of ranges required for the -f
, -b
, or -c
options. The relevant section of the spec states:
The application shall ensure that the option-argument list (see options -b, -c, and -f below) is a -separated list or -separated list of positive numbers and ranges. Ranges can be in three forms. The first is two positive numbers separated by a (low- high), which represents all fields from the first number to the second number. The second is a positive number preceded by a (- high), which represents all fields from field number 1 to that number. The third is a positive number followed by a ( low-), which represents that number to the last field, inclusive. The elements in list can be repeated, can overlap, and can be specified in any order, but the bytes, characters, or fields selected shall be written in the order of the input data. If an element appears in the selection list more than once, it shall be written exactly once.
I'm interested in tips for writing more idiomatic Rust (especially the error handling), and any other hints for a new Rust programmer. Thanks!
use std::error::Error;
use std::fmt;
use std::fmt::{Display, Formatter};
use std::iter::FromIterator;
use std::num::ParseIntError;
pub type Result<T> = std::result::Result<T, RangeError>;
#[derive(Debug, Eq, PartialEq)]
pub enum RangeError {
MalformedRangSpec,
Parse(ParseIntError),
}
impl Display for RangeError {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
use RangeError::*;
let msg = match self {
MalformedRangSpec => format!("Invalid range spec"),
Parse(e) => e.to_string(),
};
write!(f, "Error: {}", msg)
}
}
impl From<ParseIntError> for RangeError {
fn from(e: ParseIntError) -> Self {
RangeError::Parse(e)
}
}
impl Error for RangeError {}
#[derive(Debug, Eq, PartialEq, Hash)]
pub enum Range {
From(usize),
To(usize),
Inclusive(usize, usize),
Singleton(usize),
}
#[derive(Debug, Eq, PartialEq)]
pub struct RangeSet {
ranges: Vec<Range>,
}
impl RangeSet {
pub fn from<I: IntoIterator<Item = Range>>(iter: I) -> RangeSet {
RangeSet {
ranges: Vec::from_iter(iter),
}
}
pub fn from_spec<T: AsRef<str>>(spec: T) -> Result<RangeSet> {
// "-5,10,14-17,20-"
let tuples = spec
.as_ref()
.split(|c| c == ',' || c == ' ') // e.g. ["-5", "10", "14-17", "20-"]
// [[None, Some(5)], [Some(10)], [Some(14), Some(17)], [Some(20), None]]
.map(|element| {
element
.split('-') // e.g. first iter: ["", 5]
.map(|bound| match bound {
// e.g. [None, Some("5")]
"" => Ok(None),
s => {
let n: usize = s.parse()?;
Ok(Some(n))
}
})
.collect::<Result<Vec<_>>>()
})
.collect::<Result<Vec<_>>>()?;
let ranges: Vec<Range> = tuples
.iter()
.map(|range| match range.as_slice() {
[Some(n)] => Ok(Range::Singleton(*n)),
[Some(s), Some(e)] => Ok(Range::Inclusive(*s, *e)),
[Some(s), None] => Ok(Range::From(*s)),
[None, Some(e)] => Ok(Range::To(*e)),
_ => Err(RangeError::MalformedRangSpec),
})
.collect::<Result<Vec<_>>>()?;
Ok(RangeSet::from(ranges))
}
pub fn contains(&self, n: usize) -> bool {
if n == 0 {
// range defined to start at 1
return false;
}
self.ranges.iter().any(|range| match range {
Range::From(from) => (*from..).contains(&n),
Range::To(to) => (1..=*to).contains(&n),
Range::Inclusive(from, to) => (*from..=*to).contains(&n),
Range::Singleton(s) => s == &n,
})
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn contains() {
let r = RangeSet::from(vec![
Range::From(100),
Range::Inclusive(50, 60),
Range::Singleton(40),
Range::To(10),
]);
for n in 0..1000 {
match n {
1..=10 | 40..=40 | 50..=60 | 100..=1000 => {
assert!(r.contains(n), "should contain {}", n)
}
_ => assert!(!r.contains(n), "shouldn't contain {}", n),
}
}
}
#[test]
fn from_spec() {
let r1 = RangeSet::from(vec![Range::Singleton(1)]);
let r2 = RangeSet::from_spec("1");
assert_eq!(Ok(r1), r2);
let r1 = RangeSet::from(vec![
Range::To(10),
Range::Singleton(40),
Range::Inclusive(50, 60),
Range::From(100),
]);
let r2 = RangeSet::from_spec("-10,40,50-60,100-");
assert_eq!(Ok(r1), r2);
}
#[test]
fn from_spec_bad() {
assert!(RangeSet::from_spec("b").is_err());
assert!(RangeSet::from_spec("4-5-6").is_err());
}
}
1 Answer 1
Nice use of the custom types RangeError
, Range
, and RangeSet
. Some feedback:
The biggest problem with your code I see is that the
RangeSet::from_spec
function is quite gnarly. On inspection, my diagnosis of the problem is that you are implementing functionality inRangeSet
that would be better implemented onRange
. Here is a refactoring:impl FromStr for Range { type Err = RangeError; fn from_str(s: &str) -> Result<Range> { let bounds = s .split('-') .map(|bound| match bound { "" => Ok(None), s => Ok(Some(s.parse()?)) }) .collect::<Result<Vec<_>>>()?; match bounds.as_slice() { [Some(n)] => Ok(Range::Singleton(*n)), [Some(s), Some(e)] => Ok(Range::Inclusive(*s, *e)), [Some(s), None] => Ok(Range::From(*s)), [None, Some(e)] => Ok(Range::To(*e)), _ => Err(RangeError::MalformedRangSpec), } } } impl Range { pub fn contains(&self, n: usize) -> bool { if n == 0 { return false; } match self { Range::From(from) => (*from..).contains(&n), Range::To(to) => (1..=*to).contains(&n), Range::Inclusive(from, to) => (*from..=*to).contains(&n), Range::Singleton(s) => s == &n, } } }
In
match
statements, you can also clean up all of those extraneous*
s by pattern matching onmatch *x
instead ofmatch x
, here's how that looks:impl Range { pub fn contains(&self, n: usize) -> bool { if n == 0 { return false; } match *self { Range::From(from) => (from..).contains(&n), Range::To(to) => (1..=to).contains(&n), Range::Inclusive(from, to) => (from..=to).contains(&n), Range::Singleton(s) => s == n, } } }
and similarly in
fn from_str
:match *bounds.as_slice()
and[Some(n)] => Ok(Range::Singleton(n))
etc.In the
&[Some(s), Some(e)] => Ok(Range::Inclusive(s, e))
case, maybe you want to check that the range is nontrivial:[Some(s), Some(e)] => { if s < e { Ok(Range::Inclusive(s, e)) } else { Err(RangeError::MalformedRangSpec) } },
With these implementations on
Range
,RangeSet
becomes much simpler:impl RangeSet { pub fn from<I: IntoIterator<Item = Range>>(iter: I) -> RangeSet { RangeSet { ranges: Vec::from_iter(iter), } } pub fn from_spec<T: AsRef<str>>(spec: T) -> RangeResult<RangeSet> { // "-5,10,14-17,20-" let ranges = spec .as_ref() .split(|c| c == ',' || c == ' ') // e.g. ["-5", "10", "14-17", "20-"] .map(|element| element.parse()) .collect::<RangeResult<Vec<_>>>()?; Ok(RangeSet::from(ranges)) } pub fn contains(&self, n: usize) -> bool { self.ranges.iter().any(|range| range.contains(n)) } }
Your implementation currently lacks a CLI. For that, you will probably want to use the structopt crate. It is worth considering that this may make some of your functionality, in particular
RangeSet::from_spec
unnecessary. In particular, if you define a command-line argument of typeVec<Range>
StructOpt would automatically determine a reasonable way to parse a string into a vector of ranges (this relies onRange
implementingFromStr
which we did above).The
RangeError
implementation is an idiomatic way of handling errors and looks good. However, I personally would prefer not to redefine standard library types likepub type Result<T> = std::result::Result<T, RangeError>
, since it leads to non-standard function signatures that are not immediately readable. The error type is part of the behavior of a function, so better to explicitly returnResult<T, RangeError>
in your functions. Alternatively you could just use a new name:pub type RangeResult<T> = Result<Range, RangeError>
(FWIW, I also think
std::fmt::Result
andstd::io::Result<T>
were mistakes.)
Final code:
use std::error::Error;
use std::fmt;
use std::fmt::{Display, Formatter};
use std::iter::FromIterator;
use std::num::ParseIntError;
use std::str::FromStr;
pub type RangeResult<T> = std::result::Result<T, RangeError>;
#[derive(Debug, Eq, PartialEq)]
pub enum RangeError {
MalformedRangSpec,
Parse(ParseIntError),
}
impl Display for RangeError {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
use RangeError::*;
let msg = match self {
MalformedRangSpec => "Invalid range spec".to_string(),
Parse(e) => e.to_string(),
};
write!(f, "Error: {}", msg)
}
}
impl From<ParseIntError> for RangeError {
fn from(e: ParseIntError) -> Self {
RangeError::Parse(e)
}
}
impl Error for RangeError {}
#[derive(Debug, Eq, PartialEq, Hash)]
pub enum Range {
From(usize),
To(usize),
Inclusive(usize, usize),
Singleton(usize),
}
// Added
impl FromStr for Range {
type Err = RangeError;
fn from_str(s: &str) -> RangeResult<Range> {
let bounds = s
.split('-')
.map(|bound| match bound {
"" => Ok(None),
s => Ok(Some(s.parse()?))
})
.collect::<RangeResult<Vec<_>>>()?;
match *bounds.as_slice() {
[Some(n)] => Ok(Range::Singleton(n)),
[Some(s), Some(e)] => {
if s < e {
Ok(Range::Inclusive(s, e))
} else {
Err(RangeError::MalformedRangSpec)
}
},
[Some(s), None] => Ok(Range::From(s)),
[None, Some(e)] => Ok(Range::To(e)),
_ => Err(RangeError::MalformedRangSpec),
}
}
}
impl Range {
pub fn contains(&self, n: usize) -> bool {
if n == 0 {
return false;
}
match *self {
Range::From(from) => (from..).contains(&n),
Range::To(to) => (1..=to).contains(&n),
Range::Inclusive(from, to) => (from..=to).contains(&n),
Range::Singleton(s) => s == n,
}
}
}
#[derive(Debug, Eq, PartialEq)]
pub struct RangeSet {
ranges: Vec<Range>,
}
impl RangeSet {
pub fn from<I: IntoIterator<Item = Range>>(iter: I) -> RangeSet {
RangeSet {
ranges: Vec::from_iter(iter),
}
}
pub fn from_spec<T: AsRef<str>>(spec: T) -> RangeResult<RangeSet> {
// "-5,10,14-17,20-"
let ranges = spec
.as_ref()
.split(|c| c == ',' || c == ' ')
// e.g. ["-5", "10", "14-17", "20-"]
.map(|element| element.parse())
.collect::<RangeResult<Vec<_>>>()?;
Ok(RangeSet::from(ranges))
}
pub fn contains(&self, n: usize) -> bool {
self.ranges.iter().any(|range| range.contains(n))
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn contains() {
let r = RangeSet::from(vec![
Range::From(100),
Range::Inclusive(50, 60),
Range::Singleton(40),
Range::To(10),
]);
for n in 0..1000 {
match n {
1..=10 | 40..=40 | 50..=60 | 100..=1000 => {
assert!(r.contains(n), "should contain {}", n)
}
_ => assert!(!r.contains(n), "shouldn't contain {}", n),
}
}
}
#[test]
fn from_spec() {
let r1 = RangeSet::from(vec![Range::Singleton(1)]);
let r2 = RangeSet::from_spec("1");
assert_eq!(Ok(r1), r2);
let r1 = RangeSet::from(vec![
Range::To(10),
Range::Singleton(40),
Range::Inclusive(50, 60),
Range::From(100),
]);
let r2 = RangeSet::from_spec("-10,40,50-60,100-");
assert_eq!(Ok(r1), r2);
}
#[test]
fn from_spec_bad() {
assert!(RangeSet::from_spec("b").is_err());
assert!(RangeSet::from_spec("4-5-6").is_err());
}
}
-
1\$\begingroup\$ Thanks for insight! "(FWIW, I also think std::fmt::Result and std::io::Result<T> were mistakes.)" I thought this was confusing too but it seemed to be the convention. I like your suggestion better. \$\endgroup\$slebaron– slebaron2021年02月28日 00:11:18 +00:00Commented Feb 28, 2021 at 0:11