This is my first ever program written in Rust (after reading the book). It is small tool to sort all lines from stdin
according to two criteria. First one is time when given log line was generated - in my case files contain logs from many applications which all uses same time source. The second is related to the fact that each log is assigned consequent value stored as one byte.
All of the code is on bitbucket as cargo project. The two relevant files are lib.rs:
extern crate time;
extern crate regex;
use std::result;
use std::cmp::Ordering;
#[derive(Debug, Clone)]
pub struct Log {
time: time::Tm,
counter: u8,
pub line: String,
}
impl PartialEq for Log {
fn eq(&self, other: &Log) -> bool {
self.time == other.time && self.counter == other.counter
}
}
impl Eq for Log {}
impl PartialOrd for Log {
fn partial_cmp(&self, other: &Log) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for Log {
fn cmp(&self, other: &Log) -> Ordering {
let time_ord = self.time.cmp(&other.time);
if time_ord == Ordering::Equal {
return self.counter.cmp(&other.counter);
}
time_ord
}
}
pub struct Config(pub Vec<(regex::Regex, Vec<String>)>);
pub struct Parser {
config: Config
}
impl Parser {
pub fn default() -> Parser {
let main_regex = regex::Regex::new(r"\b(?P<counter>.{2}) \S+ <(?P<time>[^>]+)> ").unwrap();
let time_strings = vec!["%m.%d %H:%M:%S.%f".to_string(),"%Y-%m-%dT%H:%M:%S.%f".to_string()];
Parser{config: Config(vec![(main_regex, time_strings)])}
}
pub fn new(config: Config) -> Parser {
Parser{config: config}
}
pub fn parse(&self, input: &str) -> result::Result<Log, String> {
match self.parse_line(input) {
Some(l) => return Ok(l),
None => return Err(input.to_string())
}
}
fn parse_line(&self, input: &str) -> Option<Log> {
for pair in &self.config.0 {
let cap = match pair.0.captures(&input) {
None => continue,
Some(c) => c,
};
let counter = match cap.name("counter") {
None => continue,
Some(m) => match u8::from_str_radix(m, 16) {
Err(_) => continue,
Ok(c) => c,
}
};
let time = match self.parse_time(&pair.1, cap.name("time")) {
None => continue,
Some(t) => t
};
return Some(Log{
time: time,
counter: counter,
line: input.to_string()})
}
None
}
fn parse_time(&self, time_strings: &Vec<String>, capture: Option<&str>) -> Option<time::Tm> {
let m = match capture {
None => return None,
Some(m) => m
};
for s in time_strings {
if let Ok(mut t) = time::strptime(&m, s) {
if t.tm_year != 0 {
t.tm_year += 1900
}
return Some(t)
}
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;
use time;
fn expected_time() -> time::Tm {
time::Tm{
tm_sec: 36, tm_min: 35, tm_hour: 13,
tm_mday: 19, tm_mon: 9, tm_year: 0,
tm_wday: 0, tm_yday: 0, tm_isdst: 0,
tm_utcoff: 0, tm_nsec: 981305000}
}
#[test]
fn parser_parse_should_handle_first_format() {
let input = "000001 10.19 13:35:17.416 [127.0.0.1] 12 BOARD-1234-3-AppName <10.19 13:35:36.981305> 270 DBG/ABC/XYZ, something something".to_string();
let log = Parser::default().parse(&input).unwrap();
let expected_log = Log{
time: expected_time(),
counter: 0x12,
line: input.clone()};
assert_eq!(expected_log, log);
}
#[test]
fn parser_parse_should_handle_second_format() {
let input = "12 BOARD-1234-3-AppName <10.19 13:35:36.981305> 270 INF/ABC/XYZ, something something something".to_string();
let log = Parser::default().parse(&input).unwrap();
let expected_log = Log{
time: expected_time(),
counter: 0x12,
line: input.clone()};
assert_eq!(expected_log, log);
}
#[test]
fn parser_parse_should_handle_third_format() {
let input = "123456 02.11 17:20:30.92 [192.168.1.1] 57 BOARD-1234-3-AppName <2015年10月19日T13:35:36.981305Z> 270-ThreadName DBG/ABC/File.cpp#42 something something something".to_string();
let log = Parser::default().parse(&input).unwrap();
let mut expected_time = expected_time();
expected_time.tm_year = 2015;
let expected_log = Log{
time: expected_time,
counter: 0x57,
line: input.clone()};
assert_eq!(expected_log, log);
}
#[test]
fn log_relations() {
let mut smaller_time = expected_time();
smaller_time.tm_mon -= 1;
let smaller_time = smaller_time;
let expected_log = Log{time: expected_time(), counter: 0x10, line: "".to_string()};
let mut expected_eq_log = expected_log.clone();
expected_eq_log.line = "test".to_string();
let expected_eq_log = expected_eq_log;
let smaller_time_log = Log{time: smaller_time, counter: 0x10, line: "".to_string()};
let smaller_counter_log = Log{time: expected_time(), counter: 0x9, line: "".to_string()};
assert_eq!(expected_log, expected_log);
assert_eq!(expected_log, expected_eq_log);
assert!(expected_log != smaller_time_log);
assert!(expected_log != smaller_counter_log);
assert!(expected_log > smaller_time_log);
assert!(expected_log > smaller_counter_log);
assert!(!(expected_log > expected_eq_log));
assert!(!(expected_log < expected_eq_log));
}
}
and main.rs:
extern crate logparse;
extern crate toml;
extern crate regex;
#[macro_use]
extern crate log;
extern crate env_logger;
use std::io::{BufRead,Read};
use std::vec::Vec;
use std::fs::File;
fn parse_config() -> Option<logparse::Config> {
let mut path = match std::env::home_dir() {
None => {
warn!("Home dir not set");
return None
},
Some(path) => path
};
path.push(".sort_logs");
let mut f = match File::open(&path) {
Err(e) => {
warn!("Failed to open {:?}: {}", &path, e);
return None
}
Ok(f) => f
};
let mut buf = String::new();
if let Err(e) = f.read_to_string(&mut buf) {
warn!("Failed to read {:?}: {}", &path, e);
return None;
}
let mut parser = toml::Parser::new(&buf);
let toml = match parser.parse() {
None => {
warn!("Failed to parse {:?}", &path);
for error in parser.errors {
warn!("\terror: {:?}", error);
}
return None
},
Some(t) => t
};
let entries = match toml.get("entry") {
Some(&toml::Value::Array(ref entries)) => entries,
None | Some(_) => {
warn!("Expected array of 'entry'.");
return None
},
};
let mut config = logparse::Config(vec![]);
for entry in entries {
if let &toml::Value::Table(ref table) = entry {
if let Some(t) = parse_entry(&table) {
config.0.push(t);
}
}
}
Some(config)
}
fn parse_entry(entry: &toml::Table) -> Option<(regex::Regex, Vec<String>)> {
let regex = match entry.get("regex") {
Some(&toml::Value::String(ref s)) => match regex::Regex::new(&s) {
Err(e) => {
warn!("Failed to parse {}: {:?}", s, e);
return None;
},
Ok(re) => re
},
None | Some(_) => return None,
};
let times_strings = match entry.get("times") {
Some(&toml::Value::Array(ref strings)) => {
let mut v : Vec<String> = Vec::new();
for string in strings {
if let &toml::Value::String(ref s) = string {
v.push(s.clone());
}
}
v
},
None | Some(_) => return None
};
Some((regex, times_strings))
}
fn main() {
env_logger::init().unwrap();
let parser = match parse_config() {
Some(c) => logparse::Parser::new(c),
None => logparse::Parser::default()
};
let reader = std::io::BufReader::new(std::io::stdin());
let logs = reader.lines()
.filter_map(|l| l.ok().map(|ll| parser.parse(&ll)));
let mut sorted : Vec<logparse::Log> = vec![];
for log in logs {
match log {
Ok(l) => sorted.push(l),
Err(e) => { warn!("Failed to parse line: {}", e); () }
}
}
sorted.sort();
for log in sorted {
println!("{}", log.line);
}
}
1 Answer 1
To start with, your tests fail:
---- tests::log_relations stdout ----
thread 'tests::log_relations' panicked at 'assertion failed: expected_log > smaller_time_log', src/lib.rs:169
Thoughts on main
- All the boilerplate around warning during log parsing drives me crazy for whatever reason. If you want to keep it like that, maybe introduce a macro to reduce the duplication.
However, I think it's better to just report an error. Can log the warning at a higher level.
Instead of matching on
Some
/None
, check out all the methods onOption
andResult
such asmap
,map_or
,map_or_else
,and_then
, etc.toml::Value
hasas_*
methods that convert to a specific type, or aNone
.There's no need to cram running the parser into the
filter_map
, it can be chained afterward.Could choose to avoid the temp variable when creating the
sorted
array, just do it all in chained methods.No need to specify type of
sorted
when declaring aVec
. Type inference can handle it.- Fairly rare to need to
push
to aVec
. Usuallycollect
ing an iterator is more common.filter_map
,map_err
, andok
can help.
extern crate logparse;
extern crate toml;
extern crate regex;
#[macro_use]
extern crate log;
extern crate env_logger;
#[macro_use]
extern crate quick_error;
use std::io::{self, BufRead, Read};
use std::vec::Vec;
use std::fs::File;
use std::path::PathBuf;
quick_error! {
#[derive(Debug)]
pub enum ConfigError {
HomeDirNotSet {
description("Home directory not set")
}
UnableToOpen(err: io::Error, path: PathBuf) {
description("Unable to open configuration")
display("Unable to open configuration from {}: {}", path.display(), err)
cause(err)
}
UnableToRead(err: io::Error, path: PathBuf) {
description("Unable to read configuration")
display("Unable to read configuration from {}: {}", path.display(), err)
cause(err)
}
UnableToParse(errors: Vec<toml::ParserError>, path: PathBuf) {
description("Unable to parse configuration")
display("Unable to parse configuration from {}: {:?}", path.display(), errors)
}
ExpectedEntries(path: PathBuf) {
description("Expected array of 'entry'")
}
}
}
fn parse_config() -> Result<logparse::Config, ConfigError> {
let mut path = try!(std::env::home_dir().ok_or(ConfigError::HomeDirNotSet));
path.push(".sort_logs");
let mut f = try!(File::open(&path).map_err(|e| ConfigError::UnableToOpen(e, path.clone())));
let mut buf = String::new();
try!(f.read_to_string(&mut buf).map_err(|e| ConfigError::UnableToRead(e, path.clone())));
let mut parser = toml::Parser::new(&buf);
let parse_result = parser.parse();
let parse_errors = parser.errors;
let toml = try!(parse_result.ok_or_else(|| ConfigError::UnableToParse(parse_errors, path.clone())));
let entries = try!(toml
.get("entry")
.and_then(toml::Value::as_slice)
.ok_or_else(|| ConfigError::ExpectedEntries(path.clone())));
let mut config = logparse::Config(vec![]);
for entry in entries {
if let &toml::Value::Table(ref table) = entry {
if let Some(t) = parse_entry(&table) {
config.0.push(t);
}
}
}
Ok(config)
}
fn parse_entry(entry: &toml::Table) -> Option<(regex::Regex, Vec<String>)> {
let regex = match entry.get("regex").and_then(toml::Value::as_str) {
Some(ref s) => {
match regex::Regex::new(&s) {
Err(e) => {
warn!("Failed to parse {}: {:?}", s, e);
return None;
},
Ok(re) => re
}
},
None => return None,
};
let times_strings = match entry.get("times").and_then(toml::Value::as_slice) {
Some(ref strings) => {
strings
.iter()
.filter_map(|s| s.as_str())
.map(str::to_owned)
.collect()
},
None => return None
};
Some((regex, times_strings))
}
fn main() {
env_logger::init().unwrap();
let parser = parse_config()
.map(logparse::Parser::new)
.map_err(|e| warn!("Unable to parse configuration: {}", e))
.unwrap_or_else(|_| logparse::Parser::default());
let reader = std::io::BufReader::new(std::io::stdin());
let mut sorted: Vec<_> = reader
.lines()
.filter_map(|l| l.ok())
.map(|l| parser.parse(&l))
.filter_map(|log| log.map_err(|e| warn!("Failed to parse line: {}", e)).ok())
.collect();
sorted.sort();
for log in sorted {
println!("{}", log.line);
}
}
Thoughts on lib
Log
would be better named asLogLine
.- I like to define a
key
method to make implementingPartialEq
,Hash
, andOrd
easy to keep consistent. - Don't make your own
default
function, useDefault
! - Use
expect
instead ofunwrap
. The first time it fails, you will be happy you did. - Use
ok_or_else
instead of matching on anOption
- You can destructure in closure argument. This is useful for tuples because you can say something like
|(regex, formats)|
. - Sometimes, introducing a new type is the right thing to do. This allows for smaller functions.
- Be wary of publicly exposing tuples. They are rarely a good abstraction.
- Once we've introduced a struct to replace the tuple, I don't see benefit of
Config
. - Note that helper functions don't have to be methods (accepting
self
). - Basically never accept
&Vec<T>
or&String
, use&[T]
or&str
instead. - Don't accept an
Option
if you can't handle it. Let the caller deal with it. - Perform more conversion to iterators. Getting the "first" thing is just
next
. - Extracting a helper function makes
parse_time
a clean read. - No need to
to_string
theinput
when writing tests.
extern crate time;
extern crate regex;
use std::result;
use std::cmp::Ordering;
#[derive(Debug, Clone)]
pub struct LogLine {
time: time::Tm,
counter: u8,
pub line: String,
}
impl LogLine {
fn key(&self) -> (time::Tm, u8) {
(self.time, self.counter)
}
}
impl PartialEq for LogLine {
fn eq(&self, other: &LogLine) -> bool {
self.key() == other.key()
}
}
impl Eq for LogLine {}
impl PartialOrd for LogLine {
fn partial_cmp(&self, other: &LogLine) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for LogLine {
fn cmp(&self, other: &LogLine) -> Ordering {
self.key().cmp(&other.key())
}
}
pub struct Parser {
config: Vec<LineConfiguration>,
}
impl Default for Parser {
fn default() -> Parser {
let main_regex = regex::Regex::new(r"\b(?P<counter>.{2}) \S+ <(?P<time>[^>]+)> ")
.expect("Unable to build default regex");
let time_strings = vec!["%m.%d %H:%M:%S.%f".to_string(),
"%Y-%m-%dT%H:%M:%S.%f".to_string()];
Parser { config: vec![LineConfiguration::new(main_regex, time_strings)] }
}
}
impl Parser {
pub fn new(config: Vec<LineConfiguration>) -> Parser {
Parser { config: config }
}
pub fn parse(&self, input: &str) -> result::Result<LogLine, String> {
self.parse_line(input).ok_or_else(|| input.to_string())
}
fn parse_line(&self, input: &str) -> Option<LogLine> {
self.config.iter().filter_map(|c| c.parse(input)).next()
}
}
pub struct LineConfiguration {
regex: regex::Regex,
formats: Vec<String>,
}
impl LineConfiguration {
pub fn new(regex: regex::Regex, formats: Vec<String>) -> Self {
LineConfiguration {
regex: regex,
formats: formats,
}
}
fn parse(&self, input: &str) -> Option<LogLine> {
self.regex.captures(&input).and_then(|cap| {
let counter = cap.name("counter").and_then(|c| u8::from_str_radix(c, 16).ok());
let time = cap.name("time").and_then(|t| self.parse_time(t));
match (counter, time) {
(Some(counter), Some(time)) => {
Some(LogLine {
time: time,
counter: counter,
line: input.to_string(),
})
}
_ => None,
}
})
}
fn parse_time(&self, capture: &str) -> Option<time::Tm> {
self.formats
.iter()
.filter_map(|fmt| time::strptime(&capture, fmt).ok())
.next()
.map(reset_year)
}
}
fn reset_year(mut t: time::Tm) -> time::Tm {
if t.tm_year != 0 {
t.tm_year += 1900
}
t
}
#[cfg(test)]
mod tests {
use super::*;
use time;
fn expected_time() -> time::Tm {
time::Tm {
tm_sec: 36,
tm_min: 35,
tm_hour: 13,
tm_mday: 19,
tm_mon: 9,
tm_year: 0,
tm_wday: 0,
tm_yday: 0,
tm_isdst: 0,
tm_utcoff: 0,
tm_nsec: 981305000,
}
}
#[test]
fn parser_parse_should_handle_first_format() {
let input = "000001 10.19 13:35:17.416 [127.0.0.1] 12 BOARD-1234-3-AppName <10.19 \
13:35:36.981305> 270 DBG/ABC/XYZ, something something";
let log = Parser::default().parse(&input).unwrap();
let expected_log = LogLine {
time: expected_time(),
counter: 0x12,
line: input.to_string(),
};
assert_eq!(expected_log, log);
}
#[test]
fn parser_parse_should_handle_second_format() {
let input = "12 BOARD-1234-3-AppName <10.19 13:35:36.981305> 270 INF/ABC/XYZ, something \
something something";
let log = Parser::default().parse(&input).unwrap();
let expected_log = LogLine {
time: expected_time(),
counter: 0x12,
line: input.to_string(),
};
assert_eq!(expected_log, log);
}
#[test]
fn parser_parse_should_handle_third_format() {
let input = "123456 02.11 17:20:30.92 [192.168.1.1] 57 BOARD-1234-3-AppName \
<2015年10月19日T13:35:36.981305Z> 270-ThreadName DBG/ABC/File.cpp#42 something \
something something";
let log = Parser::default().parse(&input).unwrap();
let mut expected_time = expected_time();
expected_time.tm_year = 2015;
let expected_log = LogLine {
time: expected_time,
counter: 0x57,
line: input.to_string(),
};
assert_eq!(expected_log, log);
}
#[test]
fn log_relations() {
let mut smaller_time = expected_time();
smaller_time.tm_mon -= 1;
let smaller_time = smaller_time;
let expected_log = LogLine {
time: expected_time(),
counter: 0x10,
line: "".to_string(),
};
let mut expected_eq_log = expected_log.clone();
expected_eq_log.line = "test".to_string();
let expected_eq_log = expected_eq_log;
let smaller_time_log = LogLine {
time: smaller_time,
counter: 0x10,
line: "".to_string(),
};
let smaller_counter_log = LogLine {
time: expected_time(),
counter: 0x9,
line: "".to_string(),
};
assert_eq!(expected_log, expected_log);
assert_eq!(expected_log, expected_eq_log);
assert!(expected_log != smaller_time_log);
assert!(expected_log != smaller_counter_log);
assert!(expected_log > smaller_time_log);
assert!(expected_log > smaller_counter_log);
assert!(!(expected_log > expected_eq_log));
assert!(!(expected_log < expected_eq_log));
}
}
Ready for crazy mode? I didn't really like how there was allocation all throughout. When iterating over the input, you already have a String
from Lines
, so doubly-allocating it is inefficient. Additionally, in the tests you had to allocate just to make comparisons.
The route I chose was to parameterize LogLine
over a generic L
. This allows the caller to pass in a &str
or a String
and that's what will be used throughout. You'll note that I did some nasty conversion to a LogLine<()>
to drop the reference (ending the borrow) before I could put the String
back in.
Important changes:
lib.rs
extern crate time;
extern crate regex;
use std::cmp::Ordering;
#[derive(Debug, Clone)]
pub struct LogLine<L> {
time: time::Tm,
counter: u8,
pub line: L,
}
impl<L> LogLine<L> {
pub fn map<F, T>(self, f: F) -> LogLine<T>
where F: FnOnce(L) -> T
{
LogLine {
time: self.time,
counter: self.counter,
line: f(self.line),
}
}
fn key(&self) -> (time::Tm, u8) {
(self.time, self.counter)
}
}
impl<L> PartialEq for LogLine<L> {
fn eq(&self, other: &LogLine<L>) -> bool {
self.key() == other.key()
}
}
impl<L> Eq for LogLine<L> {}
impl<L> PartialOrd for LogLine<L> {
fn partial_cmp(&self, other: &LogLine<L>) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl<L> Ord for LogLine<L> {
fn cmp(&self, other: &LogLine<L>) -> Ordering {
self.key().cmp(&other.key())
}
}
pub struct Parser {
config: Vec<LineConfiguration>,
}
impl Default for Parser {
fn default() -> Parser {
let main_regex = regex::Regex::new(r"\b(?P<counter>.{2}) \S+ <(?P<time>[^>]+)> ")
.expect("Unable to build default regex");
let time_strings = vec!["%m.%d %H:%M:%S.%f".to_string(),
"%Y-%m-%dT%H:%M:%S.%f".to_string()];
Parser { config: vec![LineConfiguration::new(main_regex, time_strings)] }
}
}
impl Parser {
pub fn new(config: Vec<LineConfiguration>) -> Parser {
Parser { config: config }
}
pub fn parse<L>(&self, input: L) -> Option<LogLine<L>>
where L: AsRef<str>
{
self.parse_line(input.as_ref())
.map(|l| l.map(|_| input))
}
fn parse_line(&self, input: &str) -> Option<LogLine<()>> {
self.config.iter().filter_map(|c| c.parse(input)).next()
}
}
pub struct LineConfiguration {
regex: regex::Regex,
formats: Vec<String>,
}
impl LineConfiguration {
pub fn new(regex: regex::Regex, formats: Vec<String>) -> Self {
LineConfiguration {
regex: regex,
formats: formats,
}
}
fn parse(&self, input: &str) -> Option<LogLine<()>> {
self.regex.captures(input).and_then(|cap| {
let counter = cap.name("counter").and_then(|c| u8::from_str_radix(c, 16).ok());
let time = cap.name("time").and_then(|t| self.parse_time(t));
match (counter, time) {
(Some(counter), Some(time)) => {
Some(LogLine {
time: time,
counter: counter,
line: (),
})
}
_ => None,
}
})
}
fn parse_time(&self, capture: &str) -> Option<time::Tm> {
self.formats
.iter()
.filter_map(|fmt| time::strptime(&capture, fmt).ok())
.next()
.map(reset_year)
}
}
fn reset_year(mut t: time::Tm) -> time::Tm {
if t.tm_year != 0 {
t.tm_year += 1900
}
t
}
#[cfg(test)]
mod tests {
use super::*;
use time;
fn expected_time() -> time::Tm {
time::Tm {
tm_sec: 36,
tm_min: 35,
tm_hour: 13,
tm_mday: 19,
tm_mon: 9,
tm_year: 0,
tm_wday: 0,
tm_yday: 0,
tm_isdst: 0,
tm_utcoff: 0,
tm_nsec: 981305000,
}
}
#[test]
fn parser_parse_should_handle_first_format() {
let input = "000001 10.19 13:35:17.416 [127.0.0.1] 12 BOARD-1234-3-AppName <10.19 \
13:35:36.981305> 270 DBG/ABC/XYZ, something something";
let log = Parser::default().parse(input).unwrap();
let expected_log = LogLine {
time: expected_time(),
counter: 0x12,
line: input,
};
assert_eq!(expected_log, log);
}
#[test]
fn parser_parse_should_handle_second_format() {
let input = "12 BOARD-1234-3-AppName <10.19 13:35:36.981305> 270 INF/ABC/XYZ, something \
something something";
let log = Parser::default().parse(input).unwrap();
let expected_log = LogLine {
time: expected_time(),
counter: 0x12,
line: input,
};
assert_eq!(expected_log, log);
}
#[test]
fn parser_parse_should_handle_third_format() {
let input = "123456 02.11 17:20:30.92 [192.168.1.1] 57 BOARD-1234-3-AppName \
<2015年10月19日T13:35:36.981305Z> 270-ThreadName DBG/ABC/File.cpp#42 something \
something something";
let log = Parser::default().parse(input).unwrap();
let mut expected_time = expected_time();
expected_time.tm_year = 2015;
let expected_log = LogLine {
time: expected_time,
counter: 0x57,
line: input,
};
assert_eq!(expected_log, log);
}
#[test]
fn log_relations() {
let mut smaller_time = expected_time();
smaller_time.tm_mon -= 1;
let smaller_time = smaller_time;
let expected_log = LogLine {
time: expected_time(),
counter: 0x10,
line: "",
};
let mut expected_eq_log = expected_log.clone();
expected_eq_log.line = "test";
let expected_eq_log = expected_eq_log;
let smaller_time_log = LogLine {
time: smaller_time,
counter: 0x10,
line: "",
};
let smaller_counter_log = LogLine {
time: expected_time(),
counter: 0x9,
line: "",
};
assert_eq!(expected_log, expected_log);
assert_eq!(expected_log, expected_eq_log);
assert!(expected_log != smaller_time_log);
assert!(expected_log != smaller_counter_log);
assert!(expected_log > smaller_time_log);
assert!(expected_log > smaller_counter_log);
assert!(!(expected_log > expected_eq_log));
assert!(!(expected_log < expected_eq_log));
}
}
main.rs
extern crate logparse;
extern crate toml;
extern crate regex;
#[macro_use]
extern crate log;
extern crate env_logger;
#[macro_use]
extern crate quick_error;
use std::io::{self, BufRead, Read};
use std::vec::Vec;
use std::fs::File;
use std::path::PathBuf;
quick_error! {
#[derive(Debug)]
pub enum ConfigError {
HomeDirNotSet {
description("Home directory not set")
}
UnableToOpen(err: io::Error, path: PathBuf) {
description("Unable to open configuration")
display("Unable to open configuration from {}: {}", path.display(), err)
cause(err)
}
UnableToRead(err: io::Error, path: PathBuf) {
description("Unable to read configuration")
display("Unable to read configuration from {}: {}", path.display(), err)
cause(err)
}
UnableToParse(errors: Vec<toml::ParserError>, path: PathBuf) {
description("Unable to parse configuration")
display("Unable to parse configuration from {}: {:?}", path.display(), errors)
}
ExpectedEntries(path: PathBuf) {
description("Expected array of 'entry'")
}
}
}
fn parse_config() -> Result<Vec<logparse::LineConfiguration>, ConfigError> {
let mut path = try!(std::env::home_dir().ok_or(ConfigError::HomeDirNotSet));
path.push(".sort_logs");
let mut f = try!(File::open(&path).map_err(|e| ConfigError::UnableToOpen(e, path.clone())));
let mut buf = String::new();
try!(f.read_to_string(&mut buf).map_err(|e| ConfigError::UnableToRead(e, path.clone())));
let mut parser = toml::Parser::new(&buf);
let parse_result = parser.parse();
let parse_errors = parser.errors;
let toml = try!(parse_result.ok_or_else(|| ConfigError::UnableToParse(parse_errors, path.clone())));
let entries = try!(toml
.get("entry")
.and_then(toml::Value::as_slice)
.ok_or_else(|| ConfigError::ExpectedEntries(path.clone())));
let mut config = vec![];
for entry in entries {
if let &toml::Value::Table(ref table) = entry {
if let Some((regex, formats)) = parse_entry(&table) {
config.push(logparse::LineConfiguration::new(regex, formats));
}
}
}
Ok(config)
}
fn parse_entry(entry: &toml::Table) -> Option<(regex::Regex, Vec<String>)> {
let regex = match entry.get("regex").and_then(toml::Value::as_str) {
Some(ref s) => {
match regex::Regex::new(&s) {
Err(e) => {
warn!("Failed to parse {}: {:?}", s, e);
return None;
},
Ok(re) => re
}
},
None => return None,
};
let times_strings = match entry.get("times").and_then(toml::Value::as_slice) {
Some(ref strings) => {
strings
.iter()
.filter_map(|s| s.as_str())
.map(str::to_owned)
.collect()
},
None => return None
};
Some((regex, times_strings))
}
fn main() {
env_logger::init().unwrap();
let parser = parse_config()
.map(logparse::Parser::new)
.map_err(|e| warn!("Unable to parse configuration: {}", e))
.unwrap_or_else(|_| logparse::Parser::default());
let reader = std::io::BufReader::new(std::io::stdin());
let mut sorted: Vec<_> = reader
.lines()
.filter_map(|line| line.ok())
.filter_map(|line| {
let parsed = parser.parse(&line).map(|log_line| log_line.map(|_| ()));
if let None = parsed {
warn!("Failed to parse line: {}", line)
}
parsed.map(|log_line| log_line.map(|_| line))
})
.collect();
sorted.sort();
for log in sorted {
println!("{}", log.line);
}
}
Alternatively, you could consider splitting the meta info from the text. I think this looks quite clean:
extern crate time;
extern crate regex;
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct LogInfo {
time: time::Tm,
counter: u8,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct LogLine<L> {
pub info: LogInfo,
pub line: L,
}
impl<L> LogLine<L> {
pub fn map<F, T>(self, f: F) -> LogLine<T>
where F: FnOnce(L) -> T
{
LogLine {
info: self.info,
line: f(self.line),
}
}
}
pub struct Parser {
config: Vec<LineConfiguration>,
}
impl Default for Parser {
fn default() -> Parser {
let main_regex = regex::Regex::new(r"\b(?P<counter>.{2}) \S+ <(?P<time>[^>]+)> ")
.expect("Unable to build default regex");
let time_strings = vec!["%m.%d %H:%M:%S.%f".to_string(),
"%Y-%m-%dT%H:%M:%S.%f".to_string()];
Parser { config: vec![LineConfiguration::new(main_regex, time_strings)] }
}
}
impl Parser {
pub fn new(config: Vec<LineConfiguration>) -> Parser {
Parser { config: config }
}
pub fn parse<L>(&self, input: L) -> Option<LogLine<L>>
where L: AsRef<str>
{
self.parse_line(input.as_ref())
.map(|info| LogLine { info: info, line: input })
}
fn parse_line(&self, input: &str) -> Option<LogInfo> {
self.config.iter().filter_map(|c| c.parse(input)).next()
}
}
pub struct LineConfiguration {
regex: regex::Regex,
formats: Vec<String>,
}
impl LineConfiguration {
pub fn new(regex: regex::Regex, formats: Vec<String>) -> Self {
LineConfiguration {
regex: regex,
formats: formats,
}
}
fn parse(&self, input: &str) -> Option<LogInfo> {
self.regex.captures(input).and_then(|cap| {
let counter = cap.name("counter").and_then(|c| u8::from_str_radix(c, 16).ok());
let time = cap.name("time").and_then(|t| self.parse_time(t));
match (counter, time) {
(Some(counter), Some(time)) => {
Some(LogInfo {
time: time,
counter: counter,
})
}
_ => None,
}
})
}
fn parse_time(&self, capture: &str) -> Option<time::Tm> {
self.formats
.iter()
.filter_map(|fmt| time::strptime(&capture, fmt).ok())
.next()
.map(reset_year)
}
}
fn reset_year(mut t: time::Tm) -> time::Tm {
if t.tm_year != 0 {
t.tm_year += 1900
}
t
}
#[cfg(test)]
mod tests {
use super::*;
use time;
fn expected_time() -> time::Tm {
time::Tm {
tm_sec: 36,
tm_min: 35,
tm_hour: 13,
tm_mday: 19,
tm_mon: 9,
tm_year: 0,
tm_wday: 0,
tm_yday: 0,
tm_isdst: 0,
tm_utcoff: 0,
tm_nsec: 981305000,
}
}
#[test]
fn parser_parse_should_handle_first_format() {
let input = "000001 10.19 13:35:17.416 [127.0.0.1] 12 BOARD-1234-3-AppName <10.19 \
13:35:36.981305> 270 DBG/ABC/XYZ, something something";
let log = Parser::default().parse(input).unwrap();
let expected_log = LogLine {
info: LogInfo {
time: expected_time(),
counter: 0x12,
},
line: input,
};
assert_eq!(expected_log, log);
}
#[test]
fn parser_parse_should_handle_second_format() {
let input = "12 BOARD-1234-3-AppName <10.19 13:35:36.981305> 270 INF/ABC/XYZ, something \
something something";
let log = Parser::default().parse(input).unwrap();
let expected_log = LogLine {
info: LogInfo {
time: expected_time(),
counter: 0x12,
},
line: input,
};
assert_eq!(expected_log, log);
}
#[test]
fn parser_parse_should_handle_third_format() {
let input = "123456 02.11 17:20:30.92 [192.168.1.1] 57 BOARD-1234-3-AppName \
<2015年10月19日T13:35:36.981305Z> 270-ThreadName DBG/ABC/File.cpp#42 something \
something something";
let log = Parser::default().parse(input).unwrap();
let mut expected_time = expected_time();
expected_time.tm_year = 2015;
let expected_log = LogLine {
info: LogInfo {
time: expected_time,
counter: 0x57,
},
line: input,
};
assert_eq!(expected_log, log);
}
#[test]
fn log_relations() {
let mut smaller_time = expected_time();
smaller_time.tm_mon -= 1;
let smaller_time = smaller_time;
let expected_log = LogLine {
info: LogInfo {
time: expected_time(),
counter: 0x10,
},
line: "",
};
let mut expected_eq_log = expected_log.clone();
expected_eq_log.line = "test";
let expected_eq_log = expected_eq_log;
let smaller_time_log = LogLine {
info: LogInfo {
time: smaller_time,
counter: 0x10,
},
line: "",
};
let smaller_counter_log = LogLine {
info: LogInfo {
time: expected_time(),
counter: 0x9,
},
line: "",
};
assert_eq!(expected_log, expected_log);
assert_eq!(expected_log.info, expected_eq_log.info);
assert!(expected_log != smaller_time_log);
assert!(expected_log != smaller_counter_log);
assert!(expected_log.info > smaller_time_log.info);
assert!(expected_log.info > smaller_counter_log.info);
assert!(!(expected_log.info > expected_eq_log.info));
assert!(!(expected_log.info < expected_eq_log.info));
}
}