Lexer written in Rust

Question 1

So I ported a lexer I wrote in C++ over to rust, as I'm starting to learn rust. Since I'm very new though, I don't know any Idioms and good practices in rust. So if anyone could point out some (probably obvious) issues I'd be very thankful.

This is my code:

use std::cmp::PartialEq;
use std::fmt::Debug;
#[derive(Debug, PartialEq)]
pub enum Token {
 EOF,
 Ident(String),
 Str(String),
 FNum(f64),
 INum(u64),
 Assign,
 BLsh,
 BRsh,
 BURsh,
 If,
 Else,
 Elif,
 Loop,
 Stop,
 Skip,
 Yes,
 No,
 Nope,
 Fun,
 Ret,
 And,
 Not,
 Or,
 LBrack,
 LBrace,
 LPar,
 RBrack,
 RBrace,
 RPar,
 Semi,
 Comma,
 Get,
 Concat,
 IDiv,
 FDiv,
 Add,
 Minus,
 Mul,
 Mod,
 BXOr,
 BAnd,
 BOr,
 IE,
 EQ,
 NE,
 LT,
 LE,
 GT,
 GE,
}
macro_rules! peek_char {
 ($e:expr) => {
 match $e.peek().cloned() {
 Some(c) => c,
 None => {
 return Token::EOF;
 }
 }
 };
}
pub fn get_token(iterator: &mut std::iter::Peekable<std::str::Chars>) -> Token {
 let mut next: char = peek_char!(iterator);
 let mut current_token: String = String::new();
 if next.is_whitespace() {
 loop {
 next = peek_char!(iterator);
 if !next.is_whitespace() {
 break;
 }
 iterator.next();
 }
 get_token(iterator)
 } else if next == '#' {
 loop {
 next = peek_char!(iterator);
 if next == '\n' {
 break;
 }
 iterator.next();
 }
 get_token(iterator)
 } else if next.is_alphabetic() {
 loop {
 next = match iterator.peek().cloned() {
 Some(c) => c,
 None => {
 break;
 }
 };
 if !(next.is_alphanumeric() || next == '_') {
 break;
 }
 iterator.next();
 current_token.push(next);
 }
 match current_token.as_str() {
 "if" => Token::If,
 "else" => Token::Else,
 "elif" => Token::Elif,
 "loop" => Token::Loop,
 "stop" => Token::Stop,
 "skip" => Token::Skip,
 "yes" => Token::Yes,
 "no" => Token::No,
 "nope" => Token::Nope,
 "fun" => Token::Fun,
 "return" => Token::Ret,
 "and" => Token::And,
 "not" => Token::Not,
 "or" => Token::Or,
 _ => Token::Ident(current_token),
 }
 } else if next == '"' {
 iterator.next();
 loop {
 next = peek_char!(iterator);
 let to_add: char = if next == '\\' {
 iterator.next();
 next = peek_char!(iterator);
 match next {
 't' => '\t',
 'b' => '\x08',
 'n' => '\n',
 'r' => '\r',
 'f' => '\x0c',
 '"' => '"',
 '\\' => '\\',
 _ => panic!("unknown escaped character"),
 }
 } else if next == '"' {
 iterator.next();
 break;
 } else {
 next
 };
 iterator.next();
 current_token.push(to_add);
 }
 Token::Str(current_token)
 } else if next.is_digit(10) {
 loop {
 next = match iterator.peek().cloned() {
 Some(c) => c,
 None => {
 break;
 }
 };
 if !(next.is_digit(10) || next == '.') {
 break;
 }
 iterator.next();
 if next == '.' && current_token.contains('.') {
 panic!("multiple decimal points in number");
 }
 current_token.push(next);
 }
 if current_token.contains('.') {
 Token::FNum(
 current_token
 .parse::<f64>()
 .expect("error reading float literal"),
 )
 } else {
 Token::INum(
 u64::from_str_radix(&current_token, 10).expect("error reading integer literal"),
 )
 }
 } else if next == '/' {
 iterator.next();
 next = match iterator.peek().cloned() {
 Some(c) => c,
 None => {
 return Token::FDiv;
 }
 };
 if next == '/' {
 iterator.next();
 Token::IDiv
 } else {
 Token::FDiv
 }
 } else if next == '?' {
 iterator.next();
 next = peek_char!(iterator);
 iterator.next();
 if next == '=' {
 Token::IE
 } else {
 panic!("unknown character");
 }
 } else if next == '!' {
 iterator.next();
 next = peek_char!(iterator);
 iterator.next();
 if next == '=' {
 Token::NE
 } else {
 panic!("unknown character");
 }
 } else if next == '=' {
 iterator.next();
 next = match iterator.peek().cloned() {
 Some(c) => c,
 None => {
 return Token::Assign;
 }
 };
 if next == '=' {
 iterator.next();
 Token::EQ
 } else {
 Token::Assign
 }
 } else if next == '<' {
 iterator.next();
 next = match iterator.peek().cloned() {
 Some(c) => c,
 None => {
 return Token::LT;
 }
 };
 if next == '=' {
 iterator.next();
 Token::LE
 } else if next == '<' {
 iterator.next();
 Token::BLsh
 } else {
 Token::LT
 }
 } else if next == '>' {
 iterator.next();
 next = match iterator.peek().cloned() {
 Some(c) => c,
 None => {
 return Token::GT;
 }
 };
 if next == '=' {
 iterator.next();
 Token::GE
 } else if next == '>' {
 iterator.next();
 next = match iterator.peek().cloned() {
 Some(c) => c,
 None => {
 return Token::BRsh;
 }
 };
 if next == '>' {
 iterator.next();
 Token::BURsh
 } else {
 Token::BRsh
 }
 } else {
 Token::GT
 }
 } else {
 iterator.next();
 match next {
 '[' => Token::LBrack,
 '{' => Token::LBrace,
 '(' => Token::LPar,
 ']' => Token::RBrack,
 '}' => Token::RBrace,
 ')' => Token::RPar,
 ';' => Token::Semi,
 '.' => Token::Get,
 ',' => Token::Comma,
 '+' => Token::Add,
 '-' => Token::Minus,
 '*' => Token::Mul,
 '%' => Token::Mod,
 '$' => Token::Concat,
 '^' => Token::BXOr,
 '&' => Token::BAnd,
 '|' => Token::BOr,
 _ => panic!("unknown character"),
 }
 }
}

Question 2

A couple of general notes:

I’m not sure why you used a macro for peek_char!. I’m relatively certain this could have been a standard function.
Take some time to learn about slices.
You have a single, several hundred line function there. It’s hard to read and follow. Extract lots of well named functions to improve readability.
If you panic inside your function, there’s not really any way for the person calling your function to handle it. This should return a Result<Token, ParseError>.

RubberDuck RubberDuck 31.2k6 gold badges73 silver badges176 bronze badges · Accepted Answer · 2020-02-12 10:12:55Z

A couple of general notes:

I’m not sure why you used a macro for peek_char!. I’m relatively certain this could have been a standard function.
Take some time to learn about slices.
You have a single, several hundred line function there. It’s hard to read and follow. Extract lots of well named functions to improve readability.
If you panic inside your function, there’s not really any way for the person calling your function to handle it. This should return a Result<Token, ParseError>.

Stack Exchange Network

Lexer written in Rust

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Lexer written in Rust

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions