6
\$\begingroup\$

So I ported a lexer I wrote in C++ over to rust, as I'm starting to learn rust. Since I'm very new though, I don't know any Idioms and good practices in rust. So if anyone could point out some (probably obvious) issues I'd be very thankful.

This is my code:

use std::cmp::PartialEq;
use std::fmt::Debug;
#[derive(Debug, PartialEq)]
pub enum Token {
 EOF,
 Ident(String),
 Str(String),
 FNum(f64),
 INum(u64),
 Assign,
 BLsh,
 BRsh,
 BURsh,
 If,
 Else,
 Elif,
 Loop,
 Stop,
 Skip,
 Yes,
 No,
 Nope,
 Fun,
 Ret,
 And,
 Not,
 Or,
 LBrack,
 LBrace,
 LPar,
 RBrack,
 RBrace,
 RPar,
 Semi,
 Comma,
 Get,
 Concat,
 IDiv,
 FDiv,
 Add,
 Minus,
 Mul,
 Mod,
 BXOr,
 BAnd,
 BOr,
 IE,
 EQ,
 NE,
 LT,
 LE,
 GT,
 GE,
}
macro_rules! peek_char {
 ($e:expr) => {
 match $e.peek().cloned() {
 Some(c) => c,
 None => {
 return Token::EOF;
 }
 }
 };
}
pub fn get_token(iterator: &mut std::iter::Peekable<std::str::Chars>) -> Token {
 let mut next: char = peek_char!(iterator);
 let mut current_token: String = String::new();
 if next.is_whitespace() {
 loop {
 next = peek_char!(iterator);
 if !next.is_whitespace() {
 break;
 }
 iterator.next();
 }
 get_token(iterator)
 } else if next == '#' {
 loop {
 next = peek_char!(iterator);
 if next == '\n' {
 break;
 }
 iterator.next();
 }
 get_token(iterator)
 } else if next.is_alphabetic() {
 loop {
 next = match iterator.peek().cloned() {
 Some(c) => c,
 None => {
 break;
 }
 };
 if !(next.is_alphanumeric() || next == '_') {
 break;
 }
 iterator.next();
 current_token.push(next);
 }
 match current_token.as_str() {
 "if" => Token::If,
 "else" => Token::Else,
 "elif" => Token::Elif,
 "loop" => Token::Loop,
 "stop" => Token::Stop,
 "skip" => Token::Skip,
 "yes" => Token::Yes,
 "no" => Token::No,
 "nope" => Token::Nope,
 "fun" => Token::Fun,
 "return" => Token::Ret,
 "and" => Token::And,
 "not" => Token::Not,
 "or" => Token::Or,
 _ => Token::Ident(current_token),
 }
 } else if next == '"' {
 iterator.next();
 loop {
 next = peek_char!(iterator);
 let to_add: char = if next == '\\' {
 iterator.next();
 next = peek_char!(iterator);
 match next {
 't' => '\t',
 'b' => '\x08',
 'n' => '\n',
 'r' => '\r',
 'f' => '\x0c',
 '"' => '"',
 '\\' => '\\',
 _ => panic!("unknown escaped character"),
 }
 } else if next == '"' {
 iterator.next();
 break;
 } else {
 next
 };
 iterator.next();
 current_token.push(to_add);
 }
 Token::Str(current_token)
 } else if next.is_digit(10) {
 loop {
 next = match iterator.peek().cloned() {
 Some(c) => c,
 None => {
 break;
 }
 };
 if !(next.is_digit(10) || next == '.') {
 break;
 }
 iterator.next();
 if next == '.' && current_token.contains('.') {
 panic!("multiple decimal points in number");
 }
 current_token.push(next);
 }
 if current_token.contains('.') {
 Token::FNum(
 current_token
 .parse::<f64>()
 .expect("error reading float literal"),
 )
 } else {
 Token::INum(
 u64::from_str_radix(&current_token, 10).expect("error reading integer literal"),
 )
 }
 } else if next == '/' {
 iterator.next();
 next = match iterator.peek().cloned() {
 Some(c) => c,
 None => {
 return Token::FDiv;
 }
 };
 if next == '/' {
 iterator.next();
 Token::IDiv
 } else {
 Token::FDiv
 }
 } else if next == '?' {
 iterator.next();
 next = peek_char!(iterator);
 iterator.next();
 if next == '=' {
 Token::IE
 } else {
 panic!("unknown character");
 }
 } else if next == '!' {
 iterator.next();
 next = peek_char!(iterator);
 iterator.next();
 if next == '=' {
 Token::NE
 } else {
 panic!("unknown character");
 }
 } else if next == '=' {
 iterator.next();
 next = match iterator.peek().cloned() {
 Some(c) => c,
 None => {
 return Token::Assign;
 }
 };
 if next == '=' {
 iterator.next();
 Token::EQ
 } else {
 Token::Assign
 }
 } else if next == '<' {
 iterator.next();
 next = match iterator.peek().cloned() {
 Some(c) => c,
 None => {
 return Token::LT;
 }
 };
 if next == '=' {
 iterator.next();
 Token::LE
 } else if next == '<' {
 iterator.next();
 Token::BLsh
 } else {
 Token::LT
 }
 } else if next == '>' {
 iterator.next();
 next = match iterator.peek().cloned() {
 Some(c) => c,
 None => {
 return Token::GT;
 }
 };
 if next == '=' {
 iterator.next();
 Token::GE
 } else if next == '>' {
 iterator.next();
 next = match iterator.peek().cloned() {
 Some(c) => c,
 None => {
 return Token::BRsh;
 }
 };
 if next == '>' {
 iterator.next();
 Token::BURsh
 } else {
 Token::BRsh
 }
 } else {
 Token::GT
 }
 } else {
 iterator.next();
 match next {
 '[' => Token::LBrack,
 '{' => Token::LBrace,
 '(' => Token::LPar,
 ']' => Token::RBrack,
 '}' => Token::RBrace,
 ')' => Token::RPar,
 ';' => Token::Semi,
 '.' => Token::Get,
 ',' => Token::Comma,
 '+' => Token::Add,
 '-' => Token::Minus,
 '*' => Token::Mul,
 '%' => Token::Mod,
 '$' => Token::Concat,
 '^' => Token::BXOr,
 '&' => Token::BAnd,
 '|' => Token::BOr,
 _ => panic!("unknown character"),
 }
 }
}
AlexV
7,3532 gold badges24 silver badges47 bronze badges
asked Feb 10, 2020 at 23:36
\$\endgroup\$

1 Answer 1

4
\$\begingroup\$

A couple of general notes:

  • I’m not sure why you used a macro for peek_char!. I’m relatively certain this could have been a standard function.
  • Take some time to learn about slices.
  • You have a single, several hundred line function there. It’s hard to read and follow. Extract lots of well named functions to improve readability.
  • If you panic inside your function, there’s not really any way for the person calling your function to handle it. This should return a Result<Token, ParseError>.
answered Feb 12, 2020 at 10:12
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.