2
\$\begingroup\$

This is a recursive descent parser for simple language with following grammar:
PROGRAM <- {STATEMENT ';'}* RETURN_STMT ';'
STATEMENT <- NAME_BINDING | TYPE_DECLARATION
TYPE_DECLARATION <- named_param identifier
NAME_BINDING <- identifier '=' EXPRESSION
EXPRESSION <- (simple arithmetics with (), +-*/ and unary minus, arguments can be constants, named parameters or identifiers)
RETURN_STMT = 'return' EXPRESSION {',' EXPRESSION}* ';'

Module entry point is parseGrammar, resulting AST represented as Program

module LangParser(parseGrammar, Token(..), Result(..)) where
import Control.Applicative
import Control.Monad
data Token =
 Return |
 Simple Char |
 DecimalConst String |
 HexConst String |
 NamedParam String |
 Identifier String |
 LexError String |
 EOF
 deriving (Eq, Show)
data Op = ONop | OPlus | OMinus | OStar | OSlash | ONeg deriving (Show, Eq)
instance Ord Op where
 compare ONeg _ = GT
 compare _ ONeg = LT
 compare OStar _ = GT
 compare OSlash _ = GT
 compare OPlus b
 | b == OStar = LT
 | b == OSlash = LT
 | otherwise = GT
 compare OMinus b
 | b == OStar = LT
 | b == OSlash = LT
 | otherwise = GT
 compare _ _ = error "invalid comparison"
data Expr =
 EList Expr Expr |
 EAdd Expr Expr |
 ESub Expr Expr |
 EMul Expr Expr |
 EDiv Expr Expr |
 ENeg Expr |
 EIntConst Int |
 EVariable String |
 ENamedParam String
 deriving Show
data Stmt = 
 SReturn [Expr] |
 SDecl String String |
 SBinding String Expr
 deriving Show
type Program = [Stmt]
data ParserState = ParserState { 
 prog :: Program, 
 exprStack :: [Expr], 
 opStack :: [Op],
 stmtIdent :: String,
 stmtParam :: String, 
 rest :: [Token] 
 } deriving Show
data Result a = Error Token | State a deriving Show
instance Monad Result where
 (Error e) >>= _ = Error e
 (State a) >>= f = f a
 return = State
instance Functor Result where
 fmap _ (Error t) = Error t
 fmap f (State a) = State (f a)
instance Applicative Result where
 pure = return
 (<*>) = ap
instance Alternative Result where
 empty = Error EOF
 Error _ <|> p = p
 State x <|> _ = State x
epsilon :: ParserState -> Result ParserState
epsilon = State
term :: Token -> ParserState -> Result ParserState
term t state@ParserState {rest = (x:xs)}
 | t == x = State state{rest = xs}
 | otherwise = Error x 
term _ ParserState {rest = []} = Error EOF
termS :: Char -> ParserState -> Result ParserState
termS c = term (Simple c) 
termValue :: ParserState -> Result ParserState
termValue state@ParserState {rest = (DecimalConst x:xs)} = State state{exprStack = v : exprStack state, rest = xs}
 where v = EIntConst (read x)
termValue state@ParserState {rest = (HexConst x:xs)} = State state{exprStack = v : exprStack state, rest = xs}
 where v = EIntConst (read x)
termValue state@ParserState {rest = (NamedParam x:xs)} = State state{exprStack = ENamedParam x : exprStack state, rest = xs}
termValue state@ParserState {rest = (Identifier x:xs)} = State state{exprStack = EVariable x : exprStack state, rest = xs}
termValue ParserState {rest = (x:_)} = Error x
termValue ParserState {rest = []} = Error EOF
termIdent :: ParserState -> Result ParserState
termIdent state@ParserState {rest = (Identifier x:xs)} = State state{stmtIdent = x, rest = xs} 
termIdent ParserState {rest = (x:_)} = Error x
termIdent ParserState {rest = []} = Error EOF
foldExpr :: Op -> ParserState -> Result ParserState
foldExpr _ state@ParserState{opStack = []} = State state 
foldExpr stop state@ParserState{opStack = (op:_)} | op <= stop = State state
foldExpr stop state@ParserState{exprStack = (e:es), opStack = (ONeg:ops)} 
 = foldExpr stop state{ exprStack = ENeg e : es, opStack = ops } 
foldExpr stop state@ParserState{exprStack = (e1:e2:es), opStack = (op:ops)}
 = case op of
 OPlus -> foldExpr stop state{ exprStack = EAdd e2 e1 : es, opStack = ops }
 OMinus -> foldExpr stop state{ exprStack = ESub e2 e1 : es, opStack = ops }
 OStar -> foldExpr stop state{ exprStack = EMul e2 e1 : es, opStack = ops }
 OSlash -> foldExpr stop state{ exprStack = EDiv e2 e1 : es, opStack = ops }
 _ -> error "invalid op in stack"
foldExpr _ _ = error "invalid state in fold"
exprS :: ParserState -> Result ParserState
exprS = expr >=> foldExpr ONop
expr :: ParserState -> Result ParserState
expr s = expr' s <|> (termNeg >=> expr') s
 where 
 termNeg = termS '-' >=> (\state -> State state{opStack = ONeg : opStack state})
 expr' st = (termValue >=> exprRest) st <|> 
 (termS '(' >=> exprS >=> termS ')' >=> exprRest) st
exprRest :: ParserState -> Result ParserState
exprRest s = 
 (termOp '+' >=> expr) s <|> 
 (termOp '-' >=> expr) s <|> 
 (termOp '*' >=> expr) s <|> 
 (termOp '/' >=> expr) s <|> 
 epsilon s
 where
 termOp c = termS c >=> foldExpr (op c) >=> (\state -> State state{opStack = op c : opStack state}) 
 op '+' = OPlus
 op '-' = OMinus
 op '*' = OStar
 op '/' = OSlash
 op _ = error "bad op"
retexprs :: ParserState -> Result ParserState
retexprs = exprS >=> exprList
 where 
 exprList s = (termS ',' >=> retexprs >=> action) s <|> epsilon s
 action state@ParserState {exprStack = (e1:e2:es)} = State state{exprStack = EList e2 e1 : es}
 action _ = error "2"
retStmt :: ParserState -> Result ParserState
retStmt = term Return >=> retexprs >=> termS ';' >=> action
 where
 action state@ParserState {exprStack = (e:es)} = State state{prog = SReturn (reverse $ unroll e []) : prog state, exprStack = es}
 action _ = error "3"
 unroll :: Expr -> [Expr] -> [Expr]
 unroll (EList e1 e2) es = unroll e2 (e1:es)
 unroll e es = e:es
nameBinding :: ParserState -> Result ParserState
nameBinding = termIdent >=> termS '=' >=> exprS >=> action
 where 
 action state@ParserState {stmtIdent = ident, exprStack = (e:[])} 
 = State state{prog = SBinding ident e : prog state, exprStack = []}
 action _ = error "invalid binding"
paramDecl :: ParserState -> Result ParserState
paramDecl = termNamedParam >=> termIdent >=> action
 where
 action state@ParserState{stmtParam = param, stmtIdent = ident} 
 = State state{prog = SDecl param ident: prog state}
 termNamedParam state@ParserState {rest = (NamedParam x:xs)} = State state{stmtParam = x, rest = xs} 
 termNamedParam ParserState {rest = (x:_)} = Error x
 termNamedParam ParserState {rest = []} = Error EOF
statements :: ParserState -> Result ParserState
statements s = (statement >=> termS ';' >=> statements) s <|> epsilon s
 where statement st = paramDecl st <|> nameBinding st
parseProgram :: ParserState -> Result ParserState
parseProgram s = (statements >=> retStmt) s <|> retStmt s
parseGrammar :: [Token] -> Result ParserState
parseGrammar st = parseProgram (ParserState [] [] [] "" "" st) 

I'm looking for code improvements. For example, repeated this lines looks ugly:

NNN ParserState {rest = (x:_)} = Error x
NNN ParserState {rest = []} = Error EOF

Also, I would like to separate parser states for parsing statements and expressions. Writing two separate types will require doubling number of epsilon/term/termIdent functions, which I hope to avoid.

asked Jun 14, 2012 at 7:55
\$\endgroup\$
2
  • \$\begingroup\$ Are your operators all supposed to have equal precedence? I think your code could be made to look much better if a little more order could be placed on foldExpr, exprS, expr and exprRest \$\endgroup\$ Commented Jun 16, 2012 at 3:20
  • \$\begingroup\$ No, there is normal arithmetic precedence. \$\endgroup\$ Commented Jun 16, 2012 at 22:09

1 Answer 1

3
\$\begingroup\$

A suggested change,

data MyType = TValue | TIdent | TNamedParam
termX :: MyType -> ParserState -> Result ParserState
termX t state = case (t, rest state) of
 (TIdent, Identifier x:xs) -> vstate' x xs
 (TNamedParam, NamedParam x:xs) -> vstate' x xs
 (TValue, HexConst x:xs) -> vstate xs (v' x)
 (TValue, DecimalConst x:xs) -> vstate xs (v' x)
 (TValue, NamedParam x:xs) -> vstate xs (ENamedParam x)
 (TValue, Identifier x:xs) -> vstate xs (EVariable x)
 (_ , (x:_)) -> Error x
 (_ , []) -> Error EOF
 where v' = EIntConst . read
 vstate xs v = State state{exprStack = v: exprStack state, rest = xs}
 vstate' x xs = State state{stmtParam = x, rest = xs}
termValue = termX TValue
termIdent = termX TIdent
termNamedParam = termX TNamedParam

I think this is better than repeating the state@ again and again.

I think the below might also be nice since it is avoiding the repetition of foldexpression

foldExpr _ state@ParserState{opStack = []} = State state 
foldExpr stop state@ParserState{exprStack = ex, opStack = (x:ops)} = case process ex x of 
 Just ev -> foldExpr stop state{ exprStack = ev, opStack = ops } 
 Nothing -> State state
 where process (e:es) ONeg = Just $ ENeg e : es
 process (e1:e2:es) OPlus = Just $ EAdd e2 e1 : es
 process (e1:e2:es) OMinus = Just $ ESub e2 e1 : es
 process (e1:e2:es) OStar = Just $ EMul e2 e1 : es
 process (e1:e2:es) OSlash = Just $ EDiv e2 e1 : es
 process e y | y <= stop = Nothing

A slightly terse version of exprRest. I think the do notation is clearer here.

exprRest :: ParserState -> Result ParserState
exprRest s = foldr (\op acc ->
 (termOp op >=> expr) s <|> acc) (epsilon s) ['+', '-', '*', '/']
 where
 termOp c = termS c >=> foldExpr (op c) >=> \state ->
 State state{opStack = op c : opStack state}
 op '+' = OPlus
 op '-' = OMinus
 op '*' = OStar
 op '/' = OSlash
 op _ = error "bad op"

Here is the only change I made, Look at this pattern,

((termOp '+' >=> expr) s) <|>
 ((termOp '-' >=> expr) s) <|>
 ((termOp '*' >=> expr) s) <|>
 ((termOp '/' >=> expr) s) <|>
epsilon s

Which is same as

let myfn op = (termOp op >=> expr) s in
((myfn '+') <|> (myfn '-') <|> (myfn '*') <|> (myfn '/') <|>
epsilon s

This can be replaced with a fold

fold (\opfn acc -> opfn <|> acc) (epsilon s) [myfn +, myfn -, myfn *, myfn /]

equivalently, ...


So I used your question as an excuse to understand lenses. Here is the resulting code. See if you like it. (I don't claim it is good as I am still learning lenses)

{-# LANGUAGE TemplateHaskell, FlexibleContexts #-}
module LangParser(parseGrammar, Token(..), Result(..)) where
import Control.Applicative
import Control.Monad
import Data.Lens.Template (makeLenses)
import Data.Lens.Lazy
data Token =
 Return |
 Simple Char |
 DecimalConst String |
 HexConst String |
 NamedParam String |
 Identifier String |
 LexError String |
 EOF
 deriving (Eq, Show)
data Op = ONop | OPlus | OMinus | OStar | OSlash | ONeg deriving (Show, Eq)
instance Ord Op where
 compare ONeg _ = GT
 compare _ ONeg = LT
 compare OStar _ = GT
 compare OSlash _ = GT
 compare OPlus b
 | b == OStar = LT
 | b == OSlash = LT
 | otherwise = GT
 compare OMinus b
 | b == OStar = LT
 | b == OSlash = LT
 | otherwise = GT
 compare _ _ = error "invalid comparison"
data Expr = EList Expr Expr |
 EAdd Expr Expr |
 ESub Expr Expr |
 EMul Expr Expr |
 EDiv Expr Expr |
 ENeg Expr |
 EIntConst Int |
 EVariable String |
 ENamedParam String
 deriving Show
data Stmt = SReturn [Expr] |
 SDecl String String |
 SBinding String Expr
 deriving Show
type Program = [Stmt]
data ParserState = ParserState { 
 _prog :: Program, 
 _exprStack :: [Expr], 
 _opStack :: [Op],
 _stmtIdent :: String,
 _stmtParam :: String, 
 _rest :: [Token] 
 } deriving Show
$( makeLenses [''ParserState])
data Result a = Error Token | State a
 deriving Show
instance Monad Result where
 (Error e) >>= _ = Error e
 (State a) >>= f = f a
 return = State
instance Functor Result where
 fmap _ (Error t) = Error t
 fmap f (State a) = State (f a)
instance Applicative Result where
 pure = return
 (<*>) = ap
instance Alternative Result where
 empty = Error EOF
 Error _ <|> p = p
 State x <|> _ = State x
epsilon :: ParserState -> Result ParserState
epsilon = State
term :: Token -> ParserState -> Result ParserState
term t state = case state ^. rest of
 (x:xs) -> if t == x then State (rest ^= xs $ state) else Error x 
 [] -> Error EOF
termS :: Char -> ParserState -> Result ParserState
termS c = term (Simple c) 
data MyType = TValue | TIdent | TNamedParam
termX :: MyType -> ParserState -> Result ParserState
termX t state = case (t, rest ^$ state) of
 (TIdent, Identifier x:xs) -> vstate' x xs
 (TNamedParam, NamedParam x:xs) -> vstate' x xs
 (TValue, HexConst x:xs) -> vstate xs (v' x)
 (TValue, DecimalConst x:xs) -> vstate xs (v' x)
 (TValue, NamedParam x:xs) -> vstate xs (ENamedParam x)
 (TValue, Identifier x:xs) -> vstate xs (EVariable x)
 (_ , (x:_)) -> Error x
 (_ , []) -> Error EOF
 where v' = EIntConst . read 
 vstate xs v = State ( exprStack ^= v: (exprStack ^$ state) $ rest ^= xs $ state )
 vstate' x xs = State ( stmtParam ^= x $ rest ^= xs $ state)
foldExpr :: Op -> ParserState -> Result ParserState
foldExpr stop state = case opStack ^$ state of
 [] -> State state
 (x:ops) -> case process ex x of 
 Just ev -> foldExpr stop (exprStack ^= ev $ opStack ^= ops $ state)
 Nothing -> State state
 where process (e:es) ONeg = Just $ ENeg e : es
 process (e1:e2:es) OPlus = Just $ EAdd e2 e1 : es
 process (e1:e2:es) OMinus = Just $ ESub e2 e1 : es
 process (e1:e2:es) OStar = Just $ EMul e2 e1 : es
 process (e1:e2:es) OSlash = Just $ EDiv e2 e1 : es
 process e y | y <= stop = Nothing
 ex = exprStack ^$ state
exprS :: ParserState -> Result ParserState
exprS = expr >=> foldExpr ONop
expr :: ParserState -> Result ParserState
expr s = expr' s <|> (termNeg >=> expr') s
 where 
 termNeg s = termS '-' s >>= State . (opStack ^%= (ONeg :))
 expr' st = (termX TValue >=> exprRest) st <|> (termS '(' >=> exprS >=> termS ')' >=> exprRest) st
unroll :: Expr -> [Expr] -> [Expr]
unroll (EList e1 e2) es = unroll e2 (e1:es)
unroll e es = e:es
exprRest :: ParserState -> Result ParserState
exprRest s = 
 (termOp '+' >=> expr) s <|> 
 (termOp '-' >=> expr) s <|> 
 (termOp '*' >=> expr) s <|> 
 (termOp '/' >=> expr) s <|> 
 epsilon s
 where
 termOp c = termS c >=> foldExpr (op c) >=> State . (opStack ^%= (op c :))
 op '+' = OPlus
 op '-' = OMinus
 op '*' = OStar
 op '/' = OSlash
 op _ = error "bad op"
retexprs :: ParserState -> Result ParserState
retexprs s = do a <- exprS s
 b <- termS ',' a
 c <- retexprs b
 action c <|> epsilon s
 where 
 action state = case getES state of
 (e1:e2:es) -> State (myprog state e1 e2 es)
 _ -> error "2"
 myprog state e1 e2 es = setES state (EList e2 e1 : es)
retStmt :: ParserState -> Result ParserState
retStmt s = do a <- term Return s
 b <- retexprs a
 c <- termS ';' b
 action c
 where
 action :: ParserState -> Result ParserState
 action state = case getES state of
 (e:es) -> State (myprog state e es)
 _ -> error "3"
 myprog state e es = prog ^= SReturn (reverse $ unroll e []) : (state ^. prog) $ setES state es
nameBinding :: ParserState -> Result ParserState
nameBinding s = do a <- termX TIdent s
 b <- termS '=' a
 c <- exprS b
 action c
 where 
 action :: ParserState -> Result ParserState
 action state = case getES state of
 (e:[]) -> State (myprog state e)
 _ -> error "invalid binding"
 myprog state e = prog ^= SBinding (state ^. stmtIdent) e : (state ^. prog) $ setES state []
paramDecl :: ParserState -> Result ParserState
paramDecl s = do a <- termX TNamedParam s
 b <- termX TIdent a
 action b
 where
 action :: ParserState -> Result ParserState
 action state = State (prog ^= SDecl (state ^. stmtParam) (state ^. stmtIdent) : (state ^. prog) $ state)
setES state es = (exprStack ^= es) state
getES state = state ^. exprStack 
statements :: ParserState -> Result ParserState
statements s = do b <- statement s
 c <- termS ';' b
 statements c <|> epsilon s
 where statement st = paramDecl st <|> nameBinding st
parseProgram :: ParserState -> Result ParserState
parseProgram s = do b <- statements s
 retStmt b <|> retStmt s
parseGrammar :: [Token] -> Result ParserState
parseGrammar st = parseProgram (ParserState [] [] [] "" "" st) 
answered Jun 15, 2012 at 9:03
\$\endgroup\$
7
  • \$\begingroup\$ It's exactly how it supposed to be. In comparison left operand is always on operations stack and right operand is just taken from token stream. Left-associative operations with equal precedence should compare as GT, right-associative as LT. This is required for arithmetically correct :) parsing. \$\endgroup\$ Commented Jun 15, 2012 at 12:48
  • \$\begingroup\$ And I can't derive from Ord because OPlus and OMinus have equal precedence, but are different. \$\endgroup\$ Commented Jun 15, 2012 at 12:49
  • \$\begingroup\$ So is it correct for Op to have an Ord instance? Isn't one of the requirements for an Ord class, a total ordering? I suppose, it may not lead to problems here, but I was taught to be very careful to only define instances of Typeclasses if you can provide the same guarantees of their interfaces. \$\endgroup\$ Commented Jun 15, 2012 at 15:45
  • \$\begingroup\$ I see your point. Will remove Ord and change to plain comparison function. \$\endgroup\$ Commented Jun 16, 2012 at 22:08
  • \$\begingroup\$ Could you explain change in exprRest? I don't get how it works. \$\endgroup\$ Commented Jun 16, 2012 at 22:16

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.