Since implementing my Interactive Brainfuck interpreter in Haskell I figured that it would be a good idea to build upon an Abstract Syntax Tree. I have also decided to drop replacing the IO
monad by some other structure for the time being as what I have now works fine, and fast.
The code consists of the following:
- The main application
- The tape
- The application
Main.hs
module Main where
import Control.Monad
import qualified Data.Map as Map
import System.Environment
import Interpreter
data SimpleOption = ShowProgram | ShowMemory deriving (Enum, Eq, Ord, Show)
data AdvancedOption = Program | File deriving (Enum, Eq, Ord, Show)
data Options = Options [SimpleOption] (Map.Map AdvancedOption String) deriving (Show)
addSimpleOption :: SimpleOption -> Options -> Options
addSimpleOption opt (Options sOpts aOpts) = Options (opt:sOpts) aOpts
addAdvancedOption :: AdvancedOption -> String -> Options -> Options
addAdvancedOption opt val (Options sOpts aOpts) = Options sOpts (Map.insert opt val aOpts)
isOptionsEmpty :: Options -> Bool
isOptionsEmpty (Options sOpts aOpts) = null sOpts && Map.null aOpts
main :: IO ()
main = do
args <- getArgs
case parseArgs args (Options [] Map.empty) >>= validateOptions of
Left err -> putStrLn err
Right opts -> execute opts
execute :: Options -> IO ()
execute opts@(Options sOpts _) = do
program <- getProgram opts
(program', memory) <- interpret program
when (ShowProgram `elem` sOpts) $ putStrLn ("\n" ++ show program')
when (ShowMemory `elem` sOpts) $ putStrLn ("\n" ++ show memory)
getProgram :: Options -> IO String
getProgram (Options _ aOpts) = case Map.lookup Program aOpts of
Nothing -> case Map.lookup File aOpts of
Just file -> readFile file
Just program -> return program
parseArgs :: [String] -> Options -> Either String Options
parseArgs [] opts = if isOptionsEmpty opts then Left usage else Right opts
parseArgs [program] opts = Right $ addAdvancedOption Program program opts
parseArgs ("-sp":args) opts = parseArgs args (addSimpleOption ShowProgram opts)
parseArgs ("-sm":args) opts = parseArgs args (addSimpleOption ShowMemory opts)
parseArgs ("-p":program:args) opts = parseArgs args (addAdvancedOption Program program opts)
parseArgs ("-f":file:args) opts = parseArgs args (addAdvancedOption File file opts)
parseArgs _ _ = Left usage
usage :: String
usage = "Usage: bf-interpreter-ast-exe [-sp] [-sm] [-f file] [-p program | program]"
validateOptions :: Options -> Either String Options
validateOptions opts@(Options _ aOpts)
| Program `Map.member` aOpts && File `Map.member` aOpts = Left "Error: Only one of the options File and Program can be present"
| Program `Map.member` aOpts || File `Map.member` aOpts = Right opts
| otherwise = Left "Error: One of the options File and Program must be present"
Tape.hs
module Tape
( Tape(..)
, makeTape
, forwardTape
, reverseTape
, tapeValue
, onTapeValue
) where
data Tape a = Tape [a] !a [a] deriving (Eq)
instance Show a => Show (Tape a) where
show (Tape ls v rs) = show (reverse ls) ++ " " ++ show v ++ " " ++ show rs
makeTape :: a -> Tape a
makeTape def = Tape [] def []
forwardTape :: a -> Tape a -> Tape a
forwardTape def (Tape ls v []) = Tape (v:ls) def []
forwardTape _ (Tape ls v rs) = Tape (v:ls) (head rs) (tail rs)
reverseTape :: a -> Tape a -> Tape a
reverseTape def (Tape [] v rs) = Tape [] def (v:rs)
reverseTape _ (Tape ls v rs) = Tape (tail ls) (head ls) (v:rs)
tapeValue :: Tape a -> a
tapeValue (Tape _ v _) = v
onTapeValue :: (a -> a) -> Tape a -> Tape a
onTapeValue func (Tape ls v rs) = Tape ls (func v) rs
Interpreter.hs
module Interpreter
( interpret
) where
import Data.Word
import System.IO
import Tape
data BFInstruction = MemoryRight | MemoryLeft | Increment | Decrement | Output | Input | Loop [BFInstruction] deriving (Eq, Show)
type BFProgram = [BFInstruction]
newtype BFMemoryCell = BFMemoryCell Word8 deriving (Eq, Show)
type BFMemory = Tape BFMemoryCell
cellValue :: BFMemoryCell -> Word8
cellValue (BFMemoryCell val) = val
onCellValue :: (Word8 -> Word8) -> BFMemoryCell -> BFMemoryCell
onCellValue func (BFMemoryCell val) = BFMemoryCell $ func val
makeProgram :: String -> BFProgram
makeProgram = makeProgram'
makeProgram' :: String -> BFProgram
makeProgram' "" = []
makeProgram' (x:xs) = case x of
'>' -> continue MemoryRight
'<' -> continue MemoryLeft
'+' -> continue Increment
'-' -> continue Decrement
'.' -> continue Output
',' -> continue Input
'[' -> do
let (loop, rest) = splitOnLoopEnd xs
Loop (makeProgram loop):makeProgram' rest
']' -> []
_ -> makeProgram' xs
where
continue instr = instr:makeProgram' xs
splitOnLoopEnd :: String -> (String, String)
splitOnLoopEnd = splitOnLoopEnd' 0
splitOnLoopEnd' :: Int -> String -> (String, String)
splitOnLoopEnd' _ "" = error "No matching ] found"
splitOnLoopEnd' 0 (']':xs') = ([], xs')
splitOnLoopEnd' nesting (x:xs') = case x of
']' -> (x:ys, zs) where (ys, zs) = next (subtract 1)
'[' -> (x:ys, zs) where (ys, zs) = next (+1)
_ -> (x:ys, zs) where (ys, zs) = next id
where
next func = splitOnLoopEnd' (func nesting) xs'
interpret :: String -> IO (BFProgram, BFMemory)
interpret input = do
let program = makeProgram input
let memory = makeTape (BFMemoryCell 0)
memory' <- execute program memory
return (program, memory')
execute :: BFProgram -> BFMemory -> IO BFMemory
execute [] memory = return memory
execute xs@(x:xs') memory = case x of
MemoryRight -> continue $ forwardTape (BFMemoryCell 0) memory
MemoryLeft -> continue $ reverseTape (BFMemoryCell 0) memory
Increment -> continue $ onTapeValue (onCellValue (+1)) memory
Decrement -> continue $ onTapeValue (onCellValue (subtract 1)) memory
Output -> do
putChar $ toEnum . fromEnum . cellValue . tapeValue $ memory
hFlush stdout
continue memory
Input -> do
ch <- getChar
continue $ onTapeValue (\_ -> BFMemoryCell . toEnum . fromEnum $ ch) memory
Loop program' -> if cellValue (tapeValue memory) == 0
then continue memory
else do
memory' <- execute program' memory
execute xs memory'
where
continue = execute xs'
1 Answer 1
Prefer pattern matching over head
/tail
Your forwardTape
and reverseTape
both use head
and tail
. This can lead to empty list errors if you accidentally swap the lines:
forwardTape _ (Tape ls v rs) = Tape (v:ls) (head rs) (tail rs) -- woops
forwardTape def (Tape ls v []) = Tape (v:ls) def []
Your completely prevent this error if you use pattern matching instead:
forwardTape _ (Tape ls v (r:rs)) = Tape (v:ls) r rs
forwardTape def (Tape ls v []) = Tape (v:ls) def []
If you don't like to be explicit in forwardTape
and reverseTape
use a helper:
safeUncons :: a -> [a] -> (a, [a])
safeUncons _ (x:xs) = (x, xs)
safeUncons d [] = (d, [])
forwardTape, reverseTape :: a -> Tape a -> Tape a
forwardTape d (Tape ls v rs) = let (x, xs) = safeUncons d rs in Tape (v:ls) x xs
reverseTape d (Tape ls v rs) = let (x, xs) = safeUncons d ls in Tape xs x (v:rs)
Use do
notation for monads and monads only*
do
notation is syntactical sugar for >>
and >>=
. If you have
do a
b
x <- c
let y = 3
d x y
it gets desugared into
a >> b >> c >>= (\x -> let y = 3 in d x y)
So when you use do
, other Haskellers will try to figure out what monad is currently getting used. However, you don't use a monad at all:
'[' -> do
let (loop, rest) = splitOnLoopEnd xs
Loop (makeProgram loop):makeProgram' rest
That's the same as
'[' ->
let (loop, rest) = splitOnLoopEnd xs
in Loop (makeProgram loop):makeProgram' rest
If possible, prefer the latter style. do
expressions are meant as "conventional syntax for monadic programming", after all.
Make it hard to use functions wrong or in the wrong context
splitOnLoopEnd'
should never get used outside of splitOnLoopEnd
. Unless you want to explicitly test splitOnLoopEnd'
I don't recommend to use a top-level binding:
splitOnLoopEnd :: String -> (String, String)
splitOnLoopEnd = go 0
where
go _ "" = error "No matching ] found"
go 0 (']':xs') = ([], xs')
go nesting (x:xs') = case x of
']' -> (x:ys, zs) where (ys, zs) = next (nesting - 1)
'[' -> (x:ys, zs) where (ys, zs) = next (nesting + 1)
_ -> (x:ys, zs) where (ys, zs) = next nesting
where
next n = go n xs'
Also note that I changed next
argument. It's a lot harder to use a Int
wrong compared to a Int -> Int
. While we're at it, let's reorder some parts and get ri of the '
after xs
:
splitOnLoopEnd :: String -> (String, String)
splitOnLoopEnd = go 0
where
go _ "" = error "No matching ] found"
go 0 (']':xs) = ([], xs)
go n (x:xs) = let (ys, zs) = go (n + l) xs in (x:ys, zs)
where
l = case x of
']' -> (-1)
'[' -> 1
_ -> 0
We can use case ... of
just for the nesting difference, which removes the repetition of next
.
By the way, makeProgram
and makeProgram'
do the same. I suggest to rename the latter to the former. Probably a remnant from a previous version.
Provide more power to the user
At the moment, your user cannot work with the AST since it doesn't get exported. They can't even use the type BFProgram
. They can use the values, though. So let us have a look at the power a user should have.
Export types (but not necessarily their constructors)
If I have a malfunctioning BFProgram
, I would like to dump it at some point. I would write
dumpProgram :: BFProgram -> IO ()
or similar. However, that's not possible at the moment, since BFProgram
never gets exported. I have to write
dumpProgram :: Show a => a -> IO ()
which might be to general for my preference. It's fine to provide the user the types, though:
module Interpreter
( interpret
, parse
, execute
, BFProgram
, BFMemory
, BFMemoryCell
, BFInstruction
) where
This will only export the type, not the constructors, though, so I'm not able to generate a new BFInstruction
. I can only reuse already existing ones.
Use canonical names
Your makeProgram
is a parser, therefore I suggest you to call it parse
instead:
parse :: String -> BFProgram
While you're at it, use Either e BFProgram
instead of error
. That way you can recover from parsing errors.
Provide modular functionality.
Your interpret
variant forces the user to keep the original source. But that's not necessary if the user wants to run the program several times. So instead, I suggest you to provide both parse
and execute
. You have to change execute
's type slightly:
execute :: BFProgram -> IO BFMemory
execute = executeWith (makeTape (BFMemoryCell 0))
executeWith :: BFMemory -> BFProgram -> IO BFMemory
executeWith = -- your old execute implementation
That way one can easily run the same program multiple times:
helloWorldProgram <- parse <$> readFile "hello.bf"
replicateM_ 10 $ execute helloWorldProgram
You can keep the interpret
functionality as a "one does all", but for testing and maintenance it's easier to provide a larger interface.
Do not reinvent the wheel (unless you want to)
There are several libraries that provide argument parsers, for example optparse-applicative
. Those libraries make it easy to handle command line arguments without a hassle.
Also, a Map
for two possible option variants is slightly an overkill:
data Source = Program String | File FilePath | StdIn
data Options = Options {
showProgram :: Bool,
showMemory :: Bool,
sourceCode :: Source
} deriving (Show, Eq)
That's all you need for your current options. Your program needs all of them set, especially the sourceCode
. An optparse-applicative
parser could look like this:
source :: Parser Source
source = program <|> file <|> pure stdin
file :: Parser Source
file = File <$> strOption
( long "file"
<> short 'f'
<> metavar "FILENAME"
<> help "Brainfuck file" )
program :: Parser Source
program = Program <$> strOption
( long "program"
<> short 'p'
<> metavar "PROGRAM"
<> help "Brainfuck source code" )
stdin :: Parser Source
stdin = flag' StdIn
( long "stdin"
<> help "Read from stdin" )
options :: Parser Options
options = Options
<$> switch
( long "show-program"
<> short 'p'
<> help "Show the parsed program before execution" )
<*> switch
( long "show-memory"
<> short 'm'
<> help "Show the memory tape after execution" )
<*> source
Note that this also generate a help message.
Other remarks
Apart from the remarks mentioned above, well done. The tape movements are now \$\mathcal O(1)\,ドル and the same holds for jumping to the start or end of a loop. Your AST cannot represent invalid programs, which is a big plus compared to your previous approach.
There are some points where I would use another style, but that's personal preference, e.g.
execute program' memory >>= execute xs
I usually keep the number of prime functions or variables down, too, and use them only if they were derived from the original one, e.g.
execute p@(x:xs) -- vs -- execute xs@(x:xs')
But again, that's personal preference.
If you want to upload your package at some point you should add some documentation and use some other module names, but I don't think you're going to publish it on Hackage.
The only other improvement I can think of is optimization, which needs a modified AST, and testing, which needs a non-IO
variant.
* Technically also for Applicative, but that needs an extension, so don't.
-
\$\begingroup\$ Is there anyway to not have to traverse the code multiple times when parsing loop constructs? A loop section is 1 + nesting level times if I understand correctly. \$\endgroup\$Philippe– Philippe2019年12月22日 19:38:53 +00:00Commented Dec 22, 2019 at 19:38
Explore related questions
See similar questions with these tags.