I've been working on my own dynamically-typed, dynamically-scoped, imperative programming language called Honey Badger and feel that it's reached a stable enough state that I want someone else's eyes on my implementation of the interpreter.
For context, I'm going to walk you through a sample program in the language that does a 2-player game of tic-tac-toe.
{ print_board = fun b : { print(b[0] + "|" + b[1] + "|" + b[2]); print("-----"); print(b[3] + "|" + b[4] + "|" + b[5]); print("-----"); print(b[6] + "|" + b[7] + "|" + b[8]) }; This defines a function that takes an argument b (representing a board) and prints it out prettily and assigns it to a
variable print_board.
//Check if the board is completely full. isfull = fun b : { ind = 0; full = true; while (ind < len(b)) { if b[ind] == " " then full = false; ind = ind + 1 }; full }; This defines another function that takes a board (representing as an array of size 9) walks through it and returns a
variable full saying whether or not. Function bodies consist of a single expression (here it's a sequence of exprs chained together and wrapped in {}s) and "return" whatever that evaluates to.
//This is lazy. haswon = fun b, t : { b[0] == b[1] & b[1] == b[2] & b[2] == t | b[3] == b[4] & b[4] == b[5] & b[5] == t | b[6] == b[7] & b[7] == b[8] & b[8] == t | b[0] == b[3] & b[3] == b[6] & b[6] == t | b[1] == b[4] & b[4] == b[7] & b[7] == t | b[2] == b[5] & b[5] == b[8] & b[8] == t | b[2] == b[4] & b[4] == b[6] & b[6] == t | b[0] == b[4] & b[4] == b[8] & b[8] == t }; gameover = fun b : { isfull(b) | haswon(b, "X") | haswon(b, "O")}; These are two more functions used below, that check if the game is over yet. board = ["0", "1", "2", "3", "4", "5", "6", "7", "8"]; print("These are the indices for the positions"); print_board(board); xTurn = true; board = [" ", " ", " ", " ", " ", " ", " ", " ", " "]; while !gameover(board) { print("Where should " + (if xTurn then "X" else "O") + " go?"); print_board(board); spot = Int(readline()); if spot >= 0 & spot < 9 then { if board[spot] == " " then { board[spot] = if xTurn then "X" else "O"; xTurn = !xTurn } else print("That spot is already taken") } else print("That's not a valid spot") }; Here we loop until the game is over, asking the user where to move print_board(board); if haswon(board, "X") then print("X has won.") else if haswon(board, "O") then print("O has won.") else print("It's a draw.") }
Here at the end, we just print out a result.
Now here's the actual interpreter, with interspersed comments. When compiled it is run as "HB /path/to/honey/badger/file". The actual parsing and lexing is done by other files called parser.mly and lexer.mll in the linked GitHub, with the actual evaluation done by interpreter.ml.
(** Reference implementation for the Honey Badger
programming language. *)
open Core.Std
open Defs
open Printf
let rec string_of_kind arg = match arg with
TInt -> "Int"
|TReal -> "Real"
|TBool -> "Bool"
|TStr -> "String"
|TFunc -> "Func"
|TArr -> "Arr "
|TRecord a -> "Record"
|TUnit -> "()"
|TTop -> "T"
|TBottom -> "Bottom"
string_of_expr is almost entirely used for debugging.
(** Return the abstract syntax tree rooted at arg represented
as a string. *)
let rec string_of_expr arg = match arg with
N a -> "N " ^ string_of_int a
|F f -> "F " ^ Float.to_string f
|B b -> "B " ^ string_of_bool b
|Str s -> "String " ^ s
|Readline -> "readline()"
|Len e -> "len(" ^ string_of_expr e ^ ")"
|Print e -> "print(" ^ string_of_expr e ^ ")"
|Add (a, b) -> "Add(" ^ string_of_expr a ^ ", " ^ string_of_expr b ^ ")"
|Mul (a, b) -> "Mul(" ^ string_of_expr a ^ ", " ^ string_of_expr b ^ ")"
|Div (a, b) -> "Div(" ^ string_of_expr a ^ ", " ^ string_of_expr b ^ ")"
|Sub (a, b) -> "Sub(" ^ string_of_expr a ^ ", " ^ string_of_expr b ^ ")"
|Less (a, b) -> "Less(" ^ string_of_expr a ^ ", " ^ string_of_expr b ^ ")"
|And (a, b) -> "And(" ^ string_of_expr a ^ ", " ^ string_of_expr b ^ ")"
|Or (a, b) -> "Or(" ^ string_of_expr a ^ ", " ^ string_of_expr b ^ ")"
|Not a -> "Not(" ^ string_of_expr a ^ ")"
|If (a, b, c)-> "(If " ^ string_of_expr a ^ " then " ^ string_of_expr b ^
" else " ^ string_of_expr c ^ ")"
|Equal (a, b) -> "Equal(" ^ string_of_expr a ^ ", " ^ string_of_expr b ^ ")"
|Lam (b, c) -> "Lam(" ^ String.concat ~sep:", " b ^ ", " ^
string_of_expr c ^ ")"
|App (a, b) -> "App(" ^ string_of_expr a ^ ", " ^
String.concat ~sep:", " (List.map b string_of_expr) ^ ")"
|Arr a -> "List[" ^ String.concat ~sep:", " (List.map (Array.to_list a) string_of_expr )
^ "]"
|Unit -> "()"
|Top -> "T"
|Bottom -> "Bottom"
|Get (a, b) -> "Get(" ^ string_of_expr a ^ ", " ^ string_of_expr b ^ ")"
|GetRec (a, b) -> "GetRec(" ^ a ^ "," ^ string_of_expr b ^ ")"
|SetRec (a, b, c) -> a ^ "[" ^ b ^ "] <- " ^ string_of_expr c
|SetInd (a, b, c) -> a ^ "[" ^ string_of_expr b ^ "] <- " ^ string_of_expr c
|Cast (a, b) -> "Cast(" ^ string_of_expr a ^ ", kind)"
|Lookup a -> "Lookup " ^ a
|While (a, b) -> "While(" ^ string_of_expr a ^ ", " ^ string_of_expr b ^ ")"
|Record fields -> "Record[" ^ String.concat ~sep:", "
(List.map fields (fun field -> fst field ^ " = " ^
string_of_expr (snd field))) ^ "]"
|Seq a -> "Sequence[" ^ String.concat ~sep:"; " (List.map a (string_of_expr))
^ "]"
|Set (s, x) -> "Set (" ^ s ^ ", " ^ string_of_expr x ^ ")"
string_of_val however is used whenever we want to cast something to a string for the user.
(**
Represent a value as a human-readable string.
*)
and string_of_val arg = match arg with
VN a -> string_of_int a
|VF f -> Float.to_string f
|VB b -> string_of_bool b
|VStr s -> s
|VLam (b, c) -> "VLam(" ^ String.concat ~sep:", " b ^ ", " ^
string_of_expr c ^ ")"
|VArr a -> "[" ^ String.concat ~sep:", " (List.map (Array.to_list a) string_of_val )
^ "]"
|VUnit -> "()"
|VTop -> "T"
|VBottom -> "VBottom"
|VRecord fields -> "{" ^ String.concat ~sep:", "
(List.map !fields (fun field -> fst field ^ " = " ^
string_of_val (snd field))) ^ "}"
Here's a couple functions that do math, which for add is also used for concatenating strings and arrays.
(**
Return a * b.
Throws an exception in either a or b is a non-number.
*)
let mul a b = match (a, b) with
(VN x, VN y) -> VN(x * y)
|(VN x, VF y) -> VF (Float.of_int x *. y)
|(VF x, VN y) -> VF(x *. Float.of_int y)
|(VF x, VF y) -> VF(x *. y)
|_ -> invalid_arg "Invalid args for multiplication."
(**
Return a / b.
Throws an exception if either a or b is a non-number.
*)
let div a b = match (a, b) with
(VN x, VN y) -> VF(Float.of_int x /. Float.of_int y)
|(VN x, VF y) -> VF (Float.of_int x /. y)
|(VF x, VN y) -> VF(x /. Float.of_int y)
|(VF x, VF y) -> VF(x /. y)
|_ -> invalid_arg "Invalid args for multiplication."
(**
Return a + b.
If a and b are numbers, performs addition.
If a and b are strings, concatenates them.
If a and b are lists, concatenates them.
Throws an exception otherwise.
*)
let add a b = match (a, b) with
(VN x, VN y) -> VN(x + y)
|(VN x, VF y) -> VF (Float.of_int x +. y)
|(VF x, VN y) -> VF(x +. Float.of_int y)
|(VF x, VF y) -> VF(x +. y)
|(VArr f, VArr s) -> VArr (Array.append f s)
|(VUnit, VArr s) -> VArr s
|(VArr f, VUnit) -> VArr f
|(VStr f, VStr s) -> VStr (f ^ s)
|_ -> invalid_arg "Invalid args for addition."
(**
Return a - b.
Throws an exception if either a or b is a non-number.
*)
let sub a b = match (a, b) with
(VN x, VN y) -> VN(x - y)
|(VN x, VF y) -> VF (Float.of_int x -. y)
|(VF x, VN y) -> VF(x -. Float.of_int y)
|(VF x, VF y) -> VF(x -. y)
|_ -> invalid_arg "Invalid args for subtraction."
(**
Return a < b.
Throws an exception if either a or b is a non-number.
*)
let less a b = match (a, b) with
(VN x, VN y) -> VB(x < y)
|(VN x, VF y) -> VB (Float.of_int x < y)
|(VF x, VN y) -> VB(x < Float.of_int y)
|(VF x, VF y) -> VB(x < y)
|_ -> invalid_arg "Invalid args for comparison."
These are a bunch of functions for casting between types.
(**
casts v to an int.
For ints, this returns v.
For floats, this returns v rounded towards zero.
For bools, true is 1 and false is 0.
For strings, this tries to parse v as an int.
Throws exceptions for other inputs or if v is a string that
doesn't represent an int.
*)
let cast_int v = match v with
VN num -> VN num
|VF num -> VN (Float.to_int num)
|VB b -> VN (if b then 1 else 0)
|VStr s -> VN (Int.of_string s)
|_ -> invalid_arg ("Can't cast " ^ string_of_val v ^ " to int.")
(**
casts v to a float.
For ints, this returns v.
For floats, this returns v.
For bools, true is 1.0 and false is 0.0.
For strings, this tries to parse v as a float.
Throws exceptions for other inputs or if v is a string that
doesn't represent a float.
*)
let cast_real v = match v with
VN num -> VF (Float.of_int num)
|VF num -> VF num
|VB b -> VF (if b then 1.0 else 0.0)
|VStr s -> VF (Float.of_string s)
|_ -> invalid_arg ("Can't cast " ^ string_of_val v ^ " to real.")
(**
casts v to a bool.
For numbers, 0 is false and all others are true.
For strings, "true" is true and "false" is false.
For arrays and maps, empty is false, otherwise true.
Throws exceptions for other inputs or if v is a string that
is not "true" or "false".
*)
let cast_bool v = match v with
VB b -> VB b
|VN num -> VB (num <> 0)
|VF num -> VB (num <> 0.0)
|VStr s -> VB (Bool.of_string s)
|VArr a -> VB (Array.length a > 0)
|VRecord r -> VB (List.length !r > 0)
|_ -> invalid_arg ("Can't cast " ^ string_of_val v ^ " to bool.")
(**
casts v to type t.
For casting to int, see cast_int.
For casting to float, see cast_float
For casting to string, see string_of_val.
Throws an exception for all others.
*)
let cast v t = match (t, v) with
(TInt, _) -> cast_int v
|(TReal, _) -> cast_real v
|(TBool, _) -> cast_bool v
|(TStr, _) -> VStr (string_of_val v)
|(TFunc, VLam _) -> v
|(TRecord _, VRecord _) -> v
|(TUnit, VUnit) -> v
|(TArr, VArr _) -> v
|(TTop, _) -> v
|(TBottom, _) -> v
|_ -> invalid_arg ("Can't cast to " ^ string_of_kind t)
And then here we start eval, which is the main workhorse of the program.
(**
Evaluates expr with the given state and returns
a value.
*)
let rec eval expr state = match expr with
N a -> VN a
|F a -> VF a
|B b -> VB b
|Str s -> VStr s
|Lam a -> VLam a
|Arr a -> VArr (Array.map a ~f:(fun e -> eval e state))
|Unit -> VUnit
|Equal (a, b) -> VB(eval a state = eval b state)
|Record fields -> VRecord (ref (List.Assoc.map fields
(fun a -> eval a state)))
(* Numerical Functions *)
|Mul (a, b) -> mul (eval a state) (eval b state)
|Div (a, b) -> div (eval a state) (eval b state)
|Add (a, b) -> add (eval a state) (eval b state)
|Sub (a, b) -> sub (eval a state) (eval b state)
|Less (a, b) -> less (eval a state) (eval b state)
This is a function call. We check that we have the right number of args, evaluate them, put them into the new scope, and evaluate the function.
|App (lam, vars) -> begin
match eval lam state with
VLam(strs, body) ->
begin
if (List.length strs = List.length vars) then
let args = List.zip_exn strs
(List.map vars (fun arg -> eval arg state)) in
let newscope = Hashtbl.copy state in
List.iter args (fun (s,v) -> Hashtbl.replace newscope s v);
eval body newscope
else
invalid_arg ("Function call with wrong number of args.")
end
|_ -> invalid_arg "Can't apply on non-lambda."
end
(* Boolean Functions *)
|If (condition, thenCase, elseCase) -> begin
match eval condition state with
VB true -> eval thenCase state
|VB false -> eval elseCase state
|_ -> invalid_arg "Invalid condition for if."
end
|And (a, b) -> begin
match (eval a state, eval b state) with
(VB x, VB y) -> VB(x && y)
|(a, b) -> invalid_arg ("Invalid args for and " ^ string_of_val a ^ " "
^ string_of_val b ^ ".")
end
|Or(a, b) -> begin
match (eval a state, eval b state) with
(VB x, VB y) -> VB(x || y)
|_ -> invalid_arg "Invalid args for or."
end
|Not a ->begin
match eval a state with
VB x -> VB(not x)
|_ -> invalid_arg "Invalid arg for not."
end
Get is for indexing into an array. We automatically cast floats into ints when doing this.
(* Array functions. *)
|Get (index, arr) -> begin
let zero_index = 0 in
match eval index state with (* Get the indexth member of arr. *)
(VN num) -> if num < zero_index
then invalid_arg "Negative index."
else
begin
match eval arr state with
VArr ls -> if num < (Array.length ls)
then ls.(num)
else invalid_arg "Index out of bounds."
|_ -> invalid_arg "Attempt to index into non-array"
end
|(VF num) -> eval (Get (N (Float.to_int num), arr)) state
|_ -> invalid_arg "Not a number index"
end
GetRec is for looking up fields in dictionaries.
|GetRec (str, a) ->
begin
let VRecord fields = eval a state in
match List.Assoc.find !fields str with
Some x -> x
|None -> invalid_arg("Non-existent field " ^ str)
end
SetRec and SetInd are dictionary and array assignment, respectively.
|SetRec (var, field, expr) ->
begin
match eval (Lookup var) state with
VRecord fields ->
fields := List.Assoc.add !fields field (eval expr state);
VRecord fields
|v -> invalid_arg ("Can't set field in non-dict " ^ string_of_val v)
end
|SetInd (var, ind, expr) ->
begin
match (eval (Lookup var) state, eval ind state) with
(VArr ls, VN a) -> Array.set ls a (eval expr state); VArr ls
|(VArr ls, VF a) -> Array.set ls (Float.to_int a) (eval expr state); VArr ls
|(VArr ls, k) -> invalid_arg ("Invalid array index " ^ string_of_val k)
|(k, v) -> invalid_arg ("Index assignment to non array " ^ string_of_val k)
end
|Cast (expr, t) -> cast (eval expr state) t
|Seq a -> List.fold ~init:(VB true) ~f:(fun _ b -> eval b state) a
|Set (name, a) -> let v = eval a state in
Hashtbl.replace state name v; v
|Lookup name ->
begin
match Hashtbl.find state name with
Some x -> x
|None -> invalid_arg ("Undefined var " ^ name)
end
|While (guard, body) ->
let rec eval_loop () =
match eval guard state with
VB true -> eval body state;
eval_loop ()
|VB false -> VUnit
|_-> invalid_arg "Loop guard non-bool."
in
eval_loop ()
|Top -> VTop
Bottom is language-theory jargon for "throw an exception"
|Bottom -> invalid_arg "Attempt to eval Bottom"
|Print e -> print_endline (string_of_val (eval e state)); VUnit
|Readline -> VStr (input_line stdin)
|Len e -> begin
match eval e state with
VArr l -> VN (Array.length l)
|VStr s -> VN (String.length s)
|a -> invalid_arg (string_of_val a ^ " doesn't have a length.")
end
(**
Convenience function to wrap eval.
*)
let exec a =
eval a (Hashtbl.create ~hashable:String.hashable ())
(**
Read source file from src, parse it as an expr,
and print what it evaluates to.
*)
let main src =
let inpt = open_in src in
let linebuf = Lexing.from_channel inpt in
try
let ast = (Parser.main Lexer.token linebuf) in
if false then
printf "%s\n" (string_of_expr ast);
printf "%s\n" (string_of_val (exec ast));
In_channel.close inpt;
with
| Lexer.Error msg ->
fprintf stderr "%s%!" msg
| Parser.Error -> let pos = Lexing.lexeme_start_p linebuf in
fprintf stderr "Syntax error line %d column %d.\n%!"
pos.pos_lnum pos.pos_bol;;
main Sys.argv.(1)
Obviously, if you have any questions just ask. This is my first "big" project in OCaml, but I'd rather you didn't sugarcoat criticism.
1 Answer 1
Obviously, if you have any questions just ask. This is my first "big" project in OCaml, but I'd rather you didn't sugarcoat criticism.
It looks like a very fun project, congratulations! :)
I have a few comments, which all concerns the style of writing. This is an important topic, because a good style eases maintainability.
The name of constructors: all constructors but the expression constructor have a one letter prefix. Having a prefix is good, because it prevents name clashes and eases automatic edition of code. You should therefore consider using a prefix for expressions too. Also, I would avise not to be lazy and to use a longer prefix – use regexp editing to convert your code in a few seconds.
The string of expression would be much more readable if you use the printf function instead of concatenating strings with the
^
operator. You need for this the%a
conversion which allows to use custom printers (and consume two arguments, the custom printer and the value to print). You may even choose to write a pretty printer with the Format module from the standard library. If you do so, you can easily derive theto_string
,print
andoutput
andprint_err
functions from this pretty-printer, either manually or using the Mixture_Format mixin.open Format let rec pp_print_expr fft = function | N a -> fprintf fft "N%d" a (* why a instead of n? *) | F f -> fprintf fft "F%f" f (* why f instead of x? *) | B b -> fprintf fft "B%b" b | Str s -> fprintf fft "String %S" s | Readline -> fprintf fft "readline()" | Len expr -> fprintf fft "len(%a)" pp_print_expr expr | Print expr -> fprintf fft "print(%a)" pp_print_expr expr ...
You probably got the idea with that header. As side notes,
string_of_kind
is not recursive. Also, consider sticking to standard mathy notations for one-letter variables, otherwise you will be the only one able to read your code. Also see how you can replace some uses ofmatch
withfunction
.In the
eval
expression you have a lot of mildly complicated treatments, you could consider delegating them to other functions (which are mutually recursive toeval
). The benefits of doing this, is that you can test these functions individually.You are not consistent in the style of messages in the exception you throw, sometimes you give detailed context, sometimes not. You should be consistent here and deciding who these messages are intended to will help consistency. Note that you can easily prepare detailed messages with
ksprintf failwith
likeksprintf invalid_arg "Invalid args for and %a %a." string_of_val a string_of_val b
You should consider defining a
State
module defining a type which is your hashtable state for now, by doing this you have a natural receptacle to define functions operating on the state (like the pack-return you use to update the state) and it is later easy to experiment with alternative implementations.
On the logic of the types you define, since your language seems to be dynamically typed, I do not see why you have all your immediate values type embedded into expression. Is there a good reason why you do not replace the VN, VF, ...
constructors by an Immediate of value
?
-
3
-
\$\begingroup\$ Thanks, Michael, for doing this. And yes, it was a fun project! \$\endgroup\$Joshua Snider– Joshua Snider2015年09月11日 23:23:35 +00:00Commented Sep 11, 2015 at 23:23
Explore related questions
See similar questions with these tags.