I am trying to learn Elixir, so I decided to write a port of Ruby's Parslet library. I want to define a DSL that lets you define your grammar and given a string can parse it into an AST.
I wrote something, but it's not very idiomatic. I'd love some pointers. Also, I wanted to be able to define rules as follows:
rule :test do {
str("a") >> (str("b") >> repeat(2))
}
where the '2' is 'min_count' and have it match 'abbbbb'.
With my current design I had to do:
rule :test do {
str("a") >> repeat(2, str("b"))
}
Here is my code and the tests:
defmodule Parslet do
@moduledoc """
Documentation for Parslet.
"""
# Callback invoked by `use`.
#
# For now it returns a quoted expression that
# imports the module itself into the user code.
@doc false
defmacro __using__(_opts) do
quote do
import Parslet
# Initialize @tests to an empty list
@rules []
@root :undefined
# Invoke Parslet.__before_compile__/1 before the module is compiled
@before_compile Parslet
end
end
@doc """
Defines a test case with the given description.
## Examples
rule :testString do
str("test")
end
"""
defmacro rule(description, do: block) do
function_name = description
quote do
# Prepend the newly defined test to the list of rules
@rules [unquote(function_name) | @rules]
def unquote(function_name)(), do: unquote(block)
end
end
defmacro root(rule_name) do
quote do
# Prepend the newly defined test to the list of rules
@root unquote(rule_name)
end
end
# This will be invoked right before the target module is compiled
# giving us the perfect opportunity to inject the `parse/1` function
@doc false
defmacro __before_compile__(_env) do
quote do
def parse(document) do
# IO.puts "Root is defined as #{@root}"
# Enum.each @rules, fn name ->
# IO.puts "Defined rule #{name}"
# end
case apply(__MODULE__, @root, []).(document) do
{:ok, any, ""} -> {:ok , any}
{:ok, any, rest} -> {:error, "Consumed #{inspect(any)}, but had the following remaining '#{rest}'"}
error -> error
end
end
end
end
def call_aux(fun, aux) do
fn doc ->
case fun.(doc) do
{:ok, match, rest} -> aux.(rest, match)
other -> other
end
end
end
# TODO ... checkout ("a" <> rest ) syntax...
# https://stackoverflow.com/questions/25896762/how-can-pattern-matching-be-done-on-text
def str(text), do: str(&Parslet.identity/1, text)
def match(regex_s), do: match(&Parslet.identity/1, regex_s)
def repeat(fun, min_count), do: repeat(&Parslet.identity/1, fun, min_count)
def str(fun, text), do: call_aux( fun,
fn (doc, matched) -> str_aux(text, doc, matched) end )
def match(fun, regex_s), do: call_aux( fun,
fn (doc, matched) -> match_aux(regex_s, doc, matched) end )
def repeat(prev, fun, min_count), do: call_aux( prev,
fn (doc, matched) -> repeat_aux(fun, min_count, doc, matched) end )
defp str_aux(text, doc, matched) do
tlen = String.length(text)
if String.starts_with?(doc, text) do
{:ok, matched <> text, String.slice(doc, tlen..-1) }
else
{:error, "'#{doc}' does not match string '#{text}'"}
end
end
defp match_aux(regex_s, doc, matched) do
regex = ~r{^#{regex_s}}
case Regex.run(regex, doc) do
nil -> {:error, "'#{doc}' does not match regex '#{regex_s}'"}
[match | _] -> {:ok, matched <> match, String.slice(doc, String.length(match)..-1)}
end
end
defp repeat_aux(fun, 0, doc, matched) do
case fun.(doc) do
{:ok, match, rest} -> repeat_aux(fun, 0, rest, matched <> match)
_ -> {:ok, matched, doc}
end
end
defp repeat_aux(fun, count, doc, matched) do
case fun.(doc) do
{:ok, match, rest} -> repeat_aux(fun, count - 1, rest, matched <> match)
other -> other
end
end
def identity(doc) do
{:ok, "", doc}
end
end
Tests
defmodule ParsletTest do
use ExUnit.Case
doctest Parslet
defmodule ParsletExample do
use Parslet
rule :test_string do
str("test")
end
root :test_string
end
test "str matches whole string" do
assert ParsletExample.parse("test") == {:ok, "test"}
end
test "str doesnt match different strings" do
assert ParsletExample.parse("tost") == {:error, "'tost' does not match string 'test'"}
end
test "parse reports error if not all the input document is consumed" do
assert ParsletExample.parse("test_the_best") ==
{:error, "Consumed \"test\", but had the following remaining '_the_best'"}
end
defmodule ParsletExample2 do
use Parslet
rule :test_regex do
match("123")
end
# calling another rule should just work. :)
rule :document do
test_regex()
end
root :document
end
test "[123]" do
assert ParsletExample2.parse("123") == {:ok, "123"}
assert ParsletExample2.parse("w123") == {:error, "'w123' does not match regex '123'"}
assert ParsletExample2.parse("234") == {:error, "'234' does not match regex '123'"}
assert ParsletExample2.parse("123the_rest") == {:error, "Consumed \"123\", but had the following remaining 'the_rest'"}
end
defmodule ParsletExample3 do
use Parslet
rule :a do
repeat(str("a"), 1)
end
root :a
end
test "a+" do
assert ParsletExample3.parse("a") == {:ok, "a"}
assert ParsletExample3.parse("aaaaaa") == {:ok, "aaaaaa"}
end
defmodule ParsletExample4 do
use Parslet
rule :a do
str("a") |> str("b")
end
root :a
end
test "a > b = ab" do
assert ParsletExample4.parse("ab") == {:ok, "ab"}
end
defmodule ParsletExample5 do
use Parslet
rule :a do
repeat(str("a") |> str("b") , 1)
end
root :a
end
test "(a > b)+" do
assert ParsletExample5.parse("ababab") == {:ok, "ababab"}
end
defmodule ParsletExample6 do
use Parslet
rule :a do
str("a") |> repeat(str("b"), 1)
end
root :a
end
test "a > b+" do
assert ParsletExample6.parse("abbbbb") == {:ok, "abbbbb"}
end
end
-
\$\begingroup\$ A more complete version is available here: github.com/NigelThorne/ElixirParslet \$\endgroup\$Nigel Thorne– Nigel Thorne2018年04月28日 14:20:52 +00:00Commented Apr 28, 2018 at 14:20
1 Answer 1
The latest version is at https://github.com/NigelThorne/ElixirParslet/blob/master/test/json_parser_test.exs
I changed the dsl to create a data structure that then gets interpreted as the parser runs. This let me decouple the DSL from the behaviour in a way that passing functions around didn't.
Here is a JSON Parser written in my language.
defmodule JSONParser do
use Parslet
rule :value do
one_of ([
string(),
number(),
object(),
array(),
boolean(),
null(),
])
end
rule :null do
as(:null, str("null"))
end
rule :boolean do
as(:boolean, one_of ([
str("true"),
str("false"),
]))
end
rule :sp_ do
repeat(match("[\s\r\n]"), 0)
end
rule :string do
(str("\"")
|> as(:string,
repeat(
as(:char, one_of( [
(absent?(str("\"")) |> absent?(str("\\")) |> match(".")),
(str("\\")
|> as(:escaped, one_of(
[
match("[\"\\/bfnrt]"),
(str("u")
|> match("[a-fA-F0-9]")
|> match("[a-fA-F0-9]")
|> match("[a-fA-F0-9]")
|> match("[a-fA-F0-9]"))
]))
)
])),0)
)
|> str("\""))
end
rule :digit, do: match("[0-9]")
rule :number do
as(:number,
as(:integer, maybe(str("-")) |>
one_of([
str("0"),
(match("[1-9]") |> repeat( digit(), 0 ))
])) |>
as(:decimal,
maybe(str(".") |> repeat( digit(), 1 ))
) |>
as(:exponent,
maybe(
one_of( [str("e"), str("E")] ) |>
maybe( one_of( [ str("+"), str("-") ] )) |>
repeat( digit(), 1)
)
)
)
end
rule :key_value_pair do
as(:pair, as(:key, string()) |> sp_() |> str(":") |> sp_() |> as(:value, value()))
end
rule :object do
as(:object, str("{") |> sp_() |>
maybe(
key_value_pair() |> repeat( sp_() |> str(",") |> sp_() |> key_value_pair(), 0)
) |> sp_() |>
str("}"))
end
rule :array do
as(:array, str("[") |> sp_() |>
maybe(
value() |> repeat( sp_() |> str(",") |> sp_() |> value(), 0)
) |> sp_() |>
str("]"))
end
rule :document do
sp_() |> value |> sp_()
end
root :document
end
defmodule JSONTransformer do
def transform(%{escaped: val}) do
{result, _} = Code.eval_string("\"\\#{val}\"")
result
end
def transform(%{string: val}) when is_list(val) do
List.to_string(val)
end
def transform(%{string: val}), do: val
def transform(%{char: val}), do: val
def transform(%{array: val}), do: val
def transform(%{null: "null"}), do: :null #replace null with :null
def transform(%{boolean: val}), do: val == "true"
def transform(%{number: %{integer: val, decimal: "", exponent: ""}}) do
{intVal, ""} = Integer.parse("#{val}")
intVal
end
def transform(%{number: %{integer: val, decimal: dec, exponent: ex}}) do
{intVal, ""} = Float.parse("#{val}#{dec}#{ex}")
intVal
end
def transform(%{object: pairs}) when is_list(pairs) do
for %{pair: %{key: k, value: v}} <- pairs, into: %{}, do: {k,v}
end
def transform(%{object: %{pair: %{key: k, value: v}}}) do
%{k => v}
end
#default to leaving it untouched
def transform(any), do: any
end
def parseJSON(document) do
{:ok, parsed} = JSONParser.parse(document)
Transformer.transform_with(&JSONTransformer.transform/1, parsed)
end
So calling
parseJSON(~S({"bob":{"jane":234},"fre\r\n\t\u26C4ddy":"a"})) ==
%{"bob" => %{"jane" => 234},"fre\r\n\t⛄ddy" => "a"}