5
\$\begingroup\$

I am trying to learn Elixir, so I decided to write a port of Ruby's Parslet library. I want to define a DSL that lets you define your grammar and given a string can parse it into an AST.

I wrote something, but it's not very idiomatic. I'd love some pointers. Also, I wanted to be able to define rules as follows:

rule :test do {
 str("a") >> (str("b") >> repeat(2))
}

where the '2' is 'min_count' and have it match 'abbbbb'.

With my current design I had to do:

rule :test do {
 str("a") >> repeat(2, str("b"))
}

Here is my code and the tests:

defmodule Parslet do
 @moduledoc """
 Documentation for Parslet.
 """
 # Callback invoked by `use`.
 #
 # For now it returns a quoted expression that
 # imports the module itself into the user code.
 @doc false
 defmacro __using__(_opts) do
 quote do
 import Parslet
 # Initialize @tests to an empty list
 @rules []
 @root :undefined
 # Invoke Parslet.__before_compile__/1 before the module is compiled
 @before_compile Parslet
 end
 end
 @doc """
 Defines a test case with the given description.
 ## Examples
 rule :testString do
 str("test")
 end
 """
 defmacro rule(description, do: block) do
 function_name = description
 quote do
 # Prepend the newly defined test to the list of rules
 @rules [unquote(function_name) | @rules]
 def unquote(function_name)(), do: unquote(block)
 end
 end
 defmacro root(rule_name) do
 quote do
 # Prepend the newly defined test to the list of rules
 @root unquote(rule_name)
 end
 end
 # This will be invoked right before the target module is compiled
 # giving us the perfect opportunity to inject the `parse/1` function
 @doc false
 defmacro __before_compile__(_env) do
 quote do
 def parse(document) do
 # IO.puts "Root is defined as #{@root}"
 # Enum.each @rules, fn name ->
 # IO.puts "Defined rule #{name}"
 # end
 case apply(__MODULE__, @root, []).(document) do
 {:ok, any, ""} -> {:ok , any}
 {:ok, any, rest} -> {:error, "Consumed #{inspect(any)}, but had the following remaining '#{rest}'"}
 error -> error
 end
 end
 end
 end
 def call_aux(fun, aux) do
 fn doc ->
 case fun.(doc) do
 {:ok, match, rest} -> aux.(rest, match)
 other -> other
 end
 end
 end
 # TODO ... checkout ("a" <> rest ) syntax...
 # https://stackoverflow.com/questions/25896762/how-can-pattern-matching-be-done-on-text
 def str(text), do: str(&Parslet.identity/1, text)
 def match(regex_s), do: match(&Parslet.identity/1, regex_s)
 def repeat(fun, min_count), do: repeat(&Parslet.identity/1, fun, min_count)
 def str(fun, text), do: call_aux( fun,
 fn (doc, matched) -> str_aux(text, doc, matched) end )
 def match(fun, regex_s), do: call_aux( fun,
 fn (doc, matched) -> match_aux(regex_s, doc, matched) end )
 def repeat(prev, fun, min_count), do: call_aux( prev,
 fn (doc, matched) -> repeat_aux(fun, min_count, doc, matched) end )
 defp str_aux(text, doc, matched) do
 tlen = String.length(text)
 if String.starts_with?(doc, text) do
 {:ok, matched <> text, String.slice(doc, tlen..-1) }
 else
 {:error, "'#{doc}' does not match string '#{text}'"}
 end
 end
 defp match_aux(regex_s, doc, matched) do
 regex = ~r{^#{regex_s}}
 case Regex.run(regex, doc) do
 nil -> {:error, "'#{doc}' does not match regex '#{regex_s}'"}
 [match | _] -> {:ok, matched <> match, String.slice(doc, String.length(match)..-1)}
 end
 end
 defp repeat_aux(fun, 0, doc, matched) do
 case fun.(doc) do
 {:ok, match, rest} -> repeat_aux(fun, 0, rest, matched <> match)
 _ -> {:ok, matched, doc}
 end
 end
 defp repeat_aux(fun, count, doc, matched) do
 case fun.(doc) do
 {:ok, match, rest} -> repeat_aux(fun, count - 1, rest, matched <> match)
 other -> other
 end
 end
 def identity(doc) do
 {:ok, "", doc}
 end
end

Tests

defmodule ParsletTest do
 use ExUnit.Case
 doctest Parslet
 defmodule ParsletExample do
 use Parslet
 rule :test_string do
 str("test")
 end
 root :test_string
 end
 test "str matches whole string" do
 assert ParsletExample.parse("test") == {:ok, "test"}
 end
 test "str doesnt match different strings" do
 assert ParsletExample.parse("tost") == {:error, "'tost' does not match string 'test'"}
 end
 test "parse reports error if not all the input document is consumed" do
 assert ParsletExample.parse("test_the_best") ==
 {:error, "Consumed \"test\", but had the following remaining '_the_best'"}
 end
 defmodule ParsletExample2 do
 use Parslet
 rule :test_regex do
 match("123")
 end
 # calling another rule should just work. :)
 rule :document do
 test_regex()
 end
 root :document
 end
 test "[123]" do
 assert ParsletExample2.parse("123") == {:ok, "123"}
 assert ParsletExample2.parse("w123") == {:error, "'w123' does not match regex '123'"}
 assert ParsletExample2.parse("234") == {:error, "'234' does not match regex '123'"}
 assert ParsletExample2.parse("123the_rest") == {:error, "Consumed \"123\", but had the following remaining 'the_rest'"}
 end
 defmodule ParsletExample3 do
 use Parslet
 rule :a do
 repeat(str("a"), 1)
 end
 root :a
 end
 test "a+" do
 assert ParsletExample3.parse("a") == {:ok, "a"}
 assert ParsletExample3.parse("aaaaaa") == {:ok, "aaaaaa"}
 end
 defmodule ParsletExample4 do
 use Parslet
 rule :a do
 str("a") |> str("b")
 end
 root :a
 end
 test "a > b = ab" do
 assert ParsletExample4.parse("ab") == {:ok, "ab"}
 end
 defmodule ParsletExample5 do
 use Parslet
 rule :a do
 repeat(str("a") |> str("b") , 1)
 end
 root :a
 end
 test "(a > b)+" do
 assert ParsletExample5.parse("ababab") == {:ok, "ababab"}
 end
 defmodule ParsletExample6 do
 use Parslet
 rule :a do
 str("a") |> repeat(str("b"), 1)
 end
 root :a
 end
 test "a > b+" do
 assert ParsletExample6.parse("abbbbb") == {:ok, "abbbbb"}
 end
end
asked Mar 18, 2018 at 7:17
\$\endgroup\$
1

1 Answer 1

1
\$\begingroup\$

The latest version is at https://github.com/NigelThorne/ElixirParslet/blob/master/test/json_parser_test.exs

I changed the dsl to create a data structure that then gets interpreted as the parser runs. This let me decouple the DSL from the behaviour in a way that passing functions around didn't.

Here is a JSON Parser written in my language.

defmodule JSONParser do
 use Parslet
 rule :value do
 one_of ([
 string(),
 number(),
 object(),
 array(),
 boolean(),
 null(),
 ])
 end
 rule :null do
 as(:null, str("null"))
 end
 rule :boolean do
 as(:boolean, one_of ([
 str("true"),
 str("false"),
 ]))
 end
 rule :sp_ do
 repeat(match("[\s\r\n]"), 0)
 end
 rule :string do
 (str("\"")
 |> as(:string,
 repeat(
 as(:char, one_of( [
 (absent?(str("\"")) |> absent?(str("\\")) |> match(".")),
 (str("\\")
 |> as(:escaped, one_of(
 [
 match("[\"\\/bfnrt]"),
 (str("u")
 |> match("[a-fA-F0-9]")
 |> match("[a-fA-F0-9]")
 |> match("[a-fA-F0-9]")
 |> match("[a-fA-F0-9]"))
 ]))
 )
 ])),0)
 )
 |> str("\""))
 end
 rule :digit, do: match("[0-9]")
 rule :number do
 as(:number,
 as(:integer, maybe(str("-")) |>
 one_of([
 str("0"),
 (match("[1-9]") |> repeat( digit(), 0 ))
 ])) |>
 as(:decimal,
 maybe(str(".") |> repeat( digit(), 1 ))
 ) |>
 as(:exponent,
 maybe(
 one_of( [str("e"), str("E")] ) |>
 maybe( one_of( [ str("+"), str("-") ] )) |>
 repeat( digit(), 1)
 )
 )
 )
 end
 rule :key_value_pair do
 as(:pair, as(:key, string()) |> sp_() |> str(":") |> sp_() |> as(:value, value()))
 end
 rule :object do
 as(:object, str("{") |> sp_() |>
 maybe(
 key_value_pair() |> repeat( sp_() |> str(",") |> sp_() |> key_value_pair(), 0)
 ) |> sp_() |>
 str("}"))
 end
 rule :array do
 as(:array, str("[") |> sp_() |>
 maybe(
 value() |> repeat( sp_() |> str(",") |> sp_() |> value(), 0)
 ) |> sp_() |>
 str("]"))
 end
 rule :document do
 sp_() |> value |> sp_()
 end
 root :document
end
defmodule JSONTransformer do
 def transform(%{escaped: val}) do
 {result, _} = Code.eval_string("\"\\#{val}\"")
 result
 end
 def transform(%{string: val}) when is_list(val) do
 List.to_string(val)
 end
 def transform(%{string: val}), do: val
 def transform(%{char: val}), do: val
 def transform(%{array: val}), do: val
 def transform(%{null: "null"}), do: :null #replace null with :null
 def transform(%{boolean: val}), do: val == "true"
 def transform(%{number: %{integer: val, decimal: "", exponent: ""}}) do
 {intVal, ""} = Integer.parse("#{val}")
 intVal
 end
 def transform(%{number: %{integer: val, decimal: dec, exponent: ex}}) do
 {intVal, ""} = Float.parse("#{val}#{dec}#{ex}")
 intVal
 end
 def transform(%{object: pairs}) when is_list(pairs) do
 for %{pair: %{key: k, value: v}} <- pairs, into: %{}, do: {k,v}
 end
 def transform(%{object: %{pair: %{key: k, value: v}}}) do
 %{k => v}
 end
 #default to leaving it untouched
 def transform(any), do: any
end
def parseJSON(document) do
 {:ok, parsed} = JSONParser.parse(document)
 Transformer.transform_with(&JSONTransformer.transform/1, parsed)
end

So calling

parseJSON(~S({"bob":{"jane":234},"fre\r\n\t\u26C4ddy":"a"})) ==
 %{"bob" => %{"jane" => 234},"fre\r\n\t⛄ddy" => "a"}
answered May 3, 2018 at 13:04
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.