%%% -*- erlang-indent-level:4; indent-tabs-mode: nil -*- %%%------------------------------------------------------------------- %%% @copyright (C) 2018, Aeternity Anstalt %%% @doc Parser combinators for the Sophia parser. Based on %%% Koen Claessen. 2004. Parallel Parsing Processes. J. Functional %%% Programming 14, 6 (November 2004) %%% @end %%%------------------------------------------------------------------- -module(aeso_parse_lib). -vsn("7.1.1"). -export([parse/2, return/1, fail/0, fail/1, fail/2, map/2, bind/2, lazy/1, choice/1, choice/2, tok/1, layout/0, left/2, right/2, between/3, optional/1, many/1, many1/1, sep/2, sep1/2, infixl/2, infixr/2]). -export([current_file/0, set_current_file/1, current_include_type/0, set_current_include_type/1]). %% -- Types ------------------------------------------------------------------ -export_type([parser/1, parser_expr/1, pos/0, token/0, tokens/0]). -type pos() :: {string() | no_file, integer(), integer()} | {integer(), integer()}. -type token() :: {atom(), pos(), term()} | {atom(), pos()}. -type tokens() :: [token()]. -type error() :: {pos(), string() | no_error}. -define(lazy(F), {aeso_parse_lazy, F}). -define(fail(Err), {aeso_parse_fail, Err}). -define(choice(Ps), {aeso_parse_choice, Ps}). -define(bind(P, F), {aeso_parse_bind, P, F}). -define(right(P, Q), {aeso_parse_right, P, Q}). -define(left(P, Q), {aeso_parse_left, P, Q}). -define(map(F, P), {aeso_parse_map, F, P}). -define(layout, aeso_parse_layout). -define(tok(Atom), {aeso_parse_tok, Atom}). -define(return(X), {aeso_parse_return, X}). %% Type synonyms since you can't have function types as macro arguments for some reason. -type delayed(A) :: fun(() -> A). -type continuation(A, B) :: fun((A) -> parser(B)). -type function(A, B) :: fun((A) -> B). %% The representation of parsers that the user writes. These get compiled down to a lower-level %% representation before parsing (parser1/1). -opaque parser_expr(A) :: ?lazy(delayed(parser(A))) | ?fail(term()) | ?choice([parser(A)]) | ?bind(parser(B), continuation(B, A)) | ?map(function(B, A), parser(B)) | ?left(parser(A), parser(A)) | ?right(parser(A), parser(A)). %% Lists, tuples and maps of parsers are valid parsers. These are applied in left-to-right order and %% a list/tuple/map is built out of the results. For maps only the values (and not the keys) can be %% parsers. -type parser(A) :: parser_expr(A) | maybe_improper_list(parser(_), parser(_)) | tuple() %% A = tuple() | term(). %% Interpreted as a parser that returns the term without consuming input %% The low level parser representation. This is what's used when doing the %% actual parsing (see parse1/2). -type parser1(A) :: {tok_bind, #{atom() => fun((token()) -> parser1(A))}} %% ^ Consume a token and dispatch on its tag. | {fail, term()} %% ^ Fail with the given error | {return_plus, A, parser1(A)} %% ^ Choice between returning a value and continue parsing | {layout, fun((integer()) -> parser1(A)), parser1(A)}. %% ^ Parse a layout block. If a layout block can be started, it commits to the %% first argument. I.e. no backtracking to the second argument if the first %% fails. trampoline({bounce, Cont}) when is_function(Cont, 0) -> trampoline(Cont()); trampoline(Res) -> Res. -define(BOUNCE(X), {bounce, fun() -> X end}). %% Apply a parser to its continuation. This compiles a parser to its low-level representation. -spec apply_p(parser(A), fun((A) -> parser1(B))) -> parser1(B). apply_p(?lazy(F), K) -> apply_p(F(), K); apply_p(?fail(Err), _) -> {fail, Err}; apply_p(?choice([P | Ps]), K) -> lists:foldl(fun(Q, R) -> choice1(trampoline(apply_p(Q, K)), R) end, trampoline(apply_p(P, K)), Ps); apply_p(?bind(P, F), K) -> apply_p(P, fun(X) -> apply_p(F(X), K) end); apply_p(?right(P, Q), K) -> apply_p(P, fun(_) -> apply_p(Q, K) end); apply_p(?left(P, Q), K) -> apply_p(P, fun(X) -> apply_p(Q, fun(_) -> K(X) end) end); apply_p(?map(F, P), K) -> apply_p(P, fun(X) -> K(F(X)) end); apply_p(?layout, K) -> {layout, K, {fail, {expected, layout_block}}}; apply_p(?tok(Atom), K) -> {tok_bind, #{Atom => K}}; apply_p(?return(X), K) -> ?BOUNCE(K(X)); apply_p([P | Q], K) -> apply_p(P, fun(H) -> apply_p(Q, fun(T) -> K([H | T]) end) end); apply_p(T, K) when is_tuple(T) -> apply_p(tuple_to_list(T), fun(Xs) -> K(list_to_tuple(Xs)) end); apply_p(M, K) when is_map(M) -> {Keys, Ps} = lists:unzip(maps:to_list(M)), apply_p(Ps, fun(Vals) -> K(maps:from_list(lists:zip(Keys, Vals))) end); apply_p(X, K) -> ?BOUNCE(K(X)). %% -- Primitive combinators -------------------------------------------------- %% @doc Create a delayed parser. Required when building recursive parsers to avoid looping. -spec lazy(fun(() -> parser(A))) -> parser(A). lazy(Delayed) -> ?lazy(Delayed). %% @doc A parser that always fails at a known location. -spec fail(pos(), term()) -> parser(none()). fail(Pos, Err) -> ?fail({Pos, Err}). %% @doc A parser that always fails. -spec fail(term()) -> parser(none()). fail(Err) -> ?fail(Err). %% @doc Fail with no error message. -spec fail() -> parser(none()). fail() -> fail(no_error). %% @doc A choice between two parsers. Succeeds if either parser succeeds. -spec choice(parser(A), parser(A)) -> parser(A). choice(?choice(Ps), ?choice(Qs)) -> ?choice(Ps ++ Qs); choice(?choice(Ps), Q) -> ?choice([Q | Ps]); choice(P, ?choice(Qs)) -> ?choice([P | Qs]); choice(P, Q) -> ?choice([P, Q]). %% @doc A choice between a list of parsers. Applies 'choice/2' repeatedly. -spec choice([parser(A)]) -> parser(A). choice([]) -> fail(empty_choice); choice([P]) -> P; choice([P | Ps]) -> choice(P, choice(Ps)). %% @doc Parse a single token with the given tag. -spec tok(atom()) -> parser(token()). tok(Atom) -> ?tok(Atom). %% @doc Apply two parsers in sequence and return the result from the first one. -spec left(parser(A), parser(_)) -> parser(A). left(P, Q) -> ?left(P, Q). %% @doc Apply two parsers in sequence and return the result from the second one. -spec right(parser(_), parser(A)) -> parser(A). right(P, Q) -> ?right(P, Q). %% @doc A parser that always succeeds with the given value. -spec return(A) -> parser(A). return(X) -> ?return(X). %% @doc Monadic bind. Lets you inspect the result of the first parser before deciding on what to %% parse next. -spec bind(parser(A), fun((A) -> parser(B))) -> parser(B). bind(?return(X), F) -> F(X); bind(P, F) -> ?bind(P, F). %% @doc Apply a function to the result of a parser. -spec map(fun((A) -> B), parser(A)) -> parser(B). map(Fun, P) -> ?map(Fun, P). %% @doc Parse the start of a layout block. A layout block can start if the next token is not on the %% same line as the previous token and it is indented further than the current layout block (if %% any). The result is the column of the new layout block (i.e. the column of the next token). -spec layout() -> parser(integer()). layout() -> ?layout. %% @doc Parse a sequence of tokens using a parser. Fails if the parse is ambiguous. -spec parse(parser(A), tokens()) -> {ok, A} | {error, term()}. parse(P, S) -> case parse1(trampoline(apply_p(P, fun(X) -> {return_plus, X, {fail, no_error}} end)), S) of {[], {Pos, Err}} -> {error, {add_current_file(Pos), parse_error, flatten_error(Err)}}; {[A], _} -> {ok, A}; {As, _} -> {error, {{1, 1}, ambiguous_parse, As}} end. -spec flatten_error(iolist() | no_error) -> string(). flatten_error(no_error) -> "Unspecified error"; flatten_error(Err) -> lists:flatten(Err). %% -- Derived combinators ---------------------------------------------------- %% @doc Parse zero or more A's. -spec many(parser(A)) -> parser([A]). many(P) -> choice([], many1(P)). -dialyzer({nowarn_function, many1/1}). %% Silence improper_list warning. %% @doc Parse one or more A's. -spec many1(parser(A)) -> parser([A]). many1(P) -> [P | lazy(fun() -> many(P) end)]. %% @doc Parse zero or more A's, separated by Sep. -spec sep(parser(A), parser(_)) -> parser([A]). sep(P, Sep) -> choice([], sep1(P, Sep)). -dialyzer({nowarn_function, sep1/2}). %% Silence improper_list warning. %% @doc Parse one or more A's, separated by Sep. -spec sep1(parser(A), parser(_)) -> parser([A]). sep1(P, Sep) -> [P | many(right(Sep, P))]. %% @doc Parse a left-associative operator.

%% infixl(Elem, Op) ::= Elem | infixl(Elem, Op) Op Elem %%

-spec infixl(parser(A), parser(fun((A, A) -> A))) -> parser(A). infixl(Elem, Op) -> bind(Elem, fun(A) -> bind(many({Op, Elem}), fun(Ops) -> return(build_infixl(A, Ops)) end) end). %% @doc Parse a right-associative operator.

%% infixr(Elem, Op) ::= Elem | Elem Op infixl(Elem, Op) %%

-spec infixr(parser(A), parser(fun((A, A) -> A))) -> parser(A). infixr(Elem, Op) -> bind(Elem, fun(A) -> bind(many({Op, Elem}), fun(Ops) -> return(build_infixr(A, Ops)) end) end). build_infixl(A, []) -> A; build_infixl(A, [{Op, B} | Ops]) -> build_infixl(Op(A, B), Ops). build_infixr(A, []) -> A; build_infixr(A, [{Op, B} | Ops]) -> Op(A, build_infixr(B, Ops)). %% @doc Parse an A between two other things (typically brackets of some kind). -spec between(parser(_), parser(A), parser(_)) -> parser(A). between(L, P, R) -> right(L, left(P, R)). -spec optional(parser(A)) -> parser(none | {ok, A}). optional(P) -> choice(none, {ok, P}). %% -- Internal functions ----------------------------------------------------- -spec tag(token()) -> atom(). tag(T) when is_tuple(T) -> element(1, T). -spec pos(token()) -> pos(). pos(T) when is_tuple(T) -> element(2, T). -spec line(token()) -> integer(). line(T) when is_tuple(T) -> element(1, pos(T)). -spec col(token()) -> integer(). col(T) when is_tuple(T) -> element(2, pos(T)). %% Choice on low-level parsers. -spec choice1(parser1(A), parser1(A)) -> parser1(A). %% If both parsers want the next token we grab it and merge the continuations. choice1({tok_bind, Map1}, {tok_bind, Map2}) -> {tok_bind, merge_with(fun(F, G) -> fun(T) -> choice1(trampoline(F(T)), trampoline(G(T))) end end, Map1, Map2)}; %% If both parsers fail we combine the error messages. If only one fails we discard it. choice1({fail, E1}, {fail, E2}) -> {fail, add_error(E1, E2)}; choice1({fail, _}, Q) -> Q; choice1(P, {fail, _}) -> P; %% If either side can deliver a value, then so can the choice. choice1({return_plus, X, P}, Q) -> {return_plus, X, choice1(P, Q)}; choice1(P, {return_plus, X, Q}) -> {return_plus, X, choice1(P, Q)}; %% If both sides want a layout block we combine them. If only one side wants a layout block we %% will commit to a layout block is there is one. choice1({layout, F, P}, {layout, G, Q}) -> {layout, fun(N) -> choice1(trampoline(F(N)), trampoline(G(N))) end, choice1(P, Q)}; choice1({layout, F, P}, Q) -> {layout, F, choice1(P, Q)}; choice1(P, {layout, G, Q}) -> {layout, G, choice1(P, Q)}. %% Token stream representation. This is the state of the parse function. -record(ts, {layout :: [integer()], %% Column numbers of the current layout blocks. last :: token(), %% The previously consumed token. inserted :: tokens(), %% Inserted layout tokens, consumed before 'tokens'. tokens :: tokens()}). %% The remaining tokens to be parsed. %% The initial token stream. ts(S) -> #ts{ layout = [], last = {bof, {0, 0}}, inserted = [], tokens = S }. %% The parse function. Parses a token stream returning a list of results and an error message in %% case of failure. -spec parse1(parser1(A), tokens()) -> {[A], term()}. parse1(P, S) -> parse1(P, ts(S), [], no_error). %% The main work horse. Returns a list of possible parses and an error message in case parsing %% fails. -spec parse1(parser1(A), #ts{}, [A], term()) -> {[A], error()}. parse1({bounce, F}, Ts, Acc, Err) -> parse1(F(), Ts, Acc, Err); parse1({tok_bind, Map}, Ts, Acc, Err) -> case next_token(Ts) of {T, Ts1} -> case maps:get(tag(T), Map, '$not_found') of '$not_found' -> %% Insert a vclose (if required) on unexpected tokens. This lets you have layout %% blocks inside parens without having to put the closing paren on a separate %% line. Example: %% ((x) => %% let y = x + 1 %% y + y)(4) case maps:get(vclose, Map, '$not_found') of '$not_found' -> {Acc, unexpected_token_error(Ts, maps:keys(Map), T)}; F -> VClose = {vclose, pos(T)}, Ts2 = pop_layout(VClose, Ts#ts{ last = VClose }), parse1(F(VClose), Ts2, Acc, Err) end; F -> parse1(F(T), Ts1, Acc, Err) end; false -> {Acc, mk_error(Ts, io_lib:format("Unexpected end of file. Expected one of ~p.", [maps:keys(Map)]))} end; parse1({layout, F, P}, Ts, Acc, Err) -> case start_layout(Ts) of {Col, Ts1} -> parse1(F(Col), Ts1, Acc, Err); false -> parse1(P, Ts, Acc, mk_error(Ts, "Expected layout block.")) end; parse1({return_plus, X, P}, Ts, Acc, Err) -> case next_token(Ts) of false -> parse1(P, Ts, [X | Acc], Err); {T, _} -> parse1(P, Ts, Acc, unexpected_token_error(Ts, T)) end; parse1({fail, Err}, Ts, Acc, Err1) -> Err2 = case next_token(Ts) of {T, _} -> unexpected_token_error(Ts, T); _ -> no_error end, {Acc, add_error(add_error(mk_error(Ts, Err), Err2), Err1)}. %% Get the current position of the token stream. This is the position of the next token if any, and %% the line after the last token if at the end of the stream. -spec current_pos(#ts{}) -> pos(). current_pos(#ts{ inserted = [T | _] }) -> pos(T); current_pos(#ts{ tokens = [T | _] }) -> pos(T); current_pos(#ts{ last = T }) -> end_pos(pos(T)). -spec mk_error(#ts{}, term()) -> error(). mk_error(_Ts, {Pos, Err}) -> {Pos, Err}; mk_error(Ts, Err) -> {current_pos(Ts), Err}. -spec unexpected_token_error(#ts{}, token()) -> error(). unexpected_token_error(Ts, T) -> unexpected_token_error(Ts, [], T). unexpected_token_error(Ts, Expect, {Tag, _}) when Tag == vclose; Tag == vsemi -> Braces = [')', ']', '}'], Fix = case lists:filter(fun(T) -> lists:member(T, Braces) end, Expect) of [] -> " Probable causes:\n" " - something is missing in the previous statement, or\n" " - this line should be indented more."; [T | _] -> io_lib:format(" Did you forget a ~p?", [T]) end, Msg = io_lib:format("Unexpected indentation.~s", [Fix]), mk_error(Ts, Msg); unexpected_token_error(Ts, Expect, T) -> ExpectCon = lists:member(con, Expect), ExpectId = lists:member(id, Expect), Fix = case T of {id, _, X} when ExpectCon, hd(X) /= $_ -> io_lib:format(" Did you mean ~s?", [mk_upper(X)]); {con, _, X} when ExpectId -> io_lib:format(" Did you mean ~s?", [mk_lower(X)]); {qcon, _, Xs} when ExpectCon -> io_lib:format(" Did you mean ~s?", [lists:last(Xs)]); {qid, _, Xs} when ExpectId -> io_lib:format(" Did you mean ~s?", [lists:last(Xs)]); _ -> "" end, mk_error(Ts, io_lib:format("Unexpected ~s.~s", [describe(T), Fix])). mk_upper([C | Rest]) -> string:to_upper([C]) ++ Rest. mk_lower([C | Rest]) -> string:to_lower([C]) ++ Rest. describe({id, _, X}) -> io_lib:format("identifier ~s", [X]); describe({con, _, X}) -> io_lib:format("identifier ~s", [X]); describe({qid, _, Xs}) -> io_lib:format("qualified identifier ~s", [string:join(Xs, ".")]); describe({qcon, _, Xs}) -> io_lib:format("qualified identifier ~s", [string:join(Xs, ".")]); describe({tvar, _, X}) -> io_lib:format("type variable ~s", [X]); describe({char, _, _}) -> "character literal"; describe({string, _, _}) -> "string literal"; describe({hex, _, _}) -> "integer literal"; describe({int, _, _}) -> "integer literal"; describe({bytes, _, _}) -> "bytes literal"; describe(T) -> io_lib:format("token '~s'", [tag(T)]). %% Get the next token from a token stream. Inserts layout tokens if necessary. -spec next_token(#ts{}) -> false | {token(), #ts{}}. next_token(Ts) -> case insert_layout_tokens(Ts) of Ts1 = #ts{ inserted = [L | Ls] } -> {L, pop_layout(L, Ts1#ts{ last = L, inserted = Ls })}; Ts1 = #ts{ tokens = [T | S] } -> {T, Ts1#ts{ last = T, tokens = S }}; #ts{ inserted = [], tokens = [] } -> false end. %% Pop a layout block on an inserted 'vclose' token. -spec pop_layout(token(), #ts{}) -> #ts{}. pop_layout({vclose, _}, Ts = #ts{ layout = [_ | Layout] }) -> Ts#ts{ layout = Layout }; pop_layout(_, Ts) -> Ts. %% Attempt to start a new layout block. Requires the next token to be on a new line and indented %% more than any existing layout block. Sets the previous token to 'vopen'. -spec start_layout(#ts{}) -> false | {integer(), #ts{}}. start_layout(#ts{ inserted = [_ | _] }) -> false; %% Can't start a layout block before consuming all layout tokens start_layout(#ts{ tokens = [] }) -> false; %% No more tokens start_layout(Ts = #ts{ layout = Layout, last = Last, tokens = [T | _] }) -> Col = col(T), Valid = case Layout of [] -> line(Last) < line(T); [C1 | _] -> line(Last) < line(T) andalso C1 < Col end, Valid andalso {Col, Ts#ts{ layout = [Col | Layout], last = {vopen, pos(T)} }}. %% Insert layout tokens. If the next token is on the same line as the current layout block we insert %% a 'vsemi' token. If the next token is indented less, we insert a 'vclose' token. -spec insert_layout_tokens(#ts{}) -> #ts{}. insert_layout_tokens(Ts = #ts{ inserted = [_ | _] }) -> Ts; %% already inserted layout tokens insert_layout_tokens(Ts = #ts{ layout = Layout, last = Last, tokens = S }) -> ToInsert = insert_layout_tokens(Layout, Last, S, []), Ts#ts{ inserted = ToInsert }. %% Compute the layout tokens to be inserted. -spec insert_layout_tokens([integer()], token(), tokens(), tokens()) -> tokens(). insert_layout_tokens([_ | Layout], Last, [], Acc) -> %% End of the file. Insert vclose tokens for all layout blocks. Vclose = {vclose, end_pos(pos(Last))}, insert_layout_tokens(Layout, Last, [], [Vclose | Acc]); insert_layout_tokens([N | Layout1], Last, S = [T | _], Acc) -> Col = col(T), %% Don't insert a vsemi if the previous token was a vopen or a vsemi. The former to avoid a %% vsemi for the first token of the block and the latter to avoid inserting infinite vsemis. AlreadySemi = lists:member(tag(Last), [vsemi, vopen]) andalso col(Last) == N, if Col == N, not AlreadySemi -> lists:reverse([{vsemi, pos(T)} | Acc]); Col < N -> Vclose = {vclose, pos(T)}, insert_layout_tokens(Layout1, Vclose, S, [Vclose | Acc]); true -> lists:reverse(Acc) end; insert_layout_tokens([], _Last, _S, Acc) -> lists:reverse(Acc). %% The end-of-file position. Beginning of the line after the last token. end_pos({L, _}) -> {L + 1, 1}. %% Combine two error messages. Discard no_error's otherwise pick the first error. add_error(no_error, Err) -> Err; add_error({_, no_error}, Err) -> Err; add_error(Err, no_error) -> Err; add_error(Err, {_, no_error}) -> Err; add_error(Err, _Err1) -> Err. %% For some unfathomable reason the maps module does not have a merge_with function. -spec merge_with(fun((term(), term()) -> term()), map(), map()) -> map(). merge_with(Fun, Map1, Map2) -> case maps:size(Map1) > maps:size(Map2) of true -> lists:foldl(fun({K, R}, M) -> maps:update_with(K, fun(L) -> Fun(L, R) end, R, M) end, Map1, maps:to_list(Map2)); false -> lists:foldl(fun({K, L}, M) -> maps:update_with(K, fun(R) -> Fun(L, R) end, L, M) end, Map2, maps:to_list(Map1)) end. %% Current include type current_include_type() -> get('$current_include_type'). set_current_include_type(IncludeType) -> put('$current_include_type', IncludeType). %% Current source file current_file() -> get('$current_file'). set_current_file(File) -> put('$current_file', File). add_current_file({L, C}) -> {current_file(), L, C}; add_current_file(Pos) -> Pos.