Compare commits
3 Commits
56e63051bc
...
4f2a3c6c6f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4f2a3c6c6f | ||
|
|
7df04a81be | ||
|
|
6f02d4c4e6 |
@ -100,8 +100,12 @@ parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) ->
|
||||
end;
|
||||
parse_expression2(Type, Tk, String, {character, "[", Row, Start, _}) ->
|
||||
parse_list(Type, Tk, String, Row, Start);
|
||||
parse_expression2(Type, Tk, String, {character, "(", Row, Start, _}) ->
|
||||
parse_tuple(Type, Tk, String, Row, Start);
|
||||
parse_expression2(Type, Tk, String, {character, "{", Row, Start, _}) ->
|
||||
parse_record_or_map(Type, Tk, String, Row, Start);
|
||||
parse_expression2(Type, Tk, String, {alphanum, Ident, Row, Start, End}) ->
|
||||
parse_variant(Type, Tk, String, Ident, Row, Start, End);
|
||||
parse_expression2(_, _, _, {_, S, Row, Start, End}) ->
|
||||
{error, {unexpected_token, S, Row, Start, End}}.
|
||||
|
||||
@ -121,45 +125,166 @@ expect_tokens([Str | Rest], Tk, String) ->
|
||||
%%% List Parsing
|
||||
|
||||
parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) ->
|
||||
parse_list_loop(Inner, Tk, String, Row, Start, []);
|
||||
parse_list_loop(Inner, Tk, String, "]", Row, Start, []);
|
||||
parse_list({_, _, unknown_type}, Tk, String, Row, Start) ->
|
||||
parse_list_loop(unknown_type(), Tk, String, Row, Start, []);
|
||||
parse_list_loop(unknown_type(), Tk, String, "]", Row, Start, []);
|
||||
parse_list({O, N, _}, _, _, Row, Start) ->
|
||||
{error, {wrong_type, O, N, list, Row, Start, Start}}.
|
||||
|
||||
parse_list_loop(Inner, Tk, String, Row, Start, Acc) ->
|
||||
parse_list_loop(Inner, Tk, String, CloseChar, Row, Start, Acc) ->
|
||||
case next_token(Tk, String) of
|
||||
{ok, {{character, "]", _, _, _}, NewTk, NewString}} ->
|
||||
{ok, {{character, CloseChar, _, _, _}, NewTk, NewString}} ->
|
||||
{ok, {lists:reverse(Acc), NewTk, NewString}};
|
||||
{ok, {Token, NewTk, NewString}} ->
|
||||
parse_list_loop2(Inner, NewTk, NewString, Row, Start, Acc, Token)
|
||||
parse_list_loop2(Inner, NewTk, NewString, CloseChar, Row, Start, Acc, Token)
|
||||
end.
|
||||
|
||||
parse_list_loop2(Inner, Tk, String, Row, Start, Acc, Token) ->
|
||||
parse_list_loop2(Inner, Tk, String, CloseChar, Row, Start, Acc, Token) ->
|
||||
case parse_expression2(Inner, Tk, String, Token) of
|
||||
{ok, {Value, NewTk, NewString}} ->
|
||||
parse_list_loop3(Inner, NewTk, NewString, Row, Start, [Value | Acc]);
|
||||
parse_list_loop3(Inner, NewTk, NewString, CloseChar, Row, Start, [Value | Acc]);
|
||||
{error, Reason} ->
|
||||
Wrapped = wrap_error(Reason, {list_element, length(Acc)}),
|
||||
Wrapper = choose_list_error_wrapper(CloseChar),
|
||||
% TODO: Are tuple indices off by one from list indices?
|
||||
Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
|
||||
{error, Wrapped}
|
||||
end.
|
||||
|
||||
parse_list_loop3(Inner, Tk, String, Row, Start, Acc) ->
|
||||
parse_list_loop3(Inner, Tk, String, CloseChar, Row, Start, Acc) ->
|
||||
case next_token(Tk, String) of
|
||||
{ok, {{character, "]", _, _, _}, NewTk, NewString}} ->
|
||||
{ok, {{character, CloseChar, _, _, _}, NewTk, NewString}} ->
|
||||
{ok, {lists:reverse(Acc), NewTk, NewString}};
|
||||
{ok, {{character, ",", _, _, _}, NewTk, NewString}} ->
|
||||
parse_list_loop(Inner, NewTk, NewString, Row, Start, Acc);
|
||||
parse_list_loop(Inner, NewTk, NewString, CloseChar, Row, Start, Acc);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
choose_list_error_wrapper("]") -> list_element;
|
||||
choose_list_error_wrapper(")") -> tuple_element.
|
||||
|
||||
%%% Tuple Parsing
|
||||
|
||||
parse_tuple({_, _, {tuple, Types}}, Tk, String, Row, Start) ->
|
||||
case parse_multivalue(Types, Tk, String, Row, Start, []) of
|
||||
{ok, {TermList, NewTk, NewString}} ->
|
||||
Result = {tuple, list_to_tuple(TermList)},
|
||||
{ok, {Result, NewTk, NewString}};
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end;
|
||||
parse_tuple({_, _, unknown_type}, Tk, String, Row, Start) ->
|
||||
% An untyped tuple is a list of untyped terms, and weirdly our list parser
|
||||
% works perfectly for that, as long as we change the closing character to
|
||||
% be ")" instead of "]".
|
||||
case parse_list_loop(unknown_type(), Tk, String, ")", Row, Start, []) of
|
||||
{ok, {TermList, NewTk, NewString}} ->
|
||||
Result = {tuple, list_to_tuple(TermList)},
|
||||
{ok, {Result, NewTk, NewString}};
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end;
|
||||
parse_tuple({O, N, _}, _, _, Row, Start) ->
|
||||
{error, {wrong_type, O, N, tuple, Row, Start, Start}}.
|
||||
|
||||
parse_multivalue(ElemTypes, Tk, String, Row, Start, Acc) ->
|
||||
case next_token(Tk, String) of
|
||||
{ok, {{character, ")", Row2, Start2, _}, NewTk, NewString}} ->
|
||||
check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc);
|
||||
{ok, {Token, NewTk, NewString}} ->
|
||||
parse_multivalue2(ElemTypes, NewTk, NewString, Row, Start, Acc, Token)
|
||||
end.
|
||||
|
||||
parse_multivalue2([Next | Rest], Tk, String, Row, Start, Acc, Token) ->
|
||||
case parse_expression2(Next, Tk, String, Token) of
|
||||
{ok, {Value, NewTk, NewString}} ->
|
||||
parse_multivalue3(Rest, NewTk, NewString, Row, Start, [Value | Acc]);
|
||||
{error, Reason} ->
|
||||
Wrapper = choose_list_error_wrapper(")"),
|
||||
% TODO: Are tuple indices off by one from list indices?
|
||||
Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
|
||||
{error, Wrapped}
|
||||
end;
|
||||
parse_multivalue2([], Tk, String, _, _, Acc, {character, ")", _, _, _}) ->
|
||||
{ok, {lists:reverse(Acc), Tk, String}};
|
||||
parse_multivalue2([], _, _, _, _, _, {_, S, Row, Start, End}) ->
|
||||
{error, {unexpected_token, S, Row, Start, End}}.
|
||||
|
||||
parse_multivalue3(ElemTypes, Tk, String, Row, Start, Acc) ->
|
||||
case next_token(Tk, String) of
|
||||
{ok, {{character, ")", Row2, Start2, _}, NewTk, NewString}} ->
|
||||
check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc);
|
||||
{ok, {{character, ",", _, _, _}, NewTk, NewString}} ->
|
||||
parse_multivalue(ElemTypes, NewTk, NewString, Row, Start, Acc);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
check_multivalue_long_enough([], Tk, String, _, _, Acc) ->
|
||||
{ok, {lists:reverse(Acc), Tk, String}};
|
||||
check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) ->
|
||||
GotCount = length(Got),
|
||||
ExpectCount = length(Remaining) + GotCount,
|
||||
{error, {not_enough_elements, ExpectCount, GotCount, Row, Col}}.
|
||||
|
||||
%%% Variant parsing
|
||||
|
||||
parse_variant({_, _, {variant, Variants}}, Tk, String, Ident, Row, Start, End) ->
|
||||
parse_variant2(Variants, Tk, String, Ident, Row, Start, End);
|
||||
parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) ->
|
||||
{error, {unresolved_variant, Row, Start, End}};
|
||||
parse_variant({O, N, _}, _, _, _, Row, Start, End) ->
|
||||
% In normal code, identifiers can have many meanings, which can result in
|
||||
% lots of different errors. In this Sophia 'object notation', identifiers
|
||||
% can only ever be variant constructors, (sort of like the Sophia version
|
||||
% of atoms,) and so immediately lead to a type error if we aren't expecting
|
||||
% a variant.
|
||||
{error, {wrong_type, O, N, variant, Row, Start, End}}.
|
||||
|
||||
parse_variant2(Variants, Tk, String, Ident, Row, Start, End) ->
|
||||
case lookup_variant(Ident, Variants, 0) of
|
||||
{ok, {Tag, ElemTypes}} ->
|
||||
GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end,
|
||||
Arities = lists:map(GetArity, Variants),
|
||||
parse_variant3(Arities, Tag, ElemTypes, Tk, String);
|
||||
error ->
|
||||
{error, {invalid_constructor, Ident, Row, Start, End}}
|
||||
end.
|
||||
|
||||
parse_variant3(Arities, Tag, [], Tk, String) ->
|
||||
% Parsing of 0-arity variants is different.
|
||||
Result = {variant, Arities, Tag, {}},
|
||||
{ok, {Result, Tk, String}};
|
||||
parse_variant3(Arities, Tag, ElemTypes, Tk, String) ->
|
||||
case next_token(Tk, String) of
|
||||
{ok, {{character, "(", Row, Start, _}, NewTk, NewString}} ->
|
||||
parse_variant4(Arities, Tag, ElemTypes, NewTk, NewString, Row, Start);
|
||||
{ok, {{_, Actual, Row, Start, End}}} ->
|
||||
{error, {unexpected_token, Actual, Row, Start, End}}
|
||||
end.
|
||||
|
||||
parse_variant4(Arities, Tag, ElemTypes, Tk, String, Row, Start) ->
|
||||
case parse_multivalue(ElemTypes, Tk, String, Row, Start, []) of
|
||||
{ok, {Terms, NewTk, NewString}} ->
|
||||
Result = {variant, Arities, Tag, list_to_tuple(Terms)},
|
||||
{ok, {Result, NewTk, NewString}};
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
lookup_variant(_, [], _) ->
|
||||
error;
|
||||
lookup_variant(Ident, [{Ident, ElemTypes} | _], Tag) ->
|
||||
{ok, {Tag, ElemTypes}};
|
||||
lookup_variant(Ident, [_ | Rest], Tag) ->
|
||||
lookup_variant(Ident, Rest, Tag + 1).
|
||||
|
||||
%%% Record parsing
|
||||
|
||||
parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Tk, String, _, _) ->
|
||||
parse_map(KeyType, ValueType, Tk, String, #{});
|
||||
parse_record_or_map({_, _, {record, Fields}}, Tk, String, _, _) ->
|
||||
parse_record(Fields, Tk, String);
|
||||
parse_record(Fields, Tk, String, #{});
|
||||
parse_record_or_map({_, _, unknown_type}, Tk, String, _, _) ->
|
||||
case next_token(Tk, String) of
|
||||
{ok, {{character, "}", _, _, _}, NewTk, NewString}} ->
|
||||
@ -174,8 +299,83 @@ parse_record_or_map({_, _, unknown_type}, Tk, String, _, _) ->
|
||||
parse_record_or_map({O, N, _}, _, _, Row, Start) ->
|
||||
{error, {wrong_type, O, N, map, Row, Start, Start}}.
|
||||
|
||||
parse_record(Fields, Tk, String) ->
|
||||
{error, not_yet_implemented}.
|
||||
parse_record(Fields, Tk, String, Acc) ->
|
||||
case next_token(Tk, String) of
|
||||
{ok, {{alphanum, Ident, Row, Start, End}, NewTk, NewString}} ->
|
||||
parse_record2(Fields, NewTk, NewString, Acc, Ident, Row, Start, End);
|
||||
{ok, {{character, "}", Row, Start, End}, NewTk, NewString}} ->
|
||||
parse_record_end(Fields, NewTk, NewString, Acc, Row, Start, End);
|
||||
{ok, {{_, S, Row, Start, End}, _, _}} ->
|
||||
{error, {unexpected_token, S, Row, Start, End}};
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_record2(Fields, Tk, String, Acc, Ident, Row, Start, End) ->
|
||||
case lists:keyfind(Ident, 1, Fields) of
|
||||
{_, Type} ->
|
||||
parse_record3(Fields, Tk, String, Acc, Ident, Row, Start, End, Type);
|
||||
false ->
|
||||
{error, {invalid_field, Ident, Row, Start, End}}
|
||||
end.
|
||||
|
||||
parse_record3(Fields, Tk, String, Acc, Ident, Row, Start, End, Type) ->
|
||||
case maps:is_key(Ident, Acc) of
|
||||
false ->
|
||||
parse_record4(Fields, Tk, String, Acc, Ident, Type);
|
||||
true ->
|
||||
{error, {field_already_present, Ident, Row, Start, End}}
|
||||
end.
|
||||
|
||||
parse_record4(Fields, Tk, String, Acc, Ident, Type) ->
|
||||
case expect_tokens(["="], Tk, String) of
|
||||
{ok, {NewTk, NewString}} ->
|
||||
parse_record5(Fields, NewTk, NewString, Acc, Ident, Type);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_record5(Fields, Tk, String, Acc, Ident, Type) ->
|
||||
case parse_expression(Type, Tk, String) of
|
||||
{ok, {Result, NewTk, NewString}} ->
|
||||
NewAcc = maps:put(Ident, Result, Acc),
|
||||
parse_record6(Fields, NewTk, NewString, NewAcc);
|
||||
{error, Reason} ->
|
||||
wrap_error(Reason, {record_field, Ident})
|
||||
end.
|
||||
|
||||
parse_record6(Fields, Tk, String, Acc) ->
|
||||
case next_token(Tk, String) of
|
||||
{ok, {{character, ",", _, _, _}, NewTk, NewString}} ->
|
||||
parse_record(Fields, NewTk, NewString, Acc);
|
||||
{ok, {{character, "}", Row, Start, End}, NewTk, NewString}} ->
|
||||
parse_record_end(Fields, NewTk, NewString, Acc, Row, Start, End);
|
||||
{ok, {{_, S, Row, Start, End}, _, _}} ->
|
||||
{error, {unexpected_token, S, Row, Start, End}};
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_record_end(Fields, Tk, String, FieldValues, Row, Start, End) ->
|
||||
case parse_record_final_loop(Fields, FieldValues, []) of
|
||||
{ok, Result} ->
|
||||
{ok, {Result, Tk, String}};
|
||||
{error, {missing_field, Name}} ->
|
||||
{error, {missing_field, Name, Row, Start, End}}
|
||||
end.
|
||||
|
||||
parse_record_final_loop([{Name, _} | Rest], FieldValues, Acc) ->
|
||||
case maps:find(Name, FieldValues) of
|
||||
{ok, Value} ->
|
||||
parse_record_final_loop(Rest, FieldValues, [Value | Acc]);
|
||||
error ->
|
||||
{error, {missing_field, Name}}
|
||||
end;
|
||||
parse_record_final_loop([], _, FieldsReverse) ->
|
||||
Fields = lists:reverse(FieldsReverse),
|
||||
Tuple = list_to_tuple(Fields),
|
||||
{ok, {tuple, Tuple}}.
|
||||
|
||||
|
||||
%%% Map Parsing
|
||||
|
||||
@ -238,20 +438,33 @@ check_sophia_to_fate(Type, Sophia, Fate) ->
|
||||
erlang:error({to_fate_failed, Fate, FateActual})
|
||||
end.
|
||||
|
||||
check_parser(Type, Sophia, Fate) ->
|
||||
check_sophia_to_fate(Type, Sophia, Fate),
|
||||
check_sophia_to_fate(unknown_type(), Sophia, Fate),
|
||||
|
||||
% Finally, check that the FATE result is something that gmb understands.
|
||||
gmb_fate_encoding:serialize(Fate),
|
||||
|
||||
ok.
|
||||
|
||||
check_parser(Sophia, Fate) ->
|
||||
% Compile the literal using the compiler, to check that it is valid Sophia
|
||||
% syntax, and to get an AACI object to pass to the parser.
|
||||
Source = "contract C = entrypoint f() = " ++ Sophia,
|
||||
{ok, AACI} = hz_aaci:aaci_from_string(Source),
|
||||
{ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"),
|
||||
check_parser(Type, Sophia, Fate).
|
||||
|
||||
% Also check that the FATE term is valid, by running it through gmb.
|
||||
gmb_fate_encoding:serialize(Fate),
|
||||
|
||||
% Now check that our parser produces that output.
|
||||
check_sophia_to_fate(Type, Sophia, Fate),
|
||||
% Also check that it can be parsed without type information.
|
||||
check_sophia_to_fate(unknown_type(), Sophia, Fate).
|
||||
|
||||
check_parser_with_typedef(Typedef, Sophia, Fate) ->
|
||||
% Compile the type definitions alongside the usual literal expression.
|
||||
Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia,
|
||||
{ok, AACI} = hz_aaci:aaci_from_string(Source),
|
||||
{ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"),
|
||||
|
||||
% Check the FATE term as usual.
|
||||
gmb_fate_encoding:serialize(Fate),
|
||||
|
||||
% Do a typed parse, as usual, but there are probably record/variant
|
||||
% definitions in the AACI, so untyped parses probably don't work.
|
||||
check_sophia_to_fate(Type, Sophia, Fate).
|
||||
|
||||
int_test() ->
|
||||
check_parser("123", 123).
|
||||
@ -262,6 +475,32 @@ list_test() ->
|
||||
list_of_lists_test() ->
|
||||
check_parser("[[], [1], [2, 3]]", [[], [1], [2, 3]]).
|
||||
|
||||
tuple_test() ->
|
||||
check_parser("(1, [2, 3], (4, 5))", {tuple, {1, [2, 3], {tuple, {4, 5}}}}).
|
||||
|
||||
maps_test() ->
|
||||
check_parser("{[1] = 2, [3] = 4}", #{1 => 2, 3 => 4}).
|
||||
|
||||
records_test() ->
|
||||
TypeDef = "record pair = {x: int, y: int}",
|
||||
Sophia = "{x = 1, y = 2}",
|
||||
Fate = {tuple, {1, 2}},
|
||||
check_parser_with_typedef(TypeDef, Sophia, Fate),
|
||||
% The above won't run an untyped parse on the expression, but we can. It
|
||||
% will error, though.
|
||||
{error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia).
|
||||
|
||||
variant_test() ->
|
||||
TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)",
|
||||
TestFn = fun(Sophia, Fate) ->
|
||||
check_parser_with_typedef(TypeDef, Sophia, Fate),
|
||||
{error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), Sophia)
|
||||
end,
|
||||
|
||||
TestFn("Zero", {variant, [0, 1, 2], 0, {}}),
|
||||
TestFn("One(0)", {variant, [0, 1, 2], 1, {0}}),
|
||||
TestFn("Two(0, 1)", {variant, [0, 1, 2], 2, {0, 1}}),
|
||||
TestFn("Two([], [1, 2, 3])", {variant, [0, 1, 2], 2, {[], [1, 2, 3]}}),
|
||||
ok.
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user