Compare commits

..

No commits in common. "4f2a3c6c6f85422e03c3c0737b2d0a46afe55945" and "56e63051bc8cf5c90f101d53241da6b5a6df97b6" have entirely different histories.

View File

@ -100,12 +100,8 @@ parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) ->
end;
parse_expression2(Type, Tk, String, {character, "[", Row, Start, _}) ->
parse_list(Type, Tk, String, Row, Start);
parse_expression2(Type, Tk, String, {character, "(", Row, Start, _}) ->
parse_tuple(Type, Tk, String, Row, Start);
parse_expression2(Type, Tk, String, {character, "{", Row, Start, _}) ->
parse_record_or_map(Type, Tk, String, Row, Start);
parse_expression2(Type, Tk, String, {alphanum, Ident, Row, Start, End}) ->
parse_variant(Type, Tk, String, Ident, Row, Start, End);
parse_expression2(_, _, _, {_, S, Row, Start, End}) ->
{error, {unexpected_token, S, Row, Start, End}}.
@ -125,166 +121,45 @@ expect_tokens([Str | Rest], Tk, String) ->
%%% List Parsing
parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) ->
parse_list_loop(Inner, Tk, String, "]", Row, Start, []);
parse_list_loop(Inner, Tk, String, Row, Start, []);
parse_list({_, _, unknown_type}, Tk, String, Row, Start) ->
parse_list_loop(unknown_type(), Tk, String, "]", Row, Start, []);
parse_list_loop(unknown_type(), Tk, String, Row, Start, []);
parse_list({O, N, _}, _, _, Row, Start) ->
{error, {wrong_type, O, N, list, Row, Start, Start}}.
parse_list_loop(Inner, Tk, String, CloseChar, Row, Start, Acc) ->
parse_list_loop(Inner, Tk, String, Row, Start, Acc) ->
case next_token(Tk, String) of
{ok, {{character, CloseChar, _, _, _}, NewTk, NewString}} ->
{ok, {{character, "]", _, _, _}, NewTk, NewString}} ->
{ok, {lists:reverse(Acc), NewTk, NewString}};
{ok, {Token, NewTk, NewString}} ->
parse_list_loop2(Inner, NewTk, NewString, CloseChar, Row, Start, Acc, Token)
parse_list_loop2(Inner, NewTk, NewString, Row, Start, Acc, Token)
end.
parse_list_loop2(Inner, Tk, String, CloseChar, Row, Start, Acc, Token) ->
parse_list_loop2(Inner, Tk, String, Row, Start, Acc, Token) ->
case parse_expression2(Inner, Tk, String, Token) of
{ok, {Value, NewTk, NewString}} ->
parse_list_loop3(Inner, NewTk, NewString, CloseChar, Row, Start, [Value | Acc]);
parse_list_loop3(Inner, NewTk, NewString, Row, Start, [Value | Acc]);
{error, Reason} ->
Wrapper = choose_list_error_wrapper(CloseChar),
% TODO: Are tuple indices off by one from list indices?
Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
Wrapped = wrap_error(Reason, {list_element, length(Acc)}),
{error, Wrapped}
end.
parse_list_loop3(Inner, Tk, String, CloseChar, Row, Start, Acc) ->
parse_list_loop3(Inner, Tk, String, Row, Start, Acc) ->
case next_token(Tk, String) of
{ok, {{character, CloseChar, _, _, _}, NewTk, NewString}} ->
{ok, {{character, "]", _, _, _}, NewTk, NewString}} ->
{ok, {lists:reverse(Acc), NewTk, NewString}};
{ok, {{character, ",", _, _, _}, NewTk, NewString}} ->
parse_list_loop(Inner, NewTk, NewString, CloseChar, Row, Start, Acc);
parse_list_loop(Inner, NewTk, NewString, Row, Start, Acc);
{error, Reason} ->
{error, Reason}
end.
choose_list_error_wrapper("]") -> list_element;
choose_list_error_wrapper(")") -> tuple_element.
%%% Tuple Parsing
parse_tuple({_, _, {tuple, Types}}, Tk, String, Row, Start) ->
case parse_multivalue(Types, Tk, String, Row, Start, []) of
{ok, {TermList, NewTk, NewString}} ->
Result = {tuple, list_to_tuple(TermList)},
{ok, {Result, NewTk, NewString}};
{error, Reason} ->
{error, Reason}
end;
parse_tuple({_, _, unknown_type}, Tk, String, Row, Start) ->
% An untyped tuple is a list of untyped terms, and weirdly our list parser
% works perfectly for that, as long as we change the closing character to
% be ")" instead of "]".
case parse_list_loop(unknown_type(), Tk, String, ")", Row, Start, []) of
{ok, {TermList, NewTk, NewString}} ->
Result = {tuple, list_to_tuple(TermList)},
{ok, {Result, NewTk, NewString}};
{error, Reason} ->
{error, Reason}
end;
parse_tuple({O, N, _}, _, _, Row, Start) ->
{error, {wrong_type, O, N, tuple, Row, Start, Start}}.
parse_multivalue(ElemTypes, Tk, String, Row, Start, Acc) ->
case next_token(Tk, String) of
{ok, {{character, ")", Row2, Start2, _}, NewTk, NewString}} ->
check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc);
{ok, {Token, NewTk, NewString}} ->
parse_multivalue2(ElemTypes, NewTk, NewString, Row, Start, Acc, Token)
end.
parse_multivalue2([Next | Rest], Tk, String, Row, Start, Acc, Token) ->
case parse_expression2(Next, Tk, String, Token) of
{ok, {Value, NewTk, NewString}} ->
parse_multivalue3(Rest, NewTk, NewString, Row, Start, [Value | Acc]);
{error, Reason} ->
Wrapper = choose_list_error_wrapper(")"),
% TODO: Are tuple indices off by one from list indices?
Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
{error, Wrapped}
end;
parse_multivalue2([], Tk, String, _, _, Acc, {character, ")", _, _, _}) ->
{ok, {lists:reverse(Acc), Tk, String}};
parse_multivalue2([], _, _, _, _, _, {_, S, Row, Start, End}) ->
{error, {unexpected_token, S, Row, Start, End}}.
parse_multivalue3(ElemTypes, Tk, String, Row, Start, Acc) ->
case next_token(Tk, String) of
{ok, {{character, ")", Row2, Start2, _}, NewTk, NewString}} ->
check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc);
{ok, {{character, ",", _, _, _}, NewTk, NewString}} ->
parse_multivalue(ElemTypes, NewTk, NewString, Row, Start, Acc);
{error, Reason} ->
{error, Reason}
end.
check_multivalue_long_enough([], Tk, String, _, _, Acc) ->
{ok, {lists:reverse(Acc), Tk, String}};
check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) ->
GotCount = length(Got),
ExpectCount = length(Remaining) + GotCount,
{error, {not_enough_elements, ExpectCount, GotCount, Row, Col}}.
%%% Variant parsing
parse_variant({_, _, {variant, Variants}}, Tk, String, Ident, Row, Start, End) ->
parse_variant2(Variants, Tk, String, Ident, Row, Start, End);
parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) ->
{error, {unresolved_variant, Row, Start, End}};
parse_variant({O, N, _}, _, _, _, Row, Start, End) ->
% In normal code, identifiers can have many meanings, which can result in
% lots of different errors. In this Sophia 'object notation', identifiers
% can only ever be variant constructors, (sort of like the Sophia version
% of atoms,) and so immediately lead to a type error if we aren't expecting
% a variant.
{error, {wrong_type, O, N, variant, Row, Start, End}}.
parse_variant2(Variants, Tk, String, Ident, Row, Start, End) ->
case lookup_variant(Ident, Variants, 0) of
{ok, {Tag, ElemTypes}} ->
GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end,
Arities = lists:map(GetArity, Variants),
parse_variant3(Arities, Tag, ElemTypes, Tk, String);
error ->
{error, {invalid_constructor, Ident, Row, Start, End}}
end.
parse_variant3(Arities, Tag, [], Tk, String) ->
% Parsing of 0-arity variants is different.
Result = {variant, Arities, Tag, {}},
{ok, {Result, Tk, String}};
parse_variant3(Arities, Tag, ElemTypes, Tk, String) ->
case next_token(Tk, String) of
{ok, {{character, "(", Row, Start, _}, NewTk, NewString}} ->
parse_variant4(Arities, Tag, ElemTypes, NewTk, NewString, Row, Start);
{ok, {{_, Actual, Row, Start, End}}} ->
{error, {unexpected_token, Actual, Row, Start, End}}
end.
parse_variant4(Arities, Tag, ElemTypes, Tk, String, Row, Start) ->
case parse_multivalue(ElemTypes, Tk, String, Row, Start, []) of
{ok, {Terms, NewTk, NewString}} ->
Result = {variant, Arities, Tag, list_to_tuple(Terms)},
{ok, {Result, NewTk, NewString}};
{error, Reason} ->
{error, Reason}
end.
lookup_variant(_, [], _) ->
error;
lookup_variant(Ident, [{Ident, ElemTypes} | _], Tag) ->
{ok, {Tag, ElemTypes}};
lookup_variant(Ident, [_ | Rest], Tag) ->
lookup_variant(Ident, Rest, Tag + 1).
%%% Record parsing
parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Tk, String, _, _) ->
parse_map(KeyType, ValueType, Tk, String, #{});
parse_record_or_map({_, _, {record, Fields}}, Tk, String, _, _) ->
parse_record(Fields, Tk, String, #{});
parse_record(Fields, Tk, String);
parse_record_or_map({_, _, unknown_type}, Tk, String, _, _) ->
case next_token(Tk, String) of
{ok, {{character, "}", _, _, _}, NewTk, NewString}} ->
@ -299,83 +174,8 @@ parse_record_or_map({_, _, unknown_type}, Tk, String, _, _) ->
parse_record_or_map({O, N, _}, _, _, Row, Start) ->
{error, {wrong_type, O, N, map, Row, Start, Start}}.
parse_record(Fields, Tk, String, Acc) ->
case next_token(Tk, String) of
{ok, {{alphanum, Ident, Row, Start, End}, NewTk, NewString}} ->
parse_record2(Fields, NewTk, NewString, Acc, Ident, Row, Start, End);
{ok, {{character, "}", Row, Start, End}, NewTk, NewString}} ->
parse_record_end(Fields, NewTk, NewString, Acc, Row, Start, End);
{ok, {{_, S, Row, Start, End}, _, _}} ->
{error, {unexpected_token, S, Row, Start, End}};
{error, Reason} ->
{error, Reason}
end.
parse_record2(Fields, Tk, String, Acc, Ident, Row, Start, End) ->
case lists:keyfind(Ident, 1, Fields) of
{_, Type} ->
parse_record3(Fields, Tk, String, Acc, Ident, Row, Start, End, Type);
false ->
{error, {invalid_field, Ident, Row, Start, End}}
end.
parse_record3(Fields, Tk, String, Acc, Ident, Row, Start, End, Type) ->
case maps:is_key(Ident, Acc) of
false ->
parse_record4(Fields, Tk, String, Acc, Ident, Type);
true ->
{error, {field_already_present, Ident, Row, Start, End}}
end.
parse_record4(Fields, Tk, String, Acc, Ident, Type) ->
case expect_tokens(["="], Tk, String) of
{ok, {NewTk, NewString}} ->
parse_record5(Fields, NewTk, NewString, Acc, Ident, Type);
{error, Reason} ->
{error, Reason}
end.
parse_record5(Fields, Tk, String, Acc, Ident, Type) ->
case parse_expression(Type, Tk, String) of
{ok, {Result, NewTk, NewString}} ->
NewAcc = maps:put(Ident, Result, Acc),
parse_record6(Fields, NewTk, NewString, NewAcc);
{error, Reason} ->
wrap_error(Reason, {record_field, Ident})
end.
parse_record6(Fields, Tk, String, Acc) ->
case next_token(Tk, String) of
{ok, {{character, ",", _, _, _}, NewTk, NewString}} ->
parse_record(Fields, NewTk, NewString, Acc);
{ok, {{character, "}", Row, Start, End}, NewTk, NewString}} ->
parse_record_end(Fields, NewTk, NewString, Acc, Row, Start, End);
{ok, {{_, S, Row, Start, End}, _, _}} ->
{error, {unexpected_token, S, Row, Start, End}};
{error, Reason} ->
{error, Reason}
end.
parse_record_end(Fields, Tk, String, FieldValues, Row, Start, End) ->
case parse_record_final_loop(Fields, FieldValues, []) of
{ok, Result} ->
{ok, {Result, Tk, String}};
{error, {missing_field, Name}} ->
{error, {missing_field, Name, Row, Start, End}}
end.
parse_record_final_loop([{Name, _} | Rest], FieldValues, Acc) ->
case maps:find(Name, FieldValues) of
{ok, Value} ->
parse_record_final_loop(Rest, FieldValues, [Value | Acc]);
error ->
{error, {missing_field, Name}}
end;
parse_record_final_loop([], _, FieldsReverse) ->
Fields = lists:reverse(FieldsReverse),
Tuple = list_to_tuple(Fields),
{ok, {tuple, Tuple}}.
parse_record(Fields, Tk, String) ->
{error, not_yet_implemented}.
%%% Map Parsing
@ -438,33 +238,20 @@ check_sophia_to_fate(Type, Sophia, Fate) ->
erlang:error({to_fate_failed, Fate, FateActual})
end.
check_parser(Type, Sophia, Fate) ->
check_sophia_to_fate(Type, Sophia, Fate),
check_sophia_to_fate(unknown_type(), Sophia, Fate),
% Finally, check that the FATE result is something that gmb understands.
gmb_fate_encoding:serialize(Fate),
ok.
check_parser(Sophia, Fate) ->
% Compile the literal using the compiler, to check that it is valid Sophia
% syntax, and to get an AACI object to pass to the parser.
Source = "contract C = entrypoint f() = " ++ Sophia,
{ok, AACI} = hz_aaci:aaci_from_string(Source),
{ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"),
% Also check that the FATE term is valid, by running it through gmb.
gmb_fate_encoding:serialize(Fate),
% Now check that our parser produces that output.
check_sophia_to_fate(Type, Sophia, Fate),
% Also check that it can be parsed without type information.
check_sophia_to_fate(unknown_type(), Sophia, Fate).
check_parser_with_typedef(Typedef, Sophia, Fate) ->
% Compile the type definitions alongside the usual literal expression.
Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia,
{ok, AACI} = hz_aaci:aaci_from_string(Source),
{ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"),
% Check the FATE term as usual.
gmb_fate_encoding:serialize(Fate),
% Do a typed parse, as usual, but there are probably record/variant
% definitions in the AACI, so untyped parses probably don't work.
check_sophia_to_fate(Type, Sophia, Fate).
check_parser(Type, Sophia, Fate).
int_test() ->
check_parser("123", 123).
@ -475,32 +262,6 @@ list_test() ->
list_of_lists_test() ->
check_parser("[[], [1], [2, 3]]", [[], [1], [2, 3]]).
tuple_test() ->
check_parser("(1, [2, 3], (4, 5))", {tuple, {1, [2, 3], {tuple, {4, 5}}}}).
maps_test() ->
check_parser("{[1] = 2, [3] = 4}", #{1 => 2, 3 => 4}).
records_test() ->
TypeDef = "record pair = {x: int, y: int}",
Sophia = "{x = 1, y = 2}",
Fate = {tuple, {1, 2}},
check_parser_with_typedef(TypeDef, Sophia, Fate),
% The above won't run an untyped parse on the expression, but we can. It
% will error, though.
{error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia).
variant_test() ->
TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)",
TestFn = fun(Sophia, Fate) ->
check_parser_with_typedef(TypeDef, Sophia, Fate),
{error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), Sophia)
end,
TestFn("Zero", {variant, [0, 1, 2], 0, {}}),
TestFn("One(0)", {variant, [0, 1, 2], 1, {0}}),
TestFn("Two(0, 1)", {variant, [0, 1, 2], 2, {0, 1}}),
TestFn("Two([], [1, 2, 3])", {variant, [0, 1, 2], 2, {[], [1, 2, 3]}}),
ok.