Merge branch 'parser' of ssh://git.qpq.swiss:21203/QPQ-AG/hakuzaru into parser

This commit is contained in:
Craig Everett 2026-02-13 13:50:57 +09:00
commit a1fc5f19fa

View File

@ -28,7 +28,7 @@ parse_literal(Type, String) ->
parse_literal2(Result, Pos, String) -> parse_literal2(Result, Pos, String) ->
% We have parsed a valid expression. Now check that the string ends. % We have parsed a valid expression. Now check that the string ends.
case next_token(Pos, String) of case next_token(Pos, String) of
{ok, {{eof, _, _, _, _}, _, _}} -> {ok, {{eof, _, _, _, _, _}, _, _}} ->
{ok, Result}; {ok, Result};
{ok, {Token, _, _}} -> {ok, {Token, _, _}} ->
unexpected_token(Token); unexpected_token(Token);
@ -46,7 +46,7 @@ parse_literal2(Result, Pos, String) ->
-define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))). -define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))).
next_token({Row, Col}, []) -> next_token({Row, Col}, []) ->
{ok, {{eof, "", Row, Col, Col}, {Row, Col}, []}}; {ok, {{eof, "", [], Row, Col, Col}, {Row, Col}, []}};
next_token({Row, Col}, " " ++ Rest) -> next_token({Row, Col}, " " ++ Rest) ->
next_token({Row, Col + 1}, Rest); next_token({Row, Col + 1}, Rest);
next_token({Row, Col}, "\t" ++ Rest) -> next_token({Row, Col}, "\t" ++ Rest) ->
@ -236,8 +236,8 @@ parse_expression2(Type, Pos, String, {string, _, Value, Row, Start, End}) ->
end; end;
parse_expression2(Type, Pos, String, {character, "[", _, Row, Start, _}) -> parse_expression2(Type, Pos, String, {character, "[", _, Row, Start, _}) ->
parse_list(Type, Pos, String, Row, Start); parse_list(Type, Pos, String, Row, Start);
parse_expression2(Type, Pos, String, {character, "(", _, Row, Start, _}) -> parse_expression2(Type, Pos, String, {character, "(", _, _, _, _}) ->
parse_tuple(Type, Pos, String, Row, Start); parse_tuple(Type, Pos, String);
parse_expression2(Type, Pos, String, {character, "{", _, Row, Start, _}) -> parse_expression2(Type, Pos, String, {character, "{", _, Row, Start, _}) ->
parse_record_or_map(Type, Pos, String, Row, Start); parse_record_or_map(Type, Pos, String, Row, Start);
parse_expression2(Type, Pos, String, {alphanum, _, Path, Row, Start, End}) -> parse_expression2(Type, Pos, String, {alphanum, _, Path, Row, Start, End}) ->
@ -339,27 +339,35 @@ typecheck_signature({O, N, _}, _, _, _, Row, Start, End) ->
%%% List Parsing %%% List Parsing
parse_list({_, _, {list, [Inner]}}, Pos, String, Row, Start) -> parse_list({_, _, {list, [Inner]}}, Pos, String, _, _) ->
parse_list_loop(Inner, Pos, String, "]", Row, Start, []); parse_list2(Inner, Pos, String);
parse_list({_, _, unknown_type}, Pos, String, Row, Start) -> parse_list({_, _, unknown_type}, Pos, String, _, _) ->
parse_list_loop(unknown_type(), Pos, String, "]", Row, Start, []); parse_list2(unknown_type(), Pos, String);
parse_list({O, N, _}, _, _, Row, Start) -> parse_list({O, N, _}, _, _, Row, Start) ->
{error, {wrong_type, O, N, list, Row, Start, Start}}. {error, {wrong_type, O, N, list, Row, Start, Start}}.
parse_list_loop(Inner, Pos, String, CloseChar, Row, Start, Acc) -> parse_list2(Inner, Pos, String) ->
case next_token(Pos, String) of case parse_list_loop(Inner, Pos, String, "]", []) of
{ok, {{character, CloseChar, _, _, _, _}, NewPos, NewString}} -> {ok, {Result, _, _, NewPos, NewString}} ->
{ok, {lists:reverse(Acc), NewPos, NewString}}; {ok, {Result, NewPos, NewString}};
{ok, {Token, NewPos, NewString}} ->
parse_list_loop2(Inner, NewPos, NewString, CloseChar, Row, Start, Acc, Token);
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
parse_list_loop2(Inner, Pos, String, CloseChar, Row, Start, Acc, Token) -> parse_list_loop(Inner, Pos, String, CloseChar, Acc) ->
case next_token(Pos, String) of
{ok, {{character, CloseChar, _, Row, Col, _}, NewPos, NewString}} ->
{ok, {lists:reverse(Acc), true, {Row, Col}, NewPos, NewString}};
{ok, {Token, NewPos, NewString}} ->
parse_list_loop2(Inner, NewPos, NewString, CloseChar, Acc, Token);
{error, Reason} ->
{error, Reason}
end.
parse_list_loop2(Inner, Pos, String, CloseChar, Acc, Token) ->
case parse_expression2(Inner, Pos, String, Token) of case parse_expression2(Inner, Pos, String, Token) of
{ok, {Value, NewPos, NewString}} -> {ok, {Value, NewPos, NewString}} ->
parse_list_loop3(Inner, NewPos, NewString, CloseChar, Row, Start, [Value | Acc]); parse_list_loop3(Inner, NewPos, NewString, CloseChar, [Value | Acc]);
{error, Reason} -> {error, Reason} ->
Wrapper = choose_list_error_wrapper(CloseChar), Wrapper = choose_list_error_wrapper(CloseChar),
% TODO: Are tuple indices off by one from list indices? % TODO: Are tuple indices off by one from list indices?
@ -367,12 +375,12 @@ parse_list_loop2(Inner, Pos, String, CloseChar, Row, Start, Acc, Token) ->
{error, Wrapped} {error, Wrapped}
end. end.
parse_list_loop3(Inner, Pos, String, CloseChar, Row, Start, Acc) -> parse_list_loop3(Inner, Pos, String, CloseChar, Acc) ->
case next_token(Pos, String) of case next_token(Pos, String) of
{ok, {{character, CloseChar, _, _, _, _}, NewPos, NewString}} -> {ok, {{character, CloseChar, _, Row, Col, _}, NewPos, NewString}} ->
{ok, {lists:reverse(Acc), NewPos, NewString}}; {ok, {lists:reverse(Acc), false, {Row, Col}, NewPos, NewString}};
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
parse_list_loop(Inner, NewPos, NewString, CloseChar, Row, Start, Acc); parse_list_loop(Inner, NewPos, NewString, CloseChar, Acc);
{ok, {Token, _, _}} -> {ok, {Token, _, _}} ->
unexpected_token(Token, CloseChar); unexpected_token(Token, CloseChar);
{error, Reason} -> {error, Reason} ->
@ -384,22 +392,22 @@ choose_list_error_wrapper(")") -> tuple_element.
%%% Ambiguous Parenthesis Parsing %%% Ambiguous Parenthesis Parsing
parse_tuple({_, _, unknown_type}, Pos, String, Row, Start) -> parse_tuple({_, _, unknown_type}, Pos, String) ->
% An untyped tuple is a list of untyped terms, and weirdly our list parser % An untyped tuple is a list of untyped terms, and weirdly our list parser
% works perfectly for that, as long as we change the closing character to % works perfectly for that, as long as we change the closing character to
% be ")" instead of "]". % be ")" instead of "]".
case parse_list_loop(unknown_type(), Pos, String, ")", Row, Start, []) of case parse_list_loop(unknown_type(), Pos, String, ")", []) of
{ok, {[Inner], NewPos, NewString}} -> {ok, {[Inner], false, _, NewPos, NewString}} ->
% In Sophia, singleton tuples are unwrapped, and given the inner % In Sophia, trailing commas are invalid, and so all singleton
% type. % tuples are unwrapped, and translated into the inner type.
{ok, {Inner, NewPos, NewString}}; {ok, {Inner, NewPos, NewString}};
{ok, {TermList, NewPos, NewString}} -> {ok, {TermList, _, _, NewPos, NewString}} ->
Result = {tuple, list_to_tuple(TermList)}, Result = {tuple, list_to_tuple(TermList)},
{ok, {Result, NewPos, NewString}}; {ok, {Result, NewPos, NewString}};
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end; end;
parse_tuple({O, N, T}, Pos, String, _, _) -> parse_tuple(Type, Pos, String) ->
% Typed tuple parsing is quite complex, because we also want to support % Typed tuple parsing is quite complex, because we also want to support
% normal parentheses for grouping. It's not strictly necessary for % normal parentheses for grouping. It's not strictly necessary for
% inputting data, since we don't have any infix operators in simple % inputting data, since we don't have any infix operators in simple
@ -413,9 +421,9 @@ parse_tuple({O, N, T}, Pos, String, _, _) ->
{ok, {Count, Token, NewPos, NewString}} -> {ok, {Count, Token, NewPos, NewString}} ->
% Compare that to the amount of nesting tuple connectives are in % Compare that to the amount of nesting tuple connectives are in
% the type we are expected to produce. % the type we are expected to produce.
{ExcessCount, HeadType, Tails} = extract_tuple_type_info(Count, {O, N, T}, []), {ExcessCount, HeadType, Tails} = extract_tuple_type_info(Count, Type, []),
% Now work out what to do with all this information. % Now work out what to do with all this information.
parse_tuple2(O, N, ExcessCount, HeadType, Tails, NewPos, NewString, Token); parse_tuple2(ExcessCount, HeadType, Tails, NewPos, NewString, Token);
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
@ -437,23 +445,23 @@ extract_tuple_type_info(ParenCount, HeadType, Tails) ->
% No parens, or no more (non-empty) tuples. Stop! % No parens, or no more (non-empty) tuples. Stop!
{ParenCount, HeadType, Tails}. {ParenCount, HeadType, Tails}.
parse_tuple2(_, _, _, {_, _, unknown_type}, [_ | _], _, _, _) -> parse_tuple2(_, {_, _, unknown_type}, [_ | _], _, _, _) ->
{error, "Parsing of tuples with known lengths but unknown contents is not yet implemented."}; {error, "Parsing of tuples with known lengths but unknown contents is not yet implemented."};
parse_tuple2(O, N, ExcessCount, HeadType, Tails, Pos, String, {character, ")", _, Row, Col, _}) -> parse_tuple2(ExcessCount, HeadType, Tails, Pos, String, {character, ")", _, Row, Col, _}) ->
parse_empty_tuple(O, N, ExcessCount, HeadType, Tails, Pos, String, Row, Col); parse_empty_tuple(ExcessCount, HeadType, Tails, Pos, String, Row, Col);
parse_tuple2(O, N, ExcessCount, HeadType, Tails, Pos, String, Token) -> parse_tuple2(ExcessCount, HeadType, Tails, Pos, String, Token) ->
% Finished with parentheses for now, try and parse an expression out, to % Finished with parentheses for now, try and parse an expression out, to
% get our head term. % get our head term.
case parse_expression2(HeadType, Pos, String, Token) of case parse_expression2(HeadType, Pos, String, Token) of
{ok, {Result, NewPos, NewString}} -> {ok, {Result, NewPos, NewString}} ->
% Got a head term. Now try to build all the other tuple layers. % Got a head term. Now try to build all the other tuple layers.
parse_tuple_tails(O, N, ExcessCount, Result, Tails, NewPos, NewString); parse_tuple_tails(ExcessCount, Result, Tails, NewPos, NewString);
{error, Reason} -> {error, Reason} ->
% TODO: Wrap errors here too. % TODO: Wrap errors here too.
{error, Reason} {error, Reason}
end. end.
parse_empty_tuple(_, _, 0, _, Tails, _, _, Row, Col) -> parse_empty_tuple(0, _, Tails, _, _, Row, Col) ->
% There are zero excess parens, meaning all our parens are tuples. Get the % There are zero excess parens, meaning all our parens are tuples. Get the
% top one. % top one.
[Tail | _] = Tails, [Tail | _] = Tails,
@ -461,44 +469,32 @@ parse_empty_tuple(_, _, 0, _, Tails, _, _, Row, Col) ->
% got zero. % got zero.
ExpectCount = 1 + length(Tail), ExpectCount = 1 + length(Tail),
{error, {not_enough_elements, ExpectCount, 0, Row, Col}}; {error, {not_enough_elements, ExpectCount, 0, Row, Col}};
parse_empty_tuple(O, N, ExcessCount, {_, _, {tuple, []}}, Tails, Pos, String, _, _) -> parse_empty_tuple(ExcessCount, {_, _, {tuple, []}}, Tails, Pos, String, _, _) ->
% If we have some ambiguous parentheses left, we now know one of them is % If we have some ambiguous parentheses left, we now know one of them is
% this empty tuple. % this empty tuple.
HeadTerm = {tuple, {}}, HeadTerm = {tuple, {}},
NewExcessCount = ExcessCount - 1, NewExcessCount = ExcessCount - 1,
% Now continue the loop as if it were an integer or something, in the head % Now continue the loop as if it were an integer or something, in the head
% position. % position.
parse_tuple_tails(O, N, NewExcessCount, HeadTerm, Tails, Pos, String); parse_tuple_tails(NewExcessCount, HeadTerm, Tails, Pos, String);
parse_empty_tuple(_, _, _, {HeadO, HeadN, _}, _, _, _, Row, Col) -> parse_empty_tuple(_, {HeadO, HeadN, _}, _, _, _, Row, Col) ->
% We were expecting a head term of a different type! % We were expecting a head term of a different type!
{error, {wrong_type, HeadO, HeadN, unit, Row, Col, Col}}. {error, {wrong_type, HeadO, HeadN, unit, Row, Col, Col}}.
parse_tuple_tails(O, N, 0, HeadTerm, [TailTypes | ParentTails], Pos, String) -> parse_tuple_tails(0, HeadTerm, [], Pos, String) ->
% Tuples left to build, but no extra open parens to deal with, so we can
% just parse multivalues naively, starting from the "we have a term,
% waiting for a comma" stage of the loop.
case parse_multivalue3(TailTypes, Pos, String, -1, -1, [HeadTerm]) of
{ok, {Terms, NewPos, NewString}} ->
NewHead = {tuple, list_to_tuple(Terms)},
parse_tuple_tails(O, N, 0, NewHead, ParentTails, NewPos, NewString);
{error, Reason} ->
% TODO: More error wrapping?
{error, Reason}
end;
parse_tuple_tails(_, _, 0, HeadTerm, [], Pos, String) ->
% No open parens left, no tuples left to build, we are done! % No open parens left, no tuples left to build, we are done!
{ok, {HeadTerm, Pos, String}}; {ok, {HeadTerm, Pos, String}};
parse_tuple_tails(O, N, ExcessCount, HeadTerm, Tails, Pos, String) -> parse_tuple_tails(ExcessCount, HeadTerm, Tails, Pos, String) ->
% The ambiguous case, where we have a mix of tuple parens, and grouping % The ambiguous case, where we have a mix of tuple parens, and grouping
% parens. We want to peek at the next token, to see if it closes a grouping % parens. We want to peek at the next token, to see if it closes a grouping
% paren. % paren.
case next_token(Pos, String) of case next_token(Pos, String) of
{ok, {{character, ")", _, _, _, _}, NewPos, NewString}} -> {ok, {{character, ")", _, Row, Col, _}, NewPos, NewString}} ->
% It is grouping! Close one excess paren, and continue. % It is grouping! Try closing a grouping paren.
parse_tuple_tails(O, N, ExcessCount - 1, HeadTerm, Tails, NewPos, NewString); parse_tuple_tails_paren(ExcessCount, HeadTerm, Tails, NewPos, NewString, Row, Col);
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> {ok, {{character, ",", _, Row, Col, _}, NewPos, NewString}} ->
% It is a real tuple! Try the normal logic, then. % It is a real tuple! Try parsing a tuple.
parse_tuple_tails2(O, N, ExcessCount, HeadTerm, Tails, NewPos, NewString); parse_tuple_tails_comma(ExcessCount, HeadTerm, Tails, NewPos, NewString, Row, Col);
{ok, {Token, _, _}} -> {ok, {Token, _, _}} ->
% Anything else is just a boring parse error we can complain about. % Anything else is just a boring parse error we can complain about.
unexpected_token(Token, ")"); unexpected_token(Token, ")");
@ -506,68 +502,93 @@ parse_tuple_tails(O, N, ExcessCount, HeadTerm, Tails, Pos, String) ->
{error, Reason} {error, Reason}
end. end.
parse_tuple_tails2(O, N, ExcessCount, HeadTerm, [TailTypes | ParentTails], Pos, String) -> parse_tuple_tails_paren(0, _, [[] | _], _, _, Row, Col) ->
case parse_multivalue(TailTypes, Pos, String, -1, -1, [HeadTerm]) of % A singleton tuple was expected, but a grouping paren was given. In theory
% we could be permissive here, but we were asked to do type checking, and
% this is a type error. The type error itself is a bit hard to reproduce,
% but we do know exactly what the fix is, so let's report that instead.
{error, {expected_trailing_comma, Row, Col}};
parse_tuple_tails_paren(0, _, [Tail | _], _, _, Row, Col) ->
% A tuple (of more than one elements) was expected, but a grouping paren
% was given. Again, the type error is hard to produce, but the actual
% solution is simple; add more elements.
ExpectCount = length(Tail) + 1,
GotCount = 1,
{error, {not_enough_elements, ExpectCount, GotCount, Row, Col}};
parse_tuple_tails_paren(ExcessCount, HeadTerm, Tails, Pos, String, _, _) ->
% We were expecting some grouping parens, and now we know that one of them
% was in fact grouping. Good.
parse_tuple_tails(ExcessCount - 1, HeadTerm, Tails, Pos, String).
parse_tuple_tails_comma(_, _, [], _, _, Row, Col) ->
% No more tuples, so commas are invalid. It's hard to describe the type
% error that a comma would actually produce, so instead let's just give
% the user the actual solution to their problems, which is to remove the
% comma.
{error, {expected_close_paren, Row, Col}};
parse_tuple_tails_comma(ExcessCount, HeadTerm, Tails, Pos, String, _, _) ->
% If there are no tails then we would have exited into the "grouping parens
% only" case, so we know this works:
[TailTypes | ParentTails] = Tails,
% Now we can parse this tuple as a tuple.
case parse_multivalue(TailTypes, Pos, String, [HeadTerm]) of
{ok, {Terms, NewPos, NewString}} -> {ok, {Terms, NewPos, NewString}} ->
NewHead = {tuple, list_to_tuple(Terms)}, NewHead = {tuple, list_to_tuple(Terms)},
parse_tuple_tails(O, N, ExcessCount, NewHead, ParentTails, NewPos, NewString); % Then continue the loop, with whatever parent tuple types this
% tuple is meant to be a part of.
parse_tuple_tails(ExcessCount, NewHead, ParentTails, NewPos, NewString);
{error, Reason} -> {error, Reason} ->
% TODO: wrap errors? % TODO: wrap errors?
{error, Reason} {error, Reason}
end; end.
parse_tuple_tails2(O, N, _, _, [], _, _) ->
% This case is created when, for example, we want int * int, but instead we
% get a term like ((1, 2), 3), of type (int * int) * int. The trouble is,
% ((1, 2)) would have been valid, so it's actually the second comma that
% tips us off to the error, not the first one.
%
% For simpler cases, like (1, 2) when int was expected, this error message
% is fine:
Err = {error, {wrong_type, O, N, tuple, -1, -1, -1}},
% TODO: Row/col
% TODO: Generate better error messages in the cases where N *is* a tuple,
% but the first thing inside that tuple is the problem.
Err.
%%% Unambiguous Tuple/Variant Parsing %%% Unambiguous Tuple/Variant Parsing
parse_multivalue(ElemTypes, Pos, String, Row, Start, Acc) -> parse_multivalue(ElemTypes, Pos, String, Acc) ->
case next_token(Pos, String) of case next_token(Pos, String) of
{ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} -> {ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} ->
check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc); check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc);
{ok, {Token, NewPos, NewString}} -> {ok, {Token, NewPos, NewString}} ->
parse_multivalue2(ElemTypes, NewPos, NewString, Row, Start, Acc, Token); parse_multivalue2(ElemTypes, NewPos, NewString, Acc, Token);
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
parse_multivalue2([Next | Rest], Pos, String, Row, Start, Acc, Token) -> parse_multivalue2([Next | Rest], Pos, String, Acc, Token) ->
case parse_expression2(Next, Pos, String, Token) of case parse_expression2(Next, Pos, String, Token) of
{ok, {Value, NewPos, NewString}} -> {ok, {Value, NewPos, NewString}} ->
parse_multivalue3(Rest, NewPos, NewString, Row, Start, [Value | Acc]); parse_multivalue3(Rest, NewPos, NewString, [Value | Acc]);
{error, Reason} -> {error, Reason} ->
Wrapper = choose_list_error_wrapper(")"), Wrapper = choose_list_error_wrapper(")"),
% TODO: Are tuple indices off by one from list indices? % TODO: Are tuple indices off by one from list indices?
Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}), Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
{error, Wrapped} {error, Wrapped}
end; end;
parse_multivalue2([], Pos, String, _, _, Acc, {character, ")", _, _, _, _}) -> parse_multivalue2([], Pos, String, Acc, Token) ->
{ok, {lists:reverse(Acc), Pos, String}}; count_multivalue_excess(Pos, String, Acc, Token).
parse_multivalue2([], _, _, _, _, _, Token) ->
unexpected_token(Token, ")").
parse_multivalue3(ElemTypes, Pos, String, Row, Start, Acc) -> parse_multivalue3(ElemTypes, Pos, String, Acc) ->
case next_token(Pos, String) of case next_token(Pos, String) of
{ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} -> {ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} ->
check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc); check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc);
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
parse_multivalue(ElemTypes, NewPos, NewString, Row, Start, Acc); parse_multivalue(ElemTypes, NewPos, NewString, Acc);
{ok, {Token, _, _}} -> {ok, {Token, _, _}} ->
unexpected_token(Token, ")"); unexpected_token(Token, ")");
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
count_multivalue_excess(Pos, String, TypedAcc, Token) ->
ExpectedLen = length(TypedAcc),
case parse_list_loop2(unknown_type(), Pos, String, ")", TypedAcc, Token) of
{ok, {TermList, _, {Row, Col}, _, _}} ->
ActualLen = length(TermList),
{error, {too_many_elements, ExpectedLen, ActualLen, Row, Col}};
{error, Reason} ->
{error, Reason}
end.
check_multivalue_long_enough([], Pos, String, _, _, Acc) -> check_multivalue_long_enough([], Pos, String, _, _, Acc) ->
{ok, {lists:reverse(Acc), Pos, String}}; {ok, {lists:reverse(Acc), Pos, String}};
check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) -> check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) ->
@ -621,16 +642,16 @@ parse_variant3(Arities, Tag, [], Pos, String) ->
{ok, {Result, Pos, String}}; {ok, {Result, Pos, String}};
parse_variant3(Arities, Tag, ElemTypes, Pos, String) -> parse_variant3(Arities, Tag, ElemTypes, Pos, String) ->
case next_token(Pos, String) of case next_token(Pos, String) of
{ok, {{character, "(", _, Row, Start, _}, NewPos, NewString}} -> {ok, {{character, "(", _, _, _, _}, NewPos, NewString}} ->
parse_variant4(Arities, Tag, ElemTypes, NewPos, NewString, Row, Start); parse_variant4(Arities, Tag, ElemTypes, NewPos, NewString);
{ok, {Token, _, _}} -> {ok, {Token, _, _}} ->
unexpected_token(Token, "("); unexpected_token(Token, "(");
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
parse_variant4(Arities, Tag, ElemTypes, Pos, String, Row, Start) -> parse_variant4(Arities, Tag, ElemTypes, Pos, String) ->
case parse_multivalue(ElemTypes, Pos, String, Row, Start, []) of case parse_multivalue(ElemTypes, Pos, String, []) of
{ok, {Terms, NewPos, NewString}} -> {ok, {Terms, NewPos, NewString}} ->
Result = {variant, Arities, Tag, list_to_tuple(Terms)}, Result = {variant, Arities, Tag, list_to_tuple(Terms)},
{ok, {Result, NewPos, NewString}}; {ok, {Result, NewPos, NewString}};
@ -907,6 +928,15 @@ variant_test() ->
ok. ok.
ambiguous_variant_test() ->
TypeDef = "datatype mytype = C | D",
check_parser_with_typedef(TypeDef, "C"),
check_parser_with_typedef(TypeDef, "D"),
check_parser_with_typedef(TypeDef, "C.C"),
check_parser_with_typedef(TypeDef, "C.D"),
ok.
namespace_variant_test() -> namespace_variant_test() ->
Term = "[N.A, N.B]", Term = "[N.A, N.B]",
Source = "namespace N = datatype mytype = A | B\ncontract C = entrypoint f() = " ++ Term, Source = "namespace N = datatype mytype = A | B\ncontract C = entrypoint f() = " ++ Term,
@ -997,3 +1027,49 @@ lexer_offset_test() ->
ok. ok.
parser_offset_test() ->
{_, Type} = compile_entrypoint_value_and_type("contract C = entrypoint f() = ((1, 2), (3, 4))", "f"),
{error, {not_enough_elements, 2, 1, 1, 8}} = parse_literal(Type, "((1, 2))"),
{error, {not_enough_elements, 2, 1, 1, 10}} = parse_literal(Type, "(((1, 2)))"),
{error, {too_many_elements, 2, 3, 1, 24}} = parse_literal(Type, "((1, 2), (3, 4), (5, 6))"),
{error, {too_many_elements, 2, 3, 1, 10}} = parse_literal(Type, "((1, 2, 3), (4, 5))"),
ok.
singleton_test() ->
% The Sophia compiler would never generate this, but it is a valid type
% within the FATE virtual machine, and it is possible to represent within
% the ACI itself.
SingletonACI = #{tuple => [<<"int">>]},
% Build an AACI around this, and run it through the AACI machinery.
Function = #{name => <<"f">>,
arguments => [],
stateful => false,
payable => false,
returns => SingletonACI},
ACI = [#{contract => #{functions => [Function],
name => <<"C">>,
kind => contract_main,
payable => false,
typedefs => []}}],
{aaci, "C", #{"f" := {[], SingletonType}}, _} = hz_aaci:prepare(ACI),
% Now let's do some testing with this weird type, to see if we handle it
% correctly.
{ok, {tuple, {1}}} = parse_literal(SingletonType, "(1,)"),
% Some ambiguous nesting parens, for fun.
{ok, {tuple, {1}}} = parse_literal(SingletonType, "(((1),))"),
% No trailing comma should give an error.
{error, {expected_trailing_comma, 1, 3}} = parse_literal(SingletonType, "(1)"),
% All of the above should behave the same in untyped contexts:
{ok, {tuple, {1}}} = parse_literal(unknown_type(), "(1,)"),
{ok, {tuple, {1}}} = parse_literal(unknown_type(), "(((1),))"),
{ok, 1} = parse_literal(unknown_type(), "(1)"),
% Also if we wanted an integer, the singleton is NOT dropped, so is also an
% error.
{error, {expected_close_paren, 1, 3}} = parse_literal({integer, alread_normalized, integer}, "(1,)"),
ok.