diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index a2e00ee..13ba1e7 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -28,7 +28,7 @@ parse_literal(Type, String) -> parse_literal2(Result, Pos, String) -> % We have parsed a valid expression. Now check that the string ends. case next_token(Pos, String) of - {ok, {{eof, _, _, _, _}, _, _}} -> + {ok, {{eof, _, _, _, _, _}, _, _}} -> {ok, Result}; {ok, {Token, _, _}} -> unexpected_token(Token); @@ -46,7 +46,7 @@ parse_literal2(Result, Pos, String) -> -define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))). next_token({Row, Col}, []) -> - {ok, {{eof, "", Row, Col, Col}, {Row, Col}, []}}; + {ok, {{eof, "", [], Row, Col, Col}, {Row, Col}, []}}; next_token({Row, Col}, " " ++ Rest) -> next_token({Row, Col + 1}, Rest); next_token({Row, Col}, "\t" ++ Rest) -> @@ -236,8 +236,8 @@ parse_expression2(Type, Pos, String, {string, _, Value, Row, Start, End}) -> end; parse_expression2(Type, Pos, String, {character, "[", _, Row, Start, _}) -> parse_list(Type, Pos, String, Row, Start); -parse_expression2(Type, Pos, String, {character, "(", _, Row, Start, _}) -> - parse_tuple(Type, Pos, String, Row, Start); +parse_expression2(Type, Pos, String, {character, "(", _, _, _, _}) -> + parse_tuple(Type, Pos, String); parse_expression2(Type, Pos, String, {character, "{", _, Row, Start, _}) -> parse_record_or_map(Type, Pos, String, Row, Start); parse_expression2(Type, Pos, String, {alphanum, _, Path, Row, Start, End}) -> @@ -339,27 +339,35 @@ typecheck_signature({O, N, _}, _, _, _, Row, Start, End) -> %%% List Parsing -parse_list({_, _, {list, [Inner]}}, Pos, String, Row, Start) -> - parse_list_loop(Inner, Pos, String, "]", Row, Start, []); -parse_list({_, _, unknown_type}, Pos, String, Row, Start) -> - parse_list_loop(unknown_type(), Pos, String, "]", Row, Start, []); +parse_list({_, _, {list, [Inner]}}, Pos, String, _, _) -> + parse_list2(Inner, Pos, String); +parse_list({_, _, unknown_type}, Pos, String, _, _) -> + parse_list2(unknown_type(), Pos, String); parse_list({O, N, _}, _, _, Row, Start) -> {error, {wrong_type, O, N, list, Row, Start, Start}}. -parse_list_loop(Inner, Pos, String, CloseChar, Row, Start, Acc) -> - case next_token(Pos, String) of - {ok, {{character, CloseChar, _, _, _, _}, NewPos, NewString}} -> - {ok, {lists:reverse(Acc), NewPos, NewString}}; - {ok, {Token, NewPos, NewString}} -> - parse_list_loop2(Inner, NewPos, NewString, CloseChar, Row, Start, Acc, Token); +parse_list2(Inner, Pos, String) -> + case parse_list_loop(Inner, Pos, String, "]", []) of + {ok, {Result, _, _, NewPos, NewString}} -> + {ok, {Result, NewPos, NewString}}; {error, Reason} -> {error, Reason} end. -parse_list_loop2(Inner, Pos, String, CloseChar, Row, Start, Acc, Token) -> +parse_list_loop(Inner, Pos, String, CloseChar, Acc) -> + case next_token(Pos, String) of + {ok, {{character, CloseChar, _, Row, Col, _}, NewPos, NewString}} -> + {ok, {lists:reverse(Acc), true, {Row, Col}, NewPos, NewString}}; + {ok, {Token, NewPos, NewString}} -> + parse_list_loop2(Inner, NewPos, NewString, CloseChar, Acc, Token); + {error, Reason} -> + {error, Reason} + end. + +parse_list_loop2(Inner, Pos, String, CloseChar, Acc, Token) -> case parse_expression2(Inner, Pos, String, Token) of {ok, {Value, NewPos, NewString}} -> - parse_list_loop3(Inner, NewPos, NewString, CloseChar, Row, Start, [Value | Acc]); + parse_list_loop3(Inner, NewPos, NewString, CloseChar, [Value | Acc]); {error, Reason} -> Wrapper = choose_list_error_wrapper(CloseChar), % TODO: Are tuple indices off by one from list indices? @@ -367,12 +375,12 @@ parse_list_loop2(Inner, Pos, String, CloseChar, Row, Start, Acc, Token) -> {error, Wrapped} end. -parse_list_loop3(Inner, Pos, String, CloseChar, Row, Start, Acc) -> +parse_list_loop3(Inner, Pos, String, CloseChar, Acc) -> case next_token(Pos, String) of - {ok, {{character, CloseChar, _, _, _, _}, NewPos, NewString}} -> - {ok, {lists:reverse(Acc), NewPos, NewString}}; + {ok, {{character, CloseChar, _, Row, Col, _}, NewPos, NewString}} -> + {ok, {lists:reverse(Acc), false, {Row, Col}, NewPos, NewString}}; {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> - parse_list_loop(Inner, NewPos, NewString, CloseChar, Row, Start, Acc); + parse_list_loop(Inner, NewPos, NewString, CloseChar, Acc); {ok, {Token, _, _}} -> unexpected_token(Token, CloseChar); {error, Reason} -> @@ -384,22 +392,22 @@ choose_list_error_wrapper(")") -> tuple_element. %%% Ambiguous Parenthesis Parsing -parse_tuple({_, _, unknown_type}, Pos, String, Row, Start) -> +parse_tuple({_, _, unknown_type}, Pos, String) -> % An untyped tuple is a list of untyped terms, and weirdly our list parser % works perfectly for that, as long as we change the closing character to % be ")" instead of "]". - case parse_list_loop(unknown_type(), Pos, String, ")", Row, Start, []) of - {ok, {[Inner], NewPos, NewString}} -> - % In Sophia, singleton tuples are unwrapped, and given the inner - % type. + case parse_list_loop(unknown_type(), Pos, String, ")", []) of + {ok, {[Inner], false, _, NewPos, NewString}} -> + % In Sophia, trailing commas are invalid, and so all singleton + % tuples are unwrapped, and translated into the inner type. {ok, {Inner, NewPos, NewString}}; - {ok, {TermList, NewPos, NewString}} -> + {ok, {TermList, _, _, NewPos, NewString}} -> Result = {tuple, list_to_tuple(TermList)}, {ok, {Result, NewPos, NewString}}; {error, Reason} -> {error, Reason} end; -parse_tuple({O, N, T}, Pos, String, _, _) -> +parse_tuple(Type, Pos, String) -> % Typed tuple parsing is quite complex, because we also want to support % normal parentheses for grouping. It's not strictly necessary for % inputting data, since we don't have any infix operators in simple @@ -413,9 +421,9 @@ parse_tuple({O, N, T}, Pos, String, _, _) -> {ok, {Count, Token, NewPos, NewString}} -> % Compare that to the amount of nesting tuple connectives are in % the type we are expected to produce. - {ExcessCount, HeadType, Tails} = extract_tuple_type_info(Count, {O, N, T}, []), + {ExcessCount, HeadType, Tails} = extract_tuple_type_info(Count, Type, []), % Now work out what to do with all this information. - parse_tuple2(O, N, ExcessCount, HeadType, Tails, NewPos, NewString, Token); + parse_tuple2(ExcessCount, HeadType, Tails, NewPos, NewString, Token); {error, Reason} -> {error, Reason} end. @@ -437,23 +445,23 @@ extract_tuple_type_info(ParenCount, HeadType, Tails) -> % No parens, or no more (non-empty) tuples. Stop! {ParenCount, HeadType, Tails}. -parse_tuple2(_, _, _, {_, _, unknown_type}, [_ | _], _, _, _) -> +parse_tuple2(_, {_, _, unknown_type}, [_ | _], _, _, _) -> {error, "Parsing of tuples with known lengths but unknown contents is not yet implemented."}; -parse_tuple2(O, N, ExcessCount, HeadType, Tails, Pos, String, {character, ")", _, Row, Col, _}) -> - parse_empty_tuple(O, N, ExcessCount, HeadType, Tails, Pos, String, Row, Col); -parse_tuple2(O, N, ExcessCount, HeadType, Tails, Pos, String, Token) -> +parse_tuple2(ExcessCount, HeadType, Tails, Pos, String, {character, ")", _, Row, Col, _}) -> + parse_empty_tuple(ExcessCount, HeadType, Tails, Pos, String, Row, Col); +parse_tuple2(ExcessCount, HeadType, Tails, Pos, String, Token) -> % Finished with parentheses for now, try and parse an expression out, to % get our head term. case parse_expression2(HeadType, Pos, String, Token) of {ok, {Result, NewPos, NewString}} -> % Got a head term. Now try to build all the other tuple layers. - parse_tuple_tails(O, N, ExcessCount, Result, Tails, NewPos, NewString); + parse_tuple_tails(ExcessCount, Result, Tails, NewPos, NewString); {error, Reason} -> % TODO: Wrap errors here too. {error, Reason} end. -parse_empty_tuple(_, _, 0, _, Tails, _, _, Row, Col) -> +parse_empty_tuple(0, _, Tails, _, _, Row, Col) -> % There are zero excess parens, meaning all our parens are tuples. Get the % top one. [Tail | _] = Tails, @@ -461,44 +469,32 @@ parse_empty_tuple(_, _, 0, _, Tails, _, _, Row, Col) -> % got zero. ExpectCount = 1 + length(Tail), {error, {not_enough_elements, ExpectCount, 0, Row, Col}}; -parse_empty_tuple(O, N, ExcessCount, {_, _, {tuple, []}}, Tails, Pos, String, _, _) -> +parse_empty_tuple(ExcessCount, {_, _, {tuple, []}}, Tails, Pos, String, _, _) -> % If we have some ambiguous parentheses left, we now know one of them is % this empty tuple. HeadTerm = {tuple, {}}, NewExcessCount = ExcessCount - 1, % Now continue the loop as if it were an integer or something, in the head % position. - parse_tuple_tails(O, N, NewExcessCount, HeadTerm, Tails, Pos, String); -parse_empty_tuple(_, _, _, {HeadO, HeadN, _}, _, _, _, Row, Col) -> + parse_tuple_tails(NewExcessCount, HeadTerm, Tails, Pos, String); +parse_empty_tuple(_, {HeadO, HeadN, _}, _, _, _, Row, Col) -> % We were expecting a head term of a different type! {error, {wrong_type, HeadO, HeadN, unit, Row, Col, Col}}. -parse_tuple_tails(O, N, 0, HeadTerm, [TailTypes | ParentTails], Pos, String) -> - % Tuples left to build, but no extra open parens to deal with, so we can - % just parse multivalues naively, starting from the "we have a term, - % waiting for a comma" stage of the loop. - case parse_multivalue3(TailTypes, Pos, String, -1, -1, [HeadTerm]) of - {ok, {Terms, NewPos, NewString}} -> - NewHead = {tuple, list_to_tuple(Terms)}, - parse_tuple_tails(O, N, 0, NewHead, ParentTails, NewPos, NewString); - {error, Reason} -> - % TODO: More error wrapping? - {error, Reason} - end; -parse_tuple_tails(_, _, 0, HeadTerm, [], Pos, String) -> +parse_tuple_tails(0, HeadTerm, [], Pos, String) -> % No open parens left, no tuples left to build, we are done! {ok, {HeadTerm, Pos, String}}; -parse_tuple_tails(O, N, ExcessCount, HeadTerm, Tails, Pos, String) -> +parse_tuple_tails(ExcessCount, HeadTerm, Tails, Pos, String) -> % The ambiguous case, where we have a mix of tuple parens, and grouping % parens. We want to peek at the next token, to see if it closes a grouping % paren. case next_token(Pos, String) of - {ok, {{character, ")", _, _, _, _}, NewPos, NewString}} -> - % It is grouping! Close one excess paren, and continue. - parse_tuple_tails(O, N, ExcessCount - 1, HeadTerm, Tails, NewPos, NewString); - {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> - % It is a real tuple! Try the normal logic, then. - parse_tuple_tails2(O, N, ExcessCount, HeadTerm, Tails, NewPos, NewString); + {ok, {{character, ")", _, Row, Col, _}, NewPos, NewString}} -> + % It is grouping! Try closing a grouping paren. + parse_tuple_tails_paren(ExcessCount, HeadTerm, Tails, NewPos, NewString, Row, Col); + {ok, {{character, ",", _, Row, Col, _}, NewPos, NewString}} -> + % It is a real tuple! Try parsing a tuple. + parse_tuple_tails_comma(ExcessCount, HeadTerm, Tails, NewPos, NewString, Row, Col); {ok, {Token, _, _}} -> % Anything else is just a boring parse error we can complain about. unexpected_token(Token, ")"); @@ -506,68 +502,93 @@ parse_tuple_tails(O, N, ExcessCount, HeadTerm, Tails, Pos, String) -> {error, Reason} end. -parse_tuple_tails2(O, N, ExcessCount, HeadTerm, [TailTypes | ParentTails], Pos, String) -> - case parse_multivalue(TailTypes, Pos, String, -1, -1, [HeadTerm]) of +parse_tuple_tails_paren(0, _, [[] | _], _, _, Row, Col) -> + % A singleton tuple was expected, but a grouping paren was given. In theory + % we could be permissive here, but we were asked to do type checking, and + % this is a type error. The type error itself is a bit hard to reproduce, + % but we do know exactly what the fix is, so let's report that instead. + {error, {expected_trailing_comma, Row, Col}}; +parse_tuple_tails_paren(0, _, [Tail | _], _, _, Row, Col) -> + % A tuple (of more than one elements) was expected, but a grouping paren + % was given. Again, the type error is hard to produce, but the actual + % solution is simple; add more elements. + ExpectCount = length(Tail) + 1, + GotCount = 1, + {error, {not_enough_elements, ExpectCount, GotCount, Row, Col}}; +parse_tuple_tails_paren(ExcessCount, HeadTerm, Tails, Pos, String, _, _) -> + % We were expecting some grouping parens, and now we know that one of them + % was in fact grouping. Good. + parse_tuple_tails(ExcessCount - 1, HeadTerm, Tails, Pos, String). + +parse_tuple_tails_comma(_, _, [], _, _, Row, Col) -> + % No more tuples, so commas are invalid. It's hard to describe the type + % error that a comma would actually produce, so instead let's just give + % the user the actual solution to their problems, which is to remove the + % comma. + {error, {expected_close_paren, Row, Col}}; +parse_tuple_tails_comma(ExcessCount, HeadTerm, Tails, Pos, String, _, _) -> + % If there are no tails then we would have exited into the "grouping parens + % only" case, so we know this works: + [TailTypes | ParentTails] = Tails, + % Now we can parse this tuple as a tuple. + case parse_multivalue(TailTypes, Pos, String, [HeadTerm]) of {ok, {Terms, NewPos, NewString}} -> NewHead = {tuple, list_to_tuple(Terms)}, - parse_tuple_tails(O, N, ExcessCount, NewHead, ParentTails, NewPos, NewString); + % Then continue the loop, with whatever parent tuple types this + % tuple is meant to be a part of. + parse_tuple_tails(ExcessCount, NewHead, ParentTails, NewPos, NewString); {error, Reason} -> % TODO: wrap errors? {error, Reason} - end; -parse_tuple_tails2(O, N, _, _, [], _, _) -> - % This case is created when, for example, we want int * int, but instead we - % get a term like ((1, 2), 3), of type (int * int) * int. The trouble is, - % ((1, 2)) would have been valid, so it's actually the second comma that - % tips us off to the error, not the first one. - % - % For simpler cases, like (1, 2) when int was expected, this error message - % is fine: - Err = {error, {wrong_type, O, N, tuple, -1, -1, -1}}, - % TODO: Row/col - % TODO: Generate better error messages in the cases where N *is* a tuple, - % but the first thing inside that tuple is the problem. - Err. + end. %%% Unambiguous Tuple/Variant Parsing -parse_multivalue(ElemTypes, Pos, String, Row, Start, Acc) -> +parse_multivalue(ElemTypes, Pos, String, Acc) -> case next_token(Pos, String) of {ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} -> check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc); {ok, {Token, NewPos, NewString}} -> - parse_multivalue2(ElemTypes, NewPos, NewString, Row, Start, Acc, Token); + parse_multivalue2(ElemTypes, NewPos, NewString, Acc, Token); {error, Reason} -> {error, Reason} end. -parse_multivalue2([Next | Rest], Pos, String, Row, Start, Acc, Token) -> +parse_multivalue2([Next | Rest], Pos, String, Acc, Token) -> case parse_expression2(Next, Pos, String, Token) of {ok, {Value, NewPos, NewString}} -> - parse_multivalue3(Rest, NewPos, NewString, Row, Start, [Value | Acc]); + parse_multivalue3(Rest, NewPos, NewString, [Value | Acc]); {error, Reason} -> Wrapper = choose_list_error_wrapper(")"), % TODO: Are tuple indices off by one from list indices? Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}), {error, Wrapped} end; -parse_multivalue2([], Pos, String, _, _, Acc, {character, ")", _, _, _, _}) -> - {ok, {lists:reverse(Acc), Pos, String}}; -parse_multivalue2([], _, _, _, _, _, Token) -> - unexpected_token(Token, ")"). +parse_multivalue2([], Pos, String, Acc, Token) -> + count_multivalue_excess(Pos, String, Acc, Token). -parse_multivalue3(ElemTypes, Pos, String, Row, Start, Acc) -> +parse_multivalue3(ElemTypes, Pos, String, Acc) -> case next_token(Pos, String) of {ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} -> check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc); {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> - parse_multivalue(ElemTypes, NewPos, NewString, Row, Start, Acc); + parse_multivalue(ElemTypes, NewPos, NewString, Acc); {ok, {Token, _, _}} -> unexpected_token(Token, ")"); {error, Reason} -> {error, Reason} end. +count_multivalue_excess(Pos, String, TypedAcc, Token) -> + ExpectedLen = length(TypedAcc), + case parse_list_loop2(unknown_type(), Pos, String, ")", TypedAcc, Token) of + {ok, {TermList, _, {Row, Col}, _, _}} -> + ActualLen = length(TermList), + {error, {too_many_elements, ExpectedLen, ActualLen, Row, Col}}; + {error, Reason} -> + {error, Reason} + end. + check_multivalue_long_enough([], Pos, String, _, _, Acc) -> {ok, {lists:reverse(Acc), Pos, String}}; check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) -> @@ -621,16 +642,16 @@ parse_variant3(Arities, Tag, [], Pos, String) -> {ok, {Result, Pos, String}}; parse_variant3(Arities, Tag, ElemTypes, Pos, String) -> case next_token(Pos, String) of - {ok, {{character, "(", _, Row, Start, _}, NewPos, NewString}} -> - parse_variant4(Arities, Tag, ElemTypes, NewPos, NewString, Row, Start); + {ok, {{character, "(", _, _, _, _}, NewPos, NewString}} -> + parse_variant4(Arities, Tag, ElemTypes, NewPos, NewString); {ok, {Token, _, _}} -> unexpected_token(Token, "("); {error, Reason} -> {error, Reason} end. -parse_variant4(Arities, Tag, ElemTypes, Pos, String, Row, Start) -> - case parse_multivalue(ElemTypes, Pos, String, Row, Start, []) of +parse_variant4(Arities, Tag, ElemTypes, Pos, String) -> + case parse_multivalue(ElemTypes, Pos, String, []) of {ok, {Terms, NewPos, NewString}} -> Result = {variant, Arities, Tag, list_to_tuple(Terms)}, {ok, {Result, NewPos, NewString}}; @@ -907,6 +928,15 @@ variant_test() -> ok. +ambiguous_variant_test() -> + TypeDef = "datatype mytype = C | D", + check_parser_with_typedef(TypeDef, "C"), + check_parser_with_typedef(TypeDef, "D"), + check_parser_with_typedef(TypeDef, "C.C"), + check_parser_with_typedef(TypeDef, "C.D"), + + ok. + namespace_variant_test() -> Term = "[N.A, N.B]", Source = "namespace N = datatype mytype = A | B\ncontract C = entrypoint f() = " ++ Term, @@ -997,3 +1027,49 @@ lexer_offset_test() -> ok. +parser_offset_test() -> + {_, Type} = compile_entrypoint_value_and_type("contract C = entrypoint f() = ((1, 2), (3, 4))", "f"), + + {error, {not_enough_elements, 2, 1, 1, 8}} = parse_literal(Type, "((1, 2))"), + {error, {not_enough_elements, 2, 1, 1, 10}} = parse_literal(Type, "(((1, 2)))"), + {error, {too_many_elements, 2, 3, 1, 24}} = parse_literal(Type, "((1, 2), (3, 4), (5, 6))"), + {error, {too_many_elements, 2, 3, 1, 10}} = parse_literal(Type, "((1, 2, 3), (4, 5))"), + + ok. + +singleton_test() -> + % The Sophia compiler would never generate this, but it is a valid type + % within the FATE virtual machine, and it is possible to represent within + % the ACI itself. + SingletonACI = #{tuple => [<<"int">>]}, + + % Build an AACI around this, and run it through the AACI machinery. + Function = #{name => <<"f">>, + arguments => [], + stateful => false, + payable => false, + returns => SingletonACI}, + ACI = [#{contract => #{functions => [Function], + name => <<"C">>, + kind => contract_main, + payable => false, + typedefs => []}}], + {aaci, "C", #{"f" := {[], SingletonType}}, _} = hz_aaci:prepare(ACI), + + % Now let's do some testing with this weird type, to see if we handle it + % correctly. + {ok, {tuple, {1}}} = parse_literal(SingletonType, "(1,)"), + % Some ambiguous nesting parens, for fun. + {ok, {tuple, {1}}} = parse_literal(SingletonType, "(((1),))"), + % No trailing comma should give an error. + {error, {expected_trailing_comma, 1, 3}} = parse_literal(SingletonType, "(1)"), + % All of the above should behave the same in untyped contexts: + {ok, {tuple, {1}}} = parse_literal(unknown_type(), "(1,)"), + {ok, {tuple, {1}}} = parse_literal(unknown_type(), "(((1),))"), + {ok, 1} = parse_literal(unknown_type(), "(1)"), + + % Also if we wanted an integer, the singleton is NOT dropped, so is also an + % error. + {error, {expected_close_paren, 1, 3}} = parse_literal({integer, alread_normalized, integer}, "(1,)"), + + ok.