-module(hz_sophia). -vsn("0.8.2"). -author("Jarvis Carroll "). -copyright("Jarvis Carroll "). -license("GPL-3.0-or-later"). -export([parse_literal/1, parse_literal/2, check_parser/1]). -include_lib("eunit/include/eunit.hrl"). -spec parse_literal(String) -> Result when String :: string(), Result :: {ok, gmb_fate_data:fate_type()} | {error, Reason :: term()}. parse_literal(String) -> parse_literal(unknown_type(), String). parse_literal(Type, String) -> case parse_expression(Type, {1, 1}, String) of {ok, {Result, NewPos, NewString}} -> parse_literal2(Result, NewPos, NewString); {error, Reason} -> {error, Reason} end. parse_literal2(Result, Pos, String) -> % We have parsed a valid expression. Now check that the string ends. case next_token(Pos, String) of {ok, {{eof, _, _, _, _, _}, _, _}} -> {ok, Result}; {ok, {Token, _, _}} -> unexpected_token(Token); {error, Reason} -> {error, Reason} end. %%% Tokenizer -define(IS_LATIN_UPPER(C), (((C) >= $A) and ((C) =< $Z))). -define(IS_LATIN_LOWER(C), (((C) >= $a) and ((C) =< $z))). -define(IS_ALPHA(C), (?IS_LATIN_UPPER(C) or ?IS_LATIN_LOWER(C) or ((C) == $_))). -define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))). -define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C) or ((C) == $.))). -define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))). next_token({Row, Col}, []) -> {ok, {{eof, "", [], Row, Col, Col}, {Row, Col}, []}}; next_token({Row, Col}, " " ++ Rest) -> next_token({Row, Col + 1}, Rest); next_token({Row, Col}, "\t" ++ Rest) -> next_token({Row, Col + 1}, Rest); next_token({Row, _}, "\r\n" ++ Rest) -> next_token({Row + 1, 1}, Rest); next_token({Row, _}, "\r" ++ Rest) -> next_token({Row + 1, 1}, Rest); next_token({Row, _}, "\n" ++ Rest) -> next_token({Row + 1, 1}, Rest); next_token(Pos, [C | _] = String) when ?IS_ALPHA(C) -> alphanum_token(Pos, Pos, String, []); next_token(Pos, [C | _] = String) when ?IS_NUM(C) -> num_token(Pos, Pos, String, [], 0); next_token({Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) -> bytes_token({Row, Col}, {Row, Col + 1}, [C | Rest], "#", []); next_token({Row, Col}, "\"" ++ Rest) -> string_token({Row, Col}, {Row, Col + 1}, Rest, "\"", <<>>); next_token({Row, Col}, "'" ++ Rest) -> character_token({Row, Col}, {Row, Col + 1}, Rest, "'"); next_token({Row, Col}, [Char | Rest]) -> Token = {character, [Char], Char, Row, Col, Col}, {ok, {Token, {Row, Col + 1}, Rest}}. alphanum_token(Start, {Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) -> alphanum_token(Start, {Row, Col + 1}, Rest, [C | Acc]); alphanum_token({_, Start}, {Row, End}, String, Acc) -> AlphaString = lists:reverse(Acc), Path = string:split(AlphaString, ".", all), Token = {alphanum, AlphaString, Path, Row, Start, End - 1}, {ok, {Token, {Row, End}, String}}. num_token(Start, {Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) -> NewValue = Value * 10 + (C - $0), num_token(Start, {Row, Col + 1}, Rest, [C | Chars], NewValue); num_token(Start, {Row, Col}, [$_, C | Rest], Chars, Value) when ?IS_NUM(C) -> NewValue = Value * 10 + (C - $0), num_token(Start, {Row, Col + 2}, Rest, [C, $_ | Chars], NewValue); num_token({_, Start}, {Row, End}, String, Chars, Value) -> NumString = lists:reverse(Chars), Token = {integer, NumString, Value, Row, Start, End - 1}, {ok, {Token, {Row, End}, String}}. bytes_token(Start, {Row, Col}, [C | Rest], Chars, Digits) when ?IS_HEX(C) -> Digit = convert_digit(C), bytes_token(Start, {Row, Col + 1}, Rest, [C | Chars], [Digit | Digits]); bytes_token(Start, {Row, Col}, [$_, C | Rest], Chars, Digits) when ?IS_HEX(C) -> Digit = convert_digit(C), bytes_token(Start, {Row, Col + 1}, Rest, [C, $_ | Chars], [Digit | Digits]); bytes_token({_, Start}, {Row, End}, String, Chars, Digits) -> BytesString = lists:reverse(Chars), Value = reverse_combine_nibbles(Digits, <<>>), Token = {bytes, BytesString, Value, Row, Start, End - 1}, {ok, {Token, {Row, End}, String}}. convert_digit(C) when C >= $0, C =< $9 -> C - $0; convert_digit(C) when C >= $A, C =< $Z -> C - $A + 10; convert_digit(C) when C >= $a, C =< $z -> C - $a + 10. reverse_combine_nibbles([D1, D2 | Rest], Acc) -> NewAcc = <>, reverse_combine_nibbles(Rest, NewAcc); reverse_combine_nibbles([D1], Acc) -> <<0:4, D1:4, Acc/binary>>; reverse_combine_nibbles([], Acc) -> Acc. string_token({_, Start}, {Row, Col}, [$" | Rest], SourceChars, Value) -> SourceStr = lists:reverse([$" | SourceChars]), Token = {string, SourceStr, Value, Row, Start, Col}, {ok, {Token, {Row, Col + 1}, Rest}}; string_token({_, Start}, {Row, Col}, [], SourceChars, _) -> SourceStr = lists:reverse(SourceChars), {error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}}; string_token({_, Start}, {Row, Col}, [$\r | _], SourceChars, _) -> SourceStr = lists:reverse(SourceChars), {error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}}; string_token({_, Start}, {Row, Col}, [$\n | _], SourceChars, _) -> SourceStr = lists:reverse(SourceChars), {error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}}; string_token(Start, Pos, String, SourceChars, Value) -> case parse_char(Start, Pos, String, SourceChars) of {ok, {Char, NewSourceChars, NewPos, NewString}} -> % TODO: ERTS probably had to convert this FROM utf8 at some point, % so why bother, if we need to convert it back? I guess we could % accept iolists if we really wanted to waste time on this point... NewValue = <>, string_token(Start, NewPos, NewString, NewSourceChars, NewValue); {error, Reason} -> {error, Reason} end. character_token({_, Start}, {Row, Col}, [], SourceChars) -> SourceStr = lists:reverse(SourceChars), {error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}}; character_token({_, Start}, {Row, Col}, [$\r | _], SourceChars) -> SourceStr = lists:reverse(SourceChars), {error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}}; character_token({_, Start}, {Row, Col}, [$\n | _], SourceChars) -> SourceStr = lists:reverse(SourceChars), {error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}}; character_token(Start, Pos, String, SourceChars) -> case parse_char(Start, Pos, String, SourceChars) of {ok, {Char, NewSourceChars, NewPos, NewString}} -> character_token2(Start, NewPos, NewString, NewSourceChars, Char); {error, Reason} -> {error, Reason} end. character_token2({_, Start}, {Row, Col}, [$' | Rest], SourceChars, Value) -> SourceStr = lists:reverse([$' | SourceChars]), Token = {char_literal, SourceStr, Value, Row, Start, Col}, {ok, {Token, {Row, Col + 1}, Rest}}; character_token2({_, Start}, {Row, Col}, _, SourceChars, _) -> SourceStr = lists:reverse(SourceChars), {error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}}. parse_char(Start, {Row, Col}, "\\x{" ++ String, SourceChars) -> escape_long_hex_code(Start, {Row, Col + 3}, String, "{x\\" ++ SourceChars, 0); parse_char(_, {Row, Col}, [$\\, $x, A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) -> Byte = convert_digit(A) * 16 + convert_digit(B), {ok, {Byte, [B, A, $x, $\\ | SourceChars], {Row, Col + 4}, String}}; parse_char({Row, Start}, {Row, Col}, [$\\, C | Rest], SourceChars) -> case escape_char(C) of {ok, ByteVal} -> {ok, {ByteVal, [C, $\ | SourceChars], {Row, Col + 2}, Rest}}; error -> {error, {invalid_escape_code, [$\\, C], Row, Start, Col + 1}} end; parse_char(_, {Row, Col}, [C | Rest], SourceChars) -> {ok, {C, [C | SourceChars], {Row, Col + 1}, Rest}}. escape_long_hex_code(_, {Row, Col}, "}" ++ String, SourceChars, Value) -> {ok, {Value, "}" ++ SourceChars, {Row, Col + 1}, String}}; escape_long_hex_code(Start, {Row, Col}, [C | String], SourceChars, Value) when ?IS_HEX(C) -> NewSourceChars = [C | SourceChars], NewValue = 16 * Value + convert_digit(C), escape_long_hex_code(Start, {Row, Col + 1}, String, NewSourceChars, NewValue); escape_long_hex_code(_, {Row, Col}, [C | _], _, _) -> {error, {invalid_hexadecimal, [C], Row, Col}}; escape_long_hex_code(_, Pos, [], SourceChars, Value) -> % Just return as if the escape code were closed, and let the string parser % produce an unclosed string error instead. {ok, {Value, SourceChars, Pos, []}}. escape_char($b) -> {ok, $\b}; escape_char($e) -> {ok, $\e}; escape_char($f) -> {ok, $\f}; escape_char($n) -> {ok, $\n}; escape_char($r) -> {ok, $\r}; escape_char($t) -> {ok, $\t}; escape_char($v) -> {ok, $\v}; % Technically \" and \' are only valid inside their own quote characters, not % each other, but whatever, we will just be permissive here. escape_char($") -> {ok, $\"}; escape_char($') -> {ok, $\'}; escape_char($\\) -> {ok, $\\}; escape_char(_) -> error. %%% Sophia Literal Parser %%% This parser is a simple recursive descent parser, written explicitly in %%% erlang. %%% %%% There are no infix operators in the subset we want to parse, so recursive %%% descent is fine with no special tricks, no shunting yard algorithm, no %%% parser generators, etc. %%% %%% If we were writing this in C then we might want to work iteratively with an %%% array of finite state machines, i.e. with a pushdown automaton, instead of %%% using recursion. This is a tried and true method of making fast parsers. %%% Recall, however, that the BEAM *is* a stack machine, written in C, so %%% rather than writing confusing iterative code in Erlang, to simulate a %%% pushdown automaton inside another simulated stack machine... we should just %%% write the recursive code, thus programming the BEAM to implement the %%% pushdown automaton that we want. parse_expression(Type, Pos, String) -> case next_token(Pos, String) of {ok, {Token, NewPos, NewString}} -> parse_expression2(Type, NewPos, NewString, Token); {error, Reason} -> {error, Reason} end. parse_expression2(Type, Pos, String, {integer, _, Value, Row, Start, End}) -> typecheck_integer(Type, Pos, String, Value, Row, Start, End); parse_expression2(Type, Pos, String, {character, "-", _, _, _, _}) -> case next_token(Pos, String) of {ok, {{integer, _, Value, Row, Start, End}, NewPos, NewString}} -> typecheck_integer(Type, NewPos, NewString, -Value, Row, Start, End); {error, Reason} -> {error, Reason} end; parse_expression2(Type, Pos, String, {bytes, _, Value, Row, Start, End}) -> Len = byte_size(Value), Result = {bytes, Value}, case Type of {_, _, {bytes, [any]}} -> {ok, {Result, Pos, String}}; {_, _, {bytes, [Len]}} -> {ok, {Result, Pos, String}}; {_, _, {bytes, [ExpectedLen]}} -> {error, {bytes_wrong_size, ExpectedLen, Len, Row, Start, End}}; {_, _, bits} -> Size = bit_size(Value), <> = Value, {ok, {{bits, IntValue}, Pos, String}}; {_, _, unknown_type} -> {ok, {Result, Pos, String}}; {O, N, _} -> {error, {wrong_type, O, N, {bytes, [Len]}, Row, Start, End}} end; parse_expression2(Type, Pos, String, {string, _, Value, Row, Start, End}) -> case Type of {_, _, string} -> {ok, {Value, Pos, String}}; {_, _, unknown_type} -> {ok, {Value, Pos, String}}; {O, N, _} -> {error, {wrong_type, O, N, string, Row, Start, End}} end; parse_expression2(Type, Pos, String, {char_literal, _, Value, Row, Start, End}) -> case Type of {_, _, char} -> {ok, {Value, Pos, String}}; {_, _, unknown_type} -> {ok, {Value, Pos, String}}; {O, N, _} -> {error, {wrong_type, O, N, char, Row, Start, End}} end; parse_expression2(Type, Pos, String, {character, "[", _, Row, Start, _}) -> parse_list(Type, Pos, String, Row, Start); parse_expression2(Type, Pos, String, {character, "(", _, _, _, _}) -> parse_tuple(Type, Pos, String); parse_expression2(Type, Pos, String, {character, "{", _, Row, Start, _}) -> parse_record_or_map(Type, Pos, String, Row, Start); parse_expression2(Type, Pos, String, {alphanum, _, Path, Row, Start, End}) -> parse_alphanum(Type, Pos, String, Path, Row, Start, End); parse_expression2(_, _, _, {eof, _, _, _, _, _}) -> {error, unexpected_end_of_file}; parse_expression2(_, _, _, Token) -> unexpected_token(Token). unknown_type() -> {unknown_type, already_normalized, unknown_type}. expect_tokens([], Pos, String) -> {ok, {Pos, String}}; expect_tokens([Str | Rest], Pos, String) -> case next_token(Pos, String) of {ok, {{_, Str, _, _, _, _}, NewPos, NewString}} -> expect_tokens(Rest, NewPos, NewString); {ok, {Token, _, _}} -> unexpected_token(Token, Str); {error, Reason} -> {error, Reason} end. unexpected_token(Token, _Expected) -> % I don't know if this is a good idea, but sometimes there are only one or % two tokens that could have worked, which might make for simple % non-technical error messages. I don't know how to format that yet, % though. unexpected_token(Token). unexpected_token({eof, _, _, _, _, _}) -> {error, expression_incomplete}; unexpected_token({_, S, _, Row, Start, End}) -> {error, {unexpected_token, S, Row, Start, End}}. %%% Ambiguous Chain Object vs Identifier Parsing parse_alphanum(Type, Pos, String, ["true"], Row, Start, End) -> typecheck_bool(Type, Pos, String, true, Row, Start, End); parse_alphanum(Type, Pos, String, ["false"], Row, Start, End) -> typecheck_bool(Type, Pos, String, false, Row, Start, End); parse_alphanum(Type, Pos, String, ["Bits", "all"], Row, Start, End) -> typecheck_bits(Type, Pos, String, -1, Row, Start, End); parse_alphanum(Type, Pos, String, ["Bits", "none"], Row, Start, End) -> typecheck_bits(Type, Pos, String, 0, Row, Start, End); parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN_LOWER(C) -> % From a programming perspective, we are trying to parse a constant, so % an alphanum token can really only be a constructor, or a chain object. % Constructors start with uppercase characters, so lowercase can only be a % chain object. try case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of {account_pubkey, Data} -> typecheck_address(Type, Pos, String, Data, Row, Start, End); {contract_pubkey, Data} -> typecheck_contract(Type, Pos, String, Data, Row, Start, End); {signature, Data} -> typecheck_signature(Type, Pos, String, Data, Row, Start, End); {_, _} -> % Only a few chain objects are recognized by Sophia. The rest % are interpreted as identifiers, so we might as well give the % same sort of error that the compiler would give. {error, {unexpected_identifier, S, Row, Start, End}} end catch _:_ -> {error, {unexpected_identifier, S, Row, Start, End}} end; parse_alphanum(Type, Pos, String, Path, Row, Start, End) -> % Inversely, chain object prefixes are always lowercase, so any other path % must be a variant constructor, or invalid. parse_variant(Type, Pos, String, Path, Row, Start, End). typecheck_integer({_, _, integer}, Pos, String, Value, _, _, _) -> {ok, {Value, Pos, String}}; typecheck_integer({_, _, unknown_type}, Pos, String, Value, _, _, _) -> {ok, {Value, Pos, String}}; typecheck_integer({_, _, bits}, Pos, String, Value, _, _, _) -> {ok, {{bits, Value}, Pos, String}}; typecheck_integer({O, N, _}, _, _, _, Row, Start, End) -> {error, {wrong_type, O, N, integer, Row, Start, End}}. typecheck_bool({_, _, unknown_type}, Pos, String, Value, _, _, _) -> {ok, {Value, Pos, String}}; typecheck_bool({_, _, boolean}, Pos, String, Value, _, _, _) -> {ok, {Value, Pos, String}}; typecheck_bool({O, N, _}, _, _, _, Row, Start, End) -> {error, {wrong_type, O, N, boolean, Row, Start, End}}. typecheck_bits({_, _, unknown_type}, Pos, String, Value, _, _, _) -> {ok, {{bits, Value}, Pos, String}}; typecheck_bits({_, _, bits}, Pos, String, Value, _, _, _) -> {ok, {{bits, Value}, Pos, String}}; typecheck_bits({O, N, _}, _, _, _, Row, Start, End) -> {error, {wrong_type, O, N, bits, Row, Start, End}}. typecheck_address({_, _, address}, Pos, String, Data, _, _, _) -> {ok, {{address, Data}, Pos, String}}; typecheck_address({_, _, contract}, Pos, String, Data, _, _, _) -> % The compiler would type error, but we should be lenient here. {ok, {{contract, Data}, Pos, String}}; typecheck_address({_, _, unknown_type}, Pos, String, Data, _, _, _) -> {ok, {{address, Data}, Pos, String}}; typecheck_address({O, N, _}, _, _, _, Row, Start, End) -> {error, {wrong_type, O, N, address, Row, Start, End}}. typecheck_contract({_, _, contract}, Pos, String, Data, _, _, _) -> {ok, {{contract, Data}, Pos, String}}; typecheck_contract({_, _, address}, Pos, String, Data, _, _, _) -> % The compiler would type error, but we should be lenient here. {ok, {{address, Data}, Pos, String}}; typecheck_contract({_, _, unknown_type}, Pos, String, Data, _, _, _) -> {ok, {{contract, Data}, Pos, String}}; typecheck_contract({O, N, _}, _, _, _, Row, Start, End) -> {error, {wrong_type, O, N, contract, Row, Start, End}}. typecheck_signature({_, _, signature}, Pos, String, Data, _, _, _) -> {ok, {{bytes, Data}, Pos, String}}; typecheck_signature({_, _, {bytes, [64]}}, Pos, String, Data, _, _, _) -> % The compiler would probably type-error, but whatever. {ok, {{bytes, Data}, Pos, String}}; typecheck_signature({_, _, {bytes, [any]}}, Pos, String, Data, _, _, _) -> % The compiler would probably type-error, but whatever. {ok, {{bytes, Data}, Pos, String}}; typecheck_signature({_, _, unknown_type}, Pos, String, Data, _, _, _) -> {ok, {{bytes, Data}, Pos, String}}; typecheck_signature({O, N, _}, _, _, _, Row, Start, End) -> {error, {wrong_type, O, N, signature, Row, Start, End}}. %%% List Parsing parse_list({_, _, {list, [Inner]}}, Pos, String, _, _) -> parse_list2(Inner, Pos, String); parse_list({_, _, unknown_type}, Pos, String, _, _) -> parse_list2(unknown_type(), Pos, String); parse_list({O, N, _}, _, _, Row, Start) -> {error, {wrong_type, O, N, list, Row, Start, Start}}. parse_list2(Inner, Pos, String) -> case parse_list_loop(Inner, Pos, String, "]", []) of {ok, {Result, _, _, NewPos, NewString}} -> {ok, {Result, NewPos, NewString}}; {error, Reason} -> {error, Reason} end. parse_list_loop(Inner, Pos, String, CloseChar, Acc) -> case next_token(Pos, String) of {ok, {{character, CloseChar, _, Row, Col, _}, NewPos, NewString}} -> {ok, {lists:reverse(Acc), true, {Row, Col}, NewPos, NewString}}; {ok, {Token, NewPos, NewString}} -> parse_list_loop2(Inner, NewPos, NewString, CloseChar, Acc, Token); {error, Reason} -> {error, Reason} end. parse_list_loop2(Inner, Pos, String, CloseChar, Acc, Token) -> case parse_expression2(Inner, Pos, String, Token) of {ok, {Value, NewPos, NewString}} -> parse_list_loop3(Inner, NewPos, NewString, CloseChar, [Value | Acc]); {error, Reason} -> Wrapper = choose_list_error_wrapper(CloseChar), % TODO: Are tuple indices off by one from list indices? Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}), {error, Wrapped} end. parse_list_loop3(Inner, Pos, String, CloseChar, Acc) -> case next_token(Pos, String) of {ok, {{character, CloseChar, _, Row, Col, _}, NewPos, NewString}} -> {ok, {lists:reverse(Acc), false, {Row, Col}, NewPos, NewString}}; {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> parse_list_loop(Inner, NewPos, NewString, CloseChar, Acc); {ok, {Token, _, _}} -> unexpected_token(Token, CloseChar); {error, Reason} -> {error, Reason} end. choose_list_error_wrapper("]") -> list_element; choose_list_error_wrapper(")") -> tuple_element. %%% Ambiguous Parenthesis Parsing parse_tuple({_, _, unknown_type}, Pos, String) -> % An untyped tuple is a list of untyped terms, and weirdly our list parser % works perfectly for that, as long as we change the closing character to % be ")" instead of "]". case parse_list_loop(unknown_type(), Pos, String, ")", []) of {ok, {[Inner], false, _, NewPos, NewString}} -> % In Sophia, trailing commas are invalid, and so all singleton % tuples are unwrapped, and translated into the inner type. {ok, {Inner, NewPos, NewString}}; {ok, {TermList, _, _, NewPos, NewString}} -> Result = {tuple, list_to_tuple(TermList)}, {ok, {Result, NewPos, NewString}}; {error, Reason} -> {error, Reason} end; parse_tuple(Type, Pos, String) -> % Typed tuple parsing is quite complex, because we also want to support % normal parentheses for grouping. It's not strictly necessary for % inputting data, since we don't have any infix operators in simple % data/term notation, but the alternatives are to generate singleton tuples % naively, (which are impossible to generate from Sophia,) or to hard error % on singleton tuples! Being faithful to Sophia is clearly nice! % Count how many ambiguous parens there are, including the one we already % saw. case count_open_parens(Pos, String, 1) of {ok, {Count, Token, NewPos, NewString}} -> % Compare that to the amount of nesting tuple connectives are in % the type we are expected to produce. {ExcessCount, HeadType, Tails} = extract_tuple_type_info(Count, Type, []), % Now work out what to do with all this information. parse_tuple2(ExcessCount, HeadType, Tails, NewPos, NewString, Token); {error, Reason} -> {error, Reason} end. count_open_parens(Pos, String, Count) -> case next_token(Pos, String) of {ok, {{character, "(", _, _, _, _}, NewPos, NewString}} -> count_open_parens(NewPos, NewString, Count + 1); {ok, {Token, NewPos, NewString}} -> {ok, {Count, Token, NewPos, NewString}}; {error, Reason} -> {error, Reason} end. extract_tuple_type_info(ParenCount, {_, _, {tuple, [Head | Rest]}}, Tails) when ParenCount > 0 -> % Have an open paren, and a tuple type. We need to go deeper! extract_tuple_type_info(ParenCount - 1, Head, [Rest | Tails]); extract_tuple_type_info(ParenCount, HeadType, Tails) -> % No parens, or no more (non-empty) tuples. Stop! {ParenCount, HeadType, Tails}. parse_tuple2(_, {_, _, unknown_type}, [_ | _], _, _, _) -> {error, "Parsing of tuples with known lengths but unknown contents is not yet implemented."}; parse_tuple2(ExcessCount, HeadType, Tails, Pos, String, {character, ")", _, Row, Col, _}) -> parse_empty_tuple(ExcessCount, HeadType, Tails, Pos, String, Row, Col); parse_tuple2(ExcessCount, HeadType, Tails, Pos, String, Token) -> % Finished with parentheses for now, try and parse an expression out, to % get our head term. case parse_expression2(HeadType, Pos, String, Token) of {ok, {Result, NewPos, NewString}} -> % Got a head term. Now try to build all the other tuple layers. parse_tuple_tails(ExcessCount, Result, Tails, NewPos, NewString); {error, Reason} -> % TODO: Wrap errors here too. {error, Reason} end. parse_empty_tuple(0, _, Tails, _, _, Row, Col) -> % There are zero excess parens, meaning all our parens are tuples. Get the % top one. [Tail | _] = Tails, % We expected some nonzero number of elements before the close paren, but % got zero. ExpectCount = 1 + length(Tail), {error, {not_enough_elements, ExpectCount, 0, Row, Col}}; parse_empty_tuple(ExcessCount, {_, _, {tuple, []}}, Tails, Pos, String, _, _) -> % If we have some ambiguous parentheses left, we now know one of them is % this empty tuple. HeadTerm = {tuple, {}}, NewExcessCount = ExcessCount - 1, % Now continue the loop as if it were an integer or something, in the head % position. parse_tuple_tails(NewExcessCount, HeadTerm, Tails, Pos, String); parse_empty_tuple(_, {HeadO, HeadN, _}, _, _, _, Row, Col) -> % We were expecting a head term of a different type! {error, {wrong_type, HeadO, HeadN, unit, Row, Col, Col}}. parse_tuple_tails(0, HeadTerm, [], Pos, String) -> % No open parens left, no tuples left to build, we are done! {ok, {HeadTerm, Pos, String}}; parse_tuple_tails(ExcessCount, HeadTerm, Tails, Pos, String) -> % The ambiguous case, where we have a mix of tuple parens, and grouping % parens. We want to peek at the next token, to see if it closes a grouping % paren. case next_token(Pos, String) of {ok, {{character, ")", _, Row, Col, _}, NewPos, NewString}} -> % It is grouping! Try closing a grouping paren. parse_tuple_tails_paren(ExcessCount, HeadTerm, Tails, NewPos, NewString, Row, Col); {ok, {{character, ",", _, Row, Col, _}, NewPos, NewString}} -> % It is a real tuple! Try parsing a tuple. parse_tuple_tails_comma(ExcessCount, HeadTerm, Tails, NewPos, NewString, Row, Col); {ok, {Token, _, _}} -> % Anything else is just a boring parse error we can complain about. unexpected_token(Token, ")"); {error, Reason} -> {error, Reason} end. parse_tuple_tails_paren(0, _, [[] | _], _, _, Row, Col) -> % A singleton tuple was expected, but a grouping paren was given. In theory % we could be permissive here, but we were asked to do type checking, and % this is a type error. The type error itself is a bit hard to reproduce, % but we do know exactly what the fix is, so let's report that instead. {error, {expected_trailing_comma, Row, Col}}; parse_tuple_tails_paren(0, _, [Tail | _], _, _, Row, Col) -> % A tuple (of more than one elements) was expected, but a grouping paren % was given. Again, the type error is hard to produce, but the actual % solution is simple; add more elements. ExpectCount = length(Tail) + 1, GotCount = 1, {error, {not_enough_elements, ExpectCount, GotCount, Row, Col}}; parse_tuple_tails_paren(ExcessCount, HeadTerm, Tails, Pos, String, _, _) -> % We were expecting some grouping parens, and now we know that one of them % was in fact grouping. Good. parse_tuple_tails(ExcessCount - 1, HeadTerm, Tails, Pos, String). parse_tuple_tails_comma(_, _, [], _, _, Row, Col) -> % No more tuples, so commas are invalid. It's hard to describe the type % error that a comma would actually produce, so instead let's just give % the user the actual solution to their problems, which is to remove the % comma. {error, {expected_close_paren, Row, Col}}; parse_tuple_tails_comma(ExcessCount, HeadTerm, Tails, Pos, String, _, _) -> % If there are no tails then we would have exited into the "grouping parens % only" case, so we know this works: [TailTypes | ParentTails] = Tails, % Now we can parse this tuple as a tuple. case parse_multivalue(TailTypes, Pos, String, [HeadTerm]) of {ok, {Terms, NewPos, NewString}} -> NewHead = {tuple, list_to_tuple(Terms)}, % Then continue the loop, with whatever parent tuple types this % tuple is meant to be a part of. parse_tuple_tails(ExcessCount, NewHead, ParentTails, NewPos, NewString); {error, Reason} -> % TODO: wrap errors? {error, Reason} end. %%% Unambiguous Tuple/Variant Parsing parse_multivalue(ElemTypes, Pos, String, Acc) -> case next_token(Pos, String) of {ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} -> check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc); {ok, {Token, NewPos, NewString}} -> parse_multivalue2(ElemTypes, NewPos, NewString, Acc, Token); {error, Reason} -> {error, Reason} end. parse_multivalue2([Next | Rest], Pos, String, Acc, Token) -> case parse_expression2(Next, Pos, String, Token) of {ok, {Value, NewPos, NewString}} -> parse_multivalue3(Rest, NewPos, NewString, [Value | Acc]); {error, Reason} -> Wrapper = choose_list_error_wrapper(")"), % TODO: Are tuple indices off by one from list indices? Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}), {error, Wrapped} end; parse_multivalue2([], Pos, String, Acc, Token) -> count_multivalue_excess(Pos, String, Acc, Token). parse_multivalue3(ElemTypes, Pos, String, Acc) -> case next_token(Pos, String) of {ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} -> check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc); {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> parse_multivalue(ElemTypes, NewPos, NewString, Acc); {ok, {Token, _, _}} -> unexpected_token(Token, ")"); {error, Reason} -> {error, Reason} end. count_multivalue_excess(Pos, String, TypedAcc, Token) -> ExpectedLen = length(TypedAcc), case parse_list_loop2(unknown_type(), Pos, String, ")", TypedAcc, Token) of {ok, {TermList, _, {Row, Col}, _, _}} -> ActualLen = length(TermList), {error, {too_many_elements, ExpectedLen, ActualLen, Row, Col}}; {error, Reason} -> {error, Reason} end. check_multivalue_long_enough([], Pos, String, _, _, Acc) -> {ok, {lists:reverse(Acc), Pos, String}}; check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) -> GotCount = length(Got), ExpectCount = length(Remaining) + GotCount, {error, {not_enough_elements, ExpectCount, GotCount, Row, Col}}. %%% Variant parsing parse_variant({O, N, {variant, Variants}}, Pos, String, [Ident], Row, Start, End) -> parse_variant2(O, N, Variants, Pos, String, "", Ident, Row, Start, End); parse_variant({O, N, {variant, Variants}}, Pos, String, [Namespace, Constructor], Row, Start, End) -> case get_typename(O, N) of [Namespace, _] -> parse_variant2(O, N, Variants, Pos, String, Namespace ++ ".", Constructor, Row, Start, End); _ -> {error, {invalid_constructor, O, N, Namespace ++ "." ++ Constructor, Row, Start, End}} end; parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) -> {error, {unresolved_variant, Row, Start, End}}; parse_variant({O, N, _}, _, _, _, Row, Start, End) -> % In normal code, identifiers can have many meanings, which can result in % lots of different errors. In constant/immediate/normalized Sophia terms % we know identifiers are always variants, so we can type error if any % other type was expected. {error, {wrong_type, O, N, variant, Row, Start, End}}. get_typename(O, already_normalized) -> get_typename(O); get_typename(_, N) -> get_typename(N). get_typename({Name, _}) -> string:split(Name, ".", all); get_typename(Name) -> string:split(Name, ".", all). parse_variant2(O, N, Variants, Pos, String, Prefix, Constructor, Row, Start, End) -> case lookup_variant(Constructor, Variants, 0) of {ok, {Tag, ElemTypes}} -> GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end, Arities = lists:map(GetArity, Variants), parse_variant3(Arities, Tag, ElemTypes, Pos, String); error -> {error, {invalid_constructor, O, N, Prefix ++ Constructor, Row, Start, End}} end. parse_variant3(Arities, Tag, [], Pos, String) -> % Parsing of 0-arity variants is different. Result = {variant, Arities, Tag, {}}, {ok, {Result, Pos, String}}; parse_variant3(Arities, Tag, ElemTypes, Pos, String) -> case next_token(Pos, String) of {ok, {{character, "(", _, _, _, _}, NewPos, NewString}} -> parse_variant4(Arities, Tag, ElemTypes, NewPos, NewString); {ok, {Token, _, _}} -> unexpected_token(Token, "("); {error, Reason} -> {error, Reason} end. parse_variant4(Arities, Tag, ElemTypes, Pos, String) -> case parse_multivalue(ElemTypes, Pos, String, []) of {ok, {Terms, NewPos, NewString}} -> Result = {variant, Arities, Tag, list_to_tuple(Terms)}, {ok, {Result, NewPos, NewString}}; {error, Reason} -> {error, Reason} end. lookup_variant(_, [], _) -> error; lookup_variant(Ident, [{Ident, ElemTypes} | _], Tag) -> {ok, {Tag, ElemTypes}}; lookup_variant(Ident, [_ | Rest], Tag) -> lookup_variant(Ident, Rest, Tag + 1). %%% Record parsing parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Pos, String, _, _) -> parse_map(KeyType, ValueType, Pos, String, #{}); parse_record_or_map({_, _, {record, Fields}}, Pos, String, _, _) -> parse_record(Fields, Pos, String, #{}); parse_record_or_map({_, _, unknown_type}, Pos, String, _, _) -> case next_token(Pos, String) of {ok, {{character, "}", _, _, _, _}, NewPos, NewString}} -> {ok, {#{}, NewPos, NewString}}; {ok, {{character, "[", _, _, _, _}, NewPos, NewString}} -> parse_map2(unknown_type(), unknown_type(), NewPos, NewString, #{}); {ok, {{alphanum, _, _, Row, Start, End}, _, _}} -> {error, {unresolved_record, Row, Start, End}}; {ok, {Token, _, _}} -> unexpected_token(Token, "}"); {error, Reason} -> {error, Reason} end; parse_record_or_map({O, N, _}, _, _, Row, Start) -> {error, {wrong_type, O, N, map, Row, Start, Start}}. parse_record(Fields, Pos, String, Acc) -> case next_token(Pos, String) of {ok, {{alphanum, Ident, _, Row, Start, End}, NewPos, NewString}} -> parse_record2(Fields, NewPos, NewString, Acc, Ident, Row, Start, End); {ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} -> parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End); {ok, {Token, _, _}} -> unexpected_token(Token, "}"); {error, Reason} -> {error, Reason} end. parse_record2(Fields, Pos, String, Acc, Ident, Row, Start, End) -> case lists:keyfind(Ident, 1, Fields) of {_, Type} -> parse_record3(Fields, Pos, String, Acc, Ident, Row, Start, End, Type); false -> {error, {invalid_field, Ident, Row, Start, End}} end. parse_record3(Fields, Pos, String, Acc, Ident, Row, Start, End, Type) -> case maps:is_key(Ident, Acc) of false -> parse_record4(Fields, Pos, String, Acc, Ident, Type); true -> {error, {field_already_present, Ident, Row, Start, End}} end. parse_record4(Fields, Pos, String, Acc, Ident, Type) -> case expect_tokens(["="], Pos, String) of {ok, {NewPos, NewString}} -> parse_record5(Fields, NewPos, NewString, Acc, Ident, Type); {error, Reason} -> {error, Reason} end. parse_record5(Fields, Pos, String, Acc, Ident, Type) -> case parse_expression(Type, Pos, String) of {ok, {Result, NewPos, NewString}} -> NewAcc = maps:put(Ident, Result, Acc), parse_record6(Fields, NewPos, NewString, NewAcc); {error, Reason} -> wrap_error(Reason, {record_field, Ident}) end. parse_record6(Fields, Pos, String, Acc) -> case next_token(Pos, String) of {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> parse_record(Fields, NewPos, NewString, Acc); {ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} -> parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End); {ok, {Token, _, _}} -> unexpected_token(Token, "}"); {error, Reason} -> {error, Reason} end. parse_record_end(Fields, Pos, String, FieldValues, Row, Start, End) -> case parse_record_final_loop(Fields, FieldValues, []) of {ok, Result} -> {ok, {Result, Pos, String}}; {error, {missing_field, Name}} -> {error, {missing_field, Name, Row, Start, End}} end. parse_record_final_loop([{Name, _} | Rest], FieldValues, Acc) -> case maps:find(Name, FieldValues) of {ok, Value} -> parse_record_final_loop(Rest, FieldValues, [Value | Acc]); error -> {error, {missing_field, Name}} end; parse_record_final_loop([], _, [Field]) -> % Singleton records are type-checked in Sophia, but unwrapped in the % resulting FATE. {ok, Field}; parse_record_final_loop([], _, FieldsReverse) -> Fields = lists:reverse(FieldsReverse), Tuple = list_to_tuple(Fields), {ok, {tuple, Tuple}}. %%% Map Parsing parse_map(KeyType, ValueType, Pos, String, Acc) -> case next_token(Pos, String) of {ok, {{character, "[", _, _, _, _}, NewPos, NewString}} -> parse_map2(KeyType, ValueType, NewPos, NewString, Acc); {ok, {{character, "}", _, _, _, _}, NewPos, NewString}} -> {ok, {Acc, NewPos, NewString}}; {ok, {Token, _, _}} -> unexpected_token(Token, "}"); {error, Reason} -> {error, Reason} end. parse_map2(KeyType, ValueType, Pos, String, Acc) -> case parse_expression(KeyType, Pos, String) of {ok, {Result, NewPos, NewString}} -> parse_map3(KeyType, ValueType, NewPos, NewString, Acc, Result); {error, Reason} -> wrap_error(Reason, {map_key, maps:size(Acc)}) end. parse_map3(KeyType, ValueType, Pos, String, Acc, Key) -> case expect_tokens(["]", "="], Pos, String) of {ok, {NewPos, NewString}} -> parse_map4(KeyType, ValueType, NewPos, NewString, Acc, Key); {error, Reason} -> {error, Reason} end. parse_map4(KeyType, ValueType, Pos, String, Acc, Key) -> case parse_expression(ValueType, Pos, String) of {ok, {Result, NewPos, NewString}} -> NewAcc = maps:put(Key, Result, Acc), parse_map5(KeyType, ValueType, NewPos, NewString, NewAcc); {error, Reason} -> {error, Reason} end. parse_map5(KeyType, ValueType, Pos, String, Acc) -> case next_token(Pos, String) of {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> parse_map(KeyType, ValueType, NewPos, NewString, Acc); {ok, {{character, "}", _, _, _, _}, NewPos, NewString}} -> {ok, {Acc, NewPos, NewString}}; {ok, {Token, _, _}} -> unexpected_token(Token, "}"); {error, Reason} -> {error, Reason} end. % TODO wrap_error(Reason, _) -> Reason. %%% Tests check_sophia_to_fate(Type, Sophia, Fate) -> case parse_literal(Type, Sophia) of {ok, Fate} -> ok; {ok, FateActual} -> erlang:error({to_fate_failed, Sophia, Fate, {ok, FateActual}}); {error, Reason} -> erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}}) end. compile_entrypoint_value_and_type(Source, Entrypoint) -> {ok, #{fate_code := FateCode, aci := ACI}} = so_compiler:from_string(Source, [{aci, json}]), % Find the fcode for the correct entrypoint. {fcode, Bodies, NamesMap, _} = FateCode, Names = maps:to_list(NamesMap), Name = unicode:characters_to_binary(Entrypoint), {Hash, Name} = lists:keyfind(Name, 2, Names), {_, _, Code} = maps:get(Hash, Bodies), FATE = extract_return_value(Code), % Generate the AACI, and get the AACI type info for the correct entrypoint. AACI = hz_aaci:prepare(ACI), {ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"), {FATE, Type}. extract_return_value(#{0 := [{'RETURNR', {immediate, FATE}}]}) -> FATE; extract_return_value(Code) -> erlang:exit({invalid_literal_fcode, Code}). check_parser(Sophia) -> % Compile the literal using the compiler, to check that it is valid Sophia % syntax, and to get an AACI object to pass to the parser. Source = "contract C = entrypoint f() = " ++ Sophia, {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"), % Check that when we parse the term we get the same value as the Sophia % compiler. check_sophia_to_fate(unknown_type(), Sophia, Fate), % Then, once we know that the term is correct, make sure that it is still % accepted *with* type info. check_sophia_to_fate(Type, Sophia, Fate). check_parser_with_typedef(Typedef, Sophia) -> % Compile the type definitions alongside the usual literal expression. Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia, {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"), % Do a typed parse, as usual, but there are probably record/variant % definitions in the AACI, so untyped parses probably don't work. check_sophia_to_fate(Type, Sophia, Fate). anon_types_test() -> % Integers. check_parser("123"), check_parser("1_2_3"), check_parser("-123"), % Booleans. check_parser("true"), check_parser("false"), check_parser("[true, false]"), % Bytes. check_parser("#DEAD000BEEF"), check_parser("#DE_AD0_00B_EEF"), % Strings. check_parser("\"hello world\""), % The Sophia compiler doesn't handle this right, but we should still. %check_parser("\"ÿ\""), %check_parser("\"♣\""), % Characters. check_parser("'A'"), check_parser("['a', ' ', '[']"), %check_parser("'ÿ'"), %check_parser("'♣'"), % List of integers. check_parser("[1, 2, 3]"), % List of lists. check_parser("[[], [1], [2, 3]]"), % Tuple. check_parser("(1, [2, 3], (4, 5))"), % Map. check_parser("{[1] = 2, [3] = 4}"), ok. string_escape_codes_test() -> check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""), check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""), check_parser("\"\\x{0}\\x{7}\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\\x{0000007F}\""), check_parser("\"'\""), check_parser("['\\b', '\\e', '\\f', '\\n', '\\r', '\\t', '\\v', '\"', '\\'', '\\\\']"), check_parser("['\\x00', '\\x11', '\\x77', '\\x4a', '\\x4A']"), check_parser("['\\x{0}', '\\x{7}', '\\x{7F}', '\\x{07F}', '\\x{007F}', '\\x{0007F}', '\\x{0000007F}']"), check_parser("'\"'"), ok. records_test() -> TypeDef = "record pair = {x: int, y: int}", Sophia = "{x = 1, y = 2}", check_parser_with_typedef(TypeDef, Sophia), % The above won't run an untyped parse on the expression, but we can. It % will error, though. {error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia). variant_test() -> TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)", check_parser_with_typedef(TypeDef, "Zero"), check_parser_with_typedef(TypeDef, "One(0)"), check_parser_with_typedef(TypeDef, "Two(0, 1)"), check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"), check_parser_with_typedef(TypeDef, "C.Zero"), {error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"), ok. ambiguous_variant_test() -> TypeDef = "datatype mytype = C | D", check_parser_with_typedef(TypeDef, "C"), check_parser_with_typedef(TypeDef, "D"), check_parser_with_typedef(TypeDef, "C.C"), check_parser_with_typedef(TypeDef, "C.D"), ok. namespace_variant_test() -> Term = "[N.A, N.B]", Source = "namespace N = datatype mytype = A | B\ncontract C = entrypoint f() = " ++ Term, {Fate, VariantType} = compile_entrypoint_value_and_type(Source, "f"), check_sophia_to_fate(VariantType, Term, Fate), ok. chain_objects_test() -> % Address, check_parser("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"), % Two different forms of signature, check_parser("[sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n, #00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF]"), % We have to build a totally custom contract example in order to get an % AACI and return value for parsing contract addresses. This is because the % compiler demands that contract addresses be type checked according to the % logic of "contract oriented programming", including covariance, etc. and % "contract oriented programming" is not very compatible with ML style type % inference. Contract = "ct_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx", Source = "contract C = entrypoint f(): C = " ++ Contract, {Fate, ContractType} = compile_entrypoint_value_and_type(Source, "f"), check_sophia_to_fate(ContractType, Contract, Fate), check_sophia_to_fate(unknown_type(), Contract, Fate), ok. bits_test() -> check_parser("Bits.all"), check_parser("Bits.none"), {_, Type} = compile_entrypoint_value_and_type("contract C = entrypoint f() = Bits.all", "f"), check_sophia_to_fate(Type, "5", {bits, 5}), check_sophia_to_fate(Type, "-5", {bits, -5}), check_sophia_to_fate(Type, "#123", {bits, 256 + 32 + 3}), ok. singleton_records_test() -> TypeDef = "record singleton('a) = {it: 'a}", check_parser_with_typedef(TypeDef, "{it = 123}"), check_parser_with_typedef(TypeDef, "{it = {it = {it = 5}}}"), check_parser_with_typedef(TypeDef, "[{it = 1}, {it = 2}, {it = 3}]"), ok. singleton_variants_test() -> % Similar tests to the singleton records, but this time there isn't % actually a special case; singleton variants are in fact wrapped in the % FATE too. TypeDef = "datatype wrapped('a) = Wrap('a)", check_parser_with_typedef(TypeDef, "Wrap(123)"), check_parser_with_typedef(TypeDef, "Wrap(Wrap(123))"), check_parser_with_typedef(TypeDef, "[Wrap(1), Wrap(2), Wrap(3)]"), ok. excess_parens_test() -> % 'singleton' parens are another special case, but unlike singleton % records, which exist in the type system, singleton parens aren't tuples % at all! They are just grouping, for arithmetic. For example. check_parser("(123)"), check_parser("[1, (2), ((3))]"), % Where this gets tricky, though, is when grouping parens are mixed with % tuple parens. E.g. this list of three tuples should all parse to the same % result. check_parser("[((1, 2)), ((1), 2), (((1), 2))]"), % Including multiple nestings of tuples and grouping, interleaved. check_parser("((((1), ((2, 3)))), 4)"), % Also empty tuples exist! check_parser("()"), check_parser("(((((), ())), ()))"), ok. lexer_offset_test() -> % Test that various tokens report their position correctly. {error, {unexpected_token, "456", 1, 5, 7}} = parse_literal("123 456"), {error, {unexpected_token, "[", 1, 5, 5}} = parse_literal("123 [0]"), {error, {unexpected_token, "ABC", 1, 5, 7}} = parse_literal("123 ABC"), {error, {unexpected_token, "#AA", 1, 5, 7}} = parse_literal("123 #AA"), {error, {unexpected_token, "\"x\"", 1, 5, 7}} = parse_literal("123 \"x\""), {error, {unexpected_token, "\"\\x{123}\"", 1, 5, 13}} = parse_literal("123 \"\\x{123}\""), % Check that the tokenizer knows its position correctly *after* various % tokens. {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("[0] 123"), ABCType = {"mytype", already_normalized, {variant, [{"ABC", []}]}}, {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal(ABCType, "ABC 123"), {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("#AA 123"), {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("\"x\" 123"), {error, {unexpected_token, "123", 1, 11, 13}} = parse_literal("\"\\x{123}\" 123"), % Check that the tokenizer accounts for various line separators correctly. {error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\nABC"), {error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\r\nABC"), {error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\rABC"), ok. parser_offset_test() -> {_, Type} = compile_entrypoint_value_and_type("contract C = entrypoint f() = ((1, 2), (3, 4))", "f"), {error, {not_enough_elements, 2, 1, 1, 8}} = parse_literal(Type, "((1, 2))"), {error, {not_enough_elements, 2, 1, 1, 10}} = parse_literal(Type, "(((1, 2)))"), {error, {too_many_elements, 2, 3, 1, 24}} = parse_literal(Type, "((1, 2), (3, 4), (5, 6))"), {error, {too_many_elements, 2, 3, 1, 10}} = parse_literal(Type, "((1, 2, 3), (4, 5))"), ok. singleton_test() -> % The Sophia compiler would never generate this, but it is a valid type % within the FATE virtual machine, and it is possible to represent within % the ACI itself. SingletonACI = #{tuple => [<<"int">>]}, % Build an AACI around this, and run it through the AACI machinery. Function = #{name => <<"f">>, arguments => [], stateful => false, payable => false, returns => SingletonACI}, ACI = [#{contract => #{functions => [Function], name => <<"C">>, kind => contract_main, payable => false, typedefs => []}}], {aaci, "C", #{"f" := {[], SingletonType}}, _} = hz_aaci:prepare(ACI), % Now let's do some testing with this weird type, to see if we handle it % correctly. {ok, {tuple, {1}}} = parse_literal(SingletonType, "(1,)"), % Some ambiguous nesting parens, for fun. {ok, {tuple, {1}}} = parse_literal(SingletonType, "(((1),))"), % No trailing comma should give an error. {error, {expected_trailing_comma, 1, 3}} = parse_literal(SingletonType, "(1)"), % All of the above should behave the same in untyped contexts: {ok, {tuple, {1}}} = parse_literal(unknown_type(), "(1,)"), {ok, {tuple, {1}}} = parse_literal(unknown_type(), "(((1),))"), {ok, 1} = parse_literal(unknown_type(), "(1)"), % Also if we wanted an integer, the singleton is NOT dropped, so is also an % error. {error, {expected_close_paren, 1, 3}} = parse_literal({integer, alread_normalized, integer}, "(1,)"), ok.