Compare commits

...

3 Commits

Author SHA1 Message Date
Jarvis Carroll
a695c21fc9 Parse address literals.
Also signatures.
2026-02-03 06:00:40 +00:00
Jarvis Carroll
493bdb990c Fix lexer row/column calculations. 2026-02-03 01:42:17 +00:00
Jarvis Carroll
17f635af61 Parse long hex escape codes
This doesn't work super consistently in the compiler, for codepoints above 127, but it should work fine for us, so, oh well!
2026-02-03 00:41:00 +00:00

View File

@ -4,10 +4,13 @@
-copyright("Jarvis Carroll <spiveehere@gmail.com>"). -copyright("Jarvis Carroll <spiveehere@gmail.com>").
-license("GPL-3.0-or-later"). -license("GPL-3.0-or-later").
-export([check_parser/1]). -export([parse_literal/1, parse_literal/2, check_parser/1]).
-include_lib("eunit/include/eunit.hrl"). -include_lib("eunit/include/eunit.hrl").
parse_literal(String) ->
parse_literal(unknown_type(), String).
parse_literal(Type, String) -> parse_literal(Type, String) ->
case parse_expression(Type, {tk, 1, 1}, String) of case parse_expression(Type, {tk, 1, 1}, String) of
{ok, {Result, NewTk, NewString}} -> {ok, {Result, NewTk, NewString}} ->
@ -29,7 +32,9 @@ parse_literal2(Result, Tk, String) ->
%%% Tokenizer %%% Tokenizer
-define(IS_ALPHA(C), ((((C) >= $A) and ((C) =< $Z)) or (((C) >= $a) and ((C) =< $z)) or ((C) == $_))). -define(IS_LATIN_UPPER(C), (((C) >= $A) and ((C) =< $Z))).
-define(IS_LATIN_LOWER(C), (((C) >= $a) and ((C) =< $z))).
-define(IS_ALPHA(C), (?IS_LATIN_UPPER(C) or ?IS_LATIN_LOWER(C) or ((C) == $_))).
-define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))). -define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))).
-define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C))). -define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C))).
-define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))). -define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))).
@ -37,55 +42,55 @@ parse_literal2(Result, Tk, String) ->
next_token({tk, Row, Col}, []) -> next_token({tk, Row, Col}, []) ->
{ok, {{eof, "", Row, Col, Col}, {tk, Row, Col}, []}}; {ok, {{eof, "", Row, Col, Col}, {tk, Row, Col}, []}};
next_token({tk, Row, Col}, " " ++ Rest) -> next_token({tk, Row, Col}, " " ++ Rest) ->
next_token({tk, Row + 1, Col}, Rest); next_token({tk, Row, Col + 1}, Rest);
next_token({tk, Row, Col}, "\t" ++ Rest) -> next_token({tk, Row, Col}, "\t" ++ Rest) ->
next_token({tk, Row + 1, Col}, Rest); next_token({tk, Row, Col + 1}, Rest);
next_token({tk, _, Col}, "\r\n" ++ Rest) -> next_token({tk, Row, _}, "\r\n" ++ Rest) ->
next_token({tk, 1, Col + 1}, Rest); next_token({tk, Row + 1, 1}, Rest);
next_token({tk, _, Col}, "\r" ++ Rest) -> next_token({tk, Row, _}, "\r" ++ Rest) ->
next_token({tk, 1, Col + 1}, Rest); next_token({tk, Row + 1, 1}, Rest);
next_token({tk, _, Col}, "\n" ++ Rest) -> next_token({tk, Row, _}, "\n" ++ Rest) ->
next_token({tk, 1, Col + 1}, Rest); next_token({tk, Row + 1, 1}, Rest);
next_token(Tk, [C | _] = String) when ?IS_ALPHA(C) -> next_token(Tk, [C | _] = String) when ?IS_ALPHA(C) ->
alphanum_token(Tk, Tk, String, []); alphanum_token(Tk, Tk, String, []);
next_token(Tk, [C | _] = String) when ?IS_NUM(C) -> next_token(Tk, [C | _] = String) when ?IS_NUM(C) ->
num_token(Tk, Tk, String, [], 0); num_token(Tk, Tk, String, [], 0);
next_token({tk, Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) -> next_token({tk, Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) ->
bytes_token({tk, Row, Col}, {tk, Row + 1, Col}, [C | Rest], "#", []); bytes_token({tk, Row, Col}, {tk, Row, Col + 1}, [C | Rest], "#", []);
next_token({tk, Row, Col}, "\"" ++ Rest) -> next_token({tk, Row, Col}, "\"" ++ Rest) ->
string_token({tk, Row, Col}, {tk, Row + 1, Col}, Rest, "\"", <<>>); string_token({tk, Row, Col}, {tk, Row, Col + 1}, Rest, "\"", <<>>);
next_token({tk, Row, Col}, [Char | Rest]) -> next_token({tk, Row, Col}, [Char | Rest]) ->
Token = {character, [Char], Char, Row, Col, Col}, Token = {character, [Char], Char, Row, Col, Col},
{ok, {Token, {tk, Row + 1, Col}, Rest}}. {ok, {Token, {tk, Row, Col + 1}, Rest}}.
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) -> alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) ->
alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]); alphanum_token(Start, {tk, Row, Col + 1}, Rest, [C | Acc]);
alphanum_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> alphanum_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
AlphaString = lists:reverse(Acc), AlphaString = lists:reverse(Acc),
Token = {alphanum, AlphaString, AlphaString, Row, Start, End}, Token = {alphanum, AlphaString, AlphaString, Row, Start, End - 1},
{ok, {Token, {tk, Row, End}, String}}. {ok, {Token, {tk, Row, End}, String}}.
num_token(Start, {tk, Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) -> num_token(Start, {tk, Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) ->
NewValue = Value * 10 + (C - $0), NewValue = Value * 10 + (C - $0),
num_token(Start, {tk, Row + 1, Col}, Rest, [C | Chars], NewValue); num_token(Start, {tk, Row, Col + 1}, Rest, [C | Chars], NewValue);
num_token(Start, {tk, Row, Col}, [$_, C | Rest], Chars, Value) when ?IS_NUM(C) -> num_token(Start, {tk, Row, Col}, [$_, C | Rest], Chars, Value) when ?IS_NUM(C) ->
NewValue = Value * 10 + (C - $0), NewValue = Value * 10 + (C - $0),
num_token(Start, {tk, Row + 2, Col}, Rest, [C, $_ | Chars], NewValue); num_token(Start, {tk, Row, Col + 2}, Rest, [C, $_ | Chars], NewValue);
num_token({tk, _, Start}, {tk, Row, End}, String, Chars, Value) -> num_token({tk, _, Start}, {tk, Row, End}, String, Chars, Value) ->
NumString = lists:reverse(Chars), NumString = lists:reverse(Chars),
Token = {integer, NumString, Value, Row, Start, End}, Token = {integer, NumString, Value, Row, Start, End - 1},
{ok, {Token, {tk, Row, End}, String}}. {ok, {Token, {tk, Row, End}, String}}.
bytes_token(Start, {tk, Row, Col}, [C | Rest], Chars, Digits) when ?IS_HEX(C) -> bytes_token(Start, {tk, Row, Col}, [C | Rest], Chars, Digits) when ?IS_HEX(C) ->
Digit = convert_digit(C), Digit = convert_digit(C),
bytes_token(Start, {tk, Row + 1, Col}, Rest, [C | Chars], [Digit | Digits]); bytes_token(Start, {tk, Row, Col + 1}, Rest, [C | Chars], [Digit | Digits]);
bytes_token(Start, {tk, Row, Col}, [$_, C | Rest], Chars, Digits) when ?IS_HEX(C) -> bytes_token(Start, {tk, Row, Col}, [$_, C | Rest], Chars, Digits) when ?IS_HEX(C) ->
Digit = convert_digit(C), Digit = convert_digit(C),
bytes_token(Start, {tk, Row + 1, Col}, Rest, [C, $_ | Chars], [Digit | Digits]); bytes_token(Start, {tk, Row, Col + 1}, Rest, [C, $_ | Chars], [Digit | Digits]);
bytes_token({tk, _, Start}, {tk, Row, End}, String, Chars, Digits) -> bytes_token({tk, _, Start}, {tk, Row, End}, String, Chars, Digits) ->
BytesString = lists:reverse(Chars), BytesString = lists:reverse(Chars),
Value = reverse_combine_nibbles(Digits, <<>>), Value = reverse_combine_nibbles(Digits, <<>>),
Token = {bytes, BytesString, Value, Row, Start, End}, Token = {bytes, BytesString, Value, Row, Start, End - 1},
{ok, {Token, {tk, Row, End}, String}}. {ok, {Token, {tk, Row, End}, String}}.
convert_digit(C) when C >= $0, C =< $9 -> convert_digit(C) when C >= $0, C =< $9 ->
@ -103,35 +108,54 @@ reverse_combine_nibbles([D1], Acc) ->
reverse_combine_nibbles([], Acc) -> reverse_combine_nibbles([], Acc) ->
Acc. Acc.
string_token(Start, {tk, Row, Col}, [$\\, $x, A, B | Rest], SourceChars, Value) -> string_token(Start, {tk, Row, Col}, "\\x" ++ String, SourceChars, Value) ->
case escape_hex_code(A, B) of case escape_hex_code({tk, Row, Col}, {tk, Row, Col + 2}, String, "x\\" ++ SourceChars) of
{ok, ByteVal} -> {ok, {Codepoint, NewSourceChars, NewTk, NewString}} ->
string_token(Start, {tk, Row + 4, Col}, Rest, [B, A, $x, $\ | SourceChars], <<Value/binary, ByteVal>>); NewValue = <<Value/binary, Codepoint/utf8>>,
error -> string_token(Start, NewTk, NewString, NewSourceChars, NewValue);
{error, {invalid_escape_code, [$\\, $x, A, B], Row, Col}} {error, Reason} ->
{error, Reason}
end; end;
string_token(Start, {tk, Row, Col}, [$\\, C | Rest], SourceChars, Value) -> string_token(Start, {tk, Row, Col}, [$\\, C | Rest], SourceChars, Value) ->
case escape_char(C) of case escape_char(C) of
{ok, ByteVal} -> {ok, ByteVal} ->
string_token(Start, {tk, Row + 2, Col}, Rest, [C, $\ | SourceChars], <<Value/binary, ByteVal>>); string_token(Start, {tk, Row, Col + 2}, Rest, [C, $\ | SourceChars], <<Value/binary, ByteVal>>);
error -> error ->
{error, {invalid_escape_code, [C], Row, Col}} {error, {invalid_escape_code, [C], Row, Col}}
end; end;
string_token({tk, _, Start}, {tk, Row, End}, [$" | Rest], SourceChars, Value) -> string_token({tk, _, Start}, {tk, Row, Col}, [$" | Rest], SourceChars, Value) ->
SourceStr = lists:reverse([$" | SourceChars]), SourceStr = lists:reverse([$" | SourceChars]),
Token = {string, SourceStr, Value, Row, Start, End}, Token = {string, SourceStr, Value, Row, Start, Col},
{ok, {Token, {tk, Row, End}, Rest}}; {ok, {Token, {tk, Row, Col + 1}, Rest}};
string_token(Start, {tk, Row, Col}, [C | Rest], SourceChars, Value) -> string_token(Start, {tk, Row, Col}, [C | Rest], SourceChars, Value) ->
string_token(Start, {tk, Row + 1, Col}, Rest, [C | SourceChars], <<Value/binary, C>>). % TODO: ERTS probably had to convert this FROM utf8 at some point, so why
% bother, if we need to convert it back? I guess we could accept iolists if
% we really wanted to waste time on this point...
string_token(Start, {tk, Row, Col + 1}, Rest, [C | SourceChars], <<Value/binary, C/utf8>>).
escape_hex_code(A, B) when ?IS_HEX(A), ?IS_HEX(B) -> escape_hex_code(Start, {tk, Row, Col}, "{" ++ String, SourceChars) ->
escape_long_hex_code(Start, {tk, Row, Col + 1}, String, "{" ++ SourceChars, 0);
escape_hex_code(_, {tk, Row, Col}, [A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) ->
% As of writing this, the Sophia compiler will convert this byte from % As of writing this, the Sophia compiler will convert this byte from
% extended ASCII to unicode... But it really shouldn't. The literal parser % extended ASCII to unicode... But it really shouldn't. The literal parser
% does what the compiler should do. % does what the compiler should do.
Byte = convert_digit(A) * 16 + convert_digit(B), Byte = convert_digit(A) * 16 + convert_digit(B),
{ok, Byte}; {ok, {Byte, [B, A | SourceChars], {tk, Row, Col + 2}, String}};
escape_hex_code(_, _) -> escape_hex_code({tk, Row1, Col1}, _, _, _) ->
error. {error, {invalid_escape_code, "\\x", Row1, Col1}}.
escape_long_hex_code(_, {tk, Row, Col}, "}" ++ String, SourceChars, Value) ->
{ok, {Value, "}" ++ SourceChars, {tk, Row, Col + 1}, String}};
escape_long_hex_code(Start, {tk, Row, Col}, [C | String], SourceChars, Value) when ?IS_HEX(C) ->
NewSourceChars = [C | SourceChars],
NewValue = 16 * Value + convert_digit(C),
escape_long_hex_code(Start, {tk, Row, Col + 1}, String, NewSourceChars, NewValue);
escape_long_hex_code(_, {tk, Row, Col}, [C | _], _, _) ->
{error, {invalid_hexadecimal, [C], Row, Col}};
escape_long_hex_code(_, Tk, [], SourceChars, Value) ->
% Just return as if the escape code were closed, and let the string parser
% produce an unclosed string error instead.
{ok, {Value, SourceChars, Tk, []}}.
escape_char($b) -> {ok, $\b}; escape_char($b) -> {ok, $\b};
escape_char($e) -> {ok, $\e}; escape_char($e) -> {ok, $\e};
@ -209,8 +233,8 @@ parse_expression2(Type, Tk, String, {character, "(", _, Row, Start, _}) ->
parse_tuple(Type, Tk, String, Row, Start); parse_tuple(Type, Tk, String, Row, Start);
parse_expression2(Type, Tk, String, {character, "{", _, Row, Start, _}) -> parse_expression2(Type, Tk, String, {character, "{", _, Row, Start, _}) ->
parse_record_or_map(Type, Tk, String, Row, Start); parse_record_or_map(Type, Tk, String, Row, Start);
parse_expression2(Type, Tk, String, {alphanum, Ident, _, Row, Start, End}) -> parse_expression2(Type, Tk, String, {alphanum, S, _, Row, Start, End}) ->
parse_variant(Type, Tk, String, Ident, Row, Start, End); parse_alphanum(Type, Tk, String, S, Row, Start, End);
parse_expression2(_, _, _, {_, S, _, Row, Start, End}) -> parse_expression2(_, _, _, {_, S, _, Row, Start, End}) ->
{error, {unexpected_token, S, Row, Start, End}}. {error, {unexpected_token, S, Row, Start, End}}.
@ -227,6 +251,69 @@ expect_tokens([Str | Rest], Tk, String) ->
{error, {unexpected_token, Actual, Row, Start, End}} {error, {unexpected_token, Actual, Row, Start, End}}
end. end.
%%% Ambiguous Chain Object vs Identifier Parsing
parse_alphanum(Type, Tk, String, [C | _] = S, Row, Start, End) when ?IS_LATIN_UPPER(C) ->
% From a programming perspective, we are trying to parse a constant, so
% an alphanum token can really only be a constructor, or a chain object.
% Chain objects start with lowercase prefixes, like ak_, so clearly this is
% a variant constructor.
parse_variant(Type, Tk, String, S, Row, Start, End);
parse_alphanum(Type, Tk, String, S, Row, Start, End) ->
% Inversely, variant constructors are always uppercase, so now that we have
% handled that case, only chain objects are left.
try
case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of
{account_pubkey, Data} ->
typecheck_address(Type, Tk, String, Data, Row, Start, End);
{contract_pubkey, Data} ->
typecheck_contract(Type, Tk, String, Data, Row, Start, End);
{signature, Data} ->
typecheck_signature(Type, Tk, String, Data, Row, Start, End);
{_, _} ->
% Only a few chain objects are recognized by Sophia. The rest
% are interpreted as identifiers, so we might as well give the
% same sort of error that the compiler would give.
{error, {unexpected_identifier, S, Row, Start, End}}
end
catch
_:_ -> {error, {unexpected_identifier, S, Row, Start, End}}
end.
typecheck_address({_, _, address}, Tk, String, Data, _, _, _) ->
{ok, {{address, Data}, Tk, String}};
typecheck_address({_, _, contract}, Tk, String, Data, _, _, _) ->
% The compiler would type error, but we should be lenient here.
{ok, {{contract, Data}, Tk, String}};
typecheck_address({_, _, unknown_type}, Tk, String, Data, _, _, _) ->
{ok, {{address, Data}, Tk, String}};
typecheck_address({O, N, _}, _, _, _, Row, Start, End) ->
{error, {wrong_type, O, N, address, Row, Start, End}}.
typecheck_contract({_, _, contract}, Tk, String, Data, _, _, _) ->
{ok, {{contract, Data}, Tk, String}};
typecheck_contract({_, _, address}, Tk, String, Data, _, _, _) ->
% The compiler would type error, but we should be lenient here.
{ok, {{address, Data}, Tk, String}};
typecheck_contract({_, _, unknown_type}, Tk, String, Data, _, _, _) ->
{ok, {{contract, Data}, Tk, String}};
typecheck_contract({O, N, _}, _, _, _, Row, Start, End) ->
{error, {wrong_type, O, N, contract, Row, Start, End}}.
typecheck_signature({_, _, signature}, Tk, String, Data, _, _, _) ->
{ok, {{bytes, Data}, Tk, String}};
typecheck_signature({_, _, {bytes, [64]}}, Tk, String, Data, _, _, _) ->
% The compiler would probably type-error, but whatever.
{ok, {{bytes, Data}, Tk, String}};
typecheck_signature({_, _, {bytes, [any]}}, Tk, String, Data, _, _, _) ->
% The compiler would probably type-error, but whatever.
{ok, {{bytes, Data}, Tk, String}};
typecheck_signature({_, _, unknown_type}, Tk, String, Data, _, _, _) ->
{ok, {{bytes, Data}, Tk, String}};
typecheck_signature({O, N, _}, _, _, _, Row, Start, End) ->
{error, {wrong_type, O, N, signature, Row, Start, End}}.
%%% List Parsing %%% List Parsing
parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) -> parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) ->
@ -675,7 +762,7 @@ check_sophia_to_fate(Type, Sophia, Fate) ->
erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}}) erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}})
end. end.
compile_entrypoint_code_and_type(Source, Entrypoint) -> compile_entrypoint_value_and_type(Source, Entrypoint) ->
{ok, #{fate_code := FateCode, aci := ACI}} = so_compiler:from_string(Source, [{aci, json}]), {ok, #{fate_code := FateCode, aci := ACI}} = so_compiler:from_string(Source, [{aci, json}]),
% Find the fcode for the correct entrypoint. % Find the fcode for the correct entrypoint.
@ -684,12 +771,13 @@ compile_entrypoint_code_and_type(Source, Entrypoint) ->
Name = unicode:characters_to_binary(Entrypoint), Name = unicode:characters_to_binary(Entrypoint),
{Hash, Name} = lists:keyfind(Name, 2, Names), {Hash, Name} = lists:keyfind(Name, 2, Names),
{_, _, Code} = maps:get(Hash, Bodies), {_, _, Code} = maps:get(Hash, Bodies),
FATE = extract_return_value(Code),
% Generate the AACI, and get the AACI type info for the correct entrypoint. % Generate the AACI, and get the AACI type info for the correct entrypoint.
AACI = hz_aaci:prepare_aaci(ACI), AACI = hz_aaci:prepare_aaci(ACI),
{ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"), {ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"),
{Code, Type}. {FATE, Type}.
extract_return_value(#{0 := [{'RETURNR', {immediate, FATE}}]}) -> extract_return_value(#{0 := [{'RETURNR', {immediate, FATE}}]}) ->
FATE; FATE;
@ -700,11 +788,10 @@ check_parser(Sophia) ->
% Compile the literal using the compiler, to check that it is valid Sophia % Compile the literal using the compiler, to check that it is valid Sophia
% syntax, and to get an AACI object to pass to the parser. % syntax, and to get an AACI object to pass to the parser.
Source = "contract C = entrypoint f() = " ++ Sophia, Source = "contract C = entrypoint f() = " ++ Sophia,
{Code, Type} = compile_entrypoint_code_and_type(Source, "f"), {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
% Check that when we parse the term we get the same value as the Sophia % Check that when we parse the term we get the same value as the Sophia
% compiler. % compiler.
Fate = extract_return_value(Code),
check_sophia_to_fate(unknown_type(), Sophia, Fate), check_sophia_to_fate(unknown_type(), Sophia, Fate),
% Then, once we know that the term is correct, make sure that it is still % Then, once we know that the term is correct, make sure that it is still
@ -714,11 +801,7 @@ check_parser(Sophia) ->
check_parser_with_typedef(Typedef, Sophia) -> check_parser_with_typedef(Typedef, Sophia) ->
% Compile the type definitions alongside the usual literal expression. % Compile the type definitions alongside the usual literal expression.
Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia, Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia,
{Code, Type} = compile_entrypoint_code_and_type(Source, "f"), {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
Fate = extract_return_value(Code),
% Check the FATE term as usual.
gmb_fate_encoding:serialize(Fate),
% Do a typed parse, as usual, but there are probably record/variant % Do a typed parse, as usual, but there are probably record/variant
% definitions in the AACI, so untyped parses probably don't work. % definitions in the AACI, so untyped parses probably don't work.
@ -747,7 +830,7 @@ anon_types_test() ->
string_escape_codes_test() -> string_escape_codes_test() ->
check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""), check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""),
check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""), check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""),
check_parser("\"\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\""), check_parser("\"\\x{0}\\x{7}\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\\x{0000007F}\""),
ok. ok.
records_test() -> records_test() ->
@ -758,6 +841,38 @@ records_test() ->
% will error, though. % will error, though.
{error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia). {error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia).
variant_test() ->
TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)",
check_parser_with_typedef(TypeDef, "Zero"),
check_parser_with_typedef(TypeDef, "One(0)"),
check_parser_with_typedef(TypeDef, "Two(0, 1)"),
check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"),
{error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"),
ok.
chain_objects_test() ->
% Address,
check_parser("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"),
% Two different forms of signature,
check_parser("[sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n, #00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF]"),
% We have to build a totally custom contract example in order to get an
% AACI and return value for parsing contract addresses. This is because the
% compiler demands that contract addresses be type checked according to the
% logic of "contract oriented programming", including covariance, etc. and
% "contract oriented programming" is not very compatible with ML style type
% inference.
Contract = "ct_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx",
Source = "contract C = entrypoint f(): C = " ++ Contract,
{Fate, ContractType} = compile_entrypoint_value_and_type(Source, "f"),
check_sophia_to_fate(ContractType, Contract, Fate),
check_sophia_to_fate(unknown_type(), Contract, Fate),
ok.
singleton_records_test() -> singleton_records_test() ->
TypeDef = "record singleton('a) = {it: 'a}", TypeDef = "record singleton('a) = {it: 'a}",
check_parser_with_typedef(TypeDef, "{it = 123}"), check_parser_with_typedef(TypeDef, "{it = 123}"),
@ -795,16 +910,28 @@ excess_parens_test() ->
ok. ok.
variant_test() -> lexer_offset_test() ->
TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)", % Test that various tokens report their position correctly.
{error, {unexpected_token, "456", 1, 5, 7}} = parse_literal("123 456"),
{error, {unexpected_token, "[", 1, 5, 5}} = parse_literal("123 [0]"),
{error, {unexpected_token, "ABC", 1, 5, 7}} = parse_literal("123 ABC"),
{error, {unexpected_token, "#AA", 1, 5, 7}} = parse_literal("123 #AA"),
{error, {unexpected_token, "\"x\"", 1, 5, 7}} = parse_literal("123 \"x\""),
{error, {unexpected_token, "\"\\x{123}\"", 1, 5, 13}} = parse_literal("123 \"\\x{123}\""),
check_parser_with_typedef(TypeDef, "Zero"), % Check that the tokenizer knows its position correctly *after* various
check_parser_with_typedef(TypeDef, "One(0)"), % tokens.
check_parser_with_typedef(TypeDef, "Two(0, 1)"), {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("[0] 123"),
check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"), ABCType = {"mytype", already_normalized, {variant, [{"ABC", []}]}},
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal(ABCType, "ABC 123"),
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("#AA 123"),
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("\"x\" 123"),
{error, {unexpected_token, "123", 1, 11, 13}} = parse_literal("\"\\x{123}\" 123"),
{error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"), % Check that the tokenizer accounts for various line separators correctly.
{error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\nABC"),
{error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\r\nABC"),
{error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\rABC"),
ok. ok.