Parse address literals.

Also signatures.
This commit is contained in:
Jarvis Carroll 2026-02-03 06:00:40 +00:00
parent 493bdb990c
commit a695c21fc9

View File

@ -32,7 +32,9 @@ parse_literal2(Result, Tk, String) ->
%%% Tokenizer %%% Tokenizer
-define(IS_ALPHA(C), ((((C) >= $A) and ((C) =< $Z)) or (((C) >= $a) and ((C) =< $z)) or ((C) == $_))). -define(IS_LATIN_UPPER(C), (((C) >= $A) and ((C) =< $Z))).
-define(IS_LATIN_LOWER(C), (((C) >= $a) and ((C) =< $z))).
-define(IS_ALPHA(C), (?IS_LATIN_UPPER(C) or ?IS_LATIN_LOWER(C) or ((C) == $_))).
-define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))). -define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))).
-define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C))). -define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C))).
-define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))). -define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))).
@ -231,8 +233,8 @@ parse_expression2(Type, Tk, String, {character, "(", _, Row, Start, _}) ->
parse_tuple(Type, Tk, String, Row, Start); parse_tuple(Type, Tk, String, Row, Start);
parse_expression2(Type, Tk, String, {character, "{", _, Row, Start, _}) -> parse_expression2(Type, Tk, String, {character, "{", _, Row, Start, _}) ->
parse_record_or_map(Type, Tk, String, Row, Start); parse_record_or_map(Type, Tk, String, Row, Start);
parse_expression2(Type, Tk, String, {alphanum, Ident, _, Row, Start, End}) -> parse_expression2(Type, Tk, String, {alphanum, S, _, Row, Start, End}) ->
parse_variant(Type, Tk, String, Ident, Row, Start, End); parse_alphanum(Type, Tk, String, S, Row, Start, End);
parse_expression2(_, _, _, {_, S, _, Row, Start, End}) -> parse_expression2(_, _, _, {_, S, _, Row, Start, End}) ->
{error, {unexpected_token, S, Row, Start, End}}. {error, {unexpected_token, S, Row, Start, End}}.
@ -249,6 +251,69 @@ expect_tokens([Str | Rest], Tk, String) ->
{error, {unexpected_token, Actual, Row, Start, End}} {error, {unexpected_token, Actual, Row, Start, End}}
end. end.
%%% Ambiguous Chain Object vs Identifier Parsing
parse_alphanum(Type, Tk, String, [C | _] = S, Row, Start, End) when ?IS_LATIN_UPPER(C) ->
% From a programming perspective, we are trying to parse a constant, so
% an alphanum token can really only be a constructor, or a chain object.
% Chain objects start with lowercase prefixes, like ak_, so clearly this is
% a variant constructor.
parse_variant(Type, Tk, String, S, Row, Start, End);
parse_alphanum(Type, Tk, String, S, Row, Start, End) ->
% Inversely, variant constructors are always uppercase, so now that we have
% handled that case, only chain objects are left.
try
case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of
{account_pubkey, Data} ->
typecheck_address(Type, Tk, String, Data, Row, Start, End);
{contract_pubkey, Data} ->
typecheck_contract(Type, Tk, String, Data, Row, Start, End);
{signature, Data} ->
typecheck_signature(Type, Tk, String, Data, Row, Start, End);
{_, _} ->
% Only a few chain objects are recognized by Sophia. The rest
% are interpreted as identifiers, so we might as well give the
% same sort of error that the compiler would give.
{error, {unexpected_identifier, S, Row, Start, End}}
end
catch
_:_ -> {error, {unexpected_identifier, S, Row, Start, End}}
end.
typecheck_address({_, _, address}, Tk, String, Data, _, _, _) ->
{ok, {{address, Data}, Tk, String}};
typecheck_address({_, _, contract}, Tk, String, Data, _, _, _) ->
% The compiler would type error, but we should be lenient here.
{ok, {{contract, Data}, Tk, String}};
typecheck_address({_, _, unknown_type}, Tk, String, Data, _, _, _) ->
{ok, {{address, Data}, Tk, String}};
typecheck_address({O, N, _}, _, _, _, Row, Start, End) ->
{error, {wrong_type, O, N, address, Row, Start, End}}.
typecheck_contract({_, _, contract}, Tk, String, Data, _, _, _) ->
{ok, {{contract, Data}, Tk, String}};
typecheck_contract({_, _, address}, Tk, String, Data, _, _, _) ->
% The compiler would type error, but we should be lenient here.
{ok, {{address, Data}, Tk, String}};
typecheck_contract({_, _, unknown_type}, Tk, String, Data, _, _, _) ->
{ok, {{contract, Data}, Tk, String}};
typecheck_contract({O, N, _}, _, _, _, Row, Start, End) ->
{error, {wrong_type, O, N, contract, Row, Start, End}}.
typecheck_signature({_, _, signature}, Tk, String, Data, _, _, _) ->
{ok, {{bytes, Data}, Tk, String}};
typecheck_signature({_, _, {bytes, [64]}}, Tk, String, Data, _, _, _) ->
% The compiler would probably type-error, but whatever.
{ok, {{bytes, Data}, Tk, String}};
typecheck_signature({_, _, {bytes, [any]}}, Tk, String, Data, _, _, _) ->
% The compiler would probably type-error, but whatever.
{ok, {{bytes, Data}, Tk, String}};
typecheck_signature({_, _, unknown_type}, Tk, String, Data, _, _, _) ->
{ok, {{bytes, Data}, Tk, String}};
typecheck_signature({O, N, _}, _, _, _, Row, Start, End) ->
{error, {wrong_type, O, N, signature, Row, Start, End}}.
%%% List Parsing %%% List Parsing
parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) -> parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) ->
@ -697,7 +762,7 @@ check_sophia_to_fate(Type, Sophia, Fate) ->
erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}}) erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}})
end. end.
compile_entrypoint_code_and_type(Source, Entrypoint) -> compile_entrypoint_value_and_type(Source, Entrypoint) ->
{ok, #{fate_code := FateCode, aci := ACI}} = so_compiler:from_string(Source, [{aci, json}]), {ok, #{fate_code := FateCode, aci := ACI}} = so_compiler:from_string(Source, [{aci, json}]),
% Find the fcode for the correct entrypoint. % Find the fcode for the correct entrypoint.
@ -706,12 +771,13 @@ compile_entrypoint_code_and_type(Source, Entrypoint) ->
Name = unicode:characters_to_binary(Entrypoint), Name = unicode:characters_to_binary(Entrypoint),
{Hash, Name} = lists:keyfind(Name, 2, Names), {Hash, Name} = lists:keyfind(Name, 2, Names),
{_, _, Code} = maps:get(Hash, Bodies), {_, _, Code} = maps:get(Hash, Bodies),
FATE = extract_return_value(Code),
% Generate the AACI, and get the AACI type info for the correct entrypoint. % Generate the AACI, and get the AACI type info for the correct entrypoint.
AACI = hz_aaci:prepare_aaci(ACI), AACI = hz_aaci:prepare_aaci(ACI),
{ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"), {ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"),
{Code, Type}. {FATE, Type}.
extract_return_value(#{0 := [{'RETURNR', {immediate, FATE}}]}) -> extract_return_value(#{0 := [{'RETURNR', {immediate, FATE}}]}) ->
FATE; FATE;
@ -722,11 +788,10 @@ check_parser(Sophia) ->
% Compile the literal using the compiler, to check that it is valid Sophia % Compile the literal using the compiler, to check that it is valid Sophia
% syntax, and to get an AACI object to pass to the parser. % syntax, and to get an AACI object to pass to the parser.
Source = "contract C = entrypoint f() = " ++ Sophia, Source = "contract C = entrypoint f() = " ++ Sophia,
{Code, Type} = compile_entrypoint_code_and_type(Source, "f"), {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
% Check that when we parse the term we get the same value as the Sophia % Check that when we parse the term we get the same value as the Sophia
% compiler. % compiler.
Fate = extract_return_value(Code),
check_sophia_to_fate(unknown_type(), Sophia, Fate), check_sophia_to_fate(unknown_type(), Sophia, Fate),
% Then, once we know that the term is correct, make sure that it is still % Then, once we know that the term is correct, make sure that it is still
@ -736,11 +801,7 @@ check_parser(Sophia) ->
check_parser_with_typedef(Typedef, Sophia) -> check_parser_with_typedef(Typedef, Sophia) ->
% Compile the type definitions alongside the usual literal expression. % Compile the type definitions alongside the usual literal expression.
Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia, Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia,
{Code, Type} = compile_entrypoint_code_and_type(Source, "f"), {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
Fate = extract_return_value(Code),
% Check the FATE term as usual.
gmb_fate_encoding:serialize(Fate),
% Do a typed parse, as usual, but there are probably record/variant % Do a typed parse, as usual, but there are probably record/variant
% definitions in the AACI, so untyped parses probably don't work. % definitions in the AACI, so untyped parses probably don't work.
@ -780,6 +841,38 @@ records_test() ->
% will error, though. % will error, though.
{error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia). {error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia).
variant_test() ->
TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)",
check_parser_with_typedef(TypeDef, "Zero"),
check_parser_with_typedef(TypeDef, "One(0)"),
check_parser_with_typedef(TypeDef, "Two(0, 1)"),
check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"),
{error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"),
ok.
chain_objects_test() ->
% Address,
check_parser("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"),
% Two different forms of signature,
check_parser("[sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n, #00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF]"),
% We have to build a totally custom contract example in order to get an
% AACI and return value for parsing contract addresses. This is because the
% compiler demands that contract addresses be type checked according to the
% logic of "contract oriented programming", including covariance, etc. and
% "contract oriented programming" is not very compatible with ML style type
% inference.
Contract = "ct_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx",
Source = "contract C = entrypoint f(): C = " ++ Contract,
{Fate, ContractType} = compile_entrypoint_value_and_type(Source, "f"),
check_sophia_to_fate(ContractType, Contract, Fate),
check_sophia_to_fate(unknown_type(), Contract, Fate),
ok.
singleton_records_test() -> singleton_records_test() ->
TypeDef = "record singleton('a) = {it: 'a}", TypeDef = "record singleton('a) = {it: 'a}",
check_parser_with_typedef(TypeDef, "{it = 123}"), check_parser_with_typedef(TypeDef, "{it = 123}"),
@ -817,23 +910,11 @@ excess_parens_test() ->
ok. ok.
variant_test() ->
TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)",
check_parser_with_typedef(TypeDef, "Zero"),
check_parser_with_typedef(TypeDef, "One(0)"),
check_parser_with_typedef(TypeDef, "Two(0, 1)"),
check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"),
{error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"),
ok.
lexer_offset_test() -> lexer_offset_test() ->
% Test that various tokens report their position correctly. % Test that various tokens report their position correctly.
{error, {unexpected_token, "456", 1, 5, 7}} = parse_literal("123 456"), {error, {unexpected_token, "456", 1, 5, 7}} = parse_literal("123 456"),
{error, {unexpected_token, "[", 1, 5, 5}} = parse_literal("123 [0]"), {error, {unexpected_token, "[", 1, 5, 5}} = parse_literal("123 [0]"),
{error, {unexpected_token, "abc", 1, 5, 7}} = parse_literal("123 abc"), {error, {unexpected_token, "ABC", 1, 5, 7}} = parse_literal("123 ABC"),
{error, {unexpected_token, "#AA", 1, 5, 7}} = parse_literal("123 #AA"), {error, {unexpected_token, "#AA", 1, 5, 7}} = parse_literal("123 #AA"),
{error, {unexpected_token, "\"x\"", 1, 5, 7}} = parse_literal("123 \"x\""), {error, {unexpected_token, "\"x\"", 1, 5, 7}} = parse_literal("123 \"x\""),
{error, {unexpected_token, "\"\\x{123}\"", 1, 5, 13}} = parse_literal("123 \"\\x{123}\""), {error, {unexpected_token, "\"\\x{123}\"", 1, 5, 13}} = parse_literal("123 \"\\x{123}\""),
@ -841,16 +922,16 @@ lexer_offset_test() ->
% Check that the tokenizer knows its position correctly *after* various % Check that the tokenizer knows its position correctly *after* various
% tokens. % tokens.
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("[0] 123"), {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("[0] 123"),
ABCType = {"mytype", already_normalized, {variant, [{"abc", []}]}}, ABCType = {"mytype", already_normalized, {variant, [{"ABC", []}]}},
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal(ABCType, "abc 123"), {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal(ABCType, "ABC 123"),
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("#AA 123"), {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("#AA 123"),
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("\"x\" 123"), {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("\"x\" 123"),
{error, {unexpected_token, "123", 1, 11, 13}} = parse_literal("\"\\x{123}\" 123"), {error, {unexpected_token, "123", 1, 11, 13}} = parse_literal("\"\\x{123}\" 123"),
% Check that the tokenizer accounts for various line separators correctly. % Check that the tokenizer accounts for various line separators correctly.
{error, {unexpected_token, "abc", 2, 1, 3}} = parse_literal("123\nabc"), {error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\nABC"),
{error, {unexpected_token, "abc", 2, 1, 3}} = parse_literal("123\r\nabc"), {error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\r\nABC"),
{error, {unexpected_token, "abc", 2, 1, 3}} = parse_literal("123\rabc"), {error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\rABC"),
ok. ok.