Parse address literals.

Also signatures.
2026-02-03 06:00:40 +00:00
parent 493bdb990c
commit a695c21fc9
1 changed files with 111 additions and 30 deletions
@@ -32,7 +32,9 @@ parse_literal2(Result, Tk, String) ->
 %%% Tokenizer
-define(IS_ALPHA(C), ((((C) >= $A) and ((C) =< $Z)) or (((C) >= $a) and ((C) =< $z)) or ((C) == $_))).
+-define(IS_LATIN_UPPER(C), (((C) >= $A) and ((C) =< $Z))).
 -define(IS_LATIN_LOWER(C), (((C) >= $a) and ((C) =< $z))).
 -define(IS_ALPHA(C), (?IS_LATIN_UPPER(C) or ?IS_LATIN_LOWER(C) or ((C) == $_))).
 -define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))).
 -define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C))).
 -define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))).
@@ -231,8 +233,8 @@ parse_expression2(Type, Tk, String, {character, "(", _, Row, Start, _}) ->
    parse_tuple(Type, Tk, String, Row, Start);
 parse_expression2(Type, Tk, String, {character, "{", _, Row, Start, _}) ->
    parse_record_or_map(Type, Tk, String, Row, Start);
-parse_expression2(Type, Tk, String, {alphanum, Ident, _, Row, Start, End}) ->
+parse_expression2(Type, Tk, String, {alphanum, S, _, Row, Start, End}) ->
-    parse_variant(Type, Tk, String, Ident, Row, Start, End);
+    parse_alphanum(Type, Tk, String, S, Row, Start, End);
 parse_expression2(_, _, _, {_, S, _, Row, Start, End}) ->
    {error, {unexpected_token, S, Row, Start, End}}.
@@ -249,6 +251,69 @@ expect_tokens([Str | Rest], Tk, String) ->
            {error, {unexpected_token, Actual, Row, Start, End}}
    end.
 %%% Ambiguous Chain Object vs Identifier Parsing
 parse_alphanum(Type, Tk, String, [C | _] = S, Row, Start, End) when ?IS_LATIN_UPPER(C) ->
    % From a programming perspective, we are trying to parse a constant, so
    % an alphanum token can really only be a constructor, or a chain object.
    % Chain objects start with lowercase prefixes, like ak_, so clearly this is
    % a variant constructor.
    parse_variant(Type, Tk, String, S, Row, Start, End);
 parse_alphanum(Type, Tk, String, S, Row, Start, End) ->
    % Inversely, variant constructors are always uppercase, so now that we have
    % handled that case, only chain objects are left.
    try
        case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of
            {account_pubkey, Data} ->
                typecheck_address(Type, Tk, String, Data, Row, Start, End);
            {contract_pubkey, Data} ->
                typecheck_contract(Type, Tk, String, Data, Row, Start, End);
            {signature, Data} ->
                typecheck_signature(Type, Tk, String, Data, Row, Start, End);
            {_, _} ->
                % Only a few chain objects are recognized by Sophia. The rest
                % are interpreted as identifiers, so we might as well give the
                % same sort of error that the compiler would give.
                {error, {unexpected_identifier, S, Row, Start, End}}
        end
    catch
        _:_ -> {error, {unexpected_identifier, S, Row, Start, End}}
    end.
 typecheck_address({_, _, address}, Tk, String, Data, _, _, _) ->
    {ok, {{address, Data}, Tk, String}};
 typecheck_address({_, _, contract}, Tk, String, Data, _, _, _) ->
    % The compiler would type error, but we should be lenient here.
    {ok, {{contract, Data}, Tk, String}};
 typecheck_address({_, _, unknown_type}, Tk, String, Data, _, _, _) ->
    {ok, {{address, Data}, Tk, String}};
 typecheck_address({O, N, _}, _, _, _, Row, Start, End) ->
    {error, {wrong_type, O, N, address, Row, Start, End}}.
 typecheck_contract({_, _, contract}, Tk, String, Data, _, _, _) ->
    {ok, {{contract, Data}, Tk, String}};
 typecheck_contract({_, _, address}, Tk, String, Data, _, _, _) ->
    % The compiler would type error, but we should be lenient here.
    {ok, {{address, Data}, Tk, String}};
 typecheck_contract({_, _, unknown_type}, Tk, String, Data, _, _, _) ->
    {ok, {{contract, Data}, Tk, String}};
 typecheck_contract({O, N, _}, _, _, _, Row, Start, End) ->
    {error, {wrong_type, O, N, contract, Row, Start, End}}.
 typecheck_signature({_, _, signature}, Tk, String, Data, _, _, _) ->
    {ok, {{bytes, Data}, Tk, String}};
 typecheck_signature({_, _, {bytes, [64]}}, Tk, String, Data, _, _, _) ->
    % The compiler would probably type-error, but whatever.
    {ok, {{bytes, Data}, Tk, String}};
 typecheck_signature({_, _, {bytes, [any]}}, Tk, String, Data, _, _, _) ->
    % The compiler would probably type-error, but whatever.
    {ok, {{bytes, Data}, Tk, String}};
 typecheck_signature({_, _, unknown_type}, Tk, String, Data, _, _, _) ->
    {ok, {{bytes, Data}, Tk, String}};
 typecheck_signature({O, N, _}, _, _, _, Row, Start, End) ->
    {error, {wrong_type, O, N, signature, Row, Start, End}}.
 %%% List Parsing
 parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) ->
@@ -697,7 +762,7 @@ check_sophia_to_fate(Type, Sophia, Fate) ->
            erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}})
    end.
-compile_entrypoint_code_and_type(Source, Entrypoint) ->
+compile_entrypoint_value_and_type(Source, Entrypoint) ->
    {ok, #{fate_code := FateCode, aci := ACI}} = so_compiler:from_string(Source, [{aci, json}]),
    % Find the fcode for the correct entrypoint.
@@ -706,12 +771,13 @@ compile_entrypoint_code_and_type(Source, Entrypoint) ->
    Name = unicode:characters_to_binary(Entrypoint),
    {Hash, Name} = lists:keyfind(Name, 2, Names),
    {_, _, Code} = maps:get(Hash, Bodies),
    FATE = extract_return_value(Code),
    % Generate the AACI, and get the AACI type info for the correct entrypoint.
    AACI = hz_aaci:prepare_aaci(ACI),
    {ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"),
-    {Code, Type}.
+    {FATE, Type}.
 extract_return_value(#{0 := [{'RETURNR', {immediate, FATE}}]}) ->
    FATE;
@@ -722,11 +788,10 @@ check_parser(Sophia) ->
    % Compile the literal using the compiler, to check that it is valid Sophia
    % syntax, and to get an AACI object to pass to the parser.
    Source = "contract C = entrypoint f() = " ++ Sophia,
-    {Code, Type} = compile_entrypoint_code_and_type(Source, "f"),
+    {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
    % Check that when we parse the term we get the same value as the Sophia
    % compiler.
    Fate = extract_return_value(Code),
    check_sophia_to_fate(unknown_type(), Sophia, Fate),
    % Then, once we know that the term is correct, make sure that it is still
@@ -736,11 +801,7 @@ check_parser(Sophia) ->
 check_parser_with_typedef(Typedef, Sophia) ->
    % Compile the type definitions alongside the usual literal expression.
    Source = "contract C =\n  " ++ Typedef ++ "\n  entrypoint f() = " ++ Sophia,
-    {Code, Type} = compile_entrypoint_code_and_type(Source, "f"),
+    {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
    Fate = extract_return_value(Code),
    % Check the FATE term as usual.
    gmb_fate_encoding:serialize(Fate),
    % Do a typed parse, as usual, but there are probably record/variant
    % definitions in the AACI, so untyped parses probably don't work.
@@ -780,6 +841,38 @@ records_test() ->
    % will error, though.
    {error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia).
 variant_test() ->
    TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)",
    check_parser_with_typedef(TypeDef, "Zero"),
    check_parser_with_typedef(TypeDef, "One(0)"),
    check_parser_with_typedef(TypeDef, "Two(0, 1)"),
    check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"),
    {error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"),
    ok.
 chain_objects_test() ->
    % Address,
    check_parser("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"),
    % Two different forms of signature,
    check_parser("[sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n, #00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF]"),
    % We have to build a totally custom contract example in order to get an
    % AACI and return value for parsing contract addresses. This is because the
    % compiler demands that contract addresses be type checked according to the
    % logic of "contract oriented programming", including covariance, etc. and
    % "contract oriented programming" is not very compatible with ML style type
    % inference.
    Contract = "ct_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx",
    Source = "contract C = entrypoint f(): C = " ++ Contract,
    {Fate, ContractType} = compile_entrypoint_value_and_type(Source, "f"),
    check_sophia_to_fate(ContractType, Contract, Fate),
    check_sophia_to_fate(unknown_type(), Contract, Fate),
    ok.
 singleton_records_test() ->
    TypeDef = "record singleton('a) = {it: 'a}",
    check_parser_with_typedef(TypeDef, "{it = 123}"),
@@ -817,23 +910,11 @@ excess_parens_test() ->
    ok.
 variant_test() ->
    TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)",
    check_parser_with_typedef(TypeDef, "Zero"),
    check_parser_with_typedef(TypeDef, "One(0)"),
    check_parser_with_typedef(TypeDef, "Two(0, 1)"),
    check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"),
    {error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"),
    ok.
 lexer_offset_test() ->
    % Test that various tokens report their position correctly.
    {error, {unexpected_token, "456", 1, 5, 7}} = parse_literal("123 456"),
    {error, {unexpected_token, "[", 1, 5, 5}} = parse_literal("123 [0]"),
-    {error, {unexpected_token, "abc", 1, 5, 7}} = parse_literal("123 abc"),
+    {error, {unexpected_token, "ABC", 1, 5, 7}} = parse_literal("123 ABC"),
    {error, {unexpected_token, "#AA", 1, 5, 7}} = parse_literal("123 #AA"),
    {error, {unexpected_token, "\"x\"", 1, 5, 7}} = parse_literal("123 \"x\""),
    {error, {unexpected_token, "\"\\x{123}\"", 1, 5, 13}} = parse_literal("123 \"\\x{123}\""),
@@ -841,16 +922,16 @@ lexer_offset_test() ->
    % Check that the tokenizer knows its position correctly *after* various
    % tokens.
    {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("[0] 123"),
-    ABCType = {"mytype", already_normalized, {variant, [{"abc", []}]}},
+    ABCType = {"mytype", already_normalized, {variant, [{"ABC", []}]}},
-    {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal(ABCType, "abc 123"),
+    {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal(ABCType, "ABC 123"),
    {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("#AA 123"),
    {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("\"x\" 123"),
    {error, {unexpected_token, "123", 1, 11, 13}} = parse_literal("\"\\x{123}\" 123"),
    % Check that the tokenizer accounts for various line separators correctly.
-    {error, {unexpected_token, "abc", 2, 1, 3}} = parse_literal("123\nabc"),
+    {error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\nABC"),
-    {error, {unexpected_token, "abc", 2, 1, 3}} = parse_literal("123\r\nabc"),
+    {error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\r\nABC"),
-    {error, {unexpected_token, "abc", 2, 1, 3}} = parse_literal("123\rabc"),
+    {error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\rABC"),
    ok.