Parse qualified names.

This seemed like it was going to be insanely insanely complex, but
then it turns out the compiler doesn't accept spaces in qualified
names, so I can just dump periods in the lexer and hit it with
string:split/3. Easy.
This commit is contained in:
Jarvis Carroll 2026-02-05 07:13:25 +00:00
parent d014ae0982
commit 3838a7e3c5

View File

@ -36,7 +36,7 @@ parse_literal2(Result, Pos, String) ->
-define(IS_LATIN_LOWER(C), (((C) >= $a) and ((C) =< $z))).
-define(IS_ALPHA(C), (?IS_LATIN_UPPER(C) or ?IS_LATIN_LOWER(C) or ((C) == $_))).
-define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))).
-define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C))).
-define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C) or ((C) == $.))).
-define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))).
next_token({Row, Col}, []) ->
@ -67,7 +67,8 @@ alphanum_token(Start, {Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) ->
alphanum_token(Start, {Row, Col + 1}, Rest, [C | Acc]);
alphanum_token({_, Start}, {Row, End}, String, Acc) ->
AlphaString = lists:reverse(Acc),
Token = {alphanum, AlphaString, AlphaString, Row, Start, End - 1},
Path = string:split(AlphaString, ".", all),
Token = {alphanum, AlphaString, Path, Row, Start, End - 1},
{ok, {Token, {Row, End}, String}}.
num_token(Start, {Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) ->
@ -233,8 +234,8 @@ parse_expression2(Type, Pos, String, {character, "(", _, Row, Start, _}) ->
parse_tuple(Type, Pos, String, Row, Start);
parse_expression2(Type, Pos, String, {character, "{", _, Row, Start, _}) ->
parse_record_or_map(Type, Pos, String, Row, Start);
parse_expression2(Type, Pos, String, {alphanum, S, _, Row, Start, End}) ->
parse_alphanum(Type, Pos, String, S, Row, Start, End);
parse_expression2(Type, Pos, String, {alphanum, _, Path, Row, Start, End}) ->
parse_alphanum(Type, Pos, String, Path, Row, Start, End);
parse_expression2(_, _, _, {eof, _, _, _, _, _}) ->
{error, unexpected_end_of_file};
parse_expression2(_, _, _, Token) ->
@ -269,15 +270,11 @@ unexpected_token({_, S, _, Row, Start, End}) ->
%%% Ambiguous Chain Object vs Identifier Parsing
parse_alphanum(Type, Pos, String, [C | _] = S, Row, Start, End) when ?IS_LATIN_UPPER(C) ->
parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN_LOWER(C) ->
% From a programming perspective, we are trying to parse a constant, so
% an alphanum token can really only be a constructor, or a chain object.
% Chain objects start with lowercase prefixes, like ak_, so clearly this is
% a variant constructor.
parse_variant(Type, Pos, String, S, Row, Start, End);
parse_alphanum(Type, Pos, String, S, Row, Start, End) ->
% Inversely, variant constructors are always uppercase, so now that we have
% handled that case, only chain objects are left.
% Constructors start with uppercase characters, so lowercase can only be a
% chain object.
try
case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of
{account_pubkey, Data} ->
@ -294,7 +291,11 @@ parse_alphanum(Type, Pos, String, S, Row, Start, End) ->
end
catch
_:_ -> {error, {unexpected_identifier, S, Row, Start, End}}
end.
end;
parse_alphanum(Type, Pos, String, Path, Row, Start, End) ->
% Inversely, chain object prefixes are always lowercase, so any other path
% must be a variant constructor, or invalid.
parse_variant(Type, Pos, String, Path, Row, Start, End).
typecheck_address({_, _, address}, Pos, String, Data, _, _, _) ->
{ok, {{address, Data}, Pos, String}};
@ -570,26 +571,42 @@ check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) ->
%%% Variant parsing
parse_variant({_, _, {variant, Variants}}, Pos, String, Ident, Row, Start, End) ->
parse_variant2(Variants, Pos, String, Ident, Row, Start, End);
parse_variant({O, N, {variant, Variants}}, Pos, String, [Ident], Row, Start, End) ->
parse_variant2(O, N, Variants, Pos, String, "", Ident, Row, Start, End);
parse_variant({O, N, {variant, Variants}}, Pos, String, [Namespace, Constructor], Row, Start, End) ->
case get_typename(O, N) of
[Namespace, _] ->
parse_variant2(O, N, Variants, Pos, String, Namespace ++ ".", Constructor, Row, Start, End);
_ ->
{error, {invalid_constructor, O, N, Namespace ++ "." ++ Constructor, Row, Start, End}}
end;
parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) ->
{error, {unresolved_variant, Row, Start, End}};
parse_variant({O, N, _}, _, _, _, Row, Start, End) ->
% In normal code, identifiers can have many meanings, which can result in
% lots of different errors. In this Sophia 'object notation', identifiers
% can only ever be variant constructors, (sort of like the Sophia version
% of atoms,) and so immediately lead to a type error if we aren't expecting
% a variant.
% lots of different errors. In constant/immediate/normalized Sophia terms
% we know identifiers are always variants, so we can type error if any
% other type was expected.
{error, {wrong_type, O, N, variant, Row, Start, End}}.
parse_variant2(Variants, Pos, String, Ident, Row, Start, End) ->
case lookup_variant(Ident, Variants, 0) of
get_typename(O, already_normalized) ->
get_typename(O);
get_typename(_, N) ->
get_typename(N).
get_typename({Name, _}) ->
string:split(Name, ".", all);
get_typename(Name) ->
string:split(Name, ".", all).
parse_variant2(O, N, Variants, Pos, String, Prefix, Constructor, Row, Start, End) ->
case lookup_variant(Constructor, Variants, 0) of
{ok, {Tag, ElemTypes}} ->
GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end,
Arities = lists:map(GetArity, Variants),
parse_variant3(Arities, Tag, ElemTypes, Pos, String);
error ->
{error, {invalid_constructor, Ident, Row, Start, End}}
{error, {invalid_constructor, O, N, Prefix ++ Constructor, Row, Start, End}}
end.
parse_variant3(Arities, Tag, [], Pos, String) ->
@ -878,11 +895,20 @@ variant_test() ->
check_parser_with_typedef(TypeDef, "One(0)"),
check_parser_with_typedef(TypeDef, "Two(0, 1)"),
check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"),
check_parser_with_typedef(TypeDef, "C.Zero"),
{error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"),
ok.
namespace_variant_test() ->
Term = "[N.A, N.B]",
Source = "namespace N = datatype mytype = A | B\ncontract C = entrypoint f() = " ++ Term,
{Fate, VariantType} = compile_entrypoint_value_and_type(Source, "f"),
check_sophia_to_fate(VariantType, Term, Fate),
ok.
chain_objects_test() ->
% Address,
check_parser("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"),