diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index 3ae94d5..4ac67f2 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -36,7 +36,7 @@ parse_literal2(Result, Pos, String) -> -define(IS_LATIN_LOWER(C), (((C) >= $a) and ((C) =< $z))). -define(IS_ALPHA(C), (?IS_LATIN_UPPER(C) or ?IS_LATIN_LOWER(C) or ((C) == $_))). -define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))). --define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C))). +-define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C) or ((C) == $.))). -define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))). next_token({Row, Col}, []) -> @@ -67,7 +67,8 @@ alphanum_token(Start, {Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) -> alphanum_token(Start, {Row, Col + 1}, Rest, [C | Acc]); alphanum_token({_, Start}, {Row, End}, String, Acc) -> AlphaString = lists:reverse(Acc), - Token = {alphanum, AlphaString, AlphaString, Row, Start, End - 1}, + Path = string:split(AlphaString, ".", all), + Token = {alphanum, AlphaString, Path, Row, Start, End - 1}, {ok, {Token, {Row, End}, String}}. num_token(Start, {Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) -> @@ -233,8 +234,8 @@ parse_expression2(Type, Pos, String, {character, "(", _, Row, Start, _}) -> parse_tuple(Type, Pos, String, Row, Start); parse_expression2(Type, Pos, String, {character, "{", _, Row, Start, _}) -> parse_record_or_map(Type, Pos, String, Row, Start); -parse_expression2(Type, Pos, String, {alphanum, S, _, Row, Start, End}) -> - parse_alphanum(Type, Pos, String, S, Row, Start, End); +parse_expression2(Type, Pos, String, {alphanum, _, Path, Row, Start, End}) -> + parse_alphanum(Type, Pos, String, Path, Row, Start, End); parse_expression2(_, _, _, {eof, _, _, _, _, _}) -> {error, unexpected_end_of_file}; parse_expression2(_, _, _, Token) -> @@ -269,15 +270,11 @@ unexpected_token({_, S, _, Row, Start, End}) -> %%% Ambiguous Chain Object vs Identifier Parsing -parse_alphanum(Type, Pos, String, [C | _] = S, Row, Start, End) when ?IS_LATIN_UPPER(C) -> +parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN_LOWER(C) -> % From a programming perspective, we are trying to parse a constant, so % an alphanum token can really only be a constructor, or a chain object. - % Chain objects start with lowercase prefixes, like ak_, so clearly this is - % a variant constructor. - parse_variant(Type, Pos, String, S, Row, Start, End); -parse_alphanum(Type, Pos, String, S, Row, Start, End) -> - % Inversely, variant constructors are always uppercase, so now that we have - % handled that case, only chain objects are left. + % Constructors start with uppercase characters, so lowercase can only be a + % chain object. try case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of {account_pubkey, Data} -> @@ -294,7 +291,11 @@ parse_alphanum(Type, Pos, String, S, Row, Start, End) -> end catch _:_ -> {error, {unexpected_identifier, S, Row, Start, End}} - end. + end; +parse_alphanum(Type, Pos, String, Path, Row, Start, End) -> + % Inversely, chain object prefixes are always lowercase, so any other path + % must be a variant constructor, or invalid. + parse_variant(Type, Pos, String, Path, Row, Start, End). typecheck_address({_, _, address}, Pos, String, Data, _, _, _) -> {ok, {{address, Data}, Pos, String}}; @@ -570,26 +571,42 @@ check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) -> %%% Variant parsing -parse_variant({_, _, {variant, Variants}}, Pos, String, Ident, Row, Start, End) -> - parse_variant2(Variants, Pos, String, Ident, Row, Start, End); +parse_variant({O, N, {variant, Variants}}, Pos, String, [Ident], Row, Start, End) -> + parse_variant2(O, N, Variants, Pos, String, "", Ident, Row, Start, End); +parse_variant({O, N, {variant, Variants}}, Pos, String, [Namespace, Constructor], Row, Start, End) -> + case get_typename(O, N) of + [Namespace, _] -> + parse_variant2(O, N, Variants, Pos, String, Namespace ++ ".", Constructor, Row, Start, End); + _ -> + {error, {invalid_constructor, O, N, Namespace ++ "." ++ Constructor, Row, Start, End}} + end; parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) -> {error, {unresolved_variant, Row, Start, End}}; parse_variant({O, N, _}, _, _, _, Row, Start, End) -> % In normal code, identifiers can have many meanings, which can result in - % lots of different errors. In this Sophia 'object notation', identifiers - % can only ever be variant constructors, (sort of like the Sophia version - % of atoms,) and so immediately lead to a type error if we aren't expecting - % a variant. + % lots of different errors. In constant/immediate/normalized Sophia terms + % we know identifiers are always variants, so we can type error if any + % other type was expected. {error, {wrong_type, O, N, variant, Row, Start, End}}. -parse_variant2(Variants, Pos, String, Ident, Row, Start, End) -> - case lookup_variant(Ident, Variants, 0) of +get_typename(O, already_normalized) -> + get_typename(O); +get_typename(_, N) -> + get_typename(N). + +get_typename({Name, _}) -> + string:split(Name, ".", all); +get_typename(Name) -> + string:split(Name, ".", all). + +parse_variant2(O, N, Variants, Pos, String, Prefix, Constructor, Row, Start, End) -> + case lookup_variant(Constructor, Variants, 0) of {ok, {Tag, ElemTypes}} -> GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end, Arities = lists:map(GetArity, Variants), parse_variant3(Arities, Tag, ElemTypes, Pos, String); error -> - {error, {invalid_constructor, Ident, Row, Start, End}} + {error, {invalid_constructor, O, N, Prefix ++ Constructor, Row, Start, End}} end. parse_variant3(Arities, Tag, [], Pos, String) -> @@ -878,11 +895,20 @@ variant_test() -> check_parser_with_typedef(TypeDef, "One(0)"), check_parser_with_typedef(TypeDef, "Two(0, 1)"), check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"), + check_parser_with_typedef(TypeDef, "C.Zero"), {error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"), ok. +namespace_variant_test() -> + Term = "[N.A, N.B]", + Source = "namespace N = datatype mytype = A | B\ncontract C = entrypoint f() = " ++ Term, + {Fate, VariantType} = compile_entrypoint_value_and_type(Source, "f"), + check_sophia_to_fate(VariantType, Term, Fate), + + ok. + chain_objects_test() -> % Address, check_parser("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"),