From 4f2a3c6c6f85422e03c3c0737b2d0a46afe55945 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Fri, 23 Jan 2026 06:18:39 +0000 Subject: [PATCH] Variant parsing --- src/hz_sophia.erl | 68 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index 0e491a5..d63d1be 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -104,6 +104,8 @@ parse_expression2(Type, Tk, String, {character, "(", Row, Start, _}) -> parse_tuple(Type, Tk, String, Row, Start); parse_expression2(Type, Tk, String, {character, "{", Row, Start, _}) -> parse_record_or_map(Type, Tk, String, Row, Start); +parse_expression2(Type, Tk, String, {alphanum, Ident, Row, Start, End}) -> + parse_variant(Type, Tk, String, Ident, Row, Start, End); parse_expression2(_, _, _, {_, S, Row, Start, End}) -> {error, {unexpected_token, S, Row, Start, End}}. @@ -225,6 +227,58 @@ check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) -> ExpectCount = length(Remaining) + GotCount, {error, {not_enough_elements, ExpectCount, GotCount, Row, Col}}. +%%% Variant parsing + +parse_variant({_, _, {variant, Variants}}, Tk, String, Ident, Row, Start, End) -> + parse_variant2(Variants, Tk, String, Ident, Row, Start, End); +parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) -> + {error, {unresolved_variant, Row, Start, End}}; +parse_variant({O, N, _}, _, _, _, Row, Start, End) -> + % In normal code, identifiers can have many meanings, which can result in + % lots of different errors. In this Sophia 'object notation', identifiers + % can only ever be variant constructors, (sort of like the Sophia version + % of atoms,) and so immediately lead to a type error if we aren't expecting + % a variant. + {error, {wrong_type, O, N, variant, Row, Start, End}}. + +parse_variant2(Variants, Tk, String, Ident, Row, Start, End) -> + case lookup_variant(Ident, Variants, 0) of + {ok, {Tag, ElemTypes}} -> + GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end, + Arities = lists:map(GetArity, Variants), + parse_variant3(Arities, Tag, ElemTypes, Tk, String); + error -> + {error, {invalid_constructor, Ident, Row, Start, End}} + end. + +parse_variant3(Arities, Tag, [], Tk, String) -> + % Parsing of 0-arity variants is different. + Result = {variant, Arities, Tag, {}}, + {ok, {Result, Tk, String}}; +parse_variant3(Arities, Tag, ElemTypes, Tk, String) -> + case next_token(Tk, String) of + {ok, {{character, "(", Row, Start, _}, NewTk, NewString}} -> + parse_variant4(Arities, Tag, ElemTypes, NewTk, NewString, Row, Start); + {ok, {{_, Actual, Row, Start, End}}} -> + {error, {unexpected_token, Actual, Row, Start, End}} + end. + +parse_variant4(Arities, Tag, ElemTypes, Tk, String, Row, Start) -> + case parse_multivalue(ElemTypes, Tk, String, Row, Start, []) of + {ok, {Terms, NewTk, NewString}} -> + Result = {variant, Arities, Tag, list_to_tuple(Terms)}, + {ok, {Result, NewTk, NewString}}; + {error, Reason} -> + {error, Reason} + end. + +lookup_variant(_, [], _) -> + error; +lookup_variant(Ident, [{Ident, ElemTypes} | _], Tag) -> + {ok, {Tag, ElemTypes}}; +lookup_variant(Ident, [_ | Rest], Tag) -> + lookup_variant(Ident, Rest, Tag + 1). + %%% Record parsing parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Tk, String, _, _) -> @@ -436,3 +490,17 @@ records_test() -> % will error, though. {error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia). +variant_test() -> + TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)", + TestFn = fun(Sophia, Fate) -> + check_parser_with_typedef(TypeDef, Sophia, Fate), + {error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), Sophia) + end, + + TestFn("Zero", {variant, [0, 1, 2], 0, {}}), + TestFn("One(0)", {variant, [0, 1, 2], 1, {0}}), + TestFn("Two(0, 1)", {variant, [0, 1, 2], 2, {0, 1}}), + TestFn("Two([], [1, 2, 3])", {variant, [0, 1, 2], 2, {[], [1, 2, 3]}}), + ok. + +