Singleton record/tuple parsing.
Records are a simple case to detect and handle correctly. Tuples took an entire rewrite of the little tuple parsing bit of the code.
This commit is contained in:
parent
49cd8b6687
commit
272ed01fdc
@ -223,7 +223,7 @@ expect_tokens([Str | Rest], Tk, String) ->
|
||||
case next_token(Tk, String) of
|
||||
{ok, {{_, Str, _, _, _, _}, NewTk, NewString}} ->
|
||||
expect_tokens(Rest, NewTk, NewString);
|
||||
{ok, {{_, Actual, _, Row, Start, End}}} ->
|
||||
{ok, {{_, Actual, _, Row, Start, End}, _, _}} ->
|
||||
{error, {unexpected_token, Actual, Row, Start, End}}
|
||||
end.
|
||||
|
||||
@ -268,33 +268,158 @@ parse_list_loop3(Inner, Tk, String, CloseChar, Row, Start, Acc) ->
|
||||
choose_list_error_wrapper("]") -> list_element;
|
||||
choose_list_error_wrapper(")") -> tuple_element.
|
||||
|
||||
%%% Tuple Parsing
|
||||
%%% Ambiguous Parenthesis Parsing
|
||||
|
||||
parse_tuple({_, _, {tuple, Types}}, Tk, String, Row, Start) ->
|
||||
case parse_multivalue(Types, Tk, String, Row, Start, []) of
|
||||
{ok, {TermList, NewTk, NewString}} ->
|
||||
Result = {tuple, list_to_tuple(TermList)},
|
||||
{ok, {Result, NewTk, NewString}};
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end;
|
||||
parse_tuple({_, _, unknown_type}, Tk, String, Row, Start) ->
|
||||
% An untyped tuple is a list of untyped terms, and weirdly our list parser
|
||||
% works perfectly for that, as long as we change the closing character to
|
||||
% be ")" instead of "]".
|
||||
case parse_list_loop(unknown_type(), Tk, String, ")", Row, Start, []) of
|
||||
{ok, {[Inner], NewTk, NewString}} ->
|
||||
% In Sophia, singleton tuples are unwrapped, and given the inner
|
||||
% type.
|
||||
{ok, {Inner, NewTk, NewString}};
|
||||
{ok, {TermList, NewTk, NewString}} ->
|
||||
Result = {tuple, list_to_tuple(TermList)},
|
||||
{ok, {Result, NewTk, NewString}};
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end;
|
||||
parse_tuple({O, N, _}, _, _, Row, Start) ->
|
||||
{error, {wrong_type, O, N, tuple, Row, Start, Start}}.
|
||||
parse_tuple({O, N, T}, Tk, String, _, _) ->
|
||||
% Typed tuple parsing is quite complex, because we also want to support
|
||||
% normal parentheses for grouping. It's not strictly necessary for
|
||||
% inputting data, since we don't have any infix operators in simple
|
||||
% data/term notation, but the alternatives are to generate singleton tuples
|
||||
% naively, (which are impossible to generate from Sophia,) or to hard error
|
||||
% on singleton tuples! Being faithful to Sophia is clearly nice!
|
||||
|
||||
% Count how many ambiguous parens there are, including the one we already
|
||||
% saw.
|
||||
case count_open_parens(Tk, String, 1) of
|
||||
{ok, {Count, Token, NewTk, NewString}} ->
|
||||
% Compare that to the amount of nesting tuple connectives are in
|
||||
% the type we are expected to produce.
|
||||
{ExcessCount, HeadType, Tails} = extract_tuple_type_info(Count, {O, N, T}, []),
|
||||
% Now work out what to do with all this information.
|
||||
parse_tuple2(O, N, ExcessCount, HeadType, Tails, NewTk, NewString, Token);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
count_open_parens(Tk, String, Count) ->
|
||||
case next_token(Tk, String) of
|
||||
{ok, {{character, "(", _, _, _, _}, NewTk, NewString}} ->
|
||||
count_open_parens(NewTk, NewString, Count + 1);
|
||||
{ok, {Token, NewTk, NewString}} ->
|
||||
{ok, {Count, Token, NewTk, NewString}};
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
extract_tuple_type_info(ParenCount, {_, _, {tuple, [Head | Rest]}}, Tails) when ParenCount > 0 ->
|
||||
% Have an open paren, and a tuple type. We need to go deeper!
|
||||
extract_tuple_type_info(ParenCount - 1, Head, [Rest | Tails]);
|
||||
extract_tuple_type_info(ParenCount, HeadType, Tails) ->
|
||||
% No parens, or no more (non-empty) tuples. Stop!
|
||||
{ParenCount, HeadType, Tails}.
|
||||
|
||||
parse_tuple2(_, _, _, {_, _, unknown_type}, [_ | _], _, _, _) ->
|
||||
{error, "Parsing of tuples with known lengths but unknown contents is not yet implemented."};
|
||||
parse_tuple2(O, N, ExcessCount, HeadType, Tails, Tk, String, {character, ")", _, Row, Col, _}) ->
|
||||
parse_empty_tuple(O, N, ExcessCount, HeadType, Tails, Tk, String, Row, Col);
|
||||
parse_tuple2(O, N, ExcessCount, HeadType, Tails, Tk, String, Token) ->
|
||||
% Finished with parentheses for now, try and parse an expression out, to
|
||||
% get our head term.
|
||||
case parse_expression2(HeadType, Tk, String, Token) of
|
||||
{ok, {Result, NewTk, NewString}} ->
|
||||
% Got a head term. Now try to build all the other tuple layers.
|
||||
parse_tuple_tails(O, N, ExcessCount, Result, Tails, NewTk, NewString);
|
||||
{error, Reason} ->
|
||||
% TODO: Wrap errors here too.
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_empty_tuple(_, _, 0, _, Tails, _, _, Row, Col) ->
|
||||
% There are zero excess parens, meaning all our parens are tuples. Get the
|
||||
% top one.
|
||||
[Tail | _] = Tails,
|
||||
% We expected some nonzero number of elements before the close paren, but
|
||||
% got zero.
|
||||
ExpectCount = 1 + length(Tail),
|
||||
{error, {not_enough_elements, ExpectCount, 0, Row, Col}};
|
||||
parse_empty_tuple(O, N, ExcessCount, {_, _, {tuple, []}}, Tails, Tk, String, _, _) ->
|
||||
% If we have some ambiguous parentheses left, we now know one of them is
|
||||
% this empty tuple.
|
||||
HeadTerm = {tuple, {}},
|
||||
NewExcessCount = ExcessCount - 1,
|
||||
% Now continue the loop as if it were an integer or something, in the head
|
||||
% position.
|
||||
parse_tuple_tails(O, N, NewExcessCount, HeadTerm, Tails, Tk, String);
|
||||
parse_empty_tuple(_, _, _, {HeadO, HeadN, _}, _, _, _, Row, Col) ->
|
||||
% We were expecting a head term of a different type!
|
||||
{error, {wrong_type, HeadO, HeadN, unit, Row, Col, Col}}.
|
||||
|
||||
parse_tuple_tails(O, N, 0, HeadTerm, [TailTypes | ParentTails], Tk, String) ->
|
||||
% Tuples left to build, but no extra open parens to deal with, so we can
|
||||
% just parse multivalues naively, starting from the "we have a term,
|
||||
% waiting for a comma" stage of the loop.
|
||||
case parse_multivalue3(TailTypes, Tk, String, -1, -1, [HeadTerm]) of
|
||||
{ok, {Terms, NewTk, NewString}} ->
|
||||
NewHead = {tuple, list_to_tuple(Terms)},
|
||||
parse_tuple_tails(O, N, 0, NewHead, ParentTails, NewTk, NewString);
|
||||
{error, Reason} ->
|
||||
% TODO: More error wrapping?
|
||||
{error, Reason}
|
||||
end;
|
||||
parse_tuple_tails(_, _, 0, HeadTerm, [], Tk, String) ->
|
||||
% No open parens left, no tuples left to build, we are done!
|
||||
{ok, {HeadTerm, Tk, String}};
|
||||
parse_tuple_tails(O, N, ExcessCount, HeadTerm, Tails, Tk, String) ->
|
||||
% The ambiguous case, where we have a mix of tuple parens, and grouping
|
||||
% parens. We want to peek at the next token, to see if it closes a grouping
|
||||
% paren.
|
||||
case next_token(Tk, String) of
|
||||
{ok, {{character, ")", _, _, _, _}, NewTk, NewString}} ->
|
||||
% It is grouping! Close one excess paren, and continue.
|
||||
parse_tuple_tails(O, N, ExcessCount - 1, HeadTerm, Tails, NewTk, NewString);
|
||||
{ok, {{character, ",", _, _, _, _}, NewTk, NewString}} ->
|
||||
% It is a real tuple! Try the normal logic, then.
|
||||
parse_tuple_tails2(O, N, ExcessCount, HeadTerm, Tails, NewTk, NewString);
|
||||
{ok, {{_, Actual, _, Row, Start, End}, _, _}} ->
|
||||
% Anything else is just a boring parse error we can complain about.
|
||||
{error, {unexpected_token, Actual, Row, Start, End}};
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_tuple_tails2(O, N, ExcessCount, HeadTerm, [TailTypes | ParentTails], Tk, String) ->
|
||||
case parse_multivalue(TailTypes, Tk, String, -1, -1, [HeadTerm]) of
|
||||
{ok, {Terms, NewTk, NewString}} ->
|
||||
NewHead = {tuple, list_to_tuple(Terms)},
|
||||
parse_tuple_tails(O, N, ExcessCount, NewHead, ParentTails, NewTk, NewString);
|
||||
{error, Reason} ->
|
||||
% TODO: wrap errors?
|
||||
{error, Reason}
|
||||
end;
|
||||
parse_tuple_tails2(O, N, _, _, [], _, _) ->
|
||||
% This case is created when, for example, we want int * int, but instead we
|
||||
% get a term like ((1, 2), 3), of type (int * int) * int. The trouble is,
|
||||
% ((1, 2)) would have been valid, so it's actually the second comma that
|
||||
% tips us off to the error, not the first one.
|
||||
%
|
||||
% For simpler cases, like (1, 2) when int was expected, this error message
|
||||
% is fine:
|
||||
Err = {error, {wrong_type, O, N, tuple, -1, -1, -1}},
|
||||
% TODO: Row/col
|
||||
% TODO: Generate better error messages in the cases where N *is* a tuple,
|
||||
% but the first thing inside that tuple is the problem.
|
||||
Err.
|
||||
|
||||
%%% Unambiguous Tuple/Variant Parsing
|
||||
|
||||
parse_multivalue(ElemTypes, Tk, String, Row, Start, Acc) ->
|
||||
case next_token(Tk, String) of
|
||||
{ok, {{character, ")", Row2, Start2, _}, NewTk, NewString}} ->
|
||||
{ok, {{character, ")", _, Row2, Start2, _}, NewTk, NewString}} ->
|
||||
check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc);
|
||||
{ok, {Token, NewTk, NewString}} ->
|
||||
parse_multivalue2(ElemTypes, NewTk, NewString, Row, Start, Acc, Token)
|
||||
@ -310,7 +435,7 @@ parse_multivalue2([Next | Rest], Tk, String, Row, Start, Acc, Token) ->
|
||||
Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
|
||||
{error, Wrapped}
|
||||
end;
|
||||
parse_multivalue2([], Tk, String, _, _, Acc, {character, ")", _, _, _}) ->
|
||||
parse_multivalue2([], Tk, String, _, _, Acc, {character, ")", _, _, _, _}) ->
|
||||
{ok, {lists:reverse(Acc), Tk, String}};
|
||||
parse_multivalue2([], _, _, _, _, _, {_, S, _, Row, Start, End}) ->
|
||||
{error, {unexpected_token, S, Row, Start, End}}.
|
||||
@ -321,6 +446,8 @@ parse_multivalue3(ElemTypes, Tk, String, Row, Start, Acc) ->
|
||||
check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc);
|
||||
{ok, {{character, ",", _, _, _, _}, NewTk, NewString}} ->
|
||||
parse_multivalue(ElemTypes, NewTk, NewString, Row, Start, Acc);
|
||||
{ok, {{_, Actual, _, Row, Start, End}, _, _}} ->
|
||||
{error, {unexpected_token, Actual, Row, Start, End}};
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
@ -364,7 +491,7 @@ parse_variant3(Arities, Tag, ElemTypes, Tk, String) ->
|
||||
case next_token(Tk, String) of
|
||||
{ok, {{character, "(", _, Row, Start, _}, NewTk, NewString}} ->
|
||||
parse_variant4(Arities, Tag, ElemTypes, NewTk, NewString, Row, Start);
|
||||
{ok, {{_, Actual, _, Row, Start, End}}} ->
|
||||
{ok, {{_, Actual, _, Row, Start, End}, _, _}} ->
|
||||
{error, {unexpected_token, Actual, Row, Start, End}}
|
||||
end.
|
||||
|
||||
@ -476,6 +603,10 @@ parse_record_final_loop([{Name, _} | Rest], FieldValues, Acc) ->
|
||||
error ->
|
||||
{error, {missing_field, Name}}
|
||||
end;
|
||||
parse_record_final_loop([], _, [Field]) ->
|
||||
% Singleton records are type-checked in Sophia, but unwrapped in the
|
||||
% resulting FATE.
|
||||
{ok, Field};
|
||||
parse_record_final_loop([], _, FieldsReverse) ->
|
||||
Fields = lists:reverse(FieldsReverse),
|
||||
Tuple = list_to_tuple(Fields),
|
||||
@ -490,7 +621,7 @@ parse_map(KeyType, ValueType, Tk, String, Acc) ->
|
||||
parse_map2(KeyType, ValueType, NewTk, NewString, Acc);
|
||||
{ok, {{character, "}", _, _, _, _}, NewTk, NewString}} ->
|
||||
{ok, {Acc, NewTk, NewString}};
|
||||
{ok, {{_, S, _, Row, Start, End}}} ->
|
||||
{ok, {{_, S, _, Row, Start, End}, _, _}} ->
|
||||
{error, {unexpected_token, S, Row, Start, End}}
|
||||
end.
|
||||
|
||||
@ -525,7 +656,7 @@ parse_map5(KeyType, ValueType, Tk, String, Acc) ->
|
||||
parse_map(KeyType, ValueType, NewTk, NewString, Acc);
|
||||
{ok, {{character, "}", _, _, _, _}, NewTk, NewString}} ->
|
||||
{ok, {Acc, NewTk, NewString}};
|
||||
{ok, {{_, S, _, Row, Start, End}}} ->
|
||||
{ok, {{_, S, _, Row, Start, End}, _, _}} ->
|
||||
{error, {unexpected_token, S, Row, Start, End}}
|
||||
end.
|
||||
|
||||
@ -570,15 +701,15 @@ check_parser(Sophia) ->
|
||||
% syntax, and to get an AACI object to pass to the parser.
|
||||
Source = "contract C = entrypoint f() = " ++ Sophia,
|
||||
{Code, Type} = compile_entrypoint_code_and_type(Source, "f"),
|
||||
|
||||
% Check that when we parse the term we get the same value as the Sophia
|
||||
% compiler.
|
||||
Fate = extract_return_value(Code),
|
||||
check_sophia_to_fate(unknown_type(), Sophia, Fate),
|
||||
|
||||
% Also check that the FATE term is valid, by running it through gmb.
|
||||
gmb_fate_encoding:serialize(Fate),
|
||||
|
||||
% Now check that our parser produces that output.
|
||||
check_sophia_to_fate(Type, Sophia, Fate),
|
||||
% Also check that it can be parsed without type information.
|
||||
check_sophia_to_fate(unknown_type(), Sophia, Fate).
|
||||
% Then, once we know that the term is correct, make sure that it is still
|
||||
% accepted *with* type info.
|
||||
check_sophia_to_fate(Type, Sophia, Fate).
|
||||
|
||||
check_parser_with_typedef(Typedef, Sophia) ->
|
||||
% Compile the type definitions alongside the usual literal expression.
|
||||
@ -602,8 +733,6 @@ anon_types_test() ->
|
||||
check_parser("#DE_AD0_00B_EEF"),
|
||||
% Strings.
|
||||
check_parser("\"hello world\""),
|
||||
check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""),
|
||||
check_parser("\"\\x00\\x11\\x77\""),
|
||||
% List of integers.
|
||||
check_parser("[1, 2, 3]"),
|
||||
% List of lists.
|
||||
@ -615,6 +744,12 @@ anon_types_test() ->
|
||||
|
||||
ok.
|
||||
|
||||
string_escape_codes_test() ->
|
||||
check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""),
|
||||
check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""),
|
||||
check_parser("\"\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\""),
|
||||
ok.
|
||||
|
||||
records_test() ->
|
||||
TypeDef = "record pair = {x: int, y: int}",
|
||||
Sophia = "{x = 1, y = 2}",
|
||||
@ -623,6 +758,43 @@ records_test() ->
|
||||
% will error, though.
|
||||
{error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia).
|
||||
|
||||
singleton_records_test() ->
|
||||
TypeDef = "record singleton('a) = {it: 'a}",
|
||||
check_parser_with_typedef(TypeDef, "{it = 123}"),
|
||||
check_parser_with_typedef(TypeDef, "{it = {it = {it = 5}}}"),
|
||||
check_parser_with_typedef(TypeDef, "[{it = 1}, {it = 2}, {it = 3}]"),
|
||||
|
||||
ok.
|
||||
|
||||
singleton_variants_test() ->
|
||||
% Similar tests to the singleton records, but this time there isn't
|
||||
% actually a special case; singleton variants are in fact wrapped in the
|
||||
% FATE too.
|
||||
TypeDef = "datatype wrapped('a) = Wrap('a)",
|
||||
check_parser_with_typedef(TypeDef, "Wrap(123)"),
|
||||
check_parser_with_typedef(TypeDef, "Wrap(Wrap(123))"),
|
||||
check_parser_with_typedef(TypeDef, "[Wrap(1), Wrap(2), Wrap(3)]"),
|
||||
|
||||
ok.
|
||||
|
||||
excess_parens_test() ->
|
||||
% 'singleton' parens are another special case, but unlike singleton
|
||||
% records, which exist in the type system, singleton parens aren't tuples
|
||||
% at all! They are just grouping, for arithmetic. For example.
|
||||
check_parser("(123)"),
|
||||
check_parser("[1, (2), ((3))]"),
|
||||
% Where this gets tricky, though, is when grouping parens are mixed with
|
||||
% tuple parens. E.g. this list of three tuples should all parse to the same
|
||||
% result.
|
||||
check_parser("[((1, 2)), ((1), 2), (((1), 2))]"),
|
||||
% Including multiple nestings of tuples and grouping, interleaved.
|
||||
check_parser("((((1), ((2, 3)))), 4)"),
|
||||
% Also empty tuples exist!
|
||||
check_parser("()"),
|
||||
check_parser("(((((), ())), ()))"),
|
||||
|
||||
ok.
|
||||
|
||||
variant_test() ->
|
||||
TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)",
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user