There are four major fixes here: 1. some eof tokens were being pattern matched with the wrong arity 2. tuples that are too long actually speculatively parse as an untyped tuple, and then complain that there were too many elements, 3. singleton tuples with a trailing comma are now handled differently to grouping parentheses, consistently between typed and untyped logic 4. the extra return values used to detect untyped singleton tuples are also used to pass the close paren position, so that too_many_elements can report the correct file position too. Point 4. also completely removes the need for tracking open paren positions that I was doing, and that I thought I would need to do even more of in the ambiguous-open-paren-stack case.
1076 lines
45 KiB
Erlang
1076 lines
45 KiB
Erlang
-module(hz_sophia).
|
|
-vsn("0.8.2").
|
|
-author("Jarvis Carroll <spiveehere@gmail.com>").
|
|
-copyright("Jarvis Carroll <spiveehere@gmail.com>").
|
|
-license("GPL-3.0-or-later").
|
|
|
|
-export([parse_literal/1, parse_literal/2, check_parser/1]).
|
|
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
|
|
|
|
-spec parse_literal(String) -> Result
|
|
when String :: string(),
|
|
Result :: {ok, gmb_fate_data:fate_type()}
|
|
| {error, Reason :: term()}.
|
|
|
|
parse_literal(String) ->
|
|
parse_literal(unknown_type(), String).
|
|
|
|
parse_literal(Type, String) ->
|
|
case parse_expression(Type, {1, 1}, String) of
|
|
{ok, {Result, NewPos, NewString}} ->
|
|
parse_literal2(Result, NewPos, NewString);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_literal2(Result, Pos, String) ->
|
|
% We have parsed a valid expression. Now check that the string ends.
|
|
case next_token(Pos, String) of
|
|
{ok, {{eof, _, _, _, _, _}, _, _}} ->
|
|
{ok, Result};
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
%%% Tokenizer
|
|
|
|
-define(IS_LATIN_UPPER(C), (((C) >= $A) and ((C) =< $Z))).
|
|
-define(IS_LATIN_LOWER(C), (((C) >= $a) and ((C) =< $z))).
|
|
-define(IS_ALPHA(C), (?IS_LATIN_UPPER(C) or ?IS_LATIN_LOWER(C) or ((C) == $_))).
|
|
-define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))).
|
|
-define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C) or ((C) == $.))).
|
|
-define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))).
|
|
|
|
next_token({Row, Col}, []) ->
|
|
{ok, {{eof, "", [], Row, Col, Col}, {Row, Col}, []}};
|
|
next_token({Row, Col}, " " ++ Rest) ->
|
|
next_token({Row, Col + 1}, Rest);
|
|
next_token({Row, Col}, "\t" ++ Rest) ->
|
|
next_token({Row, Col + 1}, Rest);
|
|
next_token({Row, _}, "\r\n" ++ Rest) ->
|
|
next_token({Row + 1, 1}, Rest);
|
|
next_token({Row, _}, "\r" ++ Rest) ->
|
|
next_token({Row + 1, 1}, Rest);
|
|
next_token({Row, _}, "\n" ++ Rest) ->
|
|
next_token({Row + 1, 1}, Rest);
|
|
next_token(Pos, [C | _] = String) when ?IS_ALPHA(C) ->
|
|
alphanum_token(Pos, Pos, String, []);
|
|
next_token(Pos, [C | _] = String) when ?IS_NUM(C) ->
|
|
num_token(Pos, Pos, String, [], 0);
|
|
next_token({Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) ->
|
|
bytes_token({Row, Col}, {Row, Col + 1}, [C | Rest], "#", []);
|
|
next_token({Row, Col}, "\"" ++ Rest) ->
|
|
string_token({Row, Col}, {Row, Col + 1}, Rest, "\"", <<>>);
|
|
next_token({Row, Col}, [Char | Rest]) ->
|
|
Token = {character, [Char], Char, Row, Col, Col},
|
|
{ok, {Token, {Row, Col + 1}, Rest}}.
|
|
|
|
alphanum_token(Start, {Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) ->
|
|
alphanum_token(Start, {Row, Col + 1}, Rest, [C | Acc]);
|
|
alphanum_token({_, Start}, {Row, End}, String, Acc) ->
|
|
AlphaString = lists:reverse(Acc),
|
|
Path = string:split(AlphaString, ".", all),
|
|
Token = {alphanum, AlphaString, Path, Row, Start, End - 1},
|
|
{ok, {Token, {Row, End}, String}}.
|
|
|
|
num_token(Start, {Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) ->
|
|
NewValue = Value * 10 + (C - $0),
|
|
num_token(Start, {Row, Col + 1}, Rest, [C | Chars], NewValue);
|
|
num_token(Start, {Row, Col}, [$_, C | Rest], Chars, Value) when ?IS_NUM(C) ->
|
|
NewValue = Value * 10 + (C - $0),
|
|
num_token(Start, {Row, Col + 2}, Rest, [C, $_ | Chars], NewValue);
|
|
num_token({_, Start}, {Row, End}, String, Chars, Value) ->
|
|
NumString = lists:reverse(Chars),
|
|
Token = {integer, NumString, Value, Row, Start, End - 1},
|
|
{ok, {Token, {Row, End}, String}}.
|
|
|
|
bytes_token(Start, {Row, Col}, [C | Rest], Chars, Digits) when ?IS_HEX(C) ->
|
|
Digit = convert_digit(C),
|
|
bytes_token(Start, {Row, Col + 1}, Rest, [C | Chars], [Digit | Digits]);
|
|
bytes_token(Start, {Row, Col}, [$_, C | Rest], Chars, Digits) when ?IS_HEX(C) ->
|
|
Digit = convert_digit(C),
|
|
bytes_token(Start, {Row, Col + 1}, Rest, [C, $_ | Chars], [Digit | Digits]);
|
|
bytes_token({_, Start}, {Row, End}, String, Chars, Digits) ->
|
|
BytesString = lists:reverse(Chars),
|
|
Value = reverse_combine_nibbles(Digits, <<>>),
|
|
Token = {bytes, BytesString, Value, Row, Start, End - 1},
|
|
{ok, {Token, {Row, End}, String}}.
|
|
|
|
convert_digit(C) when C >= $0, C =< $9 ->
|
|
C - $0;
|
|
convert_digit(C) when C >= $A, C =< $Z ->
|
|
C - $A + 10;
|
|
convert_digit(C) when C >= $a, C =< $z ->
|
|
C - $a + 10.
|
|
|
|
reverse_combine_nibbles([D1, D2 | Rest], Acc) ->
|
|
NewAcc = <<D2:4, D1:4, Acc/binary>>,
|
|
reverse_combine_nibbles(Rest, NewAcc);
|
|
reverse_combine_nibbles([D1], Acc) ->
|
|
<<0:4, D1:4, Acc/binary>>;
|
|
reverse_combine_nibbles([], Acc) ->
|
|
Acc.
|
|
|
|
string_token(Start, {Row, Col}, "\\x" ++ String, SourceChars, Value) ->
|
|
case escape_hex_code({Row, Col}, {Row, Col + 2}, String, "x\\" ++ SourceChars) of
|
|
{ok, {Codepoint, NewSourceChars, NewPos, NewString}} ->
|
|
NewValue = <<Value/binary, Codepoint/utf8>>,
|
|
string_token(Start, NewPos, NewString, NewSourceChars, NewValue);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end;
|
|
string_token(Start, {Row, Col}, [$\\, C | Rest], SourceChars, Value) ->
|
|
case escape_char(C) of
|
|
{ok, ByteVal} ->
|
|
string_token(Start, {Row, Col + 2}, Rest, [C, $\ | SourceChars], <<Value/binary, ByteVal>>);
|
|
error ->
|
|
{error, {invalid_escape_code, [C], Row, Col}}
|
|
end;
|
|
string_token({_, Start}, {Row, Col}, [$" | Rest], SourceChars, Value) ->
|
|
SourceStr = lists:reverse([$" | SourceChars]),
|
|
Token = {string, SourceStr, Value, Row, Start, Col},
|
|
{ok, {Token, {Row, Col + 1}, Rest}};
|
|
string_token(Start, {Row, Col}, [C | Rest], SourceChars, Value) ->
|
|
% TODO: ERTS probably had to convert this FROM utf8 at some point, so why
|
|
% bother, if we need to convert it back? I guess we could accept iolists if
|
|
% we really wanted to waste time on this point...
|
|
string_token(Start, {Row, Col + 1}, Rest, [C | SourceChars], <<Value/binary, C/utf8>>).
|
|
|
|
escape_hex_code(Start, {Row, Col}, "{" ++ String, SourceChars) ->
|
|
escape_long_hex_code(Start, {Row, Col + 1}, String, "{" ++ SourceChars, 0);
|
|
escape_hex_code(_, {Row, Col}, [A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) ->
|
|
% As of writing this, the Sophia compiler will convert this byte from
|
|
% extended ASCII to unicode... But it really shouldn't. The literal parser
|
|
% does what the compiler should do.
|
|
Byte = convert_digit(A) * 16 + convert_digit(B),
|
|
{ok, {Byte, [B, A | SourceChars], {Row, Col + 2}, String}};
|
|
escape_hex_code({Row1, Col1}, _, _, _) ->
|
|
{error, {invalid_escape_code, "\\x", Row1, Col1}}.
|
|
|
|
escape_long_hex_code(_, {Row, Col}, "}" ++ String, SourceChars, Value) ->
|
|
{ok, {Value, "}" ++ SourceChars, {Row, Col + 1}, String}};
|
|
escape_long_hex_code(Start, {Row, Col}, [C | String], SourceChars, Value) when ?IS_HEX(C) ->
|
|
NewSourceChars = [C | SourceChars],
|
|
NewValue = 16 * Value + convert_digit(C),
|
|
escape_long_hex_code(Start, {Row, Col + 1}, String, NewSourceChars, NewValue);
|
|
escape_long_hex_code(_, {Row, Col}, [C | _], _, _) ->
|
|
{error, {invalid_hexadecimal, [C], Row, Col}};
|
|
escape_long_hex_code(_, Pos, [], SourceChars, Value) ->
|
|
% Just return as if the escape code were closed, and let the string parser
|
|
% produce an unclosed string error instead.
|
|
{ok, {Value, SourceChars, Pos, []}}.
|
|
|
|
escape_char($b) -> {ok, $\b};
|
|
escape_char($e) -> {ok, $\e};
|
|
escape_char($f) -> {ok, $\f};
|
|
escape_char($n) -> {ok, $\n};
|
|
escape_char($r) -> {ok, $\r};
|
|
escape_char($t) -> {ok, $\t};
|
|
escape_char($v) -> {ok, $\v};
|
|
escape_char($") -> {ok, $\"};
|
|
escape_char($\\) -> {ok, $\\};
|
|
escape_char(_) -> error.
|
|
|
|
%%% Sophia Literal Parser
|
|
|
|
%%% This parser is a simple recursive descent parser, written explicitly in
|
|
%%% erlang.
|
|
%%%
|
|
%%% There are no infix operators in the subset we want to parse, so recursive
|
|
%%% descent is fine with no special tricks, no shunting yard algorithm, no
|
|
%%% parser generators, etc.
|
|
%%%
|
|
%%% If we were writing this in C then we might want to work iteratively with an
|
|
%%% array of finite state machines, i.e. with a pushdown automaton, instead of
|
|
%%% using recursion. This is a tried and true method of making fast parsers.
|
|
%%% Recall, however, that the BEAM *is* a stack machine, written in C, so
|
|
%%% rather than writing confusing iterative code in Erlang, to simulate a
|
|
%%% pushdown automaton inside another simulated stack machine... we should just
|
|
%%% write the recursive code, thus programming the BEAM to implement the
|
|
%%% pushdown automaton that we want.
|
|
|
|
parse_expression(Type, Pos, String) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {Token, NewPos, NewString}} ->
|
|
parse_expression2(Type, NewPos, NewString, Token);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_expression2(Type, Pos, String, {integer, _, Value, Row, Start, End}) ->
|
|
case Type of
|
|
{_, _, integer} ->
|
|
{ok, {Value, Pos, String}};
|
|
{_, _, unknown_type} ->
|
|
{ok, {Value, Pos, String}};
|
|
{O, N, _} ->
|
|
{error, {wrong_type, O, N, integer, Row, Start, End}}
|
|
end;
|
|
parse_expression2(Type, Pos, String, {bytes, _, Value, Row, Start, End}) ->
|
|
Len = byte_size(Value),
|
|
Result = {bytes, Value},
|
|
case Type of
|
|
{_, _, {bytes, [any]}} ->
|
|
{ok, {Result, Pos, String}};
|
|
{_, _, {bytes, [Len]}} ->
|
|
{ok, {Result, Pos, String}};
|
|
{_, _, {bytes, [ExpectedLen]}} ->
|
|
{error, {bytes_wrong_size, ExpectedLen, Len, Row, Start, End}};
|
|
{_, _, unknown_type} ->
|
|
{ok, {Result, Pos, String}};
|
|
{O, N, _} ->
|
|
{error, {wrong_type, O, N, {bytes, [Len]}, Row, Start, End}}
|
|
end;
|
|
parse_expression2(Type, Pos, String, {string, _, Value, Row, Start, End}) ->
|
|
case Type of
|
|
{_, _, string} ->
|
|
{ok, {Value, Pos, String}};
|
|
{_, _, unknown_type} ->
|
|
{ok, {Value, Pos, String}};
|
|
{O, N, _} ->
|
|
{error, {wrong_type, O, N, string, Row, Start, End}}
|
|
end;
|
|
parse_expression2(Type, Pos, String, {character, "[", _, Row, Start, _}) ->
|
|
parse_list(Type, Pos, String, Row, Start);
|
|
parse_expression2(Type, Pos, String, {character, "(", _, _, _, _}) ->
|
|
parse_tuple(Type, Pos, String);
|
|
parse_expression2(Type, Pos, String, {character, "{", _, Row, Start, _}) ->
|
|
parse_record_or_map(Type, Pos, String, Row, Start);
|
|
parse_expression2(Type, Pos, String, {alphanum, _, Path, Row, Start, End}) ->
|
|
parse_alphanum(Type, Pos, String, Path, Row, Start, End);
|
|
parse_expression2(_, _, _, {eof, _, _, _, _, _}) ->
|
|
{error, unexpected_end_of_file};
|
|
parse_expression2(_, _, _, Token) ->
|
|
unexpected_token(Token).
|
|
|
|
unknown_type() ->
|
|
{unknown_type, already_normalized, unknown_type}.
|
|
|
|
expect_tokens([], Pos, String) ->
|
|
{ok, {Pos, String}};
|
|
expect_tokens([Str | Rest], Pos, String) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{_, Str, _, _, _, _}, NewPos, NewString}} ->
|
|
expect_tokens(Rest, NewPos, NewString);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, Str);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
unexpected_token(Token, _Expected) ->
|
|
% I don't know if this is a good idea, but sometimes there are only one or
|
|
% two tokens that could have worked, which might make for simple
|
|
% non-technical error messages. I don't know how to format that yet,
|
|
% though.
|
|
unexpected_token(Token).
|
|
|
|
unexpected_token({eof, _, _, _, _, _}) ->
|
|
{error, expression_incomplete};
|
|
unexpected_token({_, S, _, Row, Start, End}) ->
|
|
{error, {unexpected_token, S, Row, Start, End}}.
|
|
|
|
%%% Ambiguous Chain Object vs Identifier Parsing
|
|
|
|
parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN_LOWER(C) ->
|
|
% From a programming perspective, we are trying to parse a constant, so
|
|
% an alphanum token can really only be a constructor, or a chain object.
|
|
% Constructors start with uppercase characters, so lowercase can only be a
|
|
% chain object.
|
|
try
|
|
case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of
|
|
{account_pubkey, Data} ->
|
|
typecheck_address(Type, Pos, String, Data, Row, Start, End);
|
|
{contract_pubkey, Data} ->
|
|
typecheck_contract(Type, Pos, String, Data, Row, Start, End);
|
|
{signature, Data} ->
|
|
typecheck_signature(Type, Pos, String, Data, Row, Start, End);
|
|
{_, _} ->
|
|
% Only a few chain objects are recognized by Sophia. The rest
|
|
% are interpreted as identifiers, so we might as well give the
|
|
% same sort of error that the compiler would give.
|
|
{error, {unexpected_identifier, S, Row, Start, End}}
|
|
end
|
|
catch
|
|
_:_ -> {error, {unexpected_identifier, S, Row, Start, End}}
|
|
end;
|
|
parse_alphanum(Type, Pos, String, Path, Row, Start, End) ->
|
|
% Inversely, chain object prefixes are always lowercase, so any other path
|
|
% must be a variant constructor, or invalid.
|
|
parse_variant(Type, Pos, String, Path, Row, Start, End).
|
|
|
|
typecheck_address({_, _, address}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{address, Data}, Pos, String}};
|
|
typecheck_address({_, _, contract}, Pos, String, Data, _, _, _) ->
|
|
% The compiler would type error, but we should be lenient here.
|
|
{ok, {{contract, Data}, Pos, String}};
|
|
typecheck_address({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{address, Data}, Pos, String}};
|
|
typecheck_address({O, N, _}, _, _, _, Row, Start, End) ->
|
|
{error, {wrong_type, O, N, address, Row, Start, End}}.
|
|
|
|
typecheck_contract({_, _, contract}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{contract, Data}, Pos, String}};
|
|
typecheck_contract({_, _, address}, Pos, String, Data, _, _, _) ->
|
|
% The compiler would type error, but we should be lenient here.
|
|
{ok, {{address, Data}, Pos, String}};
|
|
typecheck_contract({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{contract, Data}, Pos, String}};
|
|
typecheck_contract({O, N, _}, _, _, _, Row, Start, End) ->
|
|
{error, {wrong_type, O, N, contract, Row, Start, End}}.
|
|
|
|
typecheck_signature({_, _, signature}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{bytes, Data}, Pos, String}};
|
|
typecheck_signature({_, _, {bytes, [64]}}, Pos, String, Data, _, _, _) ->
|
|
% The compiler would probably type-error, but whatever.
|
|
{ok, {{bytes, Data}, Pos, String}};
|
|
typecheck_signature({_, _, {bytes, [any]}}, Pos, String, Data, _, _, _) ->
|
|
% The compiler would probably type-error, but whatever.
|
|
{ok, {{bytes, Data}, Pos, String}};
|
|
typecheck_signature({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{bytes, Data}, Pos, String}};
|
|
typecheck_signature({O, N, _}, _, _, _, Row, Start, End) ->
|
|
{error, {wrong_type, O, N, signature, Row, Start, End}}.
|
|
|
|
|
|
%%% List Parsing
|
|
|
|
parse_list({_, _, {list, [Inner]}}, Pos, String, _, _) ->
|
|
parse_list2(Inner, Pos, String);
|
|
parse_list({_, _, unknown_type}, Pos, String, _, _) ->
|
|
parse_list2(unknown_type(), Pos, String);
|
|
parse_list({O, N, _}, _, _, Row, Start) ->
|
|
{error, {wrong_type, O, N, list, Row, Start, Start}}.
|
|
|
|
parse_list2(Inner, Pos, String) ->
|
|
case parse_list_loop(Inner, Pos, String, "]", []) of
|
|
{ok, {Result, _, _, NewPos, NewString}} ->
|
|
{ok, {Result, NewPos, NewString}};
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_list_loop(Inner, Pos, String, CloseChar, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, CloseChar, _, Row, Col, _}, NewPos, NewString}} ->
|
|
{ok, {lists:reverse(Acc), true, {Row, Col}, NewPos, NewString}};
|
|
{ok, {Token, NewPos, NewString}} ->
|
|
parse_list_loop2(Inner, NewPos, NewString, CloseChar, Acc, Token);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_list_loop2(Inner, Pos, String, CloseChar, Acc, Token) ->
|
|
case parse_expression2(Inner, Pos, String, Token) of
|
|
{ok, {Value, NewPos, NewString}} ->
|
|
parse_list_loop3(Inner, NewPos, NewString, CloseChar, [Value | Acc]);
|
|
{error, Reason} ->
|
|
Wrapper = choose_list_error_wrapper(CloseChar),
|
|
% TODO: Are tuple indices off by one from list indices?
|
|
Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
|
|
{error, Wrapped}
|
|
end.
|
|
|
|
parse_list_loop3(Inner, Pos, String, CloseChar, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, CloseChar, _, Row, Col, _}, NewPos, NewString}} ->
|
|
{ok, {lists:reverse(Acc), false, {Row, Col}, NewPos, NewString}};
|
|
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_list_loop(Inner, NewPos, NewString, CloseChar, Acc);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, CloseChar);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
choose_list_error_wrapper("]") -> list_element;
|
|
choose_list_error_wrapper(")") -> tuple_element.
|
|
|
|
%%% Ambiguous Parenthesis Parsing
|
|
|
|
parse_tuple({_, _, unknown_type}, Pos, String) ->
|
|
% An untyped tuple is a list of untyped terms, and weirdly our list parser
|
|
% works perfectly for that, as long as we change the closing character to
|
|
% be ")" instead of "]".
|
|
case parse_list_loop(unknown_type(), Pos, String, ")", []) of
|
|
{ok, {[Inner], false, _, NewPos, NewString}} ->
|
|
% In Sophia, trailing commas are invalid, and so all singleton
|
|
% tuples are unwrapped, and translated into the inner type.
|
|
{ok, {Inner, NewPos, NewString}};
|
|
{ok, {TermList, _, _, NewPos, NewString}} ->
|
|
Result = {tuple, list_to_tuple(TermList)},
|
|
{ok, {Result, NewPos, NewString}};
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end;
|
|
parse_tuple(Type, Pos, String) ->
|
|
% Typed tuple parsing is quite complex, because we also want to support
|
|
% normal parentheses for grouping. It's not strictly necessary for
|
|
% inputting data, since we don't have any infix operators in simple
|
|
% data/term notation, but the alternatives are to generate singleton tuples
|
|
% naively, (which are impossible to generate from Sophia,) or to hard error
|
|
% on singleton tuples! Being faithful to Sophia is clearly nice!
|
|
|
|
% Count how many ambiguous parens there are, including the one we already
|
|
% saw.
|
|
case count_open_parens(Pos, String, 1) of
|
|
{ok, {Count, Token, NewPos, NewString}} ->
|
|
% Compare that to the amount of nesting tuple connectives are in
|
|
% the type we are expected to produce.
|
|
{ExcessCount, HeadType, Tails} = extract_tuple_type_info(Count, Type, []),
|
|
% Now work out what to do with all this information.
|
|
parse_tuple2(ExcessCount, HeadType, Tails, NewPos, NewString, Token);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
count_open_parens(Pos, String, Count) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, "(", _, _, _, _}, NewPos, NewString}} ->
|
|
count_open_parens(NewPos, NewString, Count + 1);
|
|
{ok, {Token, NewPos, NewString}} ->
|
|
{ok, {Count, Token, NewPos, NewString}};
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
extract_tuple_type_info(ParenCount, {_, _, {tuple, [Head | Rest]}}, Tails) when ParenCount > 0 ->
|
|
% Have an open paren, and a tuple type. We need to go deeper!
|
|
extract_tuple_type_info(ParenCount - 1, Head, [Rest | Tails]);
|
|
extract_tuple_type_info(ParenCount, HeadType, Tails) ->
|
|
% No parens, or no more (non-empty) tuples. Stop!
|
|
{ParenCount, HeadType, Tails}.
|
|
|
|
parse_tuple2(_, {_, _, unknown_type}, [_ | _], _, _, _) ->
|
|
{error, "Parsing of tuples with known lengths but unknown contents is not yet implemented."};
|
|
parse_tuple2(ExcessCount, HeadType, Tails, Pos, String, {character, ")", _, Row, Col, _}) ->
|
|
parse_empty_tuple(ExcessCount, HeadType, Tails, Pos, String, Row, Col);
|
|
parse_tuple2(ExcessCount, HeadType, Tails, Pos, String, Token) ->
|
|
% Finished with parentheses for now, try and parse an expression out, to
|
|
% get our head term.
|
|
case parse_expression2(HeadType, Pos, String, Token) of
|
|
{ok, {Result, NewPos, NewString}} ->
|
|
% Got a head term. Now try to build all the other tuple layers.
|
|
parse_tuple_tails(ExcessCount, Result, Tails, NewPos, NewString);
|
|
{error, Reason} ->
|
|
% TODO: Wrap errors here too.
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_empty_tuple(0, _, Tails, _, _, Row, Col) ->
|
|
% There are zero excess parens, meaning all our parens are tuples. Get the
|
|
% top one.
|
|
[Tail | _] = Tails,
|
|
% We expected some nonzero number of elements before the close paren, but
|
|
% got zero.
|
|
ExpectCount = 1 + length(Tail),
|
|
{error, {not_enough_elements, ExpectCount, 0, Row, Col}};
|
|
parse_empty_tuple(ExcessCount, {_, _, {tuple, []}}, Tails, Pos, String, _, _) ->
|
|
% If we have some ambiguous parentheses left, we now know one of them is
|
|
% this empty tuple.
|
|
HeadTerm = {tuple, {}},
|
|
NewExcessCount = ExcessCount - 1,
|
|
% Now continue the loop as if it were an integer or something, in the head
|
|
% position.
|
|
parse_tuple_tails(NewExcessCount, HeadTerm, Tails, Pos, String);
|
|
parse_empty_tuple(_, {HeadO, HeadN, _}, _, _, _, Row, Col) ->
|
|
% We were expecting a head term of a different type!
|
|
{error, {wrong_type, HeadO, HeadN, unit, Row, Col, Col}}.
|
|
|
|
parse_tuple_tails(0, HeadTerm, [], Pos, String) ->
|
|
% No open parens left, no tuples left to build, we are done!
|
|
{ok, {HeadTerm, Pos, String}};
|
|
parse_tuple_tails(ExcessCount, HeadTerm, Tails, Pos, String) ->
|
|
% The ambiguous case, where we have a mix of tuple parens, and grouping
|
|
% parens. We want to peek at the next token, to see if it closes a grouping
|
|
% paren.
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, ")", _, Row, Col, _}, NewPos, NewString}} ->
|
|
% It is grouping! Try closing a grouping paren.
|
|
parse_tuple_tails_paren(ExcessCount, HeadTerm, Tails, NewPos, NewString, Row, Col);
|
|
{ok, {{character, ",", _, Row, Col, _}, NewPos, NewString}} ->
|
|
% It is a real tuple! Try parsing a tuple.
|
|
parse_tuple_tails_comma(ExcessCount, HeadTerm, Tails, NewPos, NewString, Row, Col);
|
|
{ok, {Token, _, _}} ->
|
|
% Anything else is just a boring parse error we can complain about.
|
|
unexpected_token(Token, ")");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_tuple_tails_paren(0, _, [[] | _], _, _, Row, Col) ->
|
|
% A singleton tuple was expected, but a grouping paren was given. In theory
|
|
% we could be permissive here, but we were asked to do type checking, and
|
|
% this is a type error. The type error itself is a bit hard to reproduce,
|
|
% but we do know exactly what the fix is, so let's report that instead.
|
|
{error, {expected_trailing_comma, Row, Col}};
|
|
parse_tuple_tails_paren(0, _, [Tail | _], _, _, Row, Col) ->
|
|
% A tuple (of more than one elements) was expected, but a grouping paren
|
|
% was given. Again, the type error is hard to produce, but the actual
|
|
% solution is simple; add more elements.
|
|
ExpectCount = length(Tail) + 1,
|
|
GotCount = 1,
|
|
{error, {not_enough_elements, ExpectCount, GotCount, Row, Col}};
|
|
parse_tuple_tails_paren(ExcessCount, HeadTerm, Tails, Pos, String, _, _) ->
|
|
% We were expecting some grouping parens, and now we know that one of them
|
|
% was in fact grouping. Good.
|
|
parse_tuple_tails(ExcessCount - 1, HeadTerm, Tails, Pos, String).
|
|
|
|
parse_tuple_tails_comma(_, _, [], _, _, Row, Col) ->
|
|
% No more tuples, so commas are invalid. It's hard to describe the type
|
|
% error that a comma would actually produce, so instead let's just give
|
|
% the user the actual solution to their problems, which is to remove the
|
|
% comma.
|
|
{error, {expected_close_paren, Row, Col}};
|
|
parse_tuple_tails_comma(ExcessCount, HeadTerm, Tails, Pos, String, _, _) ->
|
|
% If there are no tails then we would have exited into the "grouping parens
|
|
% only" case, so we know this works:
|
|
[TailTypes | ParentTails] = Tails,
|
|
% Now we can parse this tuple as a tuple.
|
|
case parse_multivalue(TailTypes, Pos, String, [HeadTerm]) of
|
|
{ok, {Terms, NewPos, NewString}} ->
|
|
NewHead = {tuple, list_to_tuple(Terms)},
|
|
% Then continue the loop, with whatever parent tuple types this
|
|
% tuple is meant to be a part of.
|
|
parse_tuple_tails(ExcessCount, NewHead, ParentTails, NewPos, NewString);
|
|
{error, Reason} ->
|
|
% TODO: wrap errors?
|
|
{error, Reason}
|
|
end.
|
|
|
|
%%% Unambiguous Tuple/Variant Parsing
|
|
|
|
parse_multivalue(ElemTypes, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} ->
|
|
check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc);
|
|
{ok, {Token, NewPos, NewString}} ->
|
|
parse_multivalue2(ElemTypes, NewPos, NewString, Acc, Token);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_multivalue2([Next | Rest], Pos, String, Acc, Token) ->
|
|
case parse_expression2(Next, Pos, String, Token) of
|
|
{ok, {Value, NewPos, NewString}} ->
|
|
parse_multivalue3(Rest, NewPos, NewString, [Value | Acc]);
|
|
{error, Reason} ->
|
|
Wrapper = choose_list_error_wrapper(")"),
|
|
% TODO: Are tuple indices off by one from list indices?
|
|
Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
|
|
{error, Wrapped}
|
|
end;
|
|
parse_multivalue2([], Pos, String, Acc, Token) ->
|
|
count_multivalue_excess(Pos, String, Acc, Token).
|
|
|
|
parse_multivalue3(ElemTypes, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} ->
|
|
check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc);
|
|
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_multivalue(ElemTypes, NewPos, NewString, Acc);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, ")");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
count_multivalue_excess(Pos, String, TypedAcc, Token) ->
|
|
ExpectedLen = length(TypedAcc),
|
|
case parse_list_loop2(unknown_type(), Pos, String, ")", TypedAcc, Token) of
|
|
{ok, {TermList, _, {Row, Col}, _, _}} ->
|
|
ActualLen = length(TermList),
|
|
{error, {too_many_elements, ExpectedLen, ActualLen, Row, Col}};
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
check_multivalue_long_enough([], Pos, String, _, _, Acc) ->
|
|
{ok, {lists:reverse(Acc), Pos, String}};
|
|
check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) ->
|
|
GotCount = length(Got),
|
|
ExpectCount = length(Remaining) + GotCount,
|
|
{error, {not_enough_elements, ExpectCount, GotCount, Row, Col}}.
|
|
|
|
%%% Variant parsing
|
|
|
|
parse_variant({O, N, {variant, Variants}}, Pos, String, [Ident], Row, Start, End) ->
|
|
parse_variant2(O, N, Variants, Pos, String, "", Ident, Row, Start, End);
|
|
parse_variant({O, N, {variant, Variants}}, Pos, String, [Namespace, Constructor], Row, Start, End) ->
|
|
case get_typename(O, N) of
|
|
[Namespace, _] ->
|
|
parse_variant2(O, N, Variants, Pos, String, Namespace ++ ".", Constructor, Row, Start, End);
|
|
_ ->
|
|
{error, {invalid_constructor, O, N, Namespace ++ "." ++ Constructor, Row, Start, End}}
|
|
end;
|
|
parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) ->
|
|
{error, {unresolved_variant, Row, Start, End}};
|
|
parse_variant({O, N, _}, _, _, _, Row, Start, End) ->
|
|
% In normal code, identifiers can have many meanings, which can result in
|
|
% lots of different errors. In constant/immediate/normalized Sophia terms
|
|
% we know identifiers are always variants, so we can type error if any
|
|
% other type was expected.
|
|
{error, {wrong_type, O, N, variant, Row, Start, End}}.
|
|
|
|
get_typename(O, already_normalized) ->
|
|
get_typename(O);
|
|
get_typename(_, N) ->
|
|
get_typename(N).
|
|
|
|
get_typename({Name, _}) ->
|
|
string:split(Name, ".", all);
|
|
get_typename(Name) ->
|
|
string:split(Name, ".", all).
|
|
|
|
parse_variant2(O, N, Variants, Pos, String, Prefix, Constructor, Row, Start, End) ->
|
|
case lookup_variant(Constructor, Variants, 0) of
|
|
{ok, {Tag, ElemTypes}} ->
|
|
GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end,
|
|
Arities = lists:map(GetArity, Variants),
|
|
parse_variant3(Arities, Tag, ElemTypes, Pos, String);
|
|
error ->
|
|
{error, {invalid_constructor, O, N, Prefix ++ Constructor, Row, Start, End}}
|
|
end.
|
|
|
|
parse_variant3(Arities, Tag, [], Pos, String) ->
|
|
% Parsing of 0-arity variants is different.
|
|
Result = {variant, Arities, Tag, {}},
|
|
{ok, {Result, Pos, String}};
|
|
parse_variant3(Arities, Tag, ElemTypes, Pos, String) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, "(", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_variant4(Arities, Tag, ElemTypes, NewPos, NewString);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "(");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_variant4(Arities, Tag, ElemTypes, Pos, String) ->
|
|
case parse_multivalue(ElemTypes, Pos, String, []) of
|
|
{ok, {Terms, NewPos, NewString}} ->
|
|
Result = {variant, Arities, Tag, list_to_tuple(Terms)},
|
|
{ok, {Result, NewPos, NewString}};
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
lookup_variant(_, [], _) ->
|
|
error;
|
|
lookup_variant(Ident, [{Ident, ElemTypes} | _], Tag) ->
|
|
{ok, {Tag, ElemTypes}};
|
|
lookup_variant(Ident, [_ | Rest], Tag) ->
|
|
lookup_variant(Ident, Rest, Tag + 1).
|
|
|
|
%%% Record parsing
|
|
|
|
parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Pos, String, _, _) ->
|
|
parse_map(KeyType, ValueType, Pos, String, #{});
|
|
parse_record_or_map({_, _, {record, Fields}}, Pos, String, _, _) ->
|
|
parse_record(Fields, Pos, String, #{});
|
|
parse_record_or_map({_, _, unknown_type}, Pos, String, _, _) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
|
|
{ok, {#{}, NewPos, NewString}};
|
|
{ok, {{character, "[", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_map2(unknown_type(), unknown_type(), NewPos, NewString, #{});
|
|
{ok, {{alphanum, _, _, Row, Start, End}, _, _}} ->
|
|
{error, {unresolved_record, Row, Start, End}};
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "}");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end;
|
|
parse_record_or_map({O, N, _}, _, _, Row, Start) ->
|
|
{error, {wrong_type, O, N, map, Row, Start, Start}}.
|
|
|
|
parse_record(Fields, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{alphanum, Ident, _, Row, Start, End}, NewPos, NewString}} ->
|
|
parse_record2(Fields, NewPos, NewString, Acc, Ident, Row, Start, End);
|
|
{ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} ->
|
|
parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "}");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_record2(Fields, Pos, String, Acc, Ident, Row, Start, End) ->
|
|
case lists:keyfind(Ident, 1, Fields) of
|
|
{_, Type} ->
|
|
parse_record3(Fields, Pos, String, Acc, Ident, Row, Start, End, Type);
|
|
false ->
|
|
{error, {invalid_field, Ident, Row, Start, End}}
|
|
end.
|
|
|
|
parse_record3(Fields, Pos, String, Acc, Ident, Row, Start, End, Type) ->
|
|
case maps:is_key(Ident, Acc) of
|
|
false ->
|
|
parse_record4(Fields, Pos, String, Acc, Ident, Type);
|
|
true ->
|
|
{error, {field_already_present, Ident, Row, Start, End}}
|
|
end.
|
|
|
|
parse_record4(Fields, Pos, String, Acc, Ident, Type) ->
|
|
case expect_tokens(["="], Pos, String) of
|
|
{ok, {NewPos, NewString}} ->
|
|
parse_record5(Fields, NewPos, NewString, Acc, Ident, Type);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_record5(Fields, Pos, String, Acc, Ident, Type) ->
|
|
case parse_expression(Type, Pos, String) of
|
|
{ok, {Result, NewPos, NewString}} ->
|
|
NewAcc = maps:put(Ident, Result, Acc),
|
|
parse_record6(Fields, NewPos, NewString, NewAcc);
|
|
{error, Reason} ->
|
|
wrap_error(Reason, {record_field, Ident})
|
|
end.
|
|
|
|
parse_record6(Fields, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_record(Fields, NewPos, NewString, Acc);
|
|
{ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} ->
|
|
parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "}");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_record_end(Fields, Pos, String, FieldValues, Row, Start, End) ->
|
|
case parse_record_final_loop(Fields, FieldValues, []) of
|
|
{ok, Result} ->
|
|
{ok, {Result, Pos, String}};
|
|
{error, {missing_field, Name}} ->
|
|
{error, {missing_field, Name, Row, Start, End}}
|
|
end.
|
|
|
|
parse_record_final_loop([{Name, _} | Rest], FieldValues, Acc) ->
|
|
case maps:find(Name, FieldValues) of
|
|
{ok, Value} ->
|
|
parse_record_final_loop(Rest, FieldValues, [Value | Acc]);
|
|
error ->
|
|
{error, {missing_field, Name}}
|
|
end;
|
|
parse_record_final_loop([], _, [Field]) ->
|
|
% Singleton records are type-checked in Sophia, but unwrapped in the
|
|
% resulting FATE.
|
|
{ok, Field};
|
|
parse_record_final_loop([], _, FieldsReverse) ->
|
|
Fields = lists:reverse(FieldsReverse),
|
|
Tuple = list_to_tuple(Fields),
|
|
{ok, {tuple, Tuple}}.
|
|
|
|
|
|
%%% Map Parsing
|
|
|
|
parse_map(KeyType, ValueType, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, "[", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_map2(KeyType, ValueType, NewPos, NewString, Acc);
|
|
{ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
|
|
{ok, {Acc, NewPos, NewString}};
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "}");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_map2(KeyType, ValueType, Pos, String, Acc) ->
|
|
case parse_expression(KeyType, Pos, String) of
|
|
{ok, {Result, NewPos, NewString}} ->
|
|
parse_map3(KeyType, ValueType, NewPos, NewString, Acc, Result);
|
|
{error, Reason} ->
|
|
wrap_error(Reason, {map_key, maps:size(Acc)})
|
|
end.
|
|
|
|
parse_map3(KeyType, ValueType, Pos, String, Acc, Key) ->
|
|
case expect_tokens(["]", "="], Pos, String) of
|
|
{ok, {NewPos, NewString}} ->
|
|
parse_map4(KeyType, ValueType, NewPos, NewString, Acc, Key);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_map4(KeyType, ValueType, Pos, String, Acc, Key) ->
|
|
case parse_expression(ValueType, Pos, String) of
|
|
{ok, {Result, NewPos, NewString}} ->
|
|
NewAcc = maps:put(Key, Result, Acc),
|
|
parse_map5(KeyType, ValueType, NewPos, NewString, NewAcc);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_map5(KeyType, ValueType, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_map(KeyType, ValueType, NewPos, NewString, Acc);
|
|
{ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
|
|
{ok, {Acc, NewPos, NewString}};
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "}");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
% TODO
|
|
wrap_error(Reason, _) -> Reason.
|
|
|
|
%%% Tests
|
|
|
|
check_sophia_to_fate(Type, Sophia, Fate) ->
|
|
case parse_literal(Type, Sophia) of
|
|
{ok, Fate} ->
|
|
ok;
|
|
{ok, FateActual} ->
|
|
erlang:error({to_fate_failed, Sophia, Fate, {ok, FateActual}});
|
|
{error, Reason} ->
|
|
erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}})
|
|
end.
|
|
|
|
compile_entrypoint_value_and_type(Source, Entrypoint) ->
|
|
{ok, #{fate_code := FateCode, aci := ACI}} = so_compiler:from_string(Source, [{aci, json}]),
|
|
|
|
% Find the fcode for the correct entrypoint.
|
|
{fcode, Bodies, NamesMap, _} = FateCode,
|
|
Names = maps:to_list(NamesMap),
|
|
Name = unicode:characters_to_binary(Entrypoint),
|
|
{Hash, Name} = lists:keyfind(Name, 2, Names),
|
|
{_, _, Code} = maps:get(Hash, Bodies),
|
|
FATE = extract_return_value(Code),
|
|
|
|
% Generate the AACI, and get the AACI type info for the correct entrypoint.
|
|
AACI = hz_aaci:prepare(ACI),
|
|
{ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"),
|
|
|
|
{FATE, Type}.
|
|
|
|
extract_return_value(#{0 := [{'RETURNR', {immediate, FATE}}]}) ->
|
|
FATE;
|
|
extract_return_value(Code) ->
|
|
erlang:exit({invalid_literal_fcode, Code}).
|
|
|
|
check_parser(Sophia) ->
|
|
% Compile the literal using the compiler, to check that it is valid Sophia
|
|
% syntax, and to get an AACI object to pass to the parser.
|
|
Source = "contract C = entrypoint f() = " ++ Sophia,
|
|
{Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
|
|
|
|
% Check that when we parse the term we get the same value as the Sophia
|
|
% compiler.
|
|
check_sophia_to_fate(unknown_type(), Sophia, Fate),
|
|
|
|
% Then, once we know that the term is correct, make sure that it is still
|
|
% accepted *with* type info.
|
|
check_sophia_to_fate(Type, Sophia, Fate).
|
|
|
|
check_parser_with_typedef(Typedef, Sophia) ->
|
|
% Compile the type definitions alongside the usual literal expression.
|
|
Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia,
|
|
{Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
|
|
|
|
% Do a typed parse, as usual, but there are probably record/variant
|
|
% definitions in the AACI, so untyped parses probably don't work.
|
|
check_sophia_to_fate(Type, Sophia, Fate).
|
|
|
|
anon_types_test() ->
|
|
% Integers.
|
|
check_parser("123"),
|
|
check_parser("1_2_3"),
|
|
% Bytes.
|
|
check_parser("#DEAD000BEEF"),
|
|
check_parser("#DE_AD0_00B_EEF"),
|
|
% Strings.
|
|
check_parser("\"hello world\""),
|
|
% List of integers.
|
|
check_parser("[1, 2, 3]"),
|
|
% List of lists.
|
|
check_parser("[[], [1], [2, 3]]"),
|
|
% Tuple.
|
|
check_parser("(1, [2, 3], (4, 5))"),
|
|
% Map.
|
|
check_parser("{[1] = 2, [3] = 4}"),
|
|
|
|
ok.
|
|
|
|
string_escape_codes_test() ->
|
|
check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""),
|
|
check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""),
|
|
check_parser("\"\\x{0}\\x{7}\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\\x{0000007F}\""),
|
|
ok.
|
|
|
|
records_test() ->
|
|
TypeDef = "record pair = {x: int, y: int}",
|
|
Sophia = "{x = 1, y = 2}",
|
|
check_parser_with_typedef(TypeDef, Sophia),
|
|
% The above won't run an untyped parse on the expression, but we can. It
|
|
% will error, though.
|
|
{error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia).
|
|
|
|
variant_test() ->
|
|
TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)",
|
|
|
|
check_parser_with_typedef(TypeDef, "Zero"),
|
|
check_parser_with_typedef(TypeDef, "One(0)"),
|
|
check_parser_with_typedef(TypeDef, "Two(0, 1)"),
|
|
check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"),
|
|
check_parser_with_typedef(TypeDef, "C.Zero"),
|
|
|
|
{error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"),
|
|
|
|
ok.
|
|
|
|
ambiguous_variant_test() ->
|
|
TypeDef = "datatype mytype = C | D",
|
|
check_parser_with_typedef(TypeDef, "C"),
|
|
check_parser_with_typedef(TypeDef, "D"),
|
|
check_parser_with_typedef(TypeDef, "C.C"),
|
|
check_parser_with_typedef(TypeDef, "C.D"),
|
|
|
|
ok.
|
|
|
|
namespace_variant_test() ->
|
|
Term = "[N.A, N.B]",
|
|
Source = "namespace N = datatype mytype = A | B\ncontract C = entrypoint f() = " ++ Term,
|
|
{Fate, VariantType} = compile_entrypoint_value_and_type(Source, "f"),
|
|
check_sophia_to_fate(VariantType, Term, Fate),
|
|
|
|
ok.
|
|
|
|
chain_objects_test() ->
|
|
% Address,
|
|
check_parser("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"),
|
|
% Two different forms of signature,
|
|
check_parser("[sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n, #00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF]"),
|
|
|
|
% We have to build a totally custom contract example in order to get an
|
|
% AACI and return value for parsing contract addresses. This is because the
|
|
% compiler demands that contract addresses be type checked according to the
|
|
% logic of "contract oriented programming", including covariance, etc. and
|
|
% "contract oriented programming" is not very compatible with ML style type
|
|
% inference.
|
|
Contract = "ct_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx",
|
|
Source = "contract C = entrypoint f(): C = " ++ Contract,
|
|
{Fate, ContractType} = compile_entrypoint_value_and_type(Source, "f"),
|
|
check_sophia_to_fate(ContractType, Contract, Fate),
|
|
check_sophia_to_fate(unknown_type(), Contract, Fate),
|
|
|
|
ok.
|
|
|
|
singleton_records_test() ->
|
|
TypeDef = "record singleton('a) = {it: 'a}",
|
|
check_parser_with_typedef(TypeDef, "{it = 123}"),
|
|
check_parser_with_typedef(TypeDef, "{it = {it = {it = 5}}}"),
|
|
check_parser_with_typedef(TypeDef, "[{it = 1}, {it = 2}, {it = 3}]"),
|
|
|
|
ok.
|
|
|
|
singleton_variants_test() ->
|
|
% Similar tests to the singleton records, but this time there isn't
|
|
% actually a special case; singleton variants are in fact wrapped in the
|
|
% FATE too.
|
|
TypeDef = "datatype wrapped('a) = Wrap('a)",
|
|
check_parser_with_typedef(TypeDef, "Wrap(123)"),
|
|
check_parser_with_typedef(TypeDef, "Wrap(Wrap(123))"),
|
|
check_parser_with_typedef(TypeDef, "[Wrap(1), Wrap(2), Wrap(3)]"),
|
|
|
|
ok.
|
|
|
|
excess_parens_test() ->
|
|
% 'singleton' parens are another special case, but unlike singleton
|
|
% records, which exist in the type system, singleton parens aren't tuples
|
|
% at all! They are just grouping, for arithmetic. For example.
|
|
check_parser("(123)"),
|
|
check_parser("[1, (2), ((3))]"),
|
|
% Where this gets tricky, though, is when grouping parens are mixed with
|
|
% tuple parens. E.g. this list of three tuples should all parse to the same
|
|
% result.
|
|
check_parser("[((1, 2)), ((1), 2), (((1), 2))]"),
|
|
% Including multiple nestings of tuples and grouping, interleaved.
|
|
check_parser("((((1), ((2, 3)))), 4)"),
|
|
% Also empty tuples exist!
|
|
check_parser("()"),
|
|
check_parser("(((((), ())), ()))"),
|
|
|
|
ok.
|
|
|
|
lexer_offset_test() ->
|
|
% Test that various tokens report their position correctly.
|
|
{error, {unexpected_token, "456", 1, 5, 7}} = parse_literal("123 456"),
|
|
{error, {unexpected_token, "[", 1, 5, 5}} = parse_literal("123 [0]"),
|
|
{error, {unexpected_token, "ABC", 1, 5, 7}} = parse_literal("123 ABC"),
|
|
{error, {unexpected_token, "#AA", 1, 5, 7}} = parse_literal("123 #AA"),
|
|
{error, {unexpected_token, "\"x\"", 1, 5, 7}} = parse_literal("123 \"x\""),
|
|
{error, {unexpected_token, "\"\\x{123}\"", 1, 5, 13}} = parse_literal("123 \"\\x{123}\""),
|
|
|
|
% Check that the tokenizer knows its position correctly *after* various
|
|
% tokens.
|
|
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("[0] 123"),
|
|
ABCType = {"mytype", already_normalized, {variant, [{"ABC", []}]}},
|
|
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal(ABCType, "ABC 123"),
|
|
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("#AA 123"),
|
|
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("\"x\" 123"),
|
|
{error, {unexpected_token, "123", 1, 11, 13}} = parse_literal("\"\\x{123}\" 123"),
|
|
|
|
% Check that the tokenizer accounts for various line separators correctly.
|
|
{error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\nABC"),
|
|
{error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\r\nABC"),
|
|
{error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\rABC"),
|
|
|
|
ok.
|
|
|
|
parser_offset_test() ->
|
|
{_, Type} = compile_entrypoint_value_and_type("contract C = entrypoint f() = ((1, 2), (3, 4))", "f"),
|
|
|
|
{error, {not_enough_elements, 2, 1, 1, 8}} = parse_literal(Type, "((1, 2))"),
|
|
{error, {not_enough_elements, 2, 1, 1, 10}} = parse_literal(Type, "(((1, 2)))"),
|
|
{error, {too_many_elements, 2, 3, 1, 24}} = parse_literal(Type, "((1, 2), (3, 4), (5, 6))"),
|
|
{error, {too_many_elements, 2, 3, 1, 10}} = parse_literal(Type, "((1, 2, 3), (4, 5))"),
|
|
|
|
ok.
|
|
|
|
singleton_test() ->
|
|
% The Sophia compiler would never generate this, but it is a valid type
|
|
% within the FATE virtual machine, and it is possible to represent within
|
|
% the ACI itself.
|
|
SingletonACI = #{tuple => [<<"int">>]},
|
|
|
|
% Build an AACI around this, and run it through the AACI machinery.
|
|
Function = #{name => <<"f">>,
|
|
arguments => [],
|
|
stateful => false,
|
|
payable => false,
|
|
returns => SingletonACI},
|
|
ACI = [#{contract => #{functions => [Function],
|
|
name => <<"C">>,
|
|
kind => contract_main,
|
|
payable => false,
|
|
typedefs => []}}],
|
|
{aaci, "C", #{"f" := {[], SingletonType}}, _} = hz_aaci:prepare(ACI),
|
|
|
|
% Now let's do some testing with this weird type, to see if we handle it
|
|
% correctly.
|
|
{ok, {tuple, {1}}} = parse_literal(SingletonType, "(1,)"),
|
|
% Some ambiguous nesting parens, for fun.
|
|
{ok, {tuple, {1}}} = parse_literal(SingletonType, "(((1),))"),
|
|
% No trailing comma should give an error.
|
|
{error, {expected_trailing_comma, 1, 3}} = parse_literal(SingletonType, "(1)"),
|
|
% All of the above should behave the same in untyped contexts:
|
|
{ok, {tuple, {1}}} = parse_literal(unknown_type(), "(1,)"),
|
|
{ok, {tuple, {1}}} = parse_literal(unknown_type(), "(((1),))"),
|
|
{ok, 1} = parse_literal(unknown_type(), "(1)"),
|
|
|
|
% Also if we wanted an integer, the singleton is NOT dropped, so is also an
|
|
% error.
|
|
{error, {expected_close_paren, 1, 3}} = parse_literal({integer, alread_normalized, integer}, "(1,)"),
|
|
|
|
ok.
|