Sophia bitstrings aren't really something you initialize manually, so we have to make up a literal format for them. Failing that, we just accept arbitrary integers and bytearrays as bitstrings.
1183 lines
50 KiB
Erlang
1183 lines
50 KiB
Erlang
-module(hz_sophia).
|
|
-vsn("0.8.2").
|
|
-author("Jarvis Carroll <spiveehere@gmail.com>").
|
|
-copyright("Jarvis Carroll <spiveehere@gmail.com>").
|
|
-license("GPL-3.0-or-later").
|
|
|
|
-export([parse_literal/1, parse_literal/2, check_parser/1]).
|
|
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
|
|
|
|
-spec parse_literal(String) -> Result
|
|
when String :: string(),
|
|
Result :: {ok, gmb_fate_data:fate_type()}
|
|
| {error, Reason :: term()}.
|
|
|
|
parse_literal(String) ->
|
|
parse_literal(unknown_type(), String).
|
|
|
|
parse_literal(Type, String) ->
|
|
case parse_expression(Type, {1, 1}, String) of
|
|
{ok, {Result, NewPos, NewString}} ->
|
|
parse_literal2(Result, NewPos, NewString);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_literal2(Result, Pos, String) ->
|
|
% We have parsed a valid expression. Now check that the string ends.
|
|
case next_token(Pos, String) of
|
|
{ok, {{eof, _, _, _, _, _}, _, _}} ->
|
|
{ok, Result};
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
%%% Tokenizer
|
|
|
|
-define(IS_LATIN_UPPER(C), (((C) >= $A) and ((C) =< $Z))).
|
|
-define(IS_LATIN_LOWER(C), (((C) >= $a) and ((C) =< $z))).
|
|
-define(IS_ALPHA(C), (?IS_LATIN_UPPER(C) or ?IS_LATIN_LOWER(C) or ((C) == $_))).
|
|
-define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))).
|
|
-define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C) or ((C) == $.))).
|
|
-define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))).
|
|
|
|
next_token({Row, Col}, []) ->
|
|
{ok, {{eof, "", [], Row, Col, Col}, {Row, Col}, []}};
|
|
next_token({Row, Col}, " " ++ Rest) ->
|
|
next_token({Row, Col + 1}, Rest);
|
|
next_token({Row, Col}, "\t" ++ Rest) ->
|
|
next_token({Row, Col + 1}, Rest);
|
|
next_token({Row, _}, "\r\n" ++ Rest) ->
|
|
next_token({Row + 1, 1}, Rest);
|
|
next_token({Row, _}, "\r" ++ Rest) ->
|
|
next_token({Row + 1, 1}, Rest);
|
|
next_token({Row, _}, "\n" ++ Rest) ->
|
|
next_token({Row + 1, 1}, Rest);
|
|
next_token(Pos, [C | _] = String) when ?IS_ALPHA(C) ->
|
|
alphanum_token(Pos, Pos, String, []);
|
|
next_token(Pos, [C | _] = String) when ?IS_NUM(C) ->
|
|
num_token(Pos, Pos, String, [], 0);
|
|
next_token({Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) ->
|
|
bytes_token({Row, Col}, {Row, Col + 1}, [C | Rest], "#", []);
|
|
next_token({Row, Col}, "\"" ++ Rest) ->
|
|
string_token({Row, Col}, {Row, Col + 1}, Rest, "\"", <<>>);
|
|
next_token({Row, Col}, "'" ++ Rest) ->
|
|
character_token({Row, Col}, {Row, Col + 1}, Rest, "'");
|
|
next_token({Row, Col}, [Char | Rest]) ->
|
|
Token = {character, [Char], Char, Row, Col, Col},
|
|
{ok, {Token, {Row, Col + 1}, Rest}}.
|
|
|
|
alphanum_token(Start, {Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) ->
|
|
alphanum_token(Start, {Row, Col + 1}, Rest, [C | Acc]);
|
|
alphanum_token({_, Start}, {Row, End}, String, Acc) ->
|
|
AlphaString = lists:reverse(Acc),
|
|
Path = string:split(AlphaString, ".", all),
|
|
Token = {alphanum, AlphaString, Path, Row, Start, End - 1},
|
|
{ok, {Token, {Row, End}, String}}.
|
|
|
|
num_token(Start, {Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) ->
|
|
NewValue = Value * 10 + (C - $0),
|
|
num_token(Start, {Row, Col + 1}, Rest, [C | Chars], NewValue);
|
|
num_token(Start, {Row, Col}, [$_, C | Rest], Chars, Value) when ?IS_NUM(C) ->
|
|
NewValue = Value * 10 + (C - $0),
|
|
num_token(Start, {Row, Col + 2}, Rest, [C, $_ | Chars], NewValue);
|
|
num_token({_, Start}, {Row, End}, String, Chars, Value) ->
|
|
NumString = lists:reverse(Chars),
|
|
Token = {integer, NumString, Value, Row, Start, End - 1},
|
|
{ok, {Token, {Row, End}, String}}.
|
|
|
|
bytes_token(Start, {Row, Col}, [C | Rest], Chars, Digits) when ?IS_HEX(C) ->
|
|
Digit = convert_digit(C),
|
|
bytes_token(Start, {Row, Col + 1}, Rest, [C | Chars], [Digit | Digits]);
|
|
bytes_token(Start, {Row, Col}, [$_, C | Rest], Chars, Digits) when ?IS_HEX(C) ->
|
|
Digit = convert_digit(C),
|
|
bytes_token(Start, {Row, Col + 1}, Rest, [C, $_ | Chars], [Digit | Digits]);
|
|
bytes_token({_, Start}, {Row, End}, String, Chars, Digits) ->
|
|
BytesString = lists:reverse(Chars),
|
|
Value = reverse_combine_nibbles(Digits, <<>>),
|
|
Token = {bytes, BytesString, Value, Row, Start, End - 1},
|
|
{ok, {Token, {Row, End}, String}}.
|
|
|
|
convert_digit(C) when C >= $0, C =< $9 ->
|
|
C - $0;
|
|
convert_digit(C) when C >= $A, C =< $Z ->
|
|
C - $A + 10;
|
|
convert_digit(C) when C >= $a, C =< $z ->
|
|
C - $a + 10.
|
|
|
|
reverse_combine_nibbles([D1, D2 | Rest], Acc) ->
|
|
NewAcc = <<D2:4, D1:4, Acc/binary>>,
|
|
reverse_combine_nibbles(Rest, NewAcc);
|
|
reverse_combine_nibbles([D1], Acc) ->
|
|
<<0:4, D1:4, Acc/binary>>;
|
|
reverse_combine_nibbles([], Acc) ->
|
|
Acc.
|
|
|
|
string_token({_, Start}, {Row, Col}, [$" | Rest], SourceChars, Value) ->
|
|
SourceStr = lists:reverse([$" | SourceChars]),
|
|
Token = {string, SourceStr, Value, Row, Start, Col},
|
|
{ok, {Token, {Row, Col + 1}, Rest}};
|
|
string_token({_, Start}, {Row, Col}, [], SourceChars, _) ->
|
|
SourceStr = lists:reverse(SourceChars),
|
|
{error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}};
|
|
string_token({_, Start}, {Row, Col}, [$\r | _], SourceChars, _) ->
|
|
SourceStr = lists:reverse(SourceChars),
|
|
{error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}};
|
|
string_token({_, Start}, {Row, Col}, [$\n | _], SourceChars, _) ->
|
|
SourceStr = lists:reverse(SourceChars),
|
|
{error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}};
|
|
string_token(Start, Pos, String, SourceChars, Value) ->
|
|
case parse_char(Start, Pos, String, SourceChars) of
|
|
{ok, {Char, NewSourceChars, NewPos, NewString}} ->
|
|
% TODO: ERTS probably had to convert this FROM utf8 at some point,
|
|
% so why bother, if we need to convert it back? I guess we could
|
|
% accept iolists if we really wanted to waste time on this point...
|
|
NewValue = <<Value/binary, Char/utf8>>,
|
|
string_token(Start, NewPos, NewString, NewSourceChars, NewValue);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
character_token({_, Start}, {Row, Col}, [], SourceChars) ->
|
|
SourceStr = lists:reverse(SourceChars),
|
|
{error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}};
|
|
character_token({_, Start}, {Row, Col}, [$\r | _], SourceChars) ->
|
|
SourceStr = lists:reverse(SourceChars),
|
|
{error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}};
|
|
character_token({_, Start}, {Row, Col}, [$\n | _], SourceChars) ->
|
|
SourceStr = lists:reverse(SourceChars),
|
|
{error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}};
|
|
character_token(Start, Pos, String, SourceChars) ->
|
|
case parse_char(Start, Pos, String, SourceChars) of
|
|
{ok, {Char, NewSourceChars, NewPos, NewString}} ->
|
|
character_token2(Start, NewPos, NewString, NewSourceChars, Char);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
character_token2({_, Start}, {Row, Col}, [$' | Rest], SourceChars, Value) ->
|
|
SourceStr = lists:reverse([$' | SourceChars]),
|
|
Token = {char_literal, SourceStr, Value, Row, Start, Col},
|
|
{ok, {Token, {Row, Col + 1}, Rest}};
|
|
character_token2({_, Start}, {Row, Col}, _, SourceChars, _) ->
|
|
SourceStr = lists:reverse(SourceChars),
|
|
{error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}}.
|
|
|
|
parse_char(Start, {Row, Col}, "\\x{" ++ String, SourceChars) ->
|
|
escape_long_hex_code(Start, {Row, Col + 3}, String, "{x\\" ++ SourceChars, 0);
|
|
parse_char(_, {Row, Col}, [$\\, $x, A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) ->
|
|
Byte = convert_digit(A) * 16 + convert_digit(B),
|
|
{ok, {Byte, [B, A, $x, $\\ | SourceChars], {Row, Col + 4}, String}};
|
|
parse_char({Row, Start}, {Row, Col}, [$\\, C | Rest], SourceChars) ->
|
|
case escape_char(C) of
|
|
{ok, ByteVal} ->
|
|
{ok, {ByteVal, [C, $\ | SourceChars], {Row, Col + 2}, Rest}};
|
|
error ->
|
|
{error, {invalid_escape_code, [$\\, C], Row, Start, Col + 1}}
|
|
end;
|
|
parse_char(_, {Row, Col}, [C | Rest], SourceChars) ->
|
|
{ok, {C, [C | SourceChars], {Row, Col + 1}, Rest}}.
|
|
|
|
escape_long_hex_code(_, {Row, Col}, "}" ++ String, SourceChars, Value) ->
|
|
{ok, {Value, "}" ++ SourceChars, {Row, Col + 1}, String}};
|
|
escape_long_hex_code(Start, {Row, Col}, [C | String], SourceChars, Value) when ?IS_HEX(C) ->
|
|
NewSourceChars = [C | SourceChars],
|
|
NewValue = 16 * Value + convert_digit(C),
|
|
escape_long_hex_code(Start, {Row, Col + 1}, String, NewSourceChars, NewValue);
|
|
escape_long_hex_code(_, {Row, Col}, [C | _], _, _) ->
|
|
{error, {invalid_hexadecimal, [C], Row, Col}};
|
|
escape_long_hex_code(_, Pos, [], SourceChars, Value) ->
|
|
% Just return as if the escape code were closed, and let the string parser
|
|
% produce an unclosed string error instead.
|
|
{ok, {Value, SourceChars, Pos, []}}.
|
|
|
|
escape_char($b) -> {ok, $\b};
|
|
escape_char($e) -> {ok, $\e};
|
|
escape_char($f) -> {ok, $\f};
|
|
escape_char($n) -> {ok, $\n};
|
|
escape_char($r) -> {ok, $\r};
|
|
escape_char($t) -> {ok, $\t};
|
|
escape_char($v) -> {ok, $\v};
|
|
% Technically \" and \' are only valid inside their own quote characters, not
|
|
% each other, but whatever, we will just be permissive here.
|
|
escape_char($") -> {ok, $\"};
|
|
escape_char($') -> {ok, $\'};
|
|
escape_char($\\) -> {ok, $\\};
|
|
escape_char(_) -> error.
|
|
|
|
%%% Sophia Literal Parser
|
|
|
|
%%% This parser is a simple recursive descent parser, written explicitly in
|
|
%%% erlang.
|
|
%%%
|
|
%%% There are no infix operators in the subset we want to parse, so recursive
|
|
%%% descent is fine with no special tricks, no shunting yard algorithm, no
|
|
%%% parser generators, etc.
|
|
%%%
|
|
%%% If we were writing this in C then we might want to work iteratively with an
|
|
%%% array of finite state machines, i.e. with a pushdown automaton, instead of
|
|
%%% using recursion. This is a tried and true method of making fast parsers.
|
|
%%% Recall, however, that the BEAM *is* a stack machine, written in C, so
|
|
%%% rather than writing confusing iterative code in Erlang, to simulate a
|
|
%%% pushdown automaton inside another simulated stack machine... we should just
|
|
%%% write the recursive code, thus programming the BEAM to implement the
|
|
%%% pushdown automaton that we want.
|
|
|
|
parse_expression(Type, Pos, String) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {Token, NewPos, NewString}} ->
|
|
parse_expression2(Type, NewPos, NewString, Token);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_expression2(Type, Pos, String, {integer, _, Value, Row, Start, End}) ->
|
|
typecheck_integer(Type, Pos, String, Value, Row, Start, End);
|
|
parse_expression2(Type, Pos, String, {character, "-", _, _, _, _}) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{integer, _, Value, Row, Start, End}, NewPos, NewString}} ->
|
|
typecheck_integer(Type, NewPos, NewString, -Value, Row, Start, End);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end;
|
|
parse_expression2(Type, Pos, String, {bytes, _, Value, Row, Start, End}) ->
|
|
Len = byte_size(Value),
|
|
Result = {bytes, Value},
|
|
case Type of
|
|
{_, _, {bytes, [any]}} ->
|
|
{ok, {Result, Pos, String}};
|
|
{_, _, {bytes, [Len]}} ->
|
|
{ok, {Result, Pos, String}};
|
|
{_, _, {bytes, [ExpectedLen]}} ->
|
|
{error, {bytes_wrong_size, ExpectedLen, Len, Row, Start, End}};
|
|
{_, _, bits} ->
|
|
Size = bit_size(Value),
|
|
<<IntValue:Size>> = Value,
|
|
{ok, {{bits, IntValue}, Pos, String}};
|
|
{_, _, unknown_type} ->
|
|
{ok, {Result, Pos, String}};
|
|
{O, N, _} ->
|
|
{error, {wrong_type, O, N, {bytes, [Len]}, Row, Start, End}}
|
|
end;
|
|
parse_expression2(Type, Pos, String, {string, _, Value, Row, Start, End}) ->
|
|
case Type of
|
|
{_, _, string} ->
|
|
{ok, {Value, Pos, String}};
|
|
{_, _, unknown_type} ->
|
|
{ok, {Value, Pos, String}};
|
|
{O, N, _} ->
|
|
{error, {wrong_type, O, N, string, Row, Start, End}}
|
|
end;
|
|
parse_expression2(Type, Pos, String, {char_literal, _, Value, Row, Start, End}) ->
|
|
case Type of
|
|
{_, _, char} ->
|
|
{ok, {Value, Pos, String}};
|
|
{_, _, unknown_type} ->
|
|
{ok, {Value, Pos, String}};
|
|
{O, N, _} ->
|
|
{error, {wrong_type, O, N, char, Row, Start, End}}
|
|
end;
|
|
parse_expression2(Type, Pos, String, {character, "[", _, Row, Start, _}) ->
|
|
parse_list(Type, Pos, String, Row, Start);
|
|
parse_expression2(Type, Pos, String, {character, "(", _, _, _, _}) ->
|
|
parse_tuple(Type, Pos, String);
|
|
parse_expression2(Type, Pos, String, {character, "{", _, Row, Start, _}) ->
|
|
parse_record_or_map(Type, Pos, String, Row, Start);
|
|
parse_expression2(Type, Pos, String, {alphanum, _, Path, Row, Start, End}) ->
|
|
parse_alphanum(Type, Pos, String, Path, Row, Start, End);
|
|
parse_expression2(_, _, _, {eof, _, _, _, _, _}) ->
|
|
{error, unexpected_end_of_file};
|
|
parse_expression2(_, _, _, Token) ->
|
|
unexpected_token(Token).
|
|
|
|
unknown_type() ->
|
|
{unknown_type, already_normalized, unknown_type}.
|
|
|
|
expect_tokens([], Pos, String) ->
|
|
{ok, {Pos, String}};
|
|
expect_tokens([Str | Rest], Pos, String) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{_, Str, _, _, _, _}, NewPos, NewString}} ->
|
|
expect_tokens(Rest, NewPos, NewString);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, Str);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
unexpected_token(Token, _Expected) ->
|
|
% I don't know if this is a good idea, but sometimes there are only one or
|
|
% two tokens that could have worked, which might make for simple
|
|
% non-technical error messages. I don't know how to format that yet,
|
|
% though.
|
|
unexpected_token(Token).
|
|
|
|
unexpected_token({eof, _, _, _, _, _}) ->
|
|
{error, expression_incomplete};
|
|
unexpected_token({_, S, _, Row, Start, End}) ->
|
|
{error, {unexpected_token, S, Row, Start, End}}.
|
|
|
|
%%% Ambiguous Chain Object vs Identifier Parsing
|
|
|
|
parse_alphanum(Type, Pos, String, ["true"], Row, Start, End) ->
|
|
typecheck_bool(Type, Pos, String, true, Row, Start, End);
|
|
parse_alphanum(Type, Pos, String, ["false"], Row, Start, End) ->
|
|
typecheck_bool(Type, Pos, String, false, Row, Start, End);
|
|
parse_alphanum(Type, Pos, String, ["Bits", "all"], Row, Start, End) ->
|
|
typecheck_bits(Type, Pos, String, -1, Row, Start, End);
|
|
parse_alphanum(Type, Pos, String, ["Bits", "none"], Row, Start, End) ->
|
|
typecheck_bits(Type, Pos, String, 0, Row, Start, End);
|
|
parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN_LOWER(C) ->
|
|
% From a programming perspective, we are trying to parse a constant, so
|
|
% an alphanum token can really only be a constructor, or a chain object.
|
|
% Constructors start with uppercase characters, so lowercase can only be a
|
|
% chain object.
|
|
try
|
|
case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of
|
|
{account_pubkey, Data} ->
|
|
typecheck_address(Type, Pos, String, Data, Row, Start, End);
|
|
{contract_pubkey, Data} ->
|
|
typecheck_contract(Type, Pos, String, Data, Row, Start, End);
|
|
{signature, Data} ->
|
|
typecheck_signature(Type, Pos, String, Data, Row, Start, End);
|
|
{_, _} ->
|
|
% Only a few chain objects are recognized by Sophia. The rest
|
|
% are interpreted as identifiers, so we might as well give the
|
|
% same sort of error that the compiler would give.
|
|
{error, {unexpected_identifier, S, Row, Start, End}}
|
|
end
|
|
catch
|
|
_:_ -> {error, {unexpected_identifier, S, Row, Start, End}}
|
|
end;
|
|
parse_alphanum(Type, Pos, String, Path, Row, Start, End) ->
|
|
% Inversely, chain object prefixes are always lowercase, so any other path
|
|
% must be a variant constructor, or invalid.
|
|
parse_variant(Type, Pos, String, Path, Row, Start, End).
|
|
|
|
typecheck_integer({_, _, integer}, Pos, String, Value, _, _, _) ->
|
|
{ok, {Value, Pos, String}};
|
|
typecheck_integer({_, _, unknown_type}, Pos, String, Value, _, _, _) ->
|
|
{ok, {Value, Pos, String}};
|
|
typecheck_integer({_, _, bits}, Pos, String, Value, _, _, _) ->
|
|
{ok, {{bits, Value}, Pos, String}};
|
|
typecheck_integer({O, N, _}, _, _, _, Row, Start, End) ->
|
|
{error, {wrong_type, O, N, integer, Row, Start, End}}.
|
|
|
|
typecheck_bool({_, _, unknown_type}, Pos, String, Value, _, _, _) ->
|
|
{ok, {Value, Pos, String}};
|
|
typecheck_bool({_, _, boolean}, Pos, String, Value, _, _, _) ->
|
|
{ok, {Value, Pos, String}};
|
|
typecheck_bool({O, N, _}, _, _, _, Row, Start, End) ->
|
|
{error, {wrong_type, O, N, boolean, Row, Start, End}}.
|
|
|
|
typecheck_bits({_, _, unknown_type}, Pos, String, Value, _, _, _) ->
|
|
{ok, {{bits, Value}, Pos, String}};
|
|
typecheck_bits({_, _, bits}, Pos, String, Value, _, _, _) ->
|
|
{ok, {{bits, Value}, Pos, String}};
|
|
typecheck_bits({O, N, _}, _, _, _, Row, Start, End) ->
|
|
{error, {wrong_type, O, N, bits, Row, Start, End}}.
|
|
|
|
typecheck_address({_, _, address}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{address, Data}, Pos, String}};
|
|
typecheck_address({_, _, contract}, Pos, String, Data, _, _, _) ->
|
|
% The compiler would type error, but we should be lenient here.
|
|
{ok, {{contract, Data}, Pos, String}};
|
|
typecheck_address({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{address, Data}, Pos, String}};
|
|
typecheck_address({O, N, _}, _, _, _, Row, Start, End) ->
|
|
{error, {wrong_type, O, N, address, Row, Start, End}}.
|
|
|
|
typecheck_contract({_, _, contract}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{contract, Data}, Pos, String}};
|
|
typecheck_contract({_, _, address}, Pos, String, Data, _, _, _) ->
|
|
% The compiler would type error, but we should be lenient here.
|
|
{ok, {{address, Data}, Pos, String}};
|
|
typecheck_contract({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{contract, Data}, Pos, String}};
|
|
typecheck_contract({O, N, _}, _, _, _, Row, Start, End) ->
|
|
{error, {wrong_type, O, N, contract, Row, Start, End}}.
|
|
|
|
typecheck_signature({_, _, signature}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{bytes, Data}, Pos, String}};
|
|
typecheck_signature({_, _, {bytes, [64]}}, Pos, String, Data, _, _, _) ->
|
|
% The compiler would probably type-error, but whatever.
|
|
{ok, {{bytes, Data}, Pos, String}};
|
|
typecheck_signature({_, _, {bytes, [any]}}, Pos, String, Data, _, _, _) ->
|
|
% The compiler would probably type-error, but whatever.
|
|
{ok, {{bytes, Data}, Pos, String}};
|
|
typecheck_signature({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
|
|
{ok, {{bytes, Data}, Pos, String}};
|
|
typecheck_signature({O, N, _}, _, _, _, Row, Start, End) ->
|
|
{error, {wrong_type, O, N, signature, Row, Start, End}}.
|
|
|
|
|
|
%%% List Parsing
|
|
|
|
parse_list({_, _, {list, [Inner]}}, Pos, String, _, _) ->
|
|
parse_list2(Inner, Pos, String);
|
|
parse_list({_, _, unknown_type}, Pos, String, _, _) ->
|
|
parse_list2(unknown_type(), Pos, String);
|
|
parse_list({O, N, _}, _, _, Row, Start) ->
|
|
{error, {wrong_type, O, N, list, Row, Start, Start}}.
|
|
|
|
parse_list2(Inner, Pos, String) ->
|
|
case parse_list_loop(Inner, Pos, String, "]", []) of
|
|
{ok, {Result, _, _, NewPos, NewString}} ->
|
|
{ok, {Result, NewPos, NewString}};
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_list_loop(Inner, Pos, String, CloseChar, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, CloseChar, _, Row, Col, _}, NewPos, NewString}} ->
|
|
{ok, {lists:reverse(Acc), true, {Row, Col}, NewPos, NewString}};
|
|
{ok, {Token, NewPos, NewString}} ->
|
|
parse_list_loop2(Inner, NewPos, NewString, CloseChar, Acc, Token);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_list_loop2(Inner, Pos, String, CloseChar, Acc, Token) ->
|
|
case parse_expression2(Inner, Pos, String, Token) of
|
|
{ok, {Value, NewPos, NewString}} ->
|
|
parse_list_loop3(Inner, NewPos, NewString, CloseChar, [Value | Acc]);
|
|
{error, Reason} ->
|
|
Wrapper = choose_list_error_wrapper(CloseChar),
|
|
% TODO: Are tuple indices off by one from list indices?
|
|
Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
|
|
{error, Wrapped}
|
|
end.
|
|
|
|
parse_list_loop3(Inner, Pos, String, CloseChar, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, CloseChar, _, Row, Col, _}, NewPos, NewString}} ->
|
|
{ok, {lists:reverse(Acc), false, {Row, Col}, NewPos, NewString}};
|
|
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_list_loop(Inner, NewPos, NewString, CloseChar, Acc);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, CloseChar);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
choose_list_error_wrapper("]") -> list_element;
|
|
choose_list_error_wrapper(")") -> tuple_element.
|
|
|
|
%%% Ambiguous Parenthesis Parsing
|
|
|
|
parse_tuple({_, _, unknown_type}, Pos, String) ->
|
|
% An untyped tuple is a list of untyped terms, and weirdly our list parser
|
|
% works perfectly for that, as long as we change the closing character to
|
|
% be ")" instead of "]".
|
|
case parse_list_loop(unknown_type(), Pos, String, ")", []) of
|
|
{ok, {[Inner], false, _, NewPos, NewString}} ->
|
|
% In Sophia, trailing commas are invalid, and so all singleton
|
|
% tuples are unwrapped, and translated into the inner type.
|
|
{ok, {Inner, NewPos, NewString}};
|
|
{ok, {TermList, _, _, NewPos, NewString}} ->
|
|
Result = {tuple, list_to_tuple(TermList)},
|
|
{ok, {Result, NewPos, NewString}};
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end;
|
|
parse_tuple(Type, Pos, String) ->
|
|
% Typed tuple parsing is quite complex, because we also want to support
|
|
% normal parentheses for grouping. It's not strictly necessary for
|
|
% inputting data, since we don't have any infix operators in simple
|
|
% data/term notation, but the alternatives are to generate singleton tuples
|
|
% naively, (which are impossible to generate from Sophia,) or to hard error
|
|
% on singleton tuples! Being faithful to Sophia is clearly nice!
|
|
|
|
% Count how many ambiguous parens there are, including the one we already
|
|
% saw.
|
|
case count_open_parens(Pos, String, 1) of
|
|
{ok, {Count, Token, NewPos, NewString}} ->
|
|
% Compare that to the amount of nesting tuple connectives are in
|
|
% the type we are expected to produce.
|
|
{ExcessCount, HeadType, Tails} = extract_tuple_type_info(Count, Type, []),
|
|
% Now work out what to do with all this information.
|
|
parse_tuple2(ExcessCount, HeadType, Tails, NewPos, NewString, Token);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
count_open_parens(Pos, String, Count) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, "(", _, _, _, _}, NewPos, NewString}} ->
|
|
count_open_parens(NewPos, NewString, Count + 1);
|
|
{ok, {Token, NewPos, NewString}} ->
|
|
{ok, {Count, Token, NewPos, NewString}};
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
extract_tuple_type_info(ParenCount, {_, _, {tuple, [Head | Rest]}}, Tails) when ParenCount > 0 ->
|
|
% Have an open paren, and a tuple type. We need to go deeper!
|
|
extract_tuple_type_info(ParenCount - 1, Head, [Rest | Tails]);
|
|
extract_tuple_type_info(ParenCount, HeadType, Tails) ->
|
|
% No parens, or no more (non-empty) tuples. Stop!
|
|
{ParenCount, HeadType, Tails}.
|
|
|
|
parse_tuple2(_, {_, _, unknown_type}, [_ | _], _, _, _) ->
|
|
{error, "Parsing of tuples with known lengths but unknown contents is not yet implemented."};
|
|
parse_tuple2(ExcessCount, HeadType, Tails, Pos, String, {character, ")", _, Row, Col, _}) ->
|
|
parse_empty_tuple(ExcessCount, HeadType, Tails, Pos, String, Row, Col);
|
|
parse_tuple2(ExcessCount, HeadType, Tails, Pos, String, Token) ->
|
|
% Finished with parentheses for now, try and parse an expression out, to
|
|
% get our head term.
|
|
case parse_expression2(HeadType, Pos, String, Token) of
|
|
{ok, {Result, NewPos, NewString}} ->
|
|
% Got a head term. Now try to build all the other tuple layers.
|
|
parse_tuple_tails(ExcessCount, Result, Tails, NewPos, NewString);
|
|
{error, Reason} ->
|
|
% TODO: Wrap errors here too.
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_empty_tuple(0, _, Tails, _, _, Row, Col) ->
|
|
% There are zero excess parens, meaning all our parens are tuples. Get the
|
|
% top one.
|
|
[Tail | _] = Tails,
|
|
% We expected some nonzero number of elements before the close paren, but
|
|
% got zero.
|
|
ExpectCount = 1 + length(Tail),
|
|
{error, {not_enough_elements, ExpectCount, 0, Row, Col}};
|
|
parse_empty_tuple(ExcessCount, {_, _, {tuple, []}}, Tails, Pos, String, _, _) ->
|
|
% If we have some ambiguous parentheses left, we now know one of them is
|
|
% this empty tuple.
|
|
HeadTerm = {tuple, {}},
|
|
NewExcessCount = ExcessCount - 1,
|
|
% Now continue the loop as if it were an integer or something, in the head
|
|
% position.
|
|
parse_tuple_tails(NewExcessCount, HeadTerm, Tails, Pos, String);
|
|
parse_empty_tuple(_, {HeadO, HeadN, _}, _, _, _, Row, Col) ->
|
|
% We were expecting a head term of a different type!
|
|
{error, {wrong_type, HeadO, HeadN, unit, Row, Col, Col}}.
|
|
|
|
parse_tuple_tails(0, HeadTerm, [], Pos, String) ->
|
|
% No open parens left, no tuples left to build, we are done!
|
|
{ok, {HeadTerm, Pos, String}};
|
|
parse_tuple_tails(ExcessCount, HeadTerm, Tails, Pos, String) ->
|
|
% The ambiguous case, where we have a mix of tuple parens, and grouping
|
|
% parens. We want to peek at the next token, to see if it closes a grouping
|
|
% paren.
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, ")", _, Row, Col, _}, NewPos, NewString}} ->
|
|
% It is grouping! Try closing a grouping paren.
|
|
parse_tuple_tails_paren(ExcessCount, HeadTerm, Tails, NewPos, NewString, Row, Col);
|
|
{ok, {{character, ",", _, Row, Col, _}, NewPos, NewString}} ->
|
|
% It is a real tuple! Try parsing a tuple.
|
|
parse_tuple_tails_comma(ExcessCount, HeadTerm, Tails, NewPos, NewString, Row, Col);
|
|
{ok, {Token, _, _}} ->
|
|
% Anything else is just a boring parse error we can complain about.
|
|
unexpected_token(Token, ")");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_tuple_tails_paren(0, _, [[] | _], _, _, Row, Col) ->
|
|
% A singleton tuple was expected, but a grouping paren was given. In theory
|
|
% we could be permissive here, but we were asked to do type checking, and
|
|
% this is a type error. The type error itself is a bit hard to reproduce,
|
|
% but we do know exactly what the fix is, so let's report that instead.
|
|
{error, {expected_trailing_comma, Row, Col}};
|
|
parse_tuple_tails_paren(0, _, [Tail | _], _, _, Row, Col) ->
|
|
% A tuple (of more than one elements) was expected, but a grouping paren
|
|
% was given. Again, the type error is hard to produce, but the actual
|
|
% solution is simple; add more elements.
|
|
ExpectCount = length(Tail) + 1,
|
|
GotCount = 1,
|
|
{error, {not_enough_elements, ExpectCount, GotCount, Row, Col}};
|
|
parse_tuple_tails_paren(ExcessCount, HeadTerm, Tails, Pos, String, _, _) ->
|
|
% We were expecting some grouping parens, and now we know that one of them
|
|
% was in fact grouping. Good.
|
|
parse_tuple_tails(ExcessCount - 1, HeadTerm, Tails, Pos, String).
|
|
|
|
parse_tuple_tails_comma(_, _, [], _, _, Row, Col) ->
|
|
% No more tuples, so commas are invalid. It's hard to describe the type
|
|
% error that a comma would actually produce, so instead let's just give
|
|
% the user the actual solution to their problems, which is to remove the
|
|
% comma.
|
|
{error, {expected_close_paren, Row, Col}};
|
|
parse_tuple_tails_comma(ExcessCount, HeadTerm, Tails, Pos, String, _, _) ->
|
|
% If there are no tails then we would have exited into the "grouping parens
|
|
% only" case, so we know this works:
|
|
[TailTypes | ParentTails] = Tails,
|
|
% Now we can parse this tuple as a tuple.
|
|
case parse_multivalue(TailTypes, Pos, String, [HeadTerm]) of
|
|
{ok, {Terms, NewPos, NewString}} ->
|
|
NewHead = {tuple, list_to_tuple(Terms)},
|
|
% Then continue the loop, with whatever parent tuple types this
|
|
% tuple is meant to be a part of.
|
|
parse_tuple_tails(ExcessCount, NewHead, ParentTails, NewPos, NewString);
|
|
{error, Reason} ->
|
|
% TODO: wrap errors?
|
|
{error, Reason}
|
|
end.
|
|
|
|
%%% Unambiguous Tuple/Variant Parsing
|
|
|
|
parse_multivalue(ElemTypes, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} ->
|
|
check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc);
|
|
{ok, {Token, NewPos, NewString}} ->
|
|
parse_multivalue2(ElemTypes, NewPos, NewString, Acc, Token);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_multivalue2([Next | Rest], Pos, String, Acc, Token) ->
|
|
case parse_expression2(Next, Pos, String, Token) of
|
|
{ok, {Value, NewPos, NewString}} ->
|
|
parse_multivalue3(Rest, NewPos, NewString, [Value | Acc]);
|
|
{error, Reason} ->
|
|
Wrapper = choose_list_error_wrapper(")"),
|
|
% TODO: Are tuple indices off by one from list indices?
|
|
Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
|
|
{error, Wrapped}
|
|
end;
|
|
parse_multivalue2([], Pos, String, Acc, Token) ->
|
|
count_multivalue_excess(Pos, String, Acc, Token).
|
|
|
|
parse_multivalue3(ElemTypes, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} ->
|
|
check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc);
|
|
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_multivalue(ElemTypes, NewPos, NewString, Acc);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, ")");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
count_multivalue_excess(Pos, String, TypedAcc, Token) ->
|
|
ExpectedLen = length(TypedAcc),
|
|
case parse_list_loop2(unknown_type(), Pos, String, ")", TypedAcc, Token) of
|
|
{ok, {TermList, _, {Row, Col}, _, _}} ->
|
|
ActualLen = length(TermList),
|
|
{error, {too_many_elements, ExpectedLen, ActualLen, Row, Col}};
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
check_multivalue_long_enough([], Pos, String, _, _, Acc) ->
|
|
{ok, {lists:reverse(Acc), Pos, String}};
|
|
check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) ->
|
|
GotCount = length(Got),
|
|
ExpectCount = length(Remaining) + GotCount,
|
|
{error, {not_enough_elements, ExpectCount, GotCount, Row, Col}}.
|
|
|
|
%%% Variant parsing
|
|
|
|
parse_variant({O, N, {variant, Variants}}, Pos, String, [Ident], Row, Start, End) ->
|
|
parse_variant2(O, N, Variants, Pos, String, "", Ident, Row, Start, End);
|
|
parse_variant({O, N, {variant, Variants}}, Pos, String, [Namespace, Constructor], Row, Start, End) ->
|
|
case get_typename(O, N) of
|
|
[Namespace, _] ->
|
|
parse_variant2(O, N, Variants, Pos, String, Namespace ++ ".", Constructor, Row, Start, End);
|
|
_ ->
|
|
{error, {invalid_constructor, O, N, Namespace ++ "." ++ Constructor, Row, Start, End}}
|
|
end;
|
|
parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) ->
|
|
{error, {unresolved_variant, Row, Start, End}};
|
|
parse_variant({O, N, _}, _, _, _, Row, Start, End) ->
|
|
% In normal code, identifiers can have many meanings, which can result in
|
|
% lots of different errors. In constant/immediate/normalized Sophia terms
|
|
% we know identifiers are always variants, so we can type error if any
|
|
% other type was expected.
|
|
{error, {wrong_type, O, N, variant, Row, Start, End}}.
|
|
|
|
get_typename(O, already_normalized) ->
|
|
get_typename(O);
|
|
get_typename(_, N) ->
|
|
get_typename(N).
|
|
|
|
get_typename({Name, _}) ->
|
|
string:split(Name, ".", all);
|
|
get_typename(Name) ->
|
|
string:split(Name, ".", all).
|
|
|
|
parse_variant2(O, N, Variants, Pos, String, Prefix, Constructor, Row, Start, End) ->
|
|
case lookup_variant(Constructor, Variants, 0) of
|
|
{ok, {Tag, ElemTypes}} ->
|
|
GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end,
|
|
Arities = lists:map(GetArity, Variants),
|
|
parse_variant3(Arities, Tag, ElemTypes, Pos, String);
|
|
error ->
|
|
{error, {invalid_constructor, O, N, Prefix ++ Constructor, Row, Start, End}}
|
|
end.
|
|
|
|
parse_variant3(Arities, Tag, [], Pos, String) ->
|
|
% Parsing of 0-arity variants is different.
|
|
Result = {variant, Arities, Tag, {}},
|
|
{ok, {Result, Pos, String}};
|
|
parse_variant3(Arities, Tag, ElemTypes, Pos, String) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, "(", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_variant4(Arities, Tag, ElemTypes, NewPos, NewString);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "(");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_variant4(Arities, Tag, ElemTypes, Pos, String) ->
|
|
case parse_multivalue(ElemTypes, Pos, String, []) of
|
|
{ok, {Terms, NewPos, NewString}} ->
|
|
Result = {variant, Arities, Tag, list_to_tuple(Terms)},
|
|
{ok, {Result, NewPos, NewString}};
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
lookup_variant(_, [], _) ->
|
|
error;
|
|
lookup_variant(Ident, [{Ident, ElemTypes} | _], Tag) ->
|
|
{ok, {Tag, ElemTypes}};
|
|
lookup_variant(Ident, [_ | Rest], Tag) ->
|
|
lookup_variant(Ident, Rest, Tag + 1).
|
|
|
|
%%% Record parsing
|
|
|
|
parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Pos, String, _, _) ->
|
|
parse_map(KeyType, ValueType, Pos, String, #{});
|
|
parse_record_or_map({_, _, {record, Fields}}, Pos, String, _, _) ->
|
|
parse_record(Fields, Pos, String, #{});
|
|
parse_record_or_map({_, _, unknown_type}, Pos, String, _, _) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
|
|
{ok, {#{}, NewPos, NewString}};
|
|
{ok, {{character, "[", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_map2(unknown_type(), unknown_type(), NewPos, NewString, #{});
|
|
{ok, {{alphanum, _, _, Row, Start, End}, _, _}} ->
|
|
{error, {unresolved_record, Row, Start, End}};
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "}");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end;
|
|
parse_record_or_map({O, N, _}, _, _, Row, Start) ->
|
|
{error, {wrong_type, O, N, map, Row, Start, Start}}.
|
|
|
|
parse_record(Fields, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{alphanum, Ident, _, Row, Start, End}, NewPos, NewString}} ->
|
|
parse_record2(Fields, NewPos, NewString, Acc, Ident, Row, Start, End);
|
|
{ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} ->
|
|
parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "}");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_record2(Fields, Pos, String, Acc, Ident, Row, Start, End) ->
|
|
case lists:keyfind(Ident, 1, Fields) of
|
|
{_, Type} ->
|
|
parse_record3(Fields, Pos, String, Acc, Ident, Row, Start, End, Type);
|
|
false ->
|
|
{error, {invalid_field, Ident, Row, Start, End}}
|
|
end.
|
|
|
|
parse_record3(Fields, Pos, String, Acc, Ident, Row, Start, End, Type) ->
|
|
case maps:is_key(Ident, Acc) of
|
|
false ->
|
|
parse_record4(Fields, Pos, String, Acc, Ident, Type);
|
|
true ->
|
|
{error, {field_already_present, Ident, Row, Start, End}}
|
|
end.
|
|
|
|
parse_record4(Fields, Pos, String, Acc, Ident, Type) ->
|
|
case expect_tokens(["="], Pos, String) of
|
|
{ok, {NewPos, NewString}} ->
|
|
parse_record5(Fields, NewPos, NewString, Acc, Ident, Type);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_record5(Fields, Pos, String, Acc, Ident, Type) ->
|
|
case parse_expression(Type, Pos, String) of
|
|
{ok, {Result, NewPos, NewString}} ->
|
|
NewAcc = maps:put(Ident, Result, Acc),
|
|
parse_record6(Fields, NewPos, NewString, NewAcc);
|
|
{error, Reason} ->
|
|
wrap_error(Reason, {record_field, Ident})
|
|
end.
|
|
|
|
parse_record6(Fields, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_record(Fields, NewPos, NewString, Acc);
|
|
{ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} ->
|
|
parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End);
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "}");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_record_end(Fields, Pos, String, FieldValues, Row, Start, End) ->
|
|
case parse_record_final_loop(Fields, FieldValues, []) of
|
|
{ok, Result} ->
|
|
{ok, {Result, Pos, String}};
|
|
{error, {missing_field, Name}} ->
|
|
{error, {missing_field, Name, Row, Start, End}}
|
|
end.
|
|
|
|
parse_record_final_loop([{Name, _} | Rest], FieldValues, Acc) ->
|
|
case maps:find(Name, FieldValues) of
|
|
{ok, Value} ->
|
|
parse_record_final_loop(Rest, FieldValues, [Value | Acc]);
|
|
error ->
|
|
{error, {missing_field, Name}}
|
|
end;
|
|
parse_record_final_loop([], _, [Field]) ->
|
|
% Singleton records are type-checked in Sophia, but unwrapped in the
|
|
% resulting FATE.
|
|
{ok, Field};
|
|
parse_record_final_loop([], _, FieldsReverse) ->
|
|
Fields = lists:reverse(FieldsReverse),
|
|
Tuple = list_to_tuple(Fields),
|
|
{ok, {tuple, Tuple}}.
|
|
|
|
|
|
%%% Map Parsing
|
|
|
|
parse_map(KeyType, ValueType, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, "[", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_map2(KeyType, ValueType, NewPos, NewString, Acc);
|
|
{ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
|
|
{ok, {Acc, NewPos, NewString}};
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "}");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_map2(KeyType, ValueType, Pos, String, Acc) ->
|
|
case parse_expression(KeyType, Pos, String) of
|
|
{ok, {Result, NewPos, NewString}} ->
|
|
parse_map3(KeyType, ValueType, NewPos, NewString, Acc, Result);
|
|
{error, Reason} ->
|
|
wrap_error(Reason, {map_key, maps:size(Acc)})
|
|
end.
|
|
|
|
parse_map3(KeyType, ValueType, Pos, String, Acc, Key) ->
|
|
case expect_tokens(["]", "="], Pos, String) of
|
|
{ok, {NewPos, NewString}} ->
|
|
parse_map4(KeyType, ValueType, NewPos, NewString, Acc, Key);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_map4(KeyType, ValueType, Pos, String, Acc, Key) ->
|
|
case parse_expression(ValueType, Pos, String) of
|
|
{ok, {Result, NewPos, NewString}} ->
|
|
NewAcc = maps:put(Key, Result, Acc),
|
|
parse_map5(KeyType, ValueType, NewPos, NewString, NewAcc);
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
parse_map5(KeyType, ValueType, Pos, String, Acc) ->
|
|
case next_token(Pos, String) of
|
|
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
|
|
parse_map(KeyType, ValueType, NewPos, NewString, Acc);
|
|
{ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
|
|
{ok, {Acc, NewPos, NewString}};
|
|
{ok, {Token, _, _}} ->
|
|
unexpected_token(Token, "}");
|
|
{error, Reason} ->
|
|
{error, Reason}
|
|
end.
|
|
|
|
% TODO
|
|
wrap_error(Reason, _) -> Reason.
|
|
|
|
%%% Tests
|
|
|
|
check_sophia_to_fate(Type, Sophia, Fate) ->
|
|
case parse_literal(Type, Sophia) of
|
|
{ok, Fate} ->
|
|
ok;
|
|
{ok, FateActual} ->
|
|
erlang:error({to_fate_failed, Sophia, Fate, {ok, FateActual}});
|
|
{error, Reason} ->
|
|
erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}})
|
|
end.
|
|
|
|
compile_entrypoint_value_and_type(Source, Entrypoint) ->
|
|
{ok, #{fate_code := FateCode, aci := ACI}} = so_compiler:from_string(Source, [{aci, json}]),
|
|
|
|
% Find the fcode for the correct entrypoint.
|
|
{fcode, Bodies, NamesMap, _} = FateCode,
|
|
Names = maps:to_list(NamesMap),
|
|
Name = unicode:characters_to_binary(Entrypoint),
|
|
{Hash, Name} = lists:keyfind(Name, 2, Names),
|
|
{_, _, Code} = maps:get(Hash, Bodies),
|
|
FATE = extract_return_value(Code),
|
|
|
|
% Generate the AACI, and get the AACI type info for the correct entrypoint.
|
|
AACI = hz_aaci:prepare(ACI),
|
|
{ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"),
|
|
|
|
{FATE, Type}.
|
|
|
|
extract_return_value(#{0 := [{'RETURNR', {immediate, FATE}}]}) ->
|
|
FATE;
|
|
extract_return_value(Code) ->
|
|
erlang:exit({invalid_literal_fcode, Code}).
|
|
|
|
check_parser(Sophia) ->
|
|
% Compile the literal using the compiler, to check that it is valid Sophia
|
|
% syntax, and to get an AACI object to pass to the parser.
|
|
Source = "contract C = entrypoint f() = " ++ Sophia,
|
|
{Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
|
|
|
|
% Check that when we parse the term we get the same value as the Sophia
|
|
% compiler.
|
|
check_sophia_to_fate(unknown_type(), Sophia, Fate),
|
|
|
|
% Then, once we know that the term is correct, make sure that it is still
|
|
% accepted *with* type info.
|
|
check_sophia_to_fate(Type, Sophia, Fate).
|
|
|
|
check_parser_with_typedef(Typedef, Sophia) ->
|
|
% Compile the type definitions alongside the usual literal expression.
|
|
Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia,
|
|
{Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
|
|
|
|
% Do a typed parse, as usual, but there are probably record/variant
|
|
% definitions in the AACI, so untyped parses probably don't work.
|
|
check_sophia_to_fate(Type, Sophia, Fate).
|
|
|
|
anon_types_test() ->
|
|
% Integers.
|
|
check_parser("123"),
|
|
check_parser("1_2_3"),
|
|
check_parser("-123"),
|
|
% Booleans.
|
|
check_parser("true"),
|
|
check_parser("false"),
|
|
check_parser("[true, false]"),
|
|
% Bytes.
|
|
check_parser("#DEAD000BEEF"),
|
|
check_parser("#DE_AD0_00B_EEF"),
|
|
% Strings.
|
|
check_parser("\"hello world\""),
|
|
% The Sophia compiler doesn't handle this right, but we should still.
|
|
%check_parser("\"ÿ\""),
|
|
%check_parser("\"♣\""),
|
|
% Characters.
|
|
check_parser("'A'"),
|
|
check_parser("['a', ' ', '[']"),
|
|
%check_parser("'ÿ'"),
|
|
%check_parser("'♣'"),
|
|
% List of integers.
|
|
check_parser("[1, 2, 3]"),
|
|
% List of lists.
|
|
check_parser("[[], [1], [2, 3]]"),
|
|
% Tuple.
|
|
check_parser("(1, [2, 3], (4, 5))"),
|
|
% Map.
|
|
check_parser("{[1] = 2, [3] = 4}"),
|
|
|
|
ok.
|
|
|
|
string_escape_codes_test() ->
|
|
check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""),
|
|
check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""),
|
|
check_parser("\"\\x{0}\\x{7}\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\\x{0000007F}\""),
|
|
check_parser("\"'\""),
|
|
|
|
check_parser("['\\b', '\\e', '\\f', '\\n', '\\r', '\\t', '\\v', '\"', '\\'', '\\\\']"),
|
|
check_parser("['\\x00', '\\x11', '\\x77', '\\x4a', '\\x4A']"),
|
|
check_parser("['\\x{0}', '\\x{7}', '\\x{7F}', '\\x{07F}', '\\x{007F}', '\\x{0007F}', '\\x{0000007F}']"),
|
|
check_parser("'\"'"),
|
|
|
|
ok.
|
|
|
|
records_test() ->
|
|
TypeDef = "record pair = {x: int, y: int}",
|
|
Sophia = "{x = 1, y = 2}",
|
|
check_parser_with_typedef(TypeDef, Sophia),
|
|
% The above won't run an untyped parse on the expression, but we can. It
|
|
% will error, though.
|
|
{error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia).
|
|
|
|
variant_test() ->
|
|
TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)",
|
|
|
|
check_parser_with_typedef(TypeDef, "Zero"),
|
|
check_parser_with_typedef(TypeDef, "One(0)"),
|
|
check_parser_with_typedef(TypeDef, "Two(0, 1)"),
|
|
check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"),
|
|
check_parser_with_typedef(TypeDef, "C.Zero"),
|
|
|
|
{error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"),
|
|
|
|
ok.
|
|
|
|
ambiguous_variant_test() ->
|
|
TypeDef = "datatype mytype = C | D",
|
|
check_parser_with_typedef(TypeDef, "C"),
|
|
check_parser_with_typedef(TypeDef, "D"),
|
|
check_parser_with_typedef(TypeDef, "C.C"),
|
|
check_parser_with_typedef(TypeDef, "C.D"),
|
|
|
|
ok.
|
|
|
|
namespace_variant_test() ->
|
|
Term = "[N.A, N.B]",
|
|
Source = "namespace N = datatype mytype = A | B\ncontract C = entrypoint f() = " ++ Term,
|
|
{Fate, VariantType} = compile_entrypoint_value_and_type(Source, "f"),
|
|
check_sophia_to_fate(VariantType, Term, Fate),
|
|
|
|
ok.
|
|
|
|
chain_objects_test() ->
|
|
% Address,
|
|
check_parser("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"),
|
|
% Two different forms of signature,
|
|
check_parser("[sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n, #00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF]"),
|
|
|
|
% We have to build a totally custom contract example in order to get an
|
|
% AACI and return value for parsing contract addresses. This is because the
|
|
% compiler demands that contract addresses be type checked according to the
|
|
% logic of "contract oriented programming", including covariance, etc. and
|
|
% "contract oriented programming" is not very compatible with ML style type
|
|
% inference.
|
|
Contract = "ct_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx",
|
|
Source = "contract C = entrypoint f(): C = " ++ Contract,
|
|
{Fate, ContractType} = compile_entrypoint_value_and_type(Source, "f"),
|
|
check_sophia_to_fate(ContractType, Contract, Fate),
|
|
check_sophia_to_fate(unknown_type(), Contract, Fate),
|
|
|
|
ok.
|
|
|
|
bits_test() ->
|
|
check_parser("Bits.all"),
|
|
check_parser("Bits.none"),
|
|
{_, Type} = compile_entrypoint_value_and_type("contract C = entrypoint f() = Bits.all", "f"),
|
|
check_sophia_to_fate(Type, "5", {bits, 5}),
|
|
check_sophia_to_fate(Type, "-5", {bits, -5}),
|
|
check_sophia_to_fate(Type, "#123", {bits, 256 + 32 + 3}),
|
|
ok.
|
|
|
|
singleton_records_test() ->
|
|
TypeDef = "record singleton('a) = {it: 'a}",
|
|
check_parser_with_typedef(TypeDef, "{it = 123}"),
|
|
check_parser_with_typedef(TypeDef, "{it = {it = {it = 5}}}"),
|
|
check_parser_with_typedef(TypeDef, "[{it = 1}, {it = 2}, {it = 3}]"),
|
|
|
|
ok.
|
|
|
|
singleton_variants_test() ->
|
|
% Similar tests to the singleton records, but this time there isn't
|
|
% actually a special case; singleton variants are in fact wrapped in the
|
|
% FATE too.
|
|
TypeDef = "datatype wrapped('a) = Wrap('a)",
|
|
check_parser_with_typedef(TypeDef, "Wrap(123)"),
|
|
check_parser_with_typedef(TypeDef, "Wrap(Wrap(123))"),
|
|
check_parser_with_typedef(TypeDef, "[Wrap(1), Wrap(2), Wrap(3)]"),
|
|
|
|
ok.
|
|
|
|
excess_parens_test() ->
|
|
% 'singleton' parens are another special case, but unlike singleton
|
|
% records, which exist in the type system, singleton parens aren't tuples
|
|
% at all! They are just grouping, for arithmetic. For example.
|
|
check_parser("(123)"),
|
|
check_parser("[1, (2), ((3))]"),
|
|
% Where this gets tricky, though, is when grouping parens are mixed with
|
|
% tuple parens. E.g. this list of three tuples should all parse to the same
|
|
% result.
|
|
check_parser("[((1, 2)), ((1), 2), (((1), 2))]"),
|
|
% Including multiple nestings of tuples and grouping, interleaved.
|
|
check_parser("((((1), ((2, 3)))), 4)"),
|
|
% Also empty tuples exist!
|
|
check_parser("()"),
|
|
check_parser("(((((), ())), ()))"),
|
|
|
|
ok.
|
|
|
|
lexer_offset_test() ->
|
|
% Test that various tokens report their position correctly.
|
|
{error, {unexpected_token, "456", 1, 5, 7}} = parse_literal("123 456"),
|
|
{error, {unexpected_token, "[", 1, 5, 5}} = parse_literal("123 [0]"),
|
|
{error, {unexpected_token, "ABC", 1, 5, 7}} = parse_literal("123 ABC"),
|
|
{error, {unexpected_token, "#AA", 1, 5, 7}} = parse_literal("123 #AA"),
|
|
{error, {unexpected_token, "\"x\"", 1, 5, 7}} = parse_literal("123 \"x\""),
|
|
{error, {unexpected_token, "\"\\x{123}\"", 1, 5, 13}} = parse_literal("123 \"\\x{123}\""),
|
|
|
|
% Check that the tokenizer knows its position correctly *after* various
|
|
% tokens.
|
|
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("[0] 123"),
|
|
ABCType = {"mytype", already_normalized, {variant, [{"ABC", []}]}},
|
|
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal(ABCType, "ABC 123"),
|
|
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("#AA 123"),
|
|
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("\"x\" 123"),
|
|
{error, {unexpected_token, "123", 1, 11, 13}} = parse_literal("\"\\x{123}\" 123"),
|
|
|
|
% Check that the tokenizer accounts for various line separators correctly.
|
|
{error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\nABC"),
|
|
{error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\r\nABC"),
|
|
{error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\rABC"),
|
|
|
|
ok.
|
|
|
|
parser_offset_test() ->
|
|
{_, Type} = compile_entrypoint_value_and_type("contract C = entrypoint f() = ((1, 2), (3, 4))", "f"),
|
|
|
|
{error, {not_enough_elements, 2, 1, 1, 8}} = parse_literal(Type, "((1, 2))"),
|
|
{error, {not_enough_elements, 2, 1, 1, 10}} = parse_literal(Type, "(((1, 2)))"),
|
|
{error, {too_many_elements, 2, 3, 1, 24}} = parse_literal(Type, "((1, 2), (3, 4), (5, 6))"),
|
|
{error, {too_many_elements, 2, 3, 1, 10}} = parse_literal(Type, "((1, 2, 3), (4, 5))"),
|
|
|
|
ok.
|
|
|
|
singleton_test() ->
|
|
% The Sophia compiler would never generate this, but it is a valid type
|
|
% within the FATE virtual machine, and it is possible to represent within
|
|
% the ACI itself.
|
|
SingletonACI = #{tuple => [<<"int">>]},
|
|
|
|
% Build an AACI around this, and run it through the AACI machinery.
|
|
Function = #{name => <<"f">>,
|
|
arguments => [],
|
|
stateful => false,
|
|
payable => false,
|
|
returns => SingletonACI},
|
|
ACI = [#{contract => #{functions => [Function],
|
|
name => <<"C">>,
|
|
kind => contract_main,
|
|
payable => false,
|
|
typedefs => []}}],
|
|
{aaci, "C", #{"f" := {[], SingletonType}}, _} = hz_aaci:prepare(ACI),
|
|
|
|
% Now let's do some testing with this weird type, to see if we handle it
|
|
% correctly.
|
|
{ok, {tuple, {1}}} = parse_literal(SingletonType, "(1,)"),
|
|
% Some ambiguous nesting parens, for fun.
|
|
{ok, {tuple, {1}}} = parse_literal(SingletonType, "(((1),))"),
|
|
% No trailing comma should give an error.
|
|
{error, {expected_trailing_comma, 1, 3}} = parse_literal(SingletonType, "(1)"),
|
|
% All of the above should behave the same in untyped contexts:
|
|
{ok, {tuple, {1}}} = parse_literal(unknown_type(), "(1,)"),
|
|
{ok, {tuple, {1}}} = parse_literal(unknown_type(), "(((1),))"),
|
|
{ok, 1} = parse_literal(unknown_type(), "(1)"),
|
|
|
|
% Also if we wanted an integer, the singleton is NOT dropped, so is also an
|
|
% error.
|
|
{error, {expected_close_paren, 1, 3}} = parse_literal({integer, alread_normalized, integer}, "(1,)"),
|
|
|
|
ok.
|