Compare commits
22 Commits
grids3
...
3838a7e3c5
| Author | SHA1 | Date | |
|---|---|---|---|
| 3838a7e3c5 | |||
| d014ae0982 | |||
| bb4bcbb7de | |||
| a695c21fc9 | |||
| 493bdb990c | |||
| 17f635af61 | |||
| 272ed01fdc | |||
| 49cd8b6687 | |||
| 966b4b2748 | |||
| fe182a5233 | |||
| f1696e2b9e | |||
| 2bf384ca82 | |||
| 4f2a3c6c6f | |||
| 7df04a81be | |||
| 6f02d4c4e6 | |||
| 56e63051bc | |||
| 3f1c9bd626 | |||
| 97e32574c4 | |||
| 6f5525afcf | |||
| 4f1958b210 | |||
| 3da9bd570b | |||
| d2163c1ff8 |
+7
-1085
File diff suppressed because it is too large
Load Diff
+1184
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,993 @@
|
||||
-module(hz_sophia).
|
||||
-vsn("0.8.2").
|
||||
-author("Jarvis Carroll <spiveehere@gmail.com>").
|
||||
-copyright("Jarvis Carroll <spiveehere@gmail.com>").
|
||||
-license("GPL-3.0-or-later").
|
||||
|
||||
-export([parse_literal/1, parse_literal/2, check_parser/1]).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
parse_literal(String) ->
|
||||
parse_literal(unknown_type(), String).
|
||||
|
||||
parse_literal(Type, String) ->
|
||||
case parse_expression(Type, {1, 1}, String) of
|
||||
{ok, {Result, NewPos, NewString}} ->
|
||||
parse_literal2(Result, NewPos, NewString);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_literal2(Result, Pos, String) ->
|
||||
% We have parsed a valid expression. Now check that the string ends.
|
||||
case next_token(Pos, String) of
|
||||
{ok, {{eof, _, _, _, _}, _, _}} ->
|
||||
{ok, Result};
|
||||
{ok, {Token, _, _}} ->
|
||||
unexpected_token(Token);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
%%% Tokenizer
|
||||
|
||||
-define(IS_LATIN_UPPER(C), (((C) >= $A) and ((C) =< $Z))).
|
||||
-define(IS_LATIN_LOWER(C), (((C) >= $a) and ((C) =< $z))).
|
||||
-define(IS_ALPHA(C), (?IS_LATIN_UPPER(C) or ?IS_LATIN_LOWER(C) or ((C) == $_))).
|
||||
-define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))).
|
||||
-define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C) or ((C) == $.))).
|
||||
-define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))).
|
||||
|
||||
next_token({Row, Col}, []) ->
|
||||
{ok, {{eof, "", Row, Col, Col}, {Row, Col}, []}};
|
||||
next_token({Row, Col}, " " ++ Rest) ->
|
||||
next_token({Row, Col + 1}, Rest);
|
||||
next_token({Row, Col}, "\t" ++ Rest) ->
|
||||
next_token({Row, Col + 1}, Rest);
|
||||
next_token({Row, _}, "\r\n" ++ Rest) ->
|
||||
next_token({Row + 1, 1}, Rest);
|
||||
next_token({Row, _}, "\r" ++ Rest) ->
|
||||
next_token({Row + 1, 1}, Rest);
|
||||
next_token({Row, _}, "\n" ++ Rest) ->
|
||||
next_token({Row + 1, 1}, Rest);
|
||||
next_token(Pos, [C | _] = String) when ?IS_ALPHA(C) ->
|
||||
alphanum_token(Pos, Pos, String, []);
|
||||
next_token(Pos, [C | _] = String) when ?IS_NUM(C) ->
|
||||
num_token(Pos, Pos, String, [], 0);
|
||||
next_token({Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) ->
|
||||
bytes_token({Row, Col}, {Row, Col + 1}, [C | Rest], "#", []);
|
||||
next_token({Row, Col}, "\"" ++ Rest) ->
|
||||
string_token({Row, Col}, {Row, Col + 1}, Rest, "\"", <<>>);
|
||||
next_token({Row, Col}, [Char | Rest]) ->
|
||||
Token = {character, [Char], Char, Row, Col, Col},
|
||||
{ok, {Token, {Row, Col + 1}, Rest}}.
|
||||
|
||||
alphanum_token(Start, {Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) ->
|
||||
alphanum_token(Start, {Row, Col + 1}, Rest, [C | Acc]);
|
||||
alphanum_token({_, Start}, {Row, End}, String, Acc) ->
|
||||
AlphaString = lists:reverse(Acc),
|
||||
Path = string:split(AlphaString, ".", all),
|
||||
Token = {alphanum, AlphaString, Path, Row, Start, End - 1},
|
||||
{ok, {Token, {Row, End}, String}}.
|
||||
|
||||
num_token(Start, {Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) ->
|
||||
NewValue = Value * 10 + (C - $0),
|
||||
num_token(Start, {Row, Col + 1}, Rest, [C | Chars], NewValue);
|
||||
num_token(Start, {Row, Col}, [$_, C | Rest], Chars, Value) when ?IS_NUM(C) ->
|
||||
NewValue = Value * 10 + (C - $0),
|
||||
num_token(Start, {Row, Col + 2}, Rest, [C, $_ | Chars], NewValue);
|
||||
num_token({_, Start}, {Row, End}, String, Chars, Value) ->
|
||||
NumString = lists:reverse(Chars),
|
||||
Token = {integer, NumString, Value, Row, Start, End - 1},
|
||||
{ok, {Token, {Row, End}, String}}.
|
||||
|
||||
bytes_token(Start, {Row, Col}, [C | Rest], Chars, Digits) when ?IS_HEX(C) ->
|
||||
Digit = convert_digit(C),
|
||||
bytes_token(Start, {Row, Col + 1}, Rest, [C | Chars], [Digit | Digits]);
|
||||
bytes_token(Start, {Row, Col}, [$_, C | Rest], Chars, Digits) when ?IS_HEX(C) ->
|
||||
Digit = convert_digit(C),
|
||||
bytes_token(Start, {Row, Col + 1}, Rest, [C, $_ | Chars], [Digit | Digits]);
|
||||
bytes_token({_, Start}, {Row, End}, String, Chars, Digits) ->
|
||||
BytesString = lists:reverse(Chars),
|
||||
Value = reverse_combine_nibbles(Digits, <<>>),
|
||||
Token = {bytes, BytesString, Value, Row, Start, End - 1},
|
||||
{ok, {Token, {Row, End}, String}}.
|
||||
|
||||
convert_digit(C) when C >= $0, C =< $9 ->
|
||||
C - $0;
|
||||
convert_digit(C) when C >= $A, C =< $Z ->
|
||||
C - $A + 10;
|
||||
convert_digit(C) when C >= $a, C =< $z ->
|
||||
C - $a + 10.
|
||||
|
||||
reverse_combine_nibbles([D1, D2 | Rest], Acc) ->
|
||||
NewAcc = <<D2:4, D1:4, Acc/binary>>,
|
||||
reverse_combine_nibbles(Rest, NewAcc);
|
||||
reverse_combine_nibbles([D1], Acc) ->
|
||||
<<0:4, D1:4, Acc/binary>>;
|
||||
reverse_combine_nibbles([], Acc) ->
|
||||
Acc.
|
||||
|
||||
string_token(Start, {Row, Col}, "\\x" ++ String, SourceChars, Value) ->
|
||||
case escape_hex_code({Row, Col}, {Row, Col + 2}, String, "x\\" ++ SourceChars) of
|
||||
{ok, {Codepoint, NewSourceChars, NewPos, NewString}} ->
|
||||
NewValue = <<Value/binary, Codepoint/utf8>>,
|
||||
string_token(Start, NewPos, NewString, NewSourceChars, NewValue);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end;
|
||||
string_token(Start, {Row, Col}, [$\\, C | Rest], SourceChars, Value) ->
|
||||
case escape_char(C) of
|
||||
{ok, ByteVal} ->
|
||||
string_token(Start, {Row, Col + 2}, Rest, [C, $\ | SourceChars], <<Value/binary, ByteVal>>);
|
||||
error ->
|
||||
{error, {invalid_escape_code, [C], Row, Col}}
|
||||
end;
|
||||
string_token({_, Start}, {Row, Col}, [$" | Rest], SourceChars, Value) ->
|
||||
SourceStr = lists:reverse([$" | SourceChars]),
|
||||
Token = {string, SourceStr, Value, Row, Start, Col},
|
||||
{ok, {Token, {Row, Col + 1}, Rest}};
|
||||
string_token(Start, {Row, Col}, [C | Rest], SourceChars, Value) ->
|
||||
% TODO: ERTS probably had to convert this FROM utf8 at some point, so why
|
||||
% bother, if we need to convert it back? I guess we could accept iolists if
|
||||
% we really wanted to waste time on this point...
|
||||
string_token(Start, {Row, Col + 1}, Rest, [C | SourceChars], <<Value/binary, C/utf8>>).
|
||||
|
||||
escape_hex_code(Start, {Row, Col}, "{" ++ String, SourceChars) ->
|
||||
escape_long_hex_code(Start, {Row, Col + 1}, String, "{" ++ SourceChars, 0);
|
||||
escape_hex_code(_, {Row, Col}, [A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) ->
|
||||
% As of writing this, the Sophia compiler will convert this byte from
|
||||
% extended ASCII to unicode... But it really shouldn't. The literal parser
|
||||
% does what the compiler should do.
|
||||
Byte = convert_digit(A) * 16 + convert_digit(B),
|
||||
{ok, {Byte, [B, A | SourceChars], {Row, Col + 2}, String}};
|
||||
escape_hex_code({Row1, Col1}, _, _, _) ->
|
||||
{error, {invalid_escape_code, "\\x", Row1, Col1}}.
|
||||
|
||||
escape_long_hex_code(_, {Row, Col}, "}" ++ String, SourceChars, Value) ->
|
||||
{ok, {Value, "}" ++ SourceChars, {Row, Col + 1}, String}};
|
||||
escape_long_hex_code(Start, {Row, Col}, [C | String], SourceChars, Value) when ?IS_HEX(C) ->
|
||||
NewSourceChars = [C | SourceChars],
|
||||
NewValue = 16 * Value + convert_digit(C),
|
||||
escape_long_hex_code(Start, {Row, Col + 1}, String, NewSourceChars, NewValue);
|
||||
escape_long_hex_code(_, {Row, Col}, [C | _], _, _) ->
|
||||
{error, {invalid_hexadecimal, [C], Row, Col}};
|
||||
escape_long_hex_code(_, Pos, [], SourceChars, Value) ->
|
||||
% Just return as if the escape code were closed, and let the string parser
|
||||
% produce an unclosed string error instead.
|
||||
{ok, {Value, SourceChars, Pos, []}}.
|
||||
|
||||
escape_char($b) -> {ok, $\b};
|
||||
escape_char($e) -> {ok, $\e};
|
||||
escape_char($f) -> {ok, $\f};
|
||||
escape_char($n) -> {ok, $\n};
|
||||
escape_char($r) -> {ok, $\r};
|
||||
escape_char($t) -> {ok, $\t};
|
||||
escape_char($v) -> {ok, $\v};
|
||||
escape_char($") -> {ok, $\"};
|
||||
escape_char($\\) -> {ok, $\\};
|
||||
escape_char(_) -> error.
|
||||
|
||||
%%% Sophia Literal Parser
|
||||
|
||||
%%% This parser is a simple recursive descent parser, written explicitly in
|
||||
%%% erlang.
|
||||
%%%
|
||||
%%% There are no infix operators in the subset we want to parse, so recursive
|
||||
%%% descent is fine with no special tricks, no shunting yard algorithm, no
|
||||
%%% parser generators, etc.
|
||||
%%%
|
||||
%%% If we were writing this in C then we might want to work iteratively with an
|
||||
%%% array of finite state machines, i.e. with a pushdown automaton, instead of
|
||||
%%% using recursion. This is a tried and true method of making fast parsers.
|
||||
%%% Recall, however, that the BEAM *is* a stack machine, written in C, so
|
||||
%%% rather than writing confusing iterative code in Erlang, to simulate a
|
||||
%%% pushdown automaton inside another simulated stack machine... we should just
|
||||
%%% write the recursive code, thus programming the BEAM to implement the
|
||||
%%% pushdown automaton that we want.
|
||||
|
||||
parse_expression(Type, Pos, String) ->
|
||||
case next_token(Pos, String) of
|
||||
{ok, {Token, NewPos, NewString}} ->
|
||||
parse_expression2(Type, NewPos, NewString, Token);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_expression2(Type, Pos, String, {integer, _, Value, Row, Start, End}) ->
|
||||
case Type of
|
||||
{_, _, integer} ->
|
||||
{ok, {Value, Pos, String}};
|
||||
{_, _, unknown_type} ->
|
||||
{ok, {Value, Pos, String}};
|
||||
{O, N, _} ->
|
||||
{error, {wrong_type, O, N, integer, Row, Start, End}}
|
||||
end;
|
||||
parse_expression2(Type, Pos, String, {bytes, _, Value, Row, Start, End}) ->
|
||||
Len = byte_size(Value),
|
||||
Result = {bytes, Value},
|
||||
case Type of
|
||||
{_, _, {bytes, [any]}} ->
|
||||
{ok, {Result, Pos, String}};
|
||||
{_, _, {bytes, [Len]}} ->
|
||||
{ok, {Result, Pos, String}};
|
||||
{_, _, {bytes, [ExpectedLen]}} ->
|
||||
{error, {bytes_wrong_size, ExpectedLen, Len, Row, Start, End}};
|
||||
{_, _, unknown_type} ->
|
||||
{ok, {Result, Pos, String}};
|
||||
{O, N, _} ->
|
||||
{error, {wrong_type, O, N, {bytes, [Len]}, Row, Start, End}}
|
||||
end;
|
||||
parse_expression2(Type, Pos, String, {string, _, Value, Row, Start, End}) ->
|
||||
case Type of
|
||||
{_, _, string} ->
|
||||
{ok, {Value, Pos, String}};
|
||||
{_, _, unknown_type} ->
|
||||
{ok, {Value, Pos, String}};
|
||||
{O, N, _} ->
|
||||
{error, {wrong_type, O, N, string, Row, Start, End}}
|
||||
end;
|
||||
parse_expression2(Type, Pos, String, {character, "[", _, Row, Start, _}) ->
|
||||
parse_list(Type, Pos, String, Row, Start);
|
||||
parse_expression2(Type, Pos, String, {character, "(", _, Row, Start, _}) ->
|
||||
parse_tuple(Type, Pos, String, Row, Start);
|
||||
parse_expression2(Type, Pos, String, {character, "{", _, Row, Start, _}) ->
|
||||
parse_record_or_map(Type, Pos, String, Row, Start);
|
||||
parse_expression2(Type, Pos, String, {alphanum, _, Path, Row, Start, End}) ->
|
||||
parse_alphanum(Type, Pos, String, Path, Row, Start, End);
|
||||
parse_expression2(_, _, _, {eof, _, _, _, _, _}) ->
|
||||
{error, unexpected_end_of_file};
|
||||
parse_expression2(_, _, _, Token) ->
|
||||
unexpected_token(Token).
|
||||
|
||||
unknown_type() ->
|
||||
{unknown_type, already_normalized, unknown_type}.
|
||||
|
||||
expect_tokens([], Pos, String) ->
|
||||
{ok, {Pos, String}};
|
||||
expect_tokens([Str | Rest], Pos, String) ->
|
||||
case next_token(Pos, String) of
|
||||
{ok, {{_, Str, _, _, _, _}, NewPos, NewString}} ->
|
||||
expect_tokens(Rest, NewPos, NewString);
|
||||
{ok, {Token, _, _}} ->
|
||||
unexpected_token(Token, Str);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
unexpected_token(Token, _Expected) ->
|
||||
% I don't know if this is a good idea, but sometimes there are only one or
|
||||
% two tokens that could have worked, which might make for simple
|
||||
% non-technical error messages. I don't know how to format that yet,
|
||||
% though.
|
||||
unexpected_token(Token).
|
||||
|
||||
unexpected_token({eof, _, _, _, _, _}) ->
|
||||
{error, expression_incomplete};
|
||||
unexpected_token({_, S, _, Row, Start, End}) ->
|
||||
{error, {unexpected_token, S, Row, Start, End}}.
|
||||
|
||||
%%% Ambiguous Chain Object vs Identifier Parsing
|
||||
|
||||
parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN_LOWER(C) ->
|
||||
% From a programming perspective, we are trying to parse a constant, so
|
||||
% an alphanum token can really only be a constructor, or a chain object.
|
||||
% Constructors start with uppercase characters, so lowercase can only be a
|
||||
% chain object.
|
||||
try
|
||||
case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of
|
||||
{account_pubkey, Data} ->
|
||||
typecheck_address(Type, Pos, String, Data, Row, Start, End);
|
||||
{contract_pubkey, Data} ->
|
||||
typecheck_contract(Type, Pos, String, Data, Row, Start, End);
|
||||
{signature, Data} ->
|
||||
typecheck_signature(Type, Pos, String, Data, Row, Start, End);
|
||||
{_, _} ->
|
||||
% Only a few chain objects are recognized by Sophia. The rest
|
||||
% are interpreted as identifiers, so we might as well give the
|
||||
% same sort of error that the compiler would give.
|
||||
{error, {unexpected_identifier, S, Row, Start, End}}
|
||||
end
|
||||
catch
|
||||
_:_ -> {error, {unexpected_identifier, S, Row, Start, End}}
|
||||
end;
|
||||
parse_alphanum(Type, Pos, String, Path, Row, Start, End) ->
|
||||
% Inversely, chain object prefixes are always lowercase, so any other path
|
||||
% must be a variant constructor, or invalid.
|
||||
parse_variant(Type, Pos, String, Path, Row, Start, End).
|
||||
|
||||
typecheck_address({_, _, address}, Pos, String, Data, _, _, _) ->
|
||||
{ok, {{address, Data}, Pos, String}};
|
||||
typecheck_address({_, _, contract}, Pos, String, Data, _, _, _) ->
|
||||
% The compiler would type error, but we should be lenient here.
|
||||
{ok, {{contract, Data}, Pos, String}};
|
||||
typecheck_address({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
|
||||
{ok, {{address, Data}, Pos, String}};
|
||||
typecheck_address({O, N, _}, _, _, _, Row, Start, End) ->
|
||||
{error, {wrong_type, O, N, address, Row, Start, End}}.
|
||||
|
||||
typecheck_contract({_, _, contract}, Pos, String, Data, _, _, _) ->
|
||||
{ok, {{contract, Data}, Pos, String}};
|
||||
typecheck_contract({_, _, address}, Pos, String, Data, _, _, _) ->
|
||||
% The compiler would type error, but we should be lenient here.
|
||||
{ok, {{address, Data}, Pos, String}};
|
||||
typecheck_contract({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
|
||||
{ok, {{contract, Data}, Pos, String}};
|
||||
typecheck_contract({O, N, _}, _, _, _, Row, Start, End) ->
|
||||
{error, {wrong_type, O, N, contract, Row, Start, End}}.
|
||||
|
||||
typecheck_signature({_, _, signature}, Pos, String, Data, _, _, _) ->
|
||||
{ok, {{bytes, Data}, Pos, String}};
|
||||
typecheck_signature({_, _, {bytes, [64]}}, Pos, String, Data, _, _, _) ->
|
||||
% The compiler would probably type-error, but whatever.
|
||||
{ok, {{bytes, Data}, Pos, String}};
|
||||
typecheck_signature({_, _, {bytes, [any]}}, Pos, String, Data, _, _, _) ->
|
||||
% The compiler would probably type-error, but whatever.
|
||||
{ok, {{bytes, Data}, Pos, String}};
|
||||
typecheck_signature({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
|
||||
{ok, {{bytes, Data}, Pos, String}};
|
||||
typecheck_signature({O, N, _}, _, _, _, Row, Start, End) ->
|
||||
{error, {wrong_type, O, N, signature, Row, Start, End}}.
|
||||
|
||||
|
||||
%%% List Parsing
|
||||
|
||||
parse_list({_, _, {list, [Inner]}}, Pos, String, Row, Start) ->
|
||||
parse_list_loop(Inner, Pos, String, "]", Row, Start, []);
|
||||
parse_list({_, _, unknown_type}, Pos, String, Row, Start) ->
|
||||
parse_list_loop(unknown_type(), Pos, String, "]", Row, Start, []);
|
||||
parse_list({O, N, _}, _, _, Row, Start) ->
|
||||
{error, {wrong_type, O, N, list, Row, Start, Start}}.
|
||||
|
||||
parse_list_loop(Inner, Pos, String, CloseChar, Row, Start, Acc) ->
|
||||
case next_token(Pos, String) of
|
||||
{ok, {{character, CloseChar, _, _, _, _}, NewPos, NewString}} ->
|
||||
{ok, {lists:reverse(Acc), NewPos, NewString}};
|
||||
{ok, {Token, NewPos, NewString}} ->
|
||||
parse_list_loop2(Inner, NewPos, NewString, CloseChar, Row, Start, Acc, Token);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_list_loop2(Inner, Pos, String, CloseChar, Row, Start, Acc, Token) ->
|
||||
case parse_expression2(Inner, Pos, String, Token) of
|
||||
{ok, {Value, NewPos, NewString}} ->
|
||||
parse_list_loop3(Inner, NewPos, NewString, CloseChar, Row, Start, [Value | Acc]);
|
||||
{error, Reason} ->
|
||||
Wrapper = choose_list_error_wrapper(CloseChar),
|
||||
% TODO: Are tuple indices off by one from list indices?
|
||||
Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
|
||||
{error, Wrapped}
|
||||
end.
|
||||
|
||||
parse_list_loop3(Inner, Pos, String, CloseChar, Row, Start, Acc) ->
|
||||
case next_token(Pos, String) of
|
||||
{ok, {{character, CloseChar, _, _, _, _}, NewPos, NewString}} ->
|
||||
{ok, {lists:reverse(Acc), NewPos, NewString}};
|
||||
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
|
||||
parse_list_loop(Inner, NewPos, NewString, CloseChar, Row, Start, Acc);
|
||||
{ok, {Token, _, _}} ->
|
||||
unexpected_token(Token, CloseChar);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
choose_list_error_wrapper("]") -> list_element;
|
||||
choose_list_error_wrapper(")") -> tuple_element.
|
||||
|
||||
%%% Ambiguous Parenthesis Parsing
|
||||
|
||||
parse_tuple({_, _, unknown_type}, Pos, String, Row, Start) ->
|
||||
% An untyped tuple is a list of untyped terms, and weirdly our list parser
|
||||
% works perfectly for that, as long as we change the closing character to
|
||||
% be ")" instead of "]".
|
||||
case parse_list_loop(unknown_type(), Pos, String, ")", Row, Start, []) of
|
||||
{ok, {[Inner], NewPos, NewString}} ->
|
||||
% In Sophia, singleton tuples are unwrapped, and given the inner
|
||||
% type.
|
||||
{ok, {Inner, NewPos, NewString}};
|
||||
{ok, {TermList, NewPos, NewString}} ->
|
||||
Result = {tuple, list_to_tuple(TermList)},
|
||||
{ok, {Result, NewPos, NewString}};
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end;
|
||||
parse_tuple({O, N, T}, Pos, String, _, _) ->
|
||||
% Typed tuple parsing is quite complex, because we also want to support
|
||||
% normal parentheses for grouping. It's not strictly necessary for
|
||||
% inputting data, since we don't have any infix operators in simple
|
||||
% data/term notation, but the alternatives are to generate singleton tuples
|
||||
% naively, (which are impossible to generate from Sophia,) or to hard error
|
||||
% on singleton tuples! Being faithful to Sophia is clearly nice!
|
||||
|
||||
% Count how many ambiguous parens there are, including the one we already
|
||||
% saw.
|
||||
case count_open_parens(Pos, String, 1) of
|
||||
{ok, {Count, Token, NewPos, NewString}} ->
|
||||
% Compare that to the amount of nesting tuple connectives are in
|
||||
% the type we are expected to produce.
|
||||
{ExcessCount, HeadType, Tails} = extract_tuple_type_info(Count, {O, N, T}, []),
|
||||
% Now work out what to do with all this information.
|
||||
parse_tuple2(O, N, ExcessCount, HeadType, Tails, NewPos, NewString, Token);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
count_open_parens(Pos, String, Count) ->
|
||||
case next_token(Pos, String) of
|
||||
{ok, {{character, "(", _, _, _, _}, NewPos, NewString}} ->
|
||||
count_open_parens(NewPos, NewString, Count + 1);
|
||||
{ok, {Token, NewPos, NewString}} ->
|
||||
{ok, {Count, Token, NewPos, NewString}};
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
extract_tuple_type_info(ParenCount, {_, _, {tuple, [Head | Rest]}}, Tails) when ParenCount > 0 ->
|
||||
% Have an open paren, and a tuple type. We need to go deeper!
|
||||
extract_tuple_type_info(ParenCount - 1, Head, [Rest | Tails]);
|
||||
extract_tuple_type_info(ParenCount, HeadType, Tails) ->
|
||||
% No parens, or no more (non-empty) tuples. Stop!
|
||||
{ParenCount, HeadType, Tails}.
|
||||
|
||||
parse_tuple2(_, _, _, {_, _, unknown_type}, [_ | _], _, _, _) ->
|
||||
{error, "Parsing of tuples with known lengths but unknown contents is not yet implemented."};
|
||||
parse_tuple2(O, N, ExcessCount, HeadType, Tails, Pos, String, {character, ")", _, Row, Col, _}) ->
|
||||
parse_empty_tuple(O, N, ExcessCount, HeadType, Tails, Pos, String, Row, Col);
|
||||
parse_tuple2(O, N, ExcessCount, HeadType, Tails, Pos, String, Token) ->
|
||||
% Finished with parentheses for now, try and parse an expression out, to
|
||||
% get our head term.
|
||||
case parse_expression2(HeadType, Pos, String, Token) of
|
||||
{ok, {Result, NewPos, NewString}} ->
|
||||
% Got a head term. Now try to build all the other tuple layers.
|
||||
parse_tuple_tails(O, N, ExcessCount, Result, Tails, NewPos, NewString);
|
||||
{error, Reason} ->
|
||||
% TODO: Wrap errors here too.
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_empty_tuple(_, _, 0, _, Tails, _, _, Row, Col) ->
|
||||
% There are zero excess parens, meaning all our parens are tuples. Get the
|
||||
% top one.
|
||||
[Tail | _] = Tails,
|
||||
% We expected some nonzero number of elements before the close paren, but
|
||||
% got zero.
|
||||
ExpectCount = 1 + length(Tail),
|
||||
{error, {not_enough_elements, ExpectCount, 0, Row, Col}};
|
||||
parse_empty_tuple(O, N, ExcessCount, {_, _, {tuple, []}}, Tails, Pos, String, _, _) ->
|
||||
% If we have some ambiguous parentheses left, we now know one of them is
|
||||
% this empty tuple.
|
||||
HeadTerm = {tuple, {}},
|
||||
NewExcessCount = ExcessCount - 1,
|
||||
% Now continue the loop as if it were an integer or something, in the head
|
||||
% position.
|
||||
parse_tuple_tails(O, N, NewExcessCount, HeadTerm, Tails, Pos, String);
|
||||
parse_empty_tuple(_, _, _, {HeadO, HeadN, _}, _, _, _, Row, Col) ->
|
||||
% We were expecting a head term of a different type!
|
||||
{error, {wrong_type, HeadO, HeadN, unit, Row, Col, Col}}.
|
||||
|
||||
parse_tuple_tails(O, N, 0, HeadTerm, [TailTypes | ParentTails], Pos, String) ->
|
||||
% Tuples left to build, but no extra open parens to deal with, so we can
|
||||
% just parse multivalues naively, starting from the "we have a term,
|
||||
% waiting for a comma" stage of the loop.
|
||||
case parse_multivalue3(TailTypes, Pos, String, -1, -1, [HeadTerm]) of
|
||||
{ok, {Terms, NewPos, NewString}} ->
|
||||
NewHead = {tuple, list_to_tuple(Terms)},
|
||||
parse_tuple_tails(O, N, 0, NewHead, ParentTails, NewPos, NewString);
|
||||
{error, Reason} ->
|
||||
% TODO: More error wrapping?
|
||||
{error, Reason}
|
||||
end;
|
||||
parse_tuple_tails(_, _, 0, HeadTerm, [], Pos, String) ->
|
||||
% No open parens left, no tuples left to build, we are done!
|
||||
{ok, {HeadTerm, Pos, String}};
|
||||
parse_tuple_tails(O, N, ExcessCount, HeadTerm, Tails, Pos, String) ->
|
||||
% The ambiguous case, where we have a mix of tuple parens, and grouping
|
||||
% parens. We want to peek at the next token, to see if it closes a grouping
|
||||
% paren.
|
||||
case next_token(Pos, String) of
|
||||
{ok, {{character, ")", _, _, _, _}, NewPos, NewString}} ->
|
||||
% It is grouping! Close one excess paren, and continue.
|
||||
parse_tuple_tails(O, N, ExcessCount - 1, HeadTerm, Tails, NewPos, NewString);
|
||||
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
|
||||
% It is a real tuple! Try the normal logic, then.
|
||||
parse_tuple_tails2(O, N, ExcessCount, HeadTerm, Tails, NewPos, NewString);
|
||||
{ok, {Token, _, _}} ->
|
||||
% Anything else is just a boring parse error we can complain about.
|
||||
unexpected_token(Token, ")");
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_tuple_tails2(O, N, ExcessCount, HeadTerm, [TailTypes | ParentTails], Pos, String) ->
|
||||
case parse_multivalue(TailTypes, Pos, String, -1, -1, [HeadTerm]) of
|
||||
{ok, {Terms, NewPos, NewString}} ->
|
||||
NewHead = {tuple, list_to_tuple(Terms)},
|
||||
parse_tuple_tails(O, N, ExcessCount, NewHead, ParentTails, NewPos, NewString);
|
||||
{error, Reason} ->
|
||||
% TODO: wrap errors?
|
||||
{error, Reason}
|
||||
end;
|
||||
parse_tuple_tails2(O, N, _, _, [], _, _) ->
|
||||
% This case is created when, for example, we want int * int, but instead we
|
||||
% get a term like ((1, 2), 3), of type (int * int) * int. The trouble is,
|
||||
% ((1, 2)) would have been valid, so it's actually the second comma that
|
||||
% tips us off to the error, not the first one.
|
||||
%
|
||||
% For simpler cases, like (1, 2) when int was expected, this error message
|
||||
% is fine:
|
||||
Err = {error, {wrong_type, O, N, tuple, -1, -1, -1}},
|
||||
% TODO: Row/col
|
||||
% TODO: Generate better error messages in the cases where N *is* a tuple,
|
||||
% but the first thing inside that tuple is the problem.
|
||||
Err.
|
||||
|
||||
%%% Unambiguous Tuple/Variant Parsing
|
||||
|
||||
parse_multivalue(ElemTypes, Pos, String, Row, Start, Acc) ->
|
||||
case next_token(Pos, String) of
|
||||
{ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} ->
|
||||
check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc);
|
||||
{ok, {Token, NewPos, NewString}} ->
|
||||
parse_multivalue2(ElemTypes, NewPos, NewString, Row, Start, Acc, Token);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_multivalue2([Next | Rest], Pos, String, Row, Start, Acc, Token) ->
|
||||
case parse_expression2(Next, Pos, String, Token) of
|
||||
{ok, {Value, NewPos, NewString}} ->
|
||||
parse_multivalue3(Rest, NewPos, NewString, Row, Start, [Value | Acc]);
|
||||
{error, Reason} ->
|
||||
Wrapper = choose_list_error_wrapper(")"),
|
||||
% TODO: Are tuple indices off by one from list indices?
|
||||
Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
|
||||
{error, Wrapped}
|
||||
end;
|
||||
parse_multivalue2([], Pos, String, _, _, Acc, {character, ")", _, _, _, _}) ->
|
||||
{ok, {lists:reverse(Acc), Pos, String}};
|
||||
parse_multivalue2([], _, _, _, _, _, Token) ->
|
||||
unexpected_token(Token, ")").
|
||||
|
||||
parse_multivalue3(ElemTypes, Pos, String, Row, Start, Acc) ->
|
||||
case next_token(Pos, String) of
|
||||
{ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} ->
|
||||
check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc);
|
||||
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
|
||||
parse_multivalue(ElemTypes, NewPos, NewString, Row, Start, Acc);
|
||||
{ok, {Token, _, _}} ->
|
||||
unexpected_token(Token, ")");
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
check_multivalue_long_enough([], Pos, String, _, _, Acc) ->
|
||||
{ok, {lists:reverse(Acc), Pos, String}};
|
||||
check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) ->
|
||||
GotCount = length(Got),
|
||||
ExpectCount = length(Remaining) + GotCount,
|
||||
{error, {not_enough_elements, ExpectCount, GotCount, Row, Col}}.
|
||||
|
||||
%%% Variant parsing
|
||||
|
||||
parse_variant({O, N, {variant, Variants}}, Pos, String, [Ident], Row, Start, End) ->
|
||||
parse_variant2(O, N, Variants, Pos, String, "", Ident, Row, Start, End);
|
||||
parse_variant({O, N, {variant, Variants}}, Pos, String, [Namespace, Constructor], Row, Start, End) ->
|
||||
case get_typename(O, N) of
|
||||
[Namespace, _] ->
|
||||
parse_variant2(O, N, Variants, Pos, String, Namespace ++ ".", Constructor, Row, Start, End);
|
||||
_ ->
|
||||
{error, {invalid_constructor, O, N, Namespace ++ "." ++ Constructor, Row, Start, End}}
|
||||
end;
|
||||
parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) ->
|
||||
{error, {unresolved_variant, Row, Start, End}};
|
||||
parse_variant({O, N, _}, _, _, _, Row, Start, End) ->
|
||||
% In normal code, identifiers can have many meanings, which can result in
|
||||
% lots of different errors. In constant/immediate/normalized Sophia terms
|
||||
% we know identifiers are always variants, so we can type error if any
|
||||
% other type was expected.
|
||||
{error, {wrong_type, O, N, variant, Row, Start, End}}.
|
||||
|
||||
get_typename(O, already_normalized) ->
|
||||
get_typename(O);
|
||||
get_typename(_, N) ->
|
||||
get_typename(N).
|
||||
|
||||
get_typename({Name, _}) ->
|
||||
string:split(Name, ".", all);
|
||||
get_typename(Name) ->
|
||||
string:split(Name, ".", all).
|
||||
|
||||
parse_variant2(O, N, Variants, Pos, String, Prefix, Constructor, Row, Start, End) ->
|
||||
case lookup_variant(Constructor, Variants, 0) of
|
||||
{ok, {Tag, ElemTypes}} ->
|
||||
GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end,
|
||||
Arities = lists:map(GetArity, Variants),
|
||||
parse_variant3(Arities, Tag, ElemTypes, Pos, String);
|
||||
error ->
|
||||
{error, {invalid_constructor, O, N, Prefix ++ Constructor, Row, Start, End}}
|
||||
end.
|
||||
|
||||
parse_variant3(Arities, Tag, [], Pos, String) ->
|
||||
% Parsing of 0-arity variants is different.
|
||||
Result = {variant, Arities, Tag, {}},
|
||||
{ok, {Result, Pos, String}};
|
||||
parse_variant3(Arities, Tag, ElemTypes, Pos, String) ->
|
||||
case next_token(Pos, String) of
|
||||
{ok, {{character, "(", _, Row, Start, _}, NewPos, NewString}} ->
|
||||
parse_variant4(Arities, Tag, ElemTypes, NewPos, NewString, Row, Start);
|
||||
{ok, {Token, _, _}} ->
|
||||
unexpected_token(Token, "(");
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_variant4(Arities, Tag, ElemTypes, Pos, String, Row, Start) ->
|
||||
case parse_multivalue(ElemTypes, Pos, String, Row, Start, []) of
|
||||
{ok, {Terms, NewPos, NewString}} ->
|
||||
Result = {variant, Arities, Tag, list_to_tuple(Terms)},
|
||||
{ok, {Result, NewPos, NewString}};
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
lookup_variant(_, [], _) ->
|
||||
error;
|
||||
lookup_variant(Ident, [{Ident, ElemTypes} | _], Tag) ->
|
||||
{ok, {Tag, ElemTypes}};
|
||||
lookup_variant(Ident, [_ | Rest], Tag) ->
|
||||
lookup_variant(Ident, Rest, Tag + 1).
|
||||
|
||||
%%% Record parsing
|
||||
|
||||
parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Pos, String, _, _) ->
|
||||
parse_map(KeyType, ValueType, Pos, String, #{});
|
||||
parse_record_or_map({_, _, {record, Fields}}, Pos, String, _, _) ->
|
||||
parse_record(Fields, Pos, String, #{});
|
||||
parse_record_or_map({_, _, unknown_type}, Pos, String, _, _) ->
|
||||
case next_token(Pos, String) of
|
||||
{ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
|
||||
{ok, {#{}, NewPos, NewString}};
|
||||
{ok, {{character, "[", _, _, _, _}, NewPos, NewString}} ->
|
||||
parse_map2(unknown_type(), unknown_type(), NewPos, NewString, #{});
|
||||
{ok, {{alphanum, _, _, Row, Start, End}, _, _}} ->
|
||||
{error, {unresolved_record, Row, Start, End}};
|
||||
{ok, {Token, _, _}} ->
|
||||
unexpected_token(Token, "}");
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end;
|
||||
parse_record_or_map({O, N, _}, _, _, Row, Start) ->
|
||||
{error, {wrong_type, O, N, map, Row, Start, Start}}.
|
||||
|
||||
parse_record(Fields, Pos, String, Acc) ->
|
||||
case next_token(Pos, String) of
|
||||
{ok, {{alphanum, Ident, _, Row, Start, End}, NewPos, NewString}} ->
|
||||
parse_record2(Fields, NewPos, NewString, Acc, Ident, Row, Start, End);
|
||||
{ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} ->
|
||||
parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End);
|
||||
{ok, {Token, _, _}} ->
|
||||
unexpected_token(Token, "}");
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_record2(Fields, Pos, String, Acc, Ident, Row, Start, End) ->
|
||||
case lists:keyfind(Ident, 1, Fields) of
|
||||
{_, Type} ->
|
||||
parse_record3(Fields, Pos, String, Acc, Ident, Row, Start, End, Type);
|
||||
false ->
|
||||
{error, {invalid_field, Ident, Row, Start, End}}
|
||||
end.
|
||||
|
||||
parse_record3(Fields, Pos, String, Acc, Ident, Row, Start, End, Type) ->
|
||||
case maps:is_key(Ident, Acc) of
|
||||
false ->
|
||||
parse_record4(Fields, Pos, String, Acc, Ident, Type);
|
||||
true ->
|
||||
{error, {field_already_present, Ident, Row, Start, End}}
|
||||
end.
|
||||
|
||||
parse_record4(Fields, Pos, String, Acc, Ident, Type) ->
|
||||
case expect_tokens(["="], Pos, String) of
|
||||
{ok, {NewPos, NewString}} ->
|
||||
parse_record5(Fields, NewPos, NewString, Acc, Ident, Type);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_record5(Fields, Pos, String, Acc, Ident, Type) ->
|
||||
case parse_expression(Type, Pos, String) of
|
||||
{ok, {Result, NewPos, NewString}} ->
|
||||
NewAcc = maps:put(Ident, Result, Acc),
|
||||
parse_record6(Fields, NewPos, NewString, NewAcc);
|
||||
{error, Reason} ->
|
||||
wrap_error(Reason, {record_field, Ident})
|
||||
end.
|
||||
|
||||
parse_record6(Fields, Pos, String, Acc) ->
|
||||
case next_token(Pos, String) of
|
||||
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
|
||||
parse_record(Fields, NewPos, NewString, Acc);
|
||||
{ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} ->
|
||||
parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End);
|
||||
{ok, {Token, _, _}} ->
|
||||
unexpected_token(Token, "}");
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_record_end(Fields, Pos, String, FieldValues, Row, Start, End) ->
|
||||
case parse_record_final_loop(Fields, FieldValues, []) of
|
||||
{ok, Result} ->
|
||||
{ok, {Result, Pos, String}};
|
||||
{error, {missing_field, Name}} ->
|
||||
{error, {missing_field, Name, Row, Start, End}}
|
||||
end.
|
||||
|
||||
parse_record_final_loop([{Name, _} | Rest], FieldValues, Acc) ->
|
||||
case maps:find(Name, FieldValues) of
|
||||
{ok, Value} ->
|
||||
parse_record_final_loop(Rest, FieldValues, [Value | Acc]);
|
||||
error ->
|
||||
{error, {missing_field, Name}}
|
||||
end;
|
||||
parse_record_final_loop([], _, [Field]) ->
|
||||
% Singleton records are type-checked in Sophia, but unwrapped in the
|
||||
% resulting FATE.
|
||||
{ok, Field};
|
||||
parse_record_final_loop([], _, FieldsReverse) ->
|
||||
Fields = lists:reverse(FieldsReverse),
|
||||
Tuple = list_to_tuple(Fields),
|
||||
{ok, {tuple, Tuple}}.
|
||||
|
||||
|
||||
%%% Map Parsing
|
||||
|
||||
parse_map(KeyType, ValueType, Pos, String, Acc) ->
|
||||
case next_token(Pos, String) of
|
||||
{ok, {{character, "[", _, _, _, _}, NewPos, NewString}} ->
|
||||
parse_map2(KeyType, ValueType, NewPos, NewString, Acc);
|
||||
{ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
|
||||
{ok, {Acc, NewPos, NewString}};
|
||||
{ok, {Token, _, _}} ->
|
||||
unexpected_token(Token, "}");
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_map2(KeyType, ValueType, Pos, String, Acc) ->
|
||||
case parse_expression(KeyType, Pos, String) of
|
||||
{ok, {Result, NewPos, NewString}} ->
|
||||
parse_map3(KeyType, ValueType, NewPos, NewString, Acc, Result);
|
||||
{error, Reason} ->
|
||||
wrap_error(Reason, {map_key, maps:size(Acc)})
|
||||
end.
|
||||
|
||||
parse_map3(KeyType, ValueType, Pos, String, Acc, Key) ->
|
||||
case expect_tokens(["]", "="], Pos, String) of
|
||||
{ok, {NewPos, NewString}} ->
|
||||
parse_map4(KeyType, ValueType, NewPos, NewString, Acc, Key);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_map4(KeyType, ValueType, Pos, String, Acc, Key) ->
|
||||
case parse_expression(ValueType, Pos, String) of
|
||||
{ok, {Result, NewPos, NewString}} ->
|
||||
NewAcc = maps:put(Key, Result, Acc),
|
||||
parse_map5(KeyType, ValueType, NewPos, NewString, NewAcc);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_map5(KeyType, ValueType, Pos, String, Acc) ->
|
||||
case next_token(Pos, String) of
|
||||
{ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
|
||||
parse_map(KeyType, ValueType, NewPos, NewString, Acc);
|
||||
{ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
|
||||
{ok, {Acc, NewPos, NewString}};
|
||||
{ok, {Token, _, _}} ->
|
||||
unexpected_token(Token, "}");
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
% TODO
|
||||
wrap_error(Reason, _) -> Reason.
|
||||
|
||||
%%% Tests
|
||||
|
||||
check_sophia_to_fate(Type, Sophia, Fate) ->
|
||||
case parse_literal(Type, Sophia) of
|
||||
{ok, Fate} ->
|
||||
ok;
|
||||
{ok, FateActual} ->
|
||||
erlang:error({to_fate_failed, Sophia, Fate, {ok, FateActual}});
|
||||
{error, Reason} ->
|
||||
erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}})
|
||||
end.
|
||||
|
||||
compile_entrypoint_value_and_type(Source, Entrypoint) ->
|
||||
{ok, #{fate_code := FateCode, aci := ACI}} = so_compiler:from_string(Source, [{aci, json}]),
|
||||
|
||||
% Find the fcode for the correct entrypoint.
|
||||
{fcode, Bodies, NamesMap, _} = FateCode,
|
||||
Names = maps:to_list(NamesMap),
|
||||
Name = unicode:characters_to_binary(Entrypoint),
|
||||
{Hash, Name} = lists:keyfind(Name, 2, Names),
|
||||
{_, _, Code} = maps:get(Hash, Bodies),
|
||||
FATE = extract_return_value(Code),
|
||||
|
||||
% Generate the AACI, and get the AACI type info for the correct entrypoint.
|
||||
AACI = hz_aaci:prepare_aaci(ACI),
|
||||
{ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"),
|
||||
|
||||
{FATE, Type}.
|
||||
|
||||
extract_return_value(#{0 := [{'RETURNR', {immediate, FATE}}]}) ->
|
||||
FATE;
|
||||
extract_return_value(Code) ->
|
||||
erlang:exit({invalid_literal_fcode, Code}).
|
||||
|
||||
check_parser(Sophia) ->
|
||||
% Compile the literal using the compiler, to check that it is valid Sophia
|
||||
% syntax, and to get an AACI object to pass to the parser.
|
||||
Source = "contract C = entrypoint f() = " ++ Sophia,
|
||||
{Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
|
||||
|
||||
% Check that when we parse the term we get the same value as the Sophia
|
||||
% compiler.
|
||||
check_sophia_to_fate(unknown_type(), Sophia, Fate),
|
||||
|
||||
% Then, once we know that the term is correct, make sure that it is still
|
||||
% accepted *with* type info.
|
||||
check_sophia_to_fate(Type, Sophia, Fate).
|
||||
|
||||
check_parser_with_typedef(Typedef, Sophia) ->
|
||||
% Compile the type definitions alongside the usual literal expression.
|
||||
Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia,
|
||||
{Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
|
||||
|
||||
% Do a typed parse, as usual, but there are probably record/variant
|
||||
% definitions in the AACI, so untyped parses probably don't work.
|
||||
check_sophia_to_fate(Type, Sophia, Fate).
|
||||
|
||||
anon_types_test() ->
|
||||
% Integers.
|
||||
check_parser("123"),
|
||||
check_parser("1_2_3"),
|
||||
% Bytes.
|
||||
check_parser("#DEAD000BEEF"),
|
||||
check_parser("#DE_AD0_00B_EEF"),
|
||||
% Strings.
|
||||
check_parser("\"hello world\""),
|
||||
% List of integers.
|
||||
check_parser("[1, 2, 3]"),
|
||||
% List of lists.
|
||||
check_parser("[[], [1], [2, 3]]"),
|
||||
% Tuple.
|
||||
check_parser("(1, [2, 3], (4, 5))"),
|
||||
% Map.
|
||||
check_parser("{[1] = 2, [3] = 4}"),
|
||||
|
||||
ok.
|
||||
|
||||
string_escape_codes_test() ->
|
||||
check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""),
|
||||
check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""),
|
||||
check_parser("\"\\x{0}\\x{7}\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\\x{0000007F}\""),
|
||||
ok.
|
||||
|
||||
records_test() ->
|
||||
TypeDef = "record pair = {x: int, y: int}",
|
||||
Sophia = "{x = 1, y = 2}",
|
||||
check_parser_with_typedef(TypeDef, Sophia),
|
||||
% The above won't run an untyped parse on the expression, but we can. It
|
||||
% will error, though.
|
||||
{error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia).
|
||||
|
||||
variant_test() ->
|
||||
TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)",
|
||||
|
||||
check_parser_with_typedef(TypeDef, "Zero"),
|
||||
check_parser_with_typedef(TypeDef, "One(0)"),
|
||||
check_parser_with_typedef(TypeDef, "Two(0, 1)"),
|
||||
check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"),
|
||||
check_parser_with_typedef(TypeDef, "C.Zero"),
|
||||
|
||||
{error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"),
|
||||
|
||||
ok.
|
||||
|
||||
namespace_variant_test() ->
|
||||
Term = "[N.A, N.B]",
|
||||
Source = "namespace N = datatype mytype = A | B\ncontract C = entrypoint f() = " ++ Term,
|
||||
{Fate, VariantType} = compile_entrypoint_value_and_type(Source, "f"),
|
||||
check_sophia_to_fate(VariantType, Term, Fate),
|
||||
|
||||
ok.
|
||||
|
||||
chain_objects_test() ->
|
||||
% Address,
|
||||
check_parser("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"),
|
||||
% Two different forms of signature,
|
||||
check_parser("[sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n, #00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF]"),
|
||||
|
||||
% We have to build a totally custom contract example in order to get an
|
||||
% AACI and return value for parsing contract addresses. This is because the
|
||||
% compiler demands that contract addresses be type checked according to the
|
||||
% logic of "contract oriented programming", including covariance, etc. and
|
||||
% "contract oriented programming" is not very compatible with ML style type
|
||||
% inference.
|
||||
Contract = "ct_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx",
|
||||
Source = "contract C = entrypoint f(): C = " ++ Contract,
|
||||
{Fate, ContractType} = compile_entrypoint_value_and_type(Source, "f"),
|
||||
check_sophia_to_fate(ContractType, Contract, Fate),
|
||||
check_sophia_to_fate(unknown_type(), Contract, Fate),
|
||||
|
||||
ok.
|
||||
|
||||
singleton_records_test() ->
|
||||
TypeDef = "record singleton('a) = {it: 'a}",
|
||||
check_parser_with_typedef(TypeDef, "{it = 123}"),
|
||||
check_parser_with_typedef(TypeDef, "{it = {it = {it = 5}}}"),
|
||||
check_parser_with_typedef(TypeDef, "[{it = 1}, {it = 2}, {it = 3}]"),
|
||||
|
||||
ok.
|
||||
|
||||
singleton_variants_test() ->
|
||||
% Similar tests to the singleton records, but this time there isn't
|
||||
% actually a special case; singleton variants are in fact wrapped in the
|
||||
% FATE too.
|
||||
TypeDef = "datatype wrapped('a) = Wrap('a)",
|
||||
check_parser_with_typedef(TypeDef, "Wrap(123)"),
|
||||
check_parser_with_typedef(TypeDef, "Wrap(Wrap(123))"),
|
||||
check_parser_with_typedef(TypeDef, "[Wrap(1), Wrap(2), Wrap(3)]"),
|
||||
|
||||
ok.
|
||||
|
||||
excess_parens_test() ->
|
||||
% 'singleton' parens are another special case, but unlike singleton
|
||||
% records, which exist in the type system, singleton parens aren't tuples
|
||||
% at all! They are just grouping, for arithmetic. For example.
|
||||
check_parser("(123)"),
|
||||
check_parser("[1, (2), ((3))]"),
|
||||
% Where this gets tricky, though, is when grouping parens are mixed with
|
||||
% tuple parens. E.g. this list of three tuples should all parse to the same
|
||||
% result.
|
||||
check_parser("[((1, 2)), ((1), 2), (((1), 2))]"),
|
||||
% Including multiple nestings of tuples and grouping, interleaved.
|
||||
check_parser("((((1), ((2, 3)))), 4)"),
|
||||
% Also empty tuples exist!
|
||||
check_parser("()"),
|
||||
check_parser("(((((), ())), ()))"),
|
||||
|
||||
ok.
|
||||
|
||||
lexer_offset_test() ->
|
||||
% Test that various tokens report their position correctly.
|
||||
{error, {unexpected_token, "456", 1, 5, 7}} = parse_literal("123 456"),
|
||||
{error, {unexpected_token, "[", 1, 5, 5}} = parse_literal("123 [0]"),
|
||||
{error, {unexpected_token, "ABC", 1, 5, 7}} = parse_literal("123 ABC"),
|
||||
{error, {unexpected_token, "#AA", 1, 5, 7}} = parse_literal("123 #AA"),
|
||||
{error, {unexpected_token, "\"x\"", 1, 5, 7}} = parse_literal("123 \"x\""),
|
||||
{error, {unexpected_token, "\"\\x{123}\"", 1, 5, 13}} = parse_literal("123 \"\\x{123}\""),
|
||||
|
||||
% Check that the tokenizer knows its position correctly *after* various
|
||||
% tokens.
|
||||
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("[0] 123"),
|
||||
ABCType = {"mytype", already_normalized, {variant, [{"ABC", []}]}},
|
||||
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal(ABCType, "ABC 123"),
|
||||
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("#AA 123"),
|
||||
{error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("\"x\" 123"),
|
||||
{error, {unexpected_token, "123", 1, 11, 13}} = parse_literal("\"\\x{123}\" 123"),
|
||||
|
||||
% Check that the tokenizer accounts for various line separators correctly.
|
||||
{error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\nABC"),
|
||||
{error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\r\nABC"),
|
||||
{error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\rABC"),
|
||||
|
||||
ok.
|
||||
|
||||
Reference in New Issue
Block a user