15 Commits

Author SHA1 Message Date
Jarvis Carroll 49cd8b6687 Parse strings 2026-01-29 06:18:06 +00:00
Jarvis Carroll 966b4b2748 Calculate scalar values during lexing
This saves some effort and probably some performance for things like integers, but I'm mainly doing this in anticipation of string literals, because it would just be ridiculous to read code that lexes string literals twice.
2026-01-29 04:06:19 +00:00
Jarvis Carroll fe182a5233 Handle underscores in integers/bytes
This forces us to test for alpha/num/hex enough times that it's now worth making macros for these things.
2026-01-29 03:03:11 +00:00
Jarvis Carroll f1696e2b9e Bytes lexing
I don't handle underscores in bytes correctly... Nor in integers, for that matter.
2026-01-29 02:01:16 +00:00
Jarvis Carroll 2bf384ca82 Infer correct values for tests automatically
Now tests compare the literal parser against the output of the
compiler. The little example contracts we are compiling for the
AACI already had the FATE value in them, in the form of the
instruction
	{'RETURNR', {immediate, FateValue}}
so we just extract that and use it for the tests.
2026-01-27 06:42:55 +00:00
Jarvis Carroll 4f2a3c6c6f Variant parsing 2026-01-23 06:18:39 +00:00
Jarvis Carroll 7df04a81be Tuple parsing 2026-01-23 02:45:23 +00:00
Jarvis Carroll 6f02d4c4e6 Record parsing 2026-01-23 00:48:06 +00:00
Jarvis Carroll 56e63051bc Map parsing 2026-01-16 05:46:27 +00:00
Jarvis Carroll 3f1c9bd626 List parsing
Slowly chipping away at cases...
2026-01-15 09:38:04 +00:00
Jarvis Carroll 97e32574c4 set up parsing structure
We tokenize, and then do the simplest possible recursive descent.

We don't want to evaluate anything, so infix operators are out,
meaning no shunting yard or tree rearranging or LR(1) shenanigans
are necessary, just write the code.

If we want to 'peek', just take the next token, and pass it around
from that point on, until it can actually be consumed.
2026-01-15 01:52:30 +00:00
Jarvis Carroll 6f5525afcf Rename get_function_signature
hz_aaci:aaci_get_function_signature is a bit redundant.
2026-01-15 01:50:50 +00:00
Jarvis Carroll 4f1958b210 use lists:unzip/1
Just a little thing I noticed could be improved.
2026-01-13 01:19:29 +00:00
Jarvis Carroll 3da9bd570b split coerce/3 into two functions
Also renamed coerce_bindings to erlang_args_to_fate, to match.
2026-01-09 04:39:58 +00:00
Jarvis Carroll d2163c1ff8 split AACI out of hz.erl
So far the interface to hz.erl is mostly unchanged, apart from prepare_aaci/1

Maybe prepare_aaci should be re-exported, but using it is exactly in line with the
'inconvenient but more flexible primitives' that hz_aaci.erl is meant to represent,
so, maybe that is a fine place to have to go for it, dunno.
2026-01-07 09:40:55 +00:00
4 changed files with 1831 additions and 1134 deletions
+7 -1098
View File
File diff suppressed because it is too large Load Diff
+1184
View File
File diff suppressed because it is too large Load Diff
+2 -36
View File
@@ -190,48 +190,16 @@ l_to_i(S) ->
end.
-spec req(Type, Message) -> RequestMap
when Type :: {sign, message | binary | bitcoin}
| tx
| ack
| sign,
Message :: binary(),
RequestMap :: map().
%% @doc
%% GRIDS maps always contain the following keys:
%% ```
%% #{"grids" => 1,
%% "chain" => "gajumaru",
%% "network_id" => "groot.mainnet.gajumaru.io",
%% "type" => "message" | "binary" | "binary" | "tx" | "ack"
%% "public_id" => term(),
%% "payload" => string()};
%% '''
req(Type, Message) ->
req(Type, Message, false).
req({sign, message}, Message, ID) ->
req(sign, Message, ID) ->
#{"grids" => 1,
"chain" => "gajumaru",
"network_id" => hz:network_id(),
"type" => "message",
"public_id" => ID,
"payload" => Message};
req({sign, binary}, Binary, ID) ->
#{"grids" => 1,
"chain" => "gajumaru",
"network_id" => hz:network_id(),
"type" => "binary",
"public_id" => ID,
"payload" => base64:encode(Binary)};
req({sign, bitcoin}, Binary, ID) ->
#{"grids" => 1,
"chain" => "gajumaru",
"network_id" => hz:network_id(),
"type" => "bitcoin",
"public_id" => ID,
"payload" => base64:encode(Binary)};
req(tx, Data, ID) ->
#{"grids" => 1,
"chain" => "gajumaru",
@@ -245,6 +213,4 @@ req(ack, Message, ID) ->
"network_id" => hz:network_id(),
"type" => "ack",
"public_id" => ID,
"payload" => Message};
req(sign, Message, ID) ->
req({sign, message}, Message, ID).
"payload" => Message}.
+638
View File
@@ -0,0 +1,638 @@
-module(hz_sophia).
-vsn("0.8.2").
-author("Jarvis Carroll <spiveehere@gmail.com>").
-copyright("Jarvis Carroll <spiveehere@gmail.com>").
-license("GPL-3.0-or-later").
-export([check_parser/1]).
-include_lib("eunit/include/eunit.hrl").
parse_literal(Type, String) ->
case parse_expression(Type, {tk, 1, 1}, String) of
{ok, {Result, NewTk, NewString}} ->
parse_literal2(Result, NewTk, NewString);
{error, Reason} ->
{error, Reason}
end.
parse_literal2(Result, Tk, String) ->
% We have parsed a valid expression. Now check that the string ends.
case next_token(Tk, String) of
{ok, {{eof, _, _, _, _}, _, _}} ->
{ok, Result};
{ok, {{_, S, _, Row, Start, End}, _, _}} ->
{error, {unexpected_token, S, Row, Start, End}};
{error, Reason} ->
{error, Reason}
end.
%%% Tokenizer
-define(IS_ALPHA(C), ((((C) >= $A) and ((C) =< $Z)) or (((C) >= $a) and ((C) =< $z)) or ((C) == $_))).
-define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))).
-define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C))).
-define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))).
next_token({tk, Row, Col}, []) ->
{ok, {{eof, "", Row, Col, Col}, {tk, Row, Col}, []}};
next_token({tk, Row, Col}, " " ++ Rest) ->
next_token({tk, Row + 1, Col}, Rest);
next_token({tk, Row, Col}, "\t" ++ Rest) ->
next_token({tk, Row + 1, Col}, Rest);
next_token({tk, _, Col}, "\r\n" ++ Rest) ->
next_token({tk, 1, Col + 1}, Rest);
next_token({tk, _, Col}, "\r" ++ Rest) ->
next_token({tk, 1, Col + 1}, Rest);
next_token({tk, _, Col}, "\n" ++ Rest) ->
next_token({tk, 1, Col + 1}, Rest);
next_token(Tk, [C | _] = String) when ?IS_ALPHA(C) ->
alphanum_token(Tk, Tk, String, []);
next_token(Tk, [C | _] = String) when ?IS_NUM(C) ->
num_token(Tk, Tk, String, [], 0);
next_token({tk, Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) ->
bytes_token({tk, Row, Col}, {tk, Row + 1, Col}, [C | Rest], "#", []);
next_token({tk, Row, Col}, "\"" ++ Rest) ->
string_token({tk, Row, Col}, {tk, Row + 1, Col}, Rest, "\"", <<>>);
next_token({tk, Row, Col}, [Char | Rest]) ->
Token = {character, [Char], Char, Row, Col, Col},
{ok, {Token, {tk, Row + 1, Col}, Rest}}.
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) ->
alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]);
alphanum_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
AlphaString = lists:reverse(Acc),
Token = {alphanum, AlphaString, AlphaString, Row, Start, End},
{ok, {Token, {tk, Row, End}, String}}.
num_token(Start, {tk, Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) ->
NewValue = Value * 10 + (C - $0),
num_token(Start, {tk, Row + 1, Col}, Rest, [C | Chars], NewValue);
num_token(Start, {tk, Row, Col}, [$_, C | Rest], Chars, Value) when ?IS_NUM(C) ->
NewValue = Value * 10 + (C - $0),
num_token(Start, {tk, Row + 2, Col}, Rest, [C, $_ | Chars], NewValue);
num_token({tk, _, Start}, {tk, Row, End}, String, Chars, Value) ->
NumString = lists:reverse(Chars),
Token = {integer, NumString, Value, Row, Start, End},
{ok, {Token, {tk, Row, End}, String}}.
bytes_token(Start, {tk, Row, Col}, [C | Rest], Chars, Digits) when ?IS_HEX(C) ->
Digit = convert_digit(C),
bytes_token(Start, {tk, Row + 1, Col}, Rest, [C | Chars], [Digit | Digits]);
bytes_token(Start, {tk, Row, Col}, [$_, C | Rest], Chars, Digits) when ?IS_HEX(C) ->
Digit = convert_digit(C),
bytes_token(Start, {tk, Row + 1, Col}, Rest, [C, $_ | Chars], [Digit | Digits]);
bytes_token({tk, _, Start}, {tk, Row, End}, String, Chars, Digits) ->
BytesString = lists:reverse(Chars),
Value = reverse_combine_nibbles(Digits, <<>>),
Token = {bytes, BytesString, Value, Row, Start, End},
{ok, {Token, {tk, Row, End}, String}}.
convert_digit(C) when C >= $0, C =< $9 ->
C - $0;
convert_digit(C) when C >= $A, C =< $Z ->
C - $A + 10;
convert_digit(C) when C >= $a, C =< $z ->
C - $a + 10.
reverse_combine_nibbles([D1, D2 | Rest], Acc) ->
NewAcc = <<D2:4, D1:4, Acc/binary>>,
reverse_combine_nibbles(Rest, NewAcc);
reverse_combine_nibbles([D1], Acc) ->
<<0:4, D1:4, Acc/binary>>;
reverse_combine_nibbles([], Acc) ->
Acc.
string_token(Start, {tk, Row, Col}, [$\\, $x, A, B | Rest], SourceChars, Value) ->
case escape_hex_code(A, B) of
{ok, ByteVal} ->
string_token(Start, {tk, Row + 4, Col}, Rest, [B, A, $x, $\ | SourceChars], <<Value/binary, ByteVal>>);
error ->
{error, {invalid_escape_code, [$\\, $x, A, B], Row, Col}}
end;
string_token(Start, {tk, Row, Col}, [$\\, C | Rest], SourceChars, Value) ->
case escape_char(C) of
{ok, ByteVal} ->
string_token(Start, {tk, Row + 2, Col}, Rest, [C, $\ | SourceChars], <<Value/binary, ByteVal>>);
error ->
{error, {invalid_escape_code, [C], Row, Col}}
end;
string_token({tk, _, Start}, {tk, Row, End}, [$" | Rest], SourceChars, Value) ->
SourceStr = lists:reverse([$" | SourceChars]),
Token = {string, SourceStr, Value, Row, Start, End},
{ok, {Token, {tk, Row, End}, Rest}};
string_token(Start, {tk, Row, Col}, [C | Rest], SourceChars, Value) ->
string_token(Start, {tk, Row + 1, Col}, Rest, [C | SourceChars], <<Value/binary, C>>).
escape_hex_code(A, B) when ?IS_HEX(A), ?IS_HEX(B) ->
% As of writing this, the Sophia compiler will convert this byte from
% extended ASCII to unicode... But it really shouldn't. The literal parser
% does what the compiler should do.
Byte = convert_digit(A) * 16 + convert_digit(B),
{ok, Byte};
escape_hex_code(_, _) ->
error.
escape_char($b) -> {ok, $\b};
escape_char($e) -> {ok, $\e};
escape_char($f) -> {ok, $\f};
escape_char($n) -> {ok, $\n};
escape_char($r) -> {ok, $\r};
escape_char($t) -> {ok, $\t};
escape_char($v) -> {ok, $\v};
escape_char($") -> {ok, $\"};
escape_char($\\) -> {ok, $\\};
escape_char(_) -> error.
%%% Sophia Literal Parser
%%% This parser is a simple recursive descent parser, written explicitly in
%%% erlang.
%%%
%%% There are no infix operators in the subset we want to parse, so recursive
%%% descent is fine with no special tricks, no shunting yard algorithm, no
%%% parser generators, etc.
%%%
%%% If we were writing this in C then we might want to work iteratively with an
%%% array of finite state machines, i.e. with a pushdown automaton, instead of
%%% using recursion. This is a tried and true method of making fast parsers.
%%% Recall, however, that the BEAM *is* a stack machine, written in C, so
%%% rather than writing confusing iterative code in Erlang, to simulate a
%%% pushdown automaton inside another simulated stack machine... we should just
%%% write the recursive code, thus programming the BEAM to implement the
%%% pushdown automaton that we want.
parse_expression(Type, Tk, String) ->
case next_token(Tk, String) of
{ok, {Token, NewTk, NewString}} ->
parse_expression2(Type, NewTk, NewString, Token);
{error, Reason} ->
{error, Reason}
end.
parse_expression2(Type, Tk, String, {integer, _, Value, Row, Start, End}) ->
case Type of
{_, _, integer} ->
{ok, {Value, Tk, String}};
{_, _, unknown_type} ->
{ok, {Value, Tk, String}};
{O, N, _} ->
{error, {wrong_type, O, N, integer, Row, Start, End}}
end;
parse_expression2(Type, Tk, String, {bytes, _, Value, Row, Start, End}) ->
Len = byte_size(Value),
Result = {bytes, Value},
case Type of
{_, _, {bytes, [any]}} ->
{ok, {Result, Tk, String}};
{_, _, {bytes, [Len]}} ->
{ok, {Result, Tk, String}};
{_, _, {bytes, [ExpectedLen]}} ->
{error, {bytes_wrong_size, ExpectedLen, Len, Row, Start, End}};
{_, _, unknown_type} ->
{ok, {Result, Tk, String}};
{O, N, _} ->
{error, {wrong_type, O, N, {bytes, [Len]}, Row, Start, End}}
end;
parse_expression2(Type, Tk, String, {string, _, Value, Row, Start, End}) ->
case Type of
{_, _, string} ->
{ok, {Value, Tk, String}};
{_, _, unknown_type} ->
{ok, {Value, Tk, String}};
{O, N, _} ->
{error, {wrong_type, O, N, string, Row, Start, End}}
end;
parse_expression2(Type, Tk, String, {character, "[", _, Row, Start, _}) ->
parse_list(Type, Tk, String, Row, Start);
parse_expression2(Type, Tk, String, {character, "(", _, Row, Start, _}) ->
parse_tuple(Type, Tk, String, Row, Start);
parse_expression2(Type, Tk, String, {character, "{", _, Row, Start, _}) ->
parse_record_or_map(Type, Tk, String, Row, Start);
parse_expression2(Type, Tk, String, {alphanum, Ident, _, Row, Start, End}) ->
parse_variant(Type, Tk, String, Ident, Row, Start, End);
parse_expression2(_, _, _, {_, S, _, Row, Start, End}) ->
{error, {unexpected_token, S, Row, Start, End}}.
unknown_type() ->
{unknown_type, already_normalized, unknown_type}.
expect_tokens([], Tk, String) ->
{ok, {Tk, String}};
expect_tokens([Str | Rest], Tk, String) ->
case next_token(Tk, String) of
{ok, {{_, Str, _, _, _, _}, NewTk, NewString}} ->
expect_tokens(Rest, NewTk, NewString);
{ok, {{_, Actual, _, Row, Start, End}}} ->
{error, {unexpected_token, Actual, Row, Start, End}}
end.
%%% List Parsing
parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) ->
parse_list_loop(Inner, Tk, String, "]", Row, Start, []);
parse_list({_, _, unknown_type}, Tk, String, Row, Start) ->
parse_list_loop(unknown_type(), Tk, String, "]", Row, Start, []);
parse_list({O, N, _}, _, _, Row, Start) ->
{error, {wrong_type, O, N, list, Row, Start, Start}}.
parse_list_loop(Inner, Tk, String, CloseChar, Row, Start, Acc) ->
case next_token(Tk, String) of
{ok, {{character, CloseChar, _, _, _, _}, NewTk, NewString}} ->
{ok, {lists:reverse(Acc), NewTk, NewString}};
{ok, {Token, NewTk, NewString}} ->
parse_list_loop2(Inner, NewTk, NewString, CloseChar, Row, Start, Acc, Token)
end.
parse_list_loop2(Inner, Tk, String, CloseChar, Row, Start, Acc, Token) ->
case parse_expression2(Inner, Tk, String, Token) of
{ok, {Value, NewTk, NewString}} ->
parse_list_loop3(Inner, NewTk, NewString, CloseChar, Row, Start, [Value | Acc]);
{error, Reason} ->
Wrapper = choose_list_error_wrapper(CloseChar),
% TODO: Are tuple indices off by one from list indices?
Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
{error, Wrapped}
end.
parse_list_loop3(Inner, Tk, String, CloseChar, Row, Start, Acc) ->
case next_token(Tk, String) of
{ok, {{character, CloseChar, _, _, _, _}, NewTk, NewString}} ->
{ok, {lists:reverse(Acc), NewTk, NewString}};
{ok, {{character, ",", _, _, _, _}, NewTk, NewString}} ->
parse_list_loop(Inner, NewTk, NewString, CloseChar, Row, Start, Acc);
{error, Reason} ->
{error, Reason}
end.
choose_list_error_wrapper("]") -> list_element;
choose_list_error_wrapper(")") -> tuple_element.
%%% Tuple Parsing
parse_tuple({_, _, {tuple, Types}}, Tk, String, Row, Start) ->
case parse_multivalue(Types, Tk, String, Row, Start, []) of
{ok, {TermList, NewTk, NewString}} ->
Result = {tuple, list_to_tuple(TermList)},
{ok, {Result, NewTk, NewString}};
{error, Reason} ->
{error, Reason}
end;
parse_tuple({_, _, unknown_type}, Tk, String, Row, Start) ->
% An untyped tuple is a list of untyped terms, and weirdly our list parser
% works perfectly for that, as long as we change the closing character to
% be ")" instead of "]".
case parse_list_loop(unknown_type(), Tk, String, ")", Row, Start, []) of
{ok, {TermList, NewTk, NewString}} ->
Result = {tuple, list_to_tuple(TermList)},
{ok, {Result, NewTk, NewString}};
{error, Reason} ->
{error, Reason}
end;
parse_tuple({O, N, _}, _, _, Row, Start) ->
{error, {wrong_type, O, N, tuple, Row, Start, Start}}.
parse_multivalue(ElemTypes, Tk, String, Row, Start, Acc) ->
case next_token(Tk, String) of
{ok, {{character, ")", Row2, Start2, _}, NewTk, NewString}} ->
check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc);
{ok, {Token, NewTk, NewString}} ->
parse_multivalue2(ElemTypes, NewTk, NewString, Row, Start, Acc, Token)
end.
parse_multivalue2([Next | Rest], Tk, String, Row, Start, Acc, Token) ->
case parse_expression2(Next, Tk, String, Token) of
{ok, {Value, NewTk, NewString}} ->
parse_multivalue3(Rest, NewTk, NewString, Row, Start, [Value | Acc]);
{error, Reason} ->
Wrapper = choose_list_error_wrapper(")"),
% TODO: Are tuple indices off by one from list indices?
Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
{error, Wrapped}
end;
parse_multivalue2([], Tk, String, _, _, Acc, {character, ")", _, _, _}) ->
{ok, {lists:reverse(Acc), Tk, String}};
parse_multivalue2([], _, _, _, _, _, {_, S, _, Row, Start, End}) ->
{error, {unexpected_token, S, Row, Start, End}}.
parse_multivalue3(ElemTypes, Tk, String, Row, Start, Acc) ->
case next_token(Tk, String) of
{ok, {{character, ")", _, Row2, Start2, _}, NewTk, NewString}} ->
check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc);
{ok, {{character, ",", _, _, _, _}, NewTk, NewString}} ->
parse_multivalue(ElemTypes, NewTk, NewString, Row, Start, Acc);
{error, Reason} ->
{error, Reason}
end.
check_multivalue_long_enough([], Tk, String, _, _, Acc) ->
{ok, {lists:reverse(Acc), Tk, String}};
check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) ->
GotCount = length(Got),
ExpectCount = length(Remaining) + GotCount,
{error, {not_enough_elements, ExpectCount, GotCount, Row, Col}}.
%%% Variant parsing
parse_variant({_, _, {variant, Variants}}, Tk, String, Ident, Row, Start, End) ->
parse_variant2(Variants, Tk, String, Ident, Row, Start, End);
parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) ->
{error, {unresolved_variant, Row, Start, End}};
parse_variant({O, N, _}, _, _, _, Row, Start, End) ->
% In normal code, identifiers can have many meanings, which can result in
% lots of different errors. In this Sophia 'object notation', identifiers
% can only ever be variant constructors, (sort of like the Sophia version
% of atoms,) and so immediately lead to a type error if we aren't expecting
% a variant.
{error, {wrong_type, O, N, variant, Row, Start, End}}.
parse_variant2(Variants, Tk, String, Ident, Row, Start, End) ->
case lookup_variant(Ident, Variants, 0) of
{ok, {Tag, ElemTypes}} ->
GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end,
Arities = lists:map(GetArity, Variants),
parse_variant3(Arities, Tag, ElemTypes, Tk, String);
error ->
{error, {invalid_constructor, Ident, Row, Start, End}}
end.
parse_variant3(Arities, Tag, [], Tk, String) ->
% Parsing of 0-arity variants is different.
Result = {variant, Arities, Tag, {}},
{ok, {Result, Tk, String}};
parse_variant3(Arities, Tag, ElemTypes, Tk, String) ->
case next_token(Tk, String) of
{ok, {{character, "(", _, Row, Start, _}, NewTk, NewString}} ->
parse_variant4(Arities, Tag, ElemTypes, NewTk, NewString, Row, Start);
{ok, {{_, Actual, _, Row, Start, End}}} ->
{error, {unexpected_token, Actual, Row, Start, End}}
end.
parse_variant4(Arities, Tag, ElemTypes, Tk, String, Row, Start) ->
case parse_multivalue(ElemTypes, Tk, String, Row, Start, []) of
{ok, {Terms, NewTk, NewString}} ->
Result = {variant, Arities, Tag, list_to_tuple(Terms)},
{ok, {Result, NewTk, NewString}};
{error, Reason} ->
{error, Reason}
end.
lookup_variant(_, [], _) ->
error;
lookup_variant(Ident, [{Ident, ElemTypes} | _], Tag) ->
{ok, {Tag, ElemTypes}};
lookup_variant(Ident, [_ | Rest], Tag) ->
lookup_variant(Ident, Rest, Tag + 1).
%%% Record parsing
parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Tk, String, _, _) ->
parse_map(KeyType, ValueType, Tk, String, #{});
parse_record_or_map({_, _, {record, Fields}}, Tk, String, _, _) ->
parse_record(Fields, Tk, String, #{});
parse_record_or_map({_, _, unknown_type}, Tk, String, _, _) ->
case next_token(Tk, String) of
{ok, {{character, "}", _, _, _, _}, NewTk, NewString}} ->
{ok, {#{}, NewTk, NewString}};
{ok, {{character, "[", _, _, _, _}, NewTk, NewString}} ->
parse_map2(unknown_type(), unknown_type(), NewTk, NewString, #{});
{ok, {{alphanum, _, _, Row, Start, End}, _, _}} ->
{error, {unresolved_record, Row, Start, End}};
{ok, {{_, S, _, Row, Start, End}, _, _}} ->
{error, {unexpected_token, S, Row, Start, End}}
end;
parse_record_or_map({O, N, _}, _, _, Row, Start) ->
{error, {wrong_type, O, N, map, Row, Start, Start}}.
parse_record(Fields, Tk, String, Acc) ->
case next_token(Tk, String) of
{ok, {{alphanum, Ident, _, Row, Start, End}, NewTk, NewString}} ->
parse_record2(Fields, NewTk, NewString, Acc, Ident, Row, Start, End);
{ok, {{character, "}", _, Row, Start, End}, NewTk, NewString}} ->
parse_record_end(Fields, NewTk, NewString, Acc, Row, Start, End);
{ok, {{_, S, _, Row, Start, End}, _, _}} ->
{error, {unexpected_token, S, Row, Start, End}};
{error, Reason} ->
{error, Reason}
end.
parse_record2(Fields, Tk, String, Acc, Ident, Row, Start, End) ->
case lists:keyfind(Ident, 1, Fields) of
{_, Type} ->
parse_record3(Fields, Tk, String, Acc, Ident, Row, Start, End, Type);
false ->
{error, {invalid_field, Ident, Row, Start, End}}
end.
parse_record3(Fields, Tk, String, Acc, Ident, Row, Start, End, Type) ->
case maps:is_key(Ident, Acc) of
false ->
parse_record4(Fields, Tk, String, Acc, Ident, Type);
true ->
{error, {field_already_present, Ident, Row, Start, End}}
end.
parse_record4(Fields, Tk, String, Acc, Ident, Type) ->
case expect_tokens(["="], Tk, String) of
{ok, {NewTk, NewString}} ->
parse_record5(Fields, NewTk, NewString, Acc, Ident, Type);
{error, Reason} ->
{error, Reason}
end.
parse_record5(Fields, Tk, String, Acc, Ident, Type) ->
case parse_expression(Type, Tk, String) of
{ok, {Result, NewTk, NewString}} ->
NewAcc = maps:put(Ident, Result, Acc),
parse_record6(Fields, NewTk, NewString, NewAcc);
{error, Reason} ->
wrap_error(Reason, {record_field, Ident})
end.
parse_record6(Fields, Tk, String, Acc) ->
case next_token(Tk, String) of
{ok, {{character, ",", _, _, _, _}, NewTk, NewString}} ->
parse_record(Fields, NewTk, NewString, Acc);
{ok, {{character, "}", _, Row, Start, End}, NewTk, NewString}} ->
parse_record_end(Fields, NewTk, NewString, Acc, Row, Start, End);
{ok, {{_, S, _, Row, Start, End}, _, _}} ->
{error, {unexpected_token, S, Row, Start, End}};
{error, Reason} ->
{error, Reason}
end.
parse_record_end(Fields, Tk, String, FieldValues, Row, Start, End) ->
case parse_record_final_loop(Fields, FieldValues, []) of
{ok, Result} ->
{ok, {Result, Tk, String}};
{error, {missing_field, Name}} ->
{error, {missing_field, Name, Row, Start, End}}
end.
parse_record_final_loop([{Name, _} | Rest], FieldValues, Acc) ->
case maps:find(Name, FieldValues) of
{ok, Value} ->
parse_record_final_loop(Rest, FieldValues, [Value | Acc]);
error ->
{error, {missing_field, Name}}
end;
parse_record_final_loop([], _, FieldsReverse) ->
Fields = lists:reverse(FieldsReverse),
Tuple = list_to_tuple(Fields),
{ok, {tuple, Tuple}}.
%%% Map Parsing
parse_map(KeyType, ValueType, Tk, String, Acc) ->
case next_token(Tk, String) of
{ok, {{character, "[", _, _, _, _}, NewTk, NewString}} ->
parse_map2(KeyType, ValueType, NewTk, NewString, Acc);
{ok, {{character, "}", _, _, _, _}, NewTk, NewString}} ->
{ok, {Acc, NewTk, NewString}};
{ok, {{_, S, _, Row, Start, End}}} ->
{error, {unexpected_token, S, Row, Start, End}}
end.
parse_map2(KeyType, ValueType, Tk, String, Acc) ->
case parse_expression(KeyType, Tk, String) of
{ok, {Result, NewTk, NewString}} ->
parse_map3(KeyType, ValueType, NewTk, NewString, Acc, Result);
{error, Reason} ->
wrap_error(Reason, {map_key, maps:size(Acc)})
end.
parse_map3(KeyType, ValueType, Tk, String, Acc, Key) ->
case expect_tokens(["]", "="], Tk, String) of
{ok, {NewTk, NewString}} ->
parse_map4(KeyType, ValueType, NewTk, NewString, Acc, Key);
{error, Reason} ->
{error, Reason}
end.
parse_map4(KeyType, ValueType, Tk, String, Acc, Key) ->
case parse_expression(ValueType, Tk, String) of
{ok, {Result, NewTk, NewString}} ->
NewAcc = maps:put(Key, Result, Acc),
parse_map5(KeyType, ValueType, NewTk, NewString, NewAcc);
{error, Reason} ->
{error, Reason}
end.
parse_map5(KeyType, ValueType, Tk, String, Acc) ->
case next_token(Tk, String) of
{ok, {{character, ",", _, _, _, _}, NewTk, NewString}} ->
parse_map(KeyType, ValueType, NewTk, NewString, Acc);
{ok, {{character, "}", _, _, _, _}, NewTk, NewString}} ->
{ok, {Acc, NewTk, NewString}};
{ok, {{_, S, _, Row, Start, End}}} ->
{error, {unexpected_token, S, Row, Start, End}}
end.
% TODO
wrap_error(Reason, _) -> Reason.
%%% Tests
check_sophia_to_fate(Type, Sophia, Fate) ->
case parse_literal(Type, Sophia) of
{ok, Fate} ->
ok;
{ok, FateActual} ->
erlang:error({to_fate_failed, Sophia, Fate, {ok, FateActual}});
{error, Reason} ->
erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}})
end.
compile_entrypoint_code_and_type(Source, Entrypoint) ->
{ok, #{fate_code := FateCode, aci := ACI}} = so_compiler:from_string(Source, [{aci, json}]),
% Find the fcode for the correct entrypoint.
{fcode, Bodies, NamesMap, _} = FateCode,
Names = maps:to_list(NamesMap),
Name = unicode:characters_to_binary(Entrypoint),
{Hash, Name} = lists:keyfind(Name, 2, Names),
{_, _, Code} = maps:get(Hash, Bodies),
% Generate the AACI, and get the AACI type info for the correct entrypoint.
AACI = hz_aaci:prepare_aaci(ACI),
{ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"),
{Code, Type}.
extract_return_value(#{0 := [{'RETURNR', {immediate, FATE}}]}) ->
FATE;
extract_return_value(Code) ->
erlang:exit({invalid_literal_fcode, Code}).
check_parser(Sophia) ->
% Compile the literal using the compiler, to check that it is valid Sophia
% syntax, and to get an AACI object to pass to the parser.
Source = "contract C = entrypoint f() = " ++ Sophia,
{Code, Type} = compile_entrypoint_code_and_type(Source, "f"),
Fate = extract_return_value(Code),
% Also check that the FATE term is valid, by running it through gmb.
gmb_fate_encoding:serialize(Fate),
% Now check that our parser produces that output.
check_sophia_to_fate(Type, Sophia, Fate),
% Also check that it can be parsed without type information.
check_sophia_to_fate(unknown_type(), Sophia, Fate).
check_parser_with_typedef(Typedef, Sophia) ->
% Compile the type definitions alongside the usual literal expression.
Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia,
{Code, Type} = compile_entrypoint_code_and_type(Source, "f"),
Fate = extract_return_value(Code),
% Check the FATE term as usual.
gmb_fate_encoding:serialize(Fate),
% Do a typed parse, as usual, but there are probably record/variant
% definitions in the AACI, so untyped parses probably don't work.
check_sophia_to_fate(Type, Sophia, Fate).
anon_types_test() ->
% Integers.
check_parser("123"),
check_parser("1_2_3"),
% Bytes.
check_parser("#DEAD000BEEF"),
check_parser("#DE_AD0_00B_EEF"),
% Strings.
check_parser("\"hello world\""),
check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""),
check_parser("\"\\x00\\x11\\x77\""),
% List of integers.
check_parser("[1, 2, 3]"),
% List of lists.
check_parser("[[], [1], [2, 3]]"),
% Tuple.
check_parser("(1, [2, 3], (4, 5))"),
% Map.
check_parser("{[1] = 2, [3] = 4}"),
ok.
records_test() ->
TypeDef = "record pair = {x: int, y: int}",
Sophia = "{x = 1, y = 2}",
check_parser_with_typedef(TypeDef, Sophia),
% The above won't run an untyped parse on the expression, but we can. It
% will error, though.
{error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia).
variant_test() ->
TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)",
check_parser_with_typedef(TypeDef, "Zero"),
check_parser_with_typedef(TypeDef, "One(0)"),
check_parser_with_typedef(TypeDef, "Two(0, 1)"),
check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"),
{error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"),
ok.