hakuzaru/src/hz_sophia.erl

-module(hz_sophia).
-vsn("0.8.2").
-author("Jarvis Carroll <spiveehere@gmail.com>").
-copyright("Jarvis Carroll <spiveehere@gmail.com>").
-license("GPL-3.0-or-later").

-export([parse_literal/1, parse_literal/2, check_parser/1]).

-include_lib("eunit/include/eunit.hrl").


-spec parse_literal(String) -> Result
    when String :: string(),
         Result :: {ok, gmb_fate_data:fate_type()}
                 | {error, Reason :: term()}.

parse_literal(String) ->
    parse_literal(unknown_type(), String).

parse_literal(Type, String) ->
    case parse_expression(Type, {1, 1}, String) of
        {ok, {Result, NewPos, NewString}} ->
            parse_literal2(Result, NewPos, NewString);
        {error, Reason} ->
            {error, Reason}
    end.

parse_literal2(Result, Pos, String) ->
    % We have parsed a valid expression. Now check that the string ends.
    case next_token(Pos, String) of
        {ok, {{eof, _, _, _, _, _}, _, _}} ->
            {ok, Result};
        {ok, {Token, _, _}} ->
            unexpected_token(Token);
        {error, Reason} ->
            {error, Reason}
    end.

%%% Tokenizer

-define(IS_LATIN_UPPER(C), (((C) >= $A) and ((C) =< $Z))).
-define(IS_LATIN_LOWER(C), (((C) >= $a) and ((C) =< $z))).
-define(IS_ALPHA(C), (?IS_LATIN_UPPER(C) or ?IS_LATIN_LOWER(C) or ((C) == $_))).
-define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))).
-define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C) or ((C) == $.))).
-define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))).

next_token({Row, Col}, []) ->
    {ok, {{eof, "", [], Row, Col, Col}, {Row, Col}, []}};
next_token({Row, Col}, " " ++ Rest) ->
    next_token({Row, Col + 1}, Rest);
next_token({Row, Col}, "\t" ++ Rest) ->
    next_token({Row, Col + 1}, Rest);
next_token({Row, _}, "\r\n" ++ Rest) ->
    next_token({Row + 1, 1}, Rest);
next_token({Row, _}, "\r" ++ Rest) ->
    next_token({Row + 1, 1}, Rest);
next_token({Row, _}, "\n" ++ Rest) ->
    next_token({Row + 1, 1}, Rest);
next_token(Pos, [C | _] = String) when ?IS_ALPHA(C) ->
    alphanum_token(Pos, Pos, String, []);
next_token(Pos, [C | _] = String) when ?IS_NUM(C) ->
    num_token(Pos, Pos, String, [], 0);
next_token({Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) ->
    bytes_token({Row, Col}, {Row, Col + 1}, [C | Rest], "#", []);
next_token({Row, Col}, "\"" ++ Rest) ->
    string_token({Row, Col}, {Row, Col + 1}, Rest, "\"", <<>>);
next_token({Row, Col}, "'" ++ Rest) ->
    character_token({Row, Col}, {Row, Col + 1}, Rest, "'");
next_token({Row, Col}, [Char | Rest]) ->
    Token = {character, [Char], Char, Row, Col, Col},
    {ok, {Token, {Row, Col + 1}, Rest}}.

alphanum_token(Start, {Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) ->
    alphanum_token(Start, {Row, Col + 1}, Rest, [C | Acc]);
alphanum_token({_, Start}, {Row, End}, String, Acc) ->
    AlphaString = lists:reverse(Acc),
    Path = string:split(AlphaString, ".", all),
    Token = {alphanum, AlphaString, Path, Row, Start, End - 1},
    {ok, {Token, {Row, End}, String}}.

num_token(Start, {Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) ->
    NewValue = Value * 10 + (C - $0),
    num_token(Start, {Row, Col + 1}, Rest, [C | Chars], NewValue);
num_token(Start, {Row, Col}, [$_, C | Rest], Chars, Value) when ?IS_NUM(C) ->
    NewValue = Value * 10 + (C - $0),
    num_token(Start, {Row, Col + 2}, Rest, [C, $_ | Chars], NewValue);
num_token({_, Start}, {Row, End}, String, Chars, Value) ->
    NumString = lists:reverse(Chars),
    Token = {integer, NumString, Value, Row, Start, End - 1},
    {ok, {Token, {Row, End}, String}}.

bytes_token(Start, {Row, Col}, [C | Rest], Chars, Digits) when ?IS_HEX(C) ->
    Digit = convert_digit(C),
    bytes_token(Start, {Row, Col + 1}, Rest, [C | Chars], [Digit | Digits]);
bytes_token(Start, {Row, Col}, [$_, C | Rest], Chars, Digits) when ?IS_HEX(C) ->
    Digit = convert_digit(C),
    bytes_token(Start, {Row, Col + 1}, Rest, [C, $_ | Chars], [Digit | Digits]);
bytes_token({_, Start}, {Row, End}, String, Chars, Digits) ->
    BytesString = lists:reverse(Chars),
    Value = reverse_combine_nibbles(Digits, <<>>),
    Token = {bytes, BytesString, Value, Row, Start, End - 1},
    {ok, {Token, {Row, End}, String}}.

convert_digit(C) when C >= $0, C =< $9 ->
    C - $0;
convert_digit(C) when C >= $A, C =< $Z ->
    C - $A + 10;
convert_digit(C) when C >= $a, C =< $z ->
    C - $a + 10.

reverse_combine_nibbles([D1, D2 | Rest], Acc) ->
    NewAcc = <<D2:4, D1:4, Acc/binary>>,
    reverse_combine_nibbles(Rest, NewAcc);
reverse_combine_nibbles([D1], Acc) ->
    <<0:4, D1:4, Acc/binary>>;
reverse_combine_nibbles([], Acc) ->
    Acc.

string_token({_, Start}, {Row, Col}, [$" | Rest], SourceChars, Value) ->
    SourceStr = lists:reverse([$" | SourceChars]),
    Token = {string, SourceStr, Value, Row, Start, Col},
    {ok, {Token, {Row, Col + 1}, Rest}};
string_token({_, Start}, {Row, Col}, [], SourceChars, _) ->
    SourceStr = lists:reverse(SourceChars),
    {error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}};
string_token({_, Start}, {Row, Col}, [$\r | _], SourceChars, _) ->
    SourceStr = lists:reverse(SourceChars),
    {error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}};
string_token({_, Start}, {Row, Col}, [$\n | _], SourceChars, _) ->
    SourceStr = lists:reverse(SourceChars),
    {error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}};
string_token(Start, Pos, String, SourceChars, Value) ->
    case parse_char(Start, Pos, String, SourceChars) of
        {ok, {Char, NewSourceChars, NewPos, NewString}} ->
            % TODO: ERTS probably had to convert this FROM utf8 at some point,
            % so why bother, if we need to convert it back? I guess we could
            % accept iolists if we really wanted to waste time on this point...
            NewValue = <<Value/binary, Char/utf8>>,
            string_token(Start, NewPos, NewString, NewSourceChars, NewValue);
        {error, Reason} ->
            {error, Reason}
    end.

character_token({_, Start}, {Row, Col}, [], SourceChars) ->
    SourceStr = lists:reverse(SourceChars),
    {error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}};
character_token({_, Start}, {Row, Col}, [$\r | _], SourceChars) ->
    SourceStr = lists:reverse(SourceChars),
    {error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}};
character_token({_, Start}, {Row, Col}, [$\n | _], SourceChars) ->
    SourceStr = lists:reverse(SourceChars),
    {error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}};
character_token(Start, Pos, String, SourceChars) ->
    case parse_char(Start, Pos, String, SourceChars) of
        {ok, {Char, NewSourceChars, NewPos, NewString}} ->
            character_token2(Start, NewPos, NewString, NewSourceChars, Char);
        {error, Reason} ->
            {error, Reason}
    end.

character_token2({_, Start}, {Row, Col}, [$' | Rest], SourceChars, Value) ->
    SourceStr = lists:reverse([$' | SourceChars]),
    Token = {char_literal, SourceStr, Value, Row, Start, Col},
    {ok, {Token, {Row, Col + 1}, Rest}};
character_token2({_, Start}, {Row, Col}, _, SourceChars, _) ->
    SourceStr = lists:reverse(SourceChars),
    {error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}}.

parse_char(Start, {Row, Col}, "\\x{" ++ String, SourceChars) ->
    escape_long_hex_code(Start, {Row, Col + 3}, String, "{x\\" ++ SourceChars, 0);
parse_char(_, {Row, Col}, [$\\, $x, A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) ->
    Byte = convert_digit(A) * 16 + convert_digit(B),
    {ok, {Byte, [B, A, $x, $\\ | SourceChars], {Row, Col + 4}, String}};
parse_char({Row, Start}, {Row, Col}, [$\\, C | Rest], SourceChars) ->
    case escape_char(C) of
        {ok, ByteVal} ->
            {ok, {ByteVal, [C, $\ | SourceChars], {Row, Col + 2}, Rest}};
        error ->
            {error, {invalid_escape_code, [$\\, C], Row, Start, Col + 1}}
    end;
parse_char(_, {Row, Col}, [C | Rest], SourceChars) ->
    {ok, {C, [C | SourceChars], {Row, Col + 1}, Rest}}.

escape_long_hex_code(_, {Row, Col}, "}" ++ String, SourceChars, Value) ->
    {ok, {Value, "}" ++ SourceChars, {Row, Col + 1}, String}};
escape_long_hex_code(Start, {Row, Col}, [C | String], SourceChars, Value) when ?IS_HEX(C) ->
    NewSourceChars = [C | SourceChars],
    NewValue = 16 * Value + convert_digit(C),
    escape_long_hex_code(Start, {Row, Col + 1}, String, NewSourceChars, NewValue);
escape_long_hex_code(_, {Row, Col}, [C | _], _, _) ->
    {error, {invalid_hexadecimal, [C], Row, Col}};
escape_long_hex_code(_, Pos, [], SourceChars, Value) ->
    % Just return as if the escape code were closed, and let the string parser
    % produce an unclosed string error instead.
    {ok, {Value, SourceChars, Pos, []}}.

escape_char($b)  -> {ok, $\b};
escape_char($e)  -> {ok, $\e};
escape_char($f)  -> {ok, $\f};
escape_char($n)  -> {ok, $\n};
escape_char($r)  -> {ok, $\r};
escape_char($t)  -> {ok, $\t};
escape_char($v)  -> {ok, $\v};
% Technically \" and \' are only valid inside their own quote characters, not
% each other, but whatever, we will just be permissive here.
escape_char($")  -> {ok, $\"};
escape_char($')  -> {ok, $\'};
escape_char($\\) -> {ok, $\\};
escape_char(_)   -> error.

%%% Sophia Literal Parser

%%% This parser is a simple recursive descent parser, written explicitly in
%%% erlang.
%%%
%%% There are no infix operators in the subset we want to parse, so recursive
%%% descent is fine with no special tricks, no shunting yard algorithm, no
%%% parser generators, etc.
%%%
%%% If we were writing this in C then we might want to work iteratively with an
%%% array of finite state machines, i.e. with a pushdown automaton, instead of
%%% using recursion. This is a tried and true method of making fast parsers.
%%% Recall, however, that the BEAM *is* a stack machine, written in C, so
%%% rather than writing confusing iterative code in Erlang, to simulate a
%%% pushdown automaton inside another simulated stack machine... we should just
%%% write the recursive code, thus programming the BEAM to implement the
%%% pushdown automaton that we want.

parse_expression(Type, Pos, String) ->
    case next_token(Pos, String) of
        {ok, {Token, NewPos, NewString}} ->
            parse_expression2(Type, NewPos, NewString, Token);
        {error, Reason} ->
            {error, Reason}
    end.

parse_expression2(Type, Pos, String, {integer, _, Value, Row, Start, End}) ->
    typecheck_integer(Type, Pos, String, Value, Row, Start, End);
parse_expression2(Type, Pos, String, {character, "-", _, _, _, _}) ->
    case next_token(Pos, String) of
        {ok, {{integer, _, Value, Row, Start, End}, NewPos, NewString}} ->
            typecheck_integer(Type, NewPos, NewString, -Value, Row, Start, End);
        {error, Reason} ->
            {error, Reason}
    end;
parse_expression2(Type, Pos, String, {bytes, _, Value, Row, Start, End}) ->
    Len = byte_size(Value),
    Result = {bytes, Value},
    case Type of
        {_, _, {bytes, [any]}} ->
            {ok, {Result, Pos, String}};
        {_, _, {bytes, [Len]}} ->
            {ok, {Result, Pos, String}};
        {_, _, {bytes, [ExpectedLen]}} ->
            {error, {bytes_wrong_size, ExpectedLen, Len, Row, Start, End}};
        {_, _, bits} ->
            Size = bit_size(Value),
            <<IntValue:Size>> = Value,
            {ok, {{bits, IntValue}, Pos, String}};
        {_, _, unknown_type} ->
            {ok, {Result, Pos, String}};
        {O, N, _} ->
            {error, {wrong_type, O, N, {bytes, [Len]}, Row, Start, End}}
    end;
parse_expression2(Type, Pos, String, {string, _, Value, Row, Start, End}) ->
    case Type of
        {_, _, string} ->
            {ok, {Value, Pos, String}};
        {_, _, unknown_type} ->
            {ok, {Value, Pos, String}};
        {O, N, _} ->
            {error, {wrong_type, O, N, string, Row, Start, End}}
    end;
parse_expression2(Type, Pos, String, {char_literal, _, Value, Row, Start, End}) ->
    case Type of
        {_, _, char} ->
            {ok, {Value, Pos, String}};
        {_, _, unknown_type} ->
            {ok, {Value, Pos, String}};
        {O, N, _} ->
            {error, {wrong_type, O, N, char, Row, Start, End}}
    end;
parse_expression2(Type, Pos, String, {character, "[", _, Row, Start, _}) ->
    parse_list(Type, Pos, String, Row, Start);
parse_expression2(Type, Pos, String, {character, "(", _, _, _, _}) ->
    parse_tuple(Type, Pos, String);
parse_expression2(Type, Pos, String, {character, "{", _, Row, Start, _}) ->
    parse_record_or_map(Type, Pos, String, Row, Start);
parse_expression2(Type, Pos, String, {alphanum, _, Path, Row, Start, End}) ->
    parse_alphanum(Type, Pos, String, Path, Row, Start, End);
parse_expression2(_, _, _, {eof, _, _, _, _, _}) ->
    {error, unexpected_end_of_file};
parse_expression2(_, _, _, Token) ->
    unexpected_token(Token).

unknown_type() ->
    {unknown_type, already_normalized, unknown_type}.

expect_tokens([], Pos, String) ->
    {ok, {Pos, String}};
expect_tokens([Str | Rest], Pos, String) ->
    case next_token(Pos, String) of
        {ok, {{_, Str, _, _, _, _}, NewPos, NewString}} ->
            expect_tokens(Rest, NewPos, NewString);
        {ok, {Token, _, _}} ->
            unexpected_token(Token, Str);
        {error, Reason} ->
            {error, Reason}
    end.

unexpected_token(Token, _Expected) ->
    % I don't know if this is a good idea, but sometimes there are only one or
    % two tokens that could have worked, which might make for simple
    % non-technical error messages. I don't know how to format that yet,
    % though.
    unexpected_token(Token).

unexpected_token({eof, _, _, _, _, _}) ->
    {error, expression_incomplete};
unexpected_token({_, S, _, Row, Start, End}) ->
    {error, {unexpected_token, S, Row, Start, End}}.

%%% Ambiguous Chain Object vs Identifier Parsing

parse_alphanum(Type, Pos, String, ["true"], Row, Start, End) ->
    typecheck_bool(Type, Pos, String, true, Row, Start, End);
parse_alphanum(Type, Pos, String, ["false"], Row, Start, End) ->
    typecheck_bool(Type, Pos, String, false, Row, Start, End);
parse_alphanum(Type, Pos, String, ["Bits", "all"], Row, Start, End) ->
    typecheck_bits(Type, Pos, String, -1, Row, Start, End);
parse_alphanum(Type, Pos, String, ["Bits", "none"], Row, Start, End) ->
    typecheck_bits(Type, Pos, String, 0, Row, Start, End);
parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN_LOWER(C) ->
    % From a programming perspective, we are trying to parse a constant, so
    % an alphanum token can really only be a constructor, or a chain object.
    % Constructors start with uppercase characters, so lowercase can only be a
    % chain object.
    try
        case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of
            {account_pubkey, Data} ->
                typecheck_address(Type, Pos, String, Data, Row, Start, End);
            {contract_pubkey, Data} ->
                typecheck_contract(Type, Pos, String, Data, Row, Start, End);
            {signature, Data} ->
                typecheck_signature(Type, Pos, String, Data, Row, Start, End);
            {_, _} ->
                % Only a few chain objects are recognized by Sophia. The rest
                % are interpreted as identifiers, so we might as well give the
                % same sort of error that the compiler would give.
                {error, {unexpected_identifier, S, Row, Start, End}}
        end
    catch
        _:_ -> {error, {unexpected_identifier, S, Row, Start, End}}
    end;
parse_alphanum(Type, Pos, String, Path, Row, Start, End) ->
    % Inversely, chain object prefixes are always lowercase, so any other path
    % must be a variant constructor, or invalid.
    parse_variant(Type, Pos, String, Path, Row, Start, End).

typecheck_integer({_, _, integer}, Pos, String, Value, _, _, _) ->
    {ok, {Value, Pos, String}};
typecheck_integer({_, _, unknown_type}, Pos, String, Value, _, _, _) ->
    {ok, {Value, Pos, String}};
typecheck_integer({_, _, bits}, Pos, String, Value, _, _, _) ->
    {ok, {{bits, Value}, Pos, String}};
typecheck_integer({O, N, _}, _, _, _, Row, Start, End) ->
    {error, {wrong_type, O, N, integer, Row, Start, End}}.

typecheck_bool({_, _, unknown_type}, Pos, String, Value, _, _, _) ->
    {ok, {Value, Pos, String}};
typecheck_bool({_, _, boolean}, Pos, String, Value, _, _, _) ->
    {ok, {Value, Pos, String}};
typecheck_bool({O, N, _}, _, _, _, Row, Start, End) ->
    {error, {wrong_type, O, N, boolean, Row, Start, End}}.

typecheck_bits({_, _, unknown_type}, Pos, String, Value, _, _, _) ->
    {ok, {{bits, Value}, Pos, String}};
typecheck_bits({_, _, bits}, Pos, String, Value, _, _, _) ->
    {ok, {{bits, Value}, Pos, String}};
typecheck_bits({O, N, _}, _, _, _, Row, Start, End) ->
    {error, {wrong_type, O, N, bits, Row, Start, End}}.

typecheck_address({_, _, address}, Pos, String, Data, _, _, _) ->
    {ok, {{address, Data}, Pos, String}};
typecheck_address({_, _, contract}, Pos, String, Data, _, _, _) ->
    % The compiler would type error, but we should be lenient here.
    {ok, {{contract, Data}, Pos, String}};
typecheck_address({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
    {ok, {{address, Data}, Pos, String}};
typecheck_address({O, N, _}, _, _, _, Row, Start, End) ->
    {error, {wrong_type, O, N, address, Row, Start, End}}.

typecheck_contract({_, _, contract}, Pos, String, Data, _, _, _) ->
    {ok, {{contract, Data}, Pos, String}};
typecheck_contract({_, _, address}, Pos, String, Data, _, _, _) ->
    % The compiler would type error, but we should be lenient here.
    {ok, {{address, Data}, Pos, String}};
typecheck_contract({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
    {ok, {{contract, Data}, Pos, String}};
typecheck_contract({O, N, _}, _, _, _, Row, Start, End) ->
    {error, {wrong_type, O, N, contract, Row, Start, End}}.

typecheck_signature({_, _, signature}, Pos, String, Data, _, _, _) ->
    {ok, {{bytes, Data}, Pos, String}};
typecheck_signature({_, _, {bytes, [64]}}, Pos, String, Data, _, _, _) ->
    % The compiler would probably type-error, but whatever.
    {ok, {{bytes, Data}, Pos, String}};
typecheck_signature({_, _, {bytes, [any]}}, Pos, String, Data, _, _, _) ->
    % The compiler would probably type-error, but whatever.
    {ok, {{bytes, Data}, Pos, String}};
typecheck_signature({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
    {ok, {{bytes, Data}, Pos, String}};
typecheck_signature({O, N, _}, _, _, _, Row, Start, End) ->
    {error, {wrong_type, O, N, signature, Row, Start, End}}.


%%% List Parsing

parse_list({_, _, {list, [Inner]}}, Pos, String, _, _) ->
    parse_list2(Inner, Pos, String);
parse_list({_, _, unknown_type}, Pos, String, _, _) ->
    parse_list2(unknown_type(), Pos, String);
parse_list({O, N, _}, _, _, Row, Start) ->
    {error, {wrong_type, O, N, list, Row, Start, Start}}.

parse_list2(Inner, Pos, String) ->
    case parse_list_loop(Inner, Pos, String, "]", []) of
        {ok, {Result, _, _, NewPos, NewString}} ->
            {ok, {Result, NewPos, NewString}};
        {error, Reason} ->
            {error, Reason}
    end.

parse_list_loop(Inner, Pos, String, CloseChar, Acc) ->
    case next_token(Pos, String) of
        {ok, {{character, CloseChar, _, Row, Col, _}, NewPos, NewString}} ->
            {ok, {lists:reverse(Acc), true, {Row, Col}, NewPos, NewString}};
        {ok, {Token, NewPos, NewString}} ->
            parse_list_loop2(Inner, NewPos, NewString, CloseChar, Acc, Token);
        {error, Reason} ->
            {error, Reason}
    end.

parse_list_loop2(Inner, Pos, String, CloseChar, Acc, Token) ->
    case parse_expression2(Inner, Pos, String, Token) of
        {ok, {Value, NewPos, NewString}} ->
            parse_list_loop3(Inner, NewPos, NewString, CloseChar, [Value | Acc]);
        {error, Reason} ->
            Wrapper = choose_list_error_wrapper(CloseChar),
            % TODO: Are tuple indices off by one from list indices?
            Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
            {error, Wrapped}
    end.

parse_list_loop3(Inner, Pos, String, CloseChar, Acc) ->
    case next_token(Pos, String) of
        {ok, {{character, CloseChar, _, Row, Col, _}, NewPos, NewString}} ->
            {ok, {lists:reverse(Acc), false, {Row, Col}, NewPos, NewString}};
        {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
            parse_list_loop(Inner, NewPos, NewString, CloseChar, Acc);
        {ok, {Token, _, _}} ->
            unexpected_token(Token, CloseChar);
        {error, Reason} ->
            {error, Reason}
    end.

choose_list_error_wrapper("]") -> list_element;
choose_list_error_wrapper(")") -> tuple_element.

%%% Ambiguous Parenthesis Parsing

parse_tuple({_, _, unknown_type}, Pos, String) ->
    % An untyped tuple is a list of untyped terms, and weirdly our list parser
    % works perfectly for that, as long as we change the closing character to
    % be ")" instead of "]".
    case parse_list_loop(unknown_type(), Pos, String, ")", []) of
        {ok, {[Inner], false, _, NewPos, NewString}} ->
            % In Sophia, trailing commas are invalid, and so all singleton
            % tuples are unwrapped, and translated into the inner type.
            {ok, {Inner, NewPos, NewString}};
        {ok, {TermList, _, _, NewPos, NewString}} ->
            Result = {tuple, list_to_tuple(TermList)},
            {ok, {Result, NewPos, NewString}};
        {error, Reason} ->
            {error, Reason}
    end;
parse_tuple(Type, Pos, String) ->
    % Typed tuple parsing is quite complex, because we also want to support
    % normal parentheses for grouping. It's not strictly necessary for
    % inputting data, since we don't have any infix operators in simple
    % data/term notation, but the alternatives are to generate singleton tuples
    % naively, (which are impossible to generate from Sophia,) or to hard error
    % on singleton tuples! Being faithful to Sophia is clearly nice!

    % Count how many ambiguous parens there are, including the one we already
    % saw.
    case count_open_parens(Pos, String, 1) of
        {ok, {Count, Token, NewPos, NewString}} ->
            % Compare that to the amount of nesting tuple connectives are in
            % the type we are expected to produce.
            {ExcessCount, HeadType, Tails} = extract_tuple_type_info(Count, Type, []),
            % Now work out what to do with all this information.
            parse_tuple2(ExcessCount, HeadType, Tails, NewPos, NewString, Token);
        {error, Reason} ->
            {error, Reason}
    end.

count_open_parens(Pos, String, Count) ->
    case next_token(Pos, String) of
        {ok, {{character, "(", _, _, _, _}, NewPos, NewString}} ->
            count_open_parens(NewPos, NewString, Count + 1);
        {ok, {Token, NewPos, NewString}} ->
            {ok, {Count, Token, NewPos, NewString}};
        {error, Reason} ->
            {error, Reason}
    end.

extract_tuple_type_info(ParenCount, {_, _, {tuple, [Head | Rest]}}, Tails) when ParenCount > 0 ->
    % Have an open paren, and a tuple type. We need to go deeper!
    extract_tuple_type_info(ParenCount - 1, Head, [Rest | Tails]);
extract_tuple_type_info(ParenCount, HeadType, Tails) ->
    % No parens, or no more (non-empty) tuples. Stop!
    {ParenCount, HeadType, Tails}.

parse_tuple2(_, {_, _, unknown_type}, [_ | _], _, _, _) ->
    {error, "Parsing of tuples with known lengths but unknown contents is not yet implemented."};
parse_tuple2(ExcessCount, HeadType, Tails, Pos, String, {character, ")", _, Row, Col, _}) ->
    parse_empty_tuple(ExcessCount, HeadType, Tails, Pos, String, Row, Col);
parse_tuple2(ExcessCount, HeadType, Tails, Pos, String, Token) ->
    % Finished with parentheses for now, try and parse an expression out, to
    % get our head term.
    case parse_expression2(HeadType, Pos, String, Token) of
        {ok, {Result, NewPos, NewString}} ->
            % Got a head term. Now try to build all the other tuple layers.
            parse_tuple_tails(ExcessCount, Result, Tails, NewPos, NewString);
        {error, Reason} ->
            % TODO: Wrap errors here too.
            {error, Reason}
    end.

parse_empty_tuple(0, _, Tails, _, _, Row, Col) ->
    % There are zero excess parens, meaning all our parens are tuples. Get the
    % top one.
    [Tail | _] = Tails,
    % We expected some nonzero number of elements before the close paren, but
    % got zero.
    ExpectCount = 1 + length(Tail),
    {error, {not_enough_elements, ExpectCount, 0, Row, Col}};
parse_empty_tuple(ExcessCount, {_, _, {tuple, []}}, Tails, Pos, String, _, _) ->
    % If we have some ambiguous parentheses left, we now know one of them is
    % this empty tuple.
    HeadTerm = {tuple, {}},
    NewExcessCount = ExcessCount - 1,
    % Now continue the loop as if it were an integer or something, in the head
    % position.
    parse_tuple_tails(NewExcessCount, HeadTerm, Tails, Pos, String);
parse_empty_tuple(_, {HeadO, HeadN, _}, _, _, _, Row, Col) ->
    % We were expecting a head term of a different type!
    {error, {wrong_type, HeadO, HeadN, unit, Row, Col, Col}}.

parse_tuple_tails(0, HeadTerm, [], Pos, String) ->
    % No open parens left, no tuples left to build, we are done!
    {ok, {HeadTerm, Pos, String}};
parse_tuple_tails(ExcessCount, HeadTerm, Tails, Pos, String) ->
    % The ambiguous case, where we have a mix of tuple parens, and grouping
    % parens. We want to peek at the next token, to see if it closes a grouping
    % paren.
    case next_token(Pos, String) of
        {ok, {{character, ")", _, Row, Col, _}, NewPos, NewString}} ->
            % It is grouping! Try closing a grouping paren.
            parse_tuple_tails_paren(ExcessCount, HeadTerm, Tails, NewPos, NewString, Row, Col);
        {ok, {{character, ",", _, Row, Col, _}, NewPos, NewString}} ->
            % It is a real tuple! Try parsing a tuple.
            parse_tuple_tails_comma(ExcessCount, HeadTerm, Tails, NewPos, NewString, Row, Col);
        {ok, {Token, _, _}} ->
            % Anything else is just a boring parse error we can complain about.
            unexpected_token(Token, ")");
        {error, Reason} ->
            {error, Reason}
    end.

parse_tuple_tails_paren(0, _, [[] | _], _, _, Row, Col) ->
    % A singleton tuple was expected, but a grouping paren was given. In theory
    % we could be permissive here, but we were asked to do type checking, and
    % this is a type error. The type error itself is a bit hard to reproduce,
    % but we do know exactly what the fix is, so let's report that instead.
    {error, {expected_trailing_comma, Row, Col}};
parse_tuple_tails_paren(0, _, [Tail | _], _, _, Row, Col) ->
    % A tuple (of more than one elements) was expected, but a grouping paren
    % was given. Again, the type error is hard to produce, but the actual
    % solution is simple; add more elements.
    ExpectCount = length(Tail) + 1,
    GotCount = 1,
    {error, {not_enough_elements, ExpectCount, GotCount, Row, Col}};
parse_tuple_tails_paren(ExcessCount, HeadTerm, Tails, Pos, String, _, _) ->
    % We were expecting some grouping parens, and now we know that one of them
    % was in fact grouping. Good.
    parse_tuple_tails(ExcessCount - 1, HeadTerm, Tails, Pos, String).

parse_tuple_tails_comma(_, _, [], _, _, Row, Col) ->
    % No more tuples, so commas are invalid. It's hard to describe the type
    % error that a comma would actually produce, so instead let's just give
    % the user the actual solution to their problems, which is to remove the
    % comma.
    {error, {expected_close_paren, Row, Col}};
parse_tuple_tails_comma(ExcessCount, HeadTerm, Tails, Pos, String, _, _) ->
    % If there are no tails then we would have exited into the "grouping parens
    % only" case, so we know this works:
    [TailTypes | ParentTails] = Tails,
    % Now we can parse this tuple as a tuple.
    case parse_multivalue(TailTypes, Pos, String, [HeadTerm]) of
        {ok, {Terms, NewPos, NewString}} ->
            NewHead = {tuple, list_to_tuple(Terms)},
            % Then continue the loop, with whatever parent tuple types this
            % tuple is meant to be a part of.
            parse_tuple_tails(ExcessCount, NewHead, ParentTails, NewPos, NewString);
        {error, Reason} ->
            % TODO: wrap errors?
            {error, Reason}
    end.

%%% Unambiguous Tuple/Variant Parsing

parse_multivalue(ElemTypes, Pos, String, Acc) ->
    case next_token(Pos, String) of
        {ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} ->
            check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc);
        {ok, {Token, NewPos, NewString}} ->
            parse_multivalue2(ElemTypes, NewPos, NewString, Acc, Token);
        {error, Reason} ->
            {error, Reason}
    end.

parse_multivalue2([Next | Rest], Pos, String, Acc, Token) ->
    case parse_expression2(Next, Pos, String, Token) of
        {ok, {Value, NewPos, NewString}} ->
            parse_multivalue3(Rest, NewPos, NewString, [Value | Acc]);
        {error, Reason} ->
            Wrapper = choose_list_error_wrapper(")"),
            % TODO: Are tuple indices off by one from list indices?
            Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
            {error, Wrapped}
    end;
parse_multivalue2([], Pos, String, Acc, Token) ->
    count_multivalue_excess(Pos, String, Acc, Token).

parse_multivalue3(ElemTypes, Pos, String, Acc) ->
    case next_token(Pos, String) of
        {ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} ->
            check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc);
        {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
            parse_multivalue(ElemTypes, NewPos, NewString, Acc);
        {ok, {Token, _, _}} ->
            unexpected_token(Token, ")");
        {error, Reason} ->
            {error, Reason}
    end.

count_multivalue_excess(Pos, String, TypedAcc, Token) ->
    ExpectedLen = length(TypedAcc),
    case parse_list_loop2(unknown_type(), Pos, String, ")", TypedAcc, Token) of
        {ok, {TermList, _, {Row, Col}, _, _}} ->
            ActualLen = length(TermList),
            {error, {too_many_elements, ExpectedLen, ActualLen, Row, Col}};
        {error, Reason} ->
            {error, Reason}
    end.

check_multivalue_long_enough([], Pos, String, _, _, Acc) ->
    {ok, {lists:reverse(Acc), Pos, String}};
check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) ->
    GotCount = length(Got),
    ExpectCount = length(Remaining) + GotCount,
    {error, {not_enough_elements, ExpectCount, GotCount, Row, Col}}.

%%% Variant parsing

parse_variant({O, N, {variant, Variants}}, Pos, String, [Ident], Row, Start, End) ->
    parse_variant2(O, N, Variants, Pos, String, "", Ident, Row, Start, End);
parse_variant({O, N, {variant, Variants}}, Pos, String, [Namespace, Constructor], Row, Start, End) ->
    case get_typename(O, N) of
        [Namespace, _] ->
            parse_variant2(O, N, Variants, Pos, String, Namespace ++ ".", Constructor, Row, Start, End);
        _ ->
            {error, {invalid_constructor, O, N, Namespace ++ "." ++ Constructor, Row, Start, End}}
    end;
parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) ->
    {error, {unresolved_variant, Row, Start, End}};
parse_variant({O, N, _}, _, _, _, Row, Start, End) ->
    % In normal code, identifiers can have many meanings, which can result in
    % lots of different errors. In constant/immediate/normalized Sophia terms
    % we know identifiers are always variants, so we can type error if any
    % other type was expected.
    {error, {wrong_type, O, N, variant, Row, Start, End}}.

get_typename(O, already_normalized) ->
    get_typename(O);
get_typename(_, N) ->
    get_typename(N).

get_typename({Name, _}) ->
    string:split(Name, ".", all);
get_typename(Name) ->
    string:split(Name, ".", all).

parse_variant2(O, N, Variants, Pos, String, Prefix, Constructor, Row, Start, End) ->
    case lookup_variant(Constructor, Variants, 0) of
        {ok, {Tag, ElemTypes}} ->
            GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end,
            Arities = lists:map(GetArity, Variants),
            parse_variant3(Arities, Tag, ElemTypes, Pos, String);
        error ->
            {error, {invalid_constructor, O, N, Prefix ++ Constructor, Row, Start, End}}
    end.

parse_variant3(Arities, Tag, [], Pos, String) ->
    % Parsing of 0-arity variants is different.
    Result = {variant, Arities, Tag, {}},
    {ok, {Result, Pos, String}};
parse_variant3(Arities, Tag, ElemTypes, Pos, String) ->
    case next_token(Pos, String) of
        {ok, {{character, "(", _, _, _, _}, NewPos, NewString}} ->
            parse_variant4(Arities, Tag, ElemTypes, NewPos, NewString);
        {ok, {Token, _, _}} ->
            unexpected_token(Token, "(");
        {error, Reason} ->
            {error, Reason}
    end.

parse_variant4(Arities, Tag, ElemTypes, Pos, String) ->
    case parse_multivalue(ElemTypes, Pos, String, []) of
        {ok, {Terms, NewPos, NewString}} ->
            Result = {variant, Arities, Tag, list_to_tuple(Terms)},
            {ok, {Result, NewPos, NewString}};
        {error, Reason} ->
            {error, Reason}
    end.

lookup_variant(_, [], _) ->
    error;
lookup_variant(Ident, [{Ident, ElemTypes} | _], Tag) ->
    {ok, {Tag, ElemTypes}};
lookup_variant(Ident, [_ | Rest], Tag) ->
    lookup_variant(Ident, Rest, Tag + 1).

%%% Record parsing

parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Pos, String, _, _) ->
    parse_map(KeyType, ValueType, Pos, String, #{});
parse_record_or_map({_, _, {record, Fields}}, Pos, String, _, _) ->
    parse_record(Fields, Pos, String, #{});
parse_record_or_map({_, _, unknown_type}, Pos, String, _, _) ->
    case next_token(Pos, String) of
        {ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
            {ok, {#{}, NewPos, NewString}};
        {ok, {{character, "[", _, _, _, _}, NewPos, NewString}} ->
            parse_map2(unknown_type(), unknown_type(), NewPos, NewString, #{});
        {ok, {{alphanum, _, _, Row, Start, End}, _, _}} ->
            {error, {unresolved_record, Row, Start, End}};
        {ok, {Token, _, _}} ->
            unexpected_token(Token, "}");
        {error, Reason} ->
            {error, Reason}
    end;
parse_record_or_map({O, N, _}, _, _, Row, Start) ->
    {error, {wrong_type, O, N, map, Row, Start, Start}}.

parse_record(Fields, Pos, String, Acc) ->
    case next_token(Pos, String) of
        {ok, {{alphanum, Ident, _, Row, Start, End}, NewPos, NewString}} ->
            parse_record2(Fields, NewPos, NewString, Acc, Ident, Row, Start, End);
        {ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} ->
            parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End);
        {ok, {Token, _, _}} ->
            unexpected_token(Token, "}");
        {error, Reason} ->
            {error, Reason}
    end.

parse_record2(Fields, Pos, String, Acc, Ident, Row, Start, End) ->
    case lists:keyfind(Ident, 1, Fields) of
        {_, Type} ->
            parse_record3(Fields, Pos, String, Acc, Ident, Row, Start, End, Type);
        false ->
            {error, {invalid_field, Ident, Row, Start, End}}
    end.

parse_record3(Fields, Pos, String, Acc, Ident, Row, Start, End, Type) ->
    case maps:is_key(Ident, Acc) of
        false ->
            parse_record4(Fields, Pos, String, Acc, Ident, Type);
        true ->
            {error, {field_already_present, Ident, Row, Start, End}}
    end.

parse_record4(Fields, Pos, String, Acc, Ident, Type) ->
    case expect_tokens(["="], Pos, String) of
        {ok, {NewPos, NewString}} ->
            parse_record5(Fields, NewPos, NewString, Acc, Ident, Type);
        {error, Reason} ->
            {error, Reason}
    end.

parse_record5(Fields, Pos, String, Acc, Ident, Type) ->
    case parse_expression(Type, Pos, String) of
        {ok, {Result, NewPos, NewString}} ->
            NewAcc = maps:put(Ident, Result, Acc),
            parse_record6(Fields, NewPos, NewString, NewAcc);
        {error, Reason} ->
            wrap_error(Reason, {record_field, Ident})
    end.

parse_record6(Fields, Pos, String, Acc) ->
    case next_token(Pos, String) of
        {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
            parse_record(Fields, NewPos, NewString, Acc);
        {ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} ->
            parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End);
        {ok, {Token, _, _}} ->
            unexpected_token(Token, "}");
        {error, Reason} ->
            {error, Reason}
    end.

parse_record_end(Fields, Pos, String, FieldValues, Row, Start, End) ->
    case parse_record_final_loop(Fields, FieldValues, []) of
        {ok, Result} ->
            {ok, {Result, Pos, String}};
        {error, {missing_field, Name}} ->
            {error, {missing_field, Name, Row, Start, End}}
    end.

parse_record_final_loop([{Name, _} | Rest], FieldValues, Acc) ->
    case maps:find(Name, FieldValues) of
        {ok, Value} ->
            parse_record_final_loop(Rest, FieldValues, [Value | Acc]);
        error ->
            {error, {missing_field, Name}}
    end;
parse_record_final_loop([], _, [Field]) ->
    % Singleton records are type-checked in Sophia, but unwrapped in the
    % resulting FATE.
    {ok, Field};
parse_record_final_loop([], _, FieldsReverse) ->
    Fields = lists:reverse(FieldsReverse),
    Tuple = list_to_tuple(Fields),
    {ok, {tuple, Tuple}}.


%%% Map Parsing

parse_map(KeyType, ValueType, Pos, String, Acc) ->
    case next_token(Pos, String) of
        {ok, {{character, "[", _, _, _, _}, NewPos, NewString}} ->
            parse_map2(KeyType, ValueType, NewPos, NewString, Acc);
        {ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
            {ok, {Acc, NewPos, NewString}};
        {ok, {Token, _, _}} ->
            unexpected_token(Token, "}");
        {error, Reason} ->
            {error, Reason}
    end.

parse_map2(KeyType, ValueType, Pos, String, Acc) ->
    case parse_expression(KeyType, Pos, String) of
        {ok, {Result, NewPos, NewString}} ->
            parse_map3(KeyType, ValueType, NewPos, NewString, Acc, Result);
        {error, Reason} ->
            wrap_error(Reason, {map_key, maps:size(Acc)})
    end.

parse_map3(KeyType, ValueType, Pos, String, Acc, Key) ->
    case expect_tokens(["]", "="], Pos, String) of
        {ok, {NewPos, NewString}} ->
            parse_map4(KeyType, ValueType, NewPos, NewString, Acc, Key);
        {error, Reason} ->
            {error, Reason}
    end.

parse_map4(KeyType, ValueType, Pos, String, Acc, Key) ->
    case parse_expression(ValueType, Pos, String) of
        {ok, {Result, NewPos, NewString}} ->
            NewAcc = maps:put(Key, Result, Acc),
            parse_map5(KeyType, ValueType, NewPos, NewString, NewAcc);
        {error, Reason} ->
            {error, Reason}
    end.

parse_map5(KeyType, ValueType, Pos, String, Acc) ->
    case next_token(Pos, String) of
        {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
            parse_map(KeyType, ValueType, NewPos, NewString, Acc);
        {ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
            {ok, {Acc, NewPos, NewString}};
        {ok, {Token, _, _}} ->
            unexpected_token(Token, "}");
        {error, Reason} ->
            {error, Reason}
    end.

% TODO
wrap_error(Reason, _) -> Reason.

%%% Tests

check_sophia_to_fate(Type, Sophia, Fate) ->
    case parse_literal(Type, Sophia) of
        {ok, Fate} ->
            ok;
        {ok, FateActual} ->
            erlang:error({to_fate_failed, Sophia, Fate, {ok, FateActual}});
        {error, Reason} ->
            erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}})
    end.

compile_entrypoint_value_and_type(Source, Entrypoint) ->
    {ok, #{fate_code := FateCode, aci := ACI}} = so_compiler:from_string(Source, [{aci, json}]),

    % Find the fcode for the correct entrypoint.
    {fcode, Bodies, NamesMap, _} = FateCode,
    Names = maps:to_list(NamesMap),
    Name = unicode:characters_to_binary(Entrypoint),
    {Hash, Name} = lists:keyfind(Name, 2, Names),
    {_, _, Code} = maps:get(Hash, Bodies),
    FATE = extract_return_value(Code),

    % Generate the AACI, and get the AACI type info for the correct entrypoint.
    AACI = hz_aaci:prepare(ACI),
    {ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"),

    {FATE, Type}.

extract_return_value(#{0 := [{'RETURNR', {immediate, FATE}}]}) ->
    FATE;
extract_return_value(Code) ->
    erlang:exit({invalid_literal_fcode, Code}).

check_parser(Sophia) ->
    % Compile the literal using the compiler, to check that it is valid Sophia
    % syntax, and to get an AACI object to pass to the parser.
    Source = "contract C = entrypoint f() = " ++ Sophia,
    {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),

    % Check that when we parse the term we get the same value as the Sophia
    % compiler.
    check_sophia_to_fate(unknown_type(), Sophia, Fate),

    % Then, once we know that the term is correct, make sure that it is still
    % accepted *with* type info.
    check_sophia_to_fate(Type, Sophia, Fate).

check_parser_with_typedef(Typedef, Sophia) ->
    % Compile the type definitions alongside the usual literal expression.
    Source = "contract C =\n  " ++ Typedef ++ "\n  entrypoint f() = " ++ Sophia,
    {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),

    % Do a typed parse, as usual, but there are probably record/variant
    % definitions in the AACI, so untyped parses probably don't work.
    check_sophia_to_fate(Type, Sophia, Fate).

anon_types_test() ->
    % Integers.
    check_parser("123"),
    check_parser("1_2_3"),
    check_parser("-123"),
    % Booleans.
    check_parser("true"),
    check_parser("false"),
    check_parser("[true, false]"),
    % Bytes.
    check_parser("#DEAD000BEEF"),
    check_parser("#DE_AD0_00B_EEF"),
    % Strings.
    check_parser("\"hello world\""),
    % The Sophia compiler doesn't handle this right, but we should still.
    %check_parser("\"ÿ\""),
    %check_parser("\"♣\""),
    % Characters.
    check_parser("'A'"),
    check_parser("['a', ' ', '[']"),
    %check_parser("'ÿ'"),
    %check_parser("'♣'"),
    % List of integers.
    check_parser("[1, 2, 3]"),
    % List of lists.
    check_parser("[[], [1], [2, 3]]"),
    % Tuple.
    check_parser("(1, [2, 3], (4, 5))"),
    % Map.
    check_parser("{[1] = 2, [3] = 4}"),

    ok.

string_escape_codes_test() ->
    check_parser("\"  \\b\\e\\f\\n\\r\\t\\v\\\"\\\\  \""),
    check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""),
    check_parser("\"\\x{0}\\x{7}\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\\x{0000007F}\""),
    check_parser("\"'\""),

    check_parser("['\\b', '\\e', '\\f', '\\n', '\\r', '\\t', '\\v', '\"', '\\'', '\\\\']"),
    check_parser("['\\x00', '\\x11', '\\x77', '\\x4a', '\\x4A']"),
    check_parser("['\\x{0}', '\\x{7}', '\\x{7F}', '\\x{07F}', '\\x{007F}', '\\x{0007F}', '\\x{0000007F}']"),
    check_parser("'\"'"),

    ok.

records_test() ->
    TypeDef = "record pair = {x: int, y: int}",
    Sophia = "{x = 1, y = 2}",
    check_parser_with_typedef(TypeDef, Sophia),
    % The above won't run an untyped parse on the expression, but we can. It
    % will error, though.
    {error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia).

variant_test() ->
    TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)",

    check_parser_with_typedef(TypeDef, "Zero"),
    check_parser_with_typedef(TypeDef, "One(0)"),
    check_parser_with_typedef(TypeDef, "Two(0, 1)"),
    check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"),
    check_parser_with_typedef(TypeDef, "C.Zero"),

    {error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"),

    ok.

ambiguous_variant_test() ->
    TypeDef = "datatype mytype = C | D",
    check_parser_with_typedef(TypeDef, "C"),
    check_parser_with_typedef(TypeDef, "D"),
    check_parser_with_typedef(TypeDef, "C.C"),
    check_parser_with_typedef(TypeDef, "C.D"),

    ok.

namespace_variant_test() ->
    Term = "[N.A, N.B]",
    Source = "namespace N = datatype mytype = A | B\ncontract C = entrypoint f() = " ++ Term,
    {Fate, VariantType} = compile_entrypoint_value_and_type(Source, "f"),
    check_sophia_to_fate(VariantType, Term, Fate),

    ok.

chain_objects_test() ->
    % Address,
    check_parser("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"),
    % Two different forms of signature,
    check_parser("[sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n, #00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF]"),

    % We have to build a totally custom contract example in order to get an
    % AACI and return value for parsing contract addresses. This is because the
    % compiler demands that contract addresses be type checked according to the
    % logic of "contract oriented programming", including covariance, etc. and
    % "contract oriented programming" is not very compatible with ML style type
    % inference.
    Contract = "ct_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx",
    Source = "contract C = entrypoint f(): C = " ++ Contract,
    {Fate, ContractType} = compile_entrypoint_value_and_type(Source, "f"),
    check_sophia_to_fate(ContractType, Contract, Fate),
    check_sophia_to_fate(unknown_type(), Contract, Fate),

    ok.

bits_test() ->
    check_parser("Bits.all"),
    check_parser("Bits.none"),
    {_, Type} = compile_entrypoint_value_and_type("contract C = entrypoint f() = Bits.all", "f"),
    check_sophia_to_fate(Type, "5", {bits, 5}),
    check_sophia_to_fate(Type, "-5", {bits, -5}),
    check_sophia_to_fate(Type, "#123", {bits, 256 + 32 + 3}),
    ok.

singleton_records_test() ->
    TypeDef = "record singleton('a) = {it: 'a}",
    check_parser_with_typedef(TypeDef, "{it = 123}"),
    check_parser_with_typedef(TypeDef, "{it = {it = {it = 5}}}"),
    check_parser_with_typedef(TypeDef, "[{it = 1}, {it = 2}, {it = 3}]"),

    ok.

singleton_variants_test() ->
    % Similar tests to the singleton records, but this time there isn't
    % actually a special case; singleton variants are in fact wrapped in the
    % FATE too.
    TypeDef = "datatype wrapped('a) = Wrap('a)",
    check_parser_with_typedef(TypeDef, "Wrap(123)"),
    check_parser_with_typedef(TypeDef, "Wrap(Wrap(123))"),
    check_parser_with_typedef(TypeDef, "[Wrap(1), Wrap(2), Wrap(3)]"),

    ok.

excess_parens_test() ->
    % 'singleton' parens are another special case, but unlike singleton
    % records, which exist in the type system, singleton parens aren't tuples
    % at all! They are just grouping, for arithmetic. For example.
    check_parser("(123)"),
    check_parser("[1, (2), ((3))]"),
    % Where this gets tricky, though, is when grouping parens are mixed with
    % tuple parens. E.g. this list of three tuples should all parse to the same
    % result.
    check_parser("[((1, 2)), ((1), 2), (((1), 2))]"),
    % Including multiple nestings of tuples and grouping, interleaved.
    check_parser("((((1), ((2, 3)))), 4)"),
    % Also empty tuples exist!
    check_parser("()"),
    check_parser("(((((), ())), ()))"),

    ok.

lexer_offset_test() ->
    % Test that various tokens report their position correctly.
    {error, {unexpected_token, "456", 1, 5, 7}} = parse_literal("123 456"),
    {error, {unexpected_token, "[", 1, 5, 5}} = parse_literal("123 [0]"),
    {error, {unexpected_token, "ABC", 1, 5, 7}} = parse_literal("123 ABC"),
    {error, {unexpected_token, "#AA", 1, 5, 7}} = parse_literal("123 #AA"),
    {error, {unexpected_token, "\"x\"", 1, 5, 7}} = parse_literal("123 \"x\""),
    {error, {unexpected_token, "\"\\x{123}\"", 1, 5, 13}} = parse_literal("123 \"\\x{123}\""),

    % Check that the tokenizer knows its position correctly *after* various
    % tokens.
    {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("[0] 123"),
    ABCType = {"mytype", already_normalized, {variant, [{"ABC", []}]}},
    {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal(ABCType, "ABC 123"),
    {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("#AA 123"),
    {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("\"x\" 123"),
    {error, {unexpected_token, "123", 1, 11, 13}} = parse_literal("\"\\x{123}\" 123"),

    % Check that the tokenizer accounts for various line separators correctly.
    {error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\nABC"),
    {error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\r\nABC"),
    {error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\rABC"),

    ok.

parser_offset_test() ->
    {_, Type} = compile_entrypoint_value_and_type("contract C = entrypoint f() = ((1, 2), (3, 4))", "f"),

    {error, {not_enough_elements, 2, 1, 1, 8}} = parse_literal(Type, "((1, 2))"),
    {error, {not_enough_elements, 2, 1, 1, 10}} = parse_literal(Type, "(((1, 2)))"),
    {error, {too_many_elements, 2, 3, 1, 24}} = parse_literal(Type, "((1, 2), (3, 4), (5, 6))"),
    {error, {too_many_elements, 2, 3, 1, 10}} = parse_literal(Type, "((1, 2, 3), (4, 5))"),

    ok.

singleton_test() ->
    % The Sophia compiler would never generate this, but it is a valid type
    % within the FATE virtual machine, and it is possible to represent within
    % the ACI itself.
    SingletonACI = #{tuple => [<<"int">>]},

    % Build an AACI around this, and run it through the AACI machinery.
    Function = #{name => <<"f">>,
                 arguments => [],
                 stateful => false,
                 payable => false,
                 returns => SingletonACI},
    ACI = [#{contract => #{functions => [Function],
                           name => <<"C">>,
                           kind => contract_main,
                           payable => false,
                           typedefs => []}}],
    {aaci, "C", #{"f" := {[], SingletonType}}, _} = hz_aaci:prepare(ACI),

    % Now let's do some testing with this weird type, to see if we handle it
    % correctly.
    {ok, {tuple, {1}}} = parse_literal(SingletonType, "(1,)"),
    % Some ambiguous nesting parens, for fun.
    {ok, {tuple, {1}}} = parse_literal(SingletonType, "(((1),))"),
    % No trailing comma should give an error.
    {error, {expected_trailing_comma, 1, 3}} = parse_literal(SingletonType, "(1)"),
    % All of the above should behave the same in untyped contexts:
    {ok, {tuple, {1}}} = parse_literal(unknown_type(), "(1,)"),
    {ok, {tuple, {1}}} = parse_literal(unknown_type(), "(((1),))"),
    {ok, 1} = parse_literal(unknown_type(), "(1)"),

    % Also if we wanted an integer, the singleton is NOT dropped, so is also an
    % error.
    {error, {expected_close_paren, 1, 3}} = parse_literal({integer, alread_normalized, integer}, "(1,)"),

    ok.