remove 'tk' atom from file positions

This commit is contained in:
Jarvis Carroll 2026-02-03 06:08:54 +00:00
parent a695c21fc9
commit bb4bcbb7de

View File

@ -12,16 +12,16 @@ parse_literal(String) ->
parse_literal(unknown_type(), String). parse_literal(unknown_type(), String).
parse_literal(Type, String) -> parse_literal(Type, String) ->
case parse_expression(Type, {tk, 1, 1}, String) of case parse_expression(Type, {1, 1}, String) of
{ok, {Result, NewTk, NewString}} -> {ok, {Result, NewPos, NewString}} ->
parse_literal2(Result, NewTk, NewString); parse_literal2(Result, NewPos, NewString);
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
parse_literal2(Result, Tk, String) -> parse_literal2(Result, Pos, String) ->
% We have parsed a valid expression. Now check that the string ends. % We have parsed a valid expression. Now check that the string ends.
case next_token(Tk, String) of case next_token(Pos, String) of
{ok, {{eof, _, _, _, _}, _, _}} -> {ok, {{eof, _, _, _, _}, _, _}} ->
{ok, Result}; {ok, Result};
{ok, {{_, S, _, Row, Start, End}, _, _}} -> {ok, {{_, S, _, Row, Start, End}, _, _}} ->
@ -39,59 +39,59 @@ parse_literal2(Result, Tk, String) ->
-define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C))). -define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C))).
-define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))). -define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))).
next_token({tk, Row, Col}, []) -> next_token({Row, Col}, []) ->
{ok, {{eof, "", Row, Col, Col}, {tk, Row, Col}, []}}; {ok, {{eof, "", Row, Col, Col}, {Row, Col}, []}};
next_token({tk, Row, Col}, " " ++ Rest) -> next_token({Row, Col}, " " ++ Rest) ->
next_token({tk, Row, Col + 1}, Rest); next_token({Row, Col + 1}, Rest);
next_token({tk, Row, Col}, "\t" ++ Rest) -> next_token({Row, Col}, "\t" ++ Rest) ->
next_token({tk, Row, Col + 1}, Rest); next_token({Row, Col + 1}, Rest);
next_token({tk, Row, _}, "\r\n" ++ Rest) -> next_token({Row, _}, "\r\n" ++ Rest) ->
next_token({tk, Row + 1, 1}, Rest); next_token({Row + 1, 1}, Rest);
next_token({tk, Row, _}, "\r" ++ Rest) -> next_token({Row, _}, "\r" ++ Rest) ->
next_token({tk, Row + 1, 1}, Rest); next_token({Row + 1, 1}, Rest);
next_token({tk, Row, _}, "\n" ++ Rest) -> next_token({Row, _}, "\n" ++ Rest) ->
next_token({tk, Row + 1, 1}, Rest); next_token({Row + 1, 1}, Rest);
next_token(Tk, [C | _] = String) when ?IS_ALPHA(C) -> next_token(Pos, [C | _] = String) when ?IS_ALPHA(C) ->
alphanum_token(Tk, Tk, String, []); alphanum_token(Pos, Pos, String, []);
next_token(Tk, [C | _] = String) when ?IS_NUM(C) -> next_token(Pos, [C | _] = String) when ?IS_NUM(C) ->
num_token(Tk, Tk, String, [], 0); num_token(Pos, Pos, String, [], 0);
next_token({tk, Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) -> next_token({Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) ->
bytes_token({tk, Row, Col}, {tk, Row, Col + 1}, [C | Rest], "#", []); bytes_token({Row, Col}, {Row, Col + 1}, [C | Rest], "#", []);
next_token({tk, Row, Col}, "\"" ++ Rest) -> next_token({Row, Col}, "\"" ++ Rest) ->
string_token({tk, Row, Col}, {tk, Row, Col + 1}, Rest, "\"", <<>>); string_token({Row, Col}, {Row, Col + 1}, Rest, "\"", <<>>);
next_token({tk, Row, Col}, [Char | Rest]) -> next_token({Row, Col}, [Char | Rest]) ->
Token = {character, [Char], Char, Row, Col, Col}, Token = {character, [Char], Char, Row, Col, Col},
{ok, {Token, {tk, Row, Col + 1}, Rest}}. {ok, {Token, {Row, Col + 1}, Rest}}.
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) -> alphanum_token(Start, {Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) ->
alphanum_token(Start, {tk, Row, Col + 1}, Rest, [C | Acc]); alphanum_token(Start, {Row, Col + 1}, Rest, [C | Acc]);
alphanum_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> alphanum_token({_, Start}, {Row, End}, String, Acc) ->
AlphaString = lists:reverse(Acc), AlphaString = lists:reverse(Acc),
Token = {alphanum, AlphaString, AlphaString, Row, Start, End - 1}, Token = {alphanum, AlphaString, AlphaString, Row, Start, End - 1},
{ok, {Token, {tk, Row, End}, String}}. {ok, {Token, {Row, End}, String}}.
num_token(Start, {tk, Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) -> num_token(Start, {Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) ->
NewValue = Value * 10 + (C - $0), NewValue = Value * 10 + (C - $0),
num_token(Start, {tk, Row, Col + 1}, Rest, [C | Chars], NewValue); num_token(Start, {Row, Col + 1}, Rest, [C | Chars], NewValue);
num_token(Start, {tk, Row, Col}, [$_, C | Rest], Chars, Value) when ?IS_NUM(C) -> num_token(Start, {Row, Col}, [$_, C | Rest], Chars, Value) when ?IS_NUM(C) ->
NewValue = Value * 10 + (C - $0), NewValue = Value * 10 + (C - $0),
num_token(Start, {tk, Row, Col + 2}, Rest, [C, $_ | Chars], NewValue); num_token(Start, {Row, Col + 2}, Rest, [C, $_ | Chars], NewValue);
num_token({tk, _, Start}, {tk, Row, End}, String, Chars, Value) -> num_token({_, Start}, {Row, End}, String, Chars, Value) ->
NumString = lists:reverse(Chars), NumString = lists:reverse(Chars),
Token = {integer, NumString, Value, Row, Start, End - 1}, Token = {integer, NumString, Value, Row, Start, End - 1},
{ok, {Token, {tk, Row, End}, String}}. {ok, {Token, {Row, End}, String}}.
bytes_token(Start, {tk, Row, Col}, [C | Rest], Chars, Digits) when ?IS_HEX(C) -> bytes_token(Start, {Row, Col}, [C | Rest], Chars, Digits) when ?IS_HEX(C) ->
Digit = convert_digit(C), Digit = convert_digit(C),
bytes_token(Start, {tk, Row, Col + 1}, Rest, [C | Chars], [Digit | Digits]); bytes_token(Start, {Row, Col + 1}, Rest, [C | Chars], [Digit | Digits]);
bytes_token(Start, {tk, Row, Col}, [$_, C | Rest], Chars, Digits) when ?IS_HEX(C) -> bytes_token(Start, {Row, Col}, [$_, C | Rest], Chars, Digits) when ?IS_HEX(C) ->
Digit = convert_digit(C), Digit = convert_digit(C),
bytes_token(Start, {tk, Row, Col + 1}, Rest, [C, $_ | Chars], [Digit | Digits]); bytes_token(Start, {Row, Col + 1}, Rest, [C, $_ | Chars], [Digit | Digits]);
bytes_token({tk, _, Start}, {tk, Row, End}, String, Chars, Digits) -> bytes_token({_, Start}, {Row, End}, String, Chars, Digits) ->
BytesString = lists:reverse(Chars), BytesString = lists:reverse(Chars),
Value = reverse_combine_nibbles(Digits, <<>>), Value = reverse_combine_nibbles(Digits, <<>>),
Token = {bytes, BytesString, Value, Row, Start, End - 1}, Token = {bytes, BytesString, Value, Row, Start, End - 1},
{ok, {Token, {tk, Row, End}, String}}. {ok, {Token, {Row, End}, String}}.
convert_digit(C) when C >= $0, C =< $9 -> convert_digit(C) when C >= $0, C =< $9 ->
C - $0; C - $0;
@ -108,54 +108,54 @@ reverse_combine_nibbles([D1], Acc) ->
reverse_combine_nibbles([], Acc) -> reverse_combine_nibbles([], Acc) ->
Acc. Acc.
string_token(Start, {tk, Row, Col}, "\\x" ++ String, SourceChars, Value) -> string_token(Start, {Row, Col}, "\\x" ++ String, SourceChars, Value) ->
case escape_hex_code({tk, Row, Col}, {tk, Row, Col + 2}, String, "x\\" ++ SourceChars) of case escape_hex_code({Row, Col}, {Row, Col + 2}, String, "x\\" ++ SourceChars) of
{ok, {Codepoint, NewSourceChars, NewTk, NewString}} -> {ok, {Codepoint, NewSourceChars, NewPos, NewString}} ->
NewValue = <<Value/binary, Codepoint/utf8>>, NewValue = <<Value/binary, Codepoint/utf8>>,
string_token(Start, NewTk, NewString, NewSourceChars, NewValue); string_token(Start, NewPos, NewString, NewSourceChars, NewValue);
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end; end;
string_token(Start, {tk, Row, Col}, [$\\, C | Rest], SourceChars, Value) -> string_token(Start, {Row, Col}, [$\\, C | Rest], SourceChars, Value) ->
case escape_char(C) of case escape_char(C) of
{ok, ByteVal} -> {ok, ByteVal} ->
string_token(Start, {tk, Row, Col + 2}, Rest, [C, $\ | SourceChars], <<Value/binary, ByteVal>>); string_token(Start, {Row, Col + 2}, Rest, [C, $\ | SourceChars], <<Value/binary, ByteVal>>);
error -> error ->
{error, {invalid_escape_code, [C], Row, Col}} {error, {invalid_escape_code, [C], Row, Col}}
end; end;
string_token({tk, _, Start}, {tk, Row, Col}, [$" | Rest], SourceChars, Value) -> string_token({_, Start}, {Row, Col}, [$" | Rest], SourceChars, Value) ->
SourceStr = lists:reverse([$" | SourceChars]), SourceStr = lists:reverse([$" | SourceChars]),
Token = {string, SourceStr, Value, Row, Start, Col}, Token = {string, SourceStr, Value, Row, Start, Col},
{ok, {Token, {tk, Row, Col + 1}, Rest}}; {ok, {Token, {Row, Col + 1}, Rest}};
string_token(Start, {tk, Row, Col}, [C | Rest], SourceChars, Value) -> string_token(Start, {Row, Col}, [C | Rest], SourceChars, Value) ->
% TODO: ERTS probably had to convert this FROM utf8 at some point, so why % TODO: ERTS probably had to convert this FROM utf8 at some point, so why
% bother, if we need to convert it back? I guess we could accept iolists if % bother, if we need to convert it back? I guess we could accept iolists if
% we really wanted to waste time on this point... % we really wanted to waste time on this point...
string_token(Start, {tk, Row, Col + 1}, Rest, [C | SourceChars], <<Value/binary, C/utf8>>). string_token(Start, {Row, Col + 1}, Rest, [C | SourceChars], <<Value/binary, C/utf8>>).
escape_hex_code(Start, {tk, Row, Col}, "{" ++ String, SourceChars) -> escape_hex_code(Start, {Row, Col}, "{" ++ String, SourceChars) ->
escape_long_hex_code(Start, {tk, Row, Col + 1}, String, "{" ++ SourceChars, 0); escape_long_hex_code(Start, {Row, Col + 1}, String, "{" ++ SourceChars, 0);
escape_hex_code(_, {tk, Row, Col}, [A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) -> escape_hex_code(_, {Row, Col}, [A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) ->
% As of writing this, the Sophia compiler will convert this byte from % As of writing this, the Sophia compiler will convert this byte from
% extended ASCII to unicode... But it really shouldn't. The literal parser % extended ASCII to unicode... But it really shouldn't. The literal parser
% does what the compiler should do. % does what the compiler should do.
Byte = convert_digit(A) * 16 + convert_digit(B), Byte = convert_digit(A) * 16 + convert_digit(B),
{ok, {Byte, [B, A | SourceChars], {tk, Row, Col + 2}, String}}; {ok, {Byte, [B, A | SourceChars], {Row, Col + 2}, String}};
escape_hex_code({tk, Row1, Col1}, _, _, _) -> escape_hex_code({Row1, Col1}, _, _, _) ->
{error, {invalid_escape_code, "\\x", Row1, Col1}}. {error, {invalid_escape_code, "\\x", Row1, Col1}}.
escape_long_hex_code(_, {tk, Row, Col}, "}" ++ String, SourceChars, Value) -> escape_long_hex_code(_, {Row, Col}, "}" ++ String, SourceChars, Value) ->
{ok, {Value, "}" ++ SourceChars, {tk, Row, Col + 1}, String}}; {ok, {Value, "}" ++ SourceChars, {Row, Col + 1}, String}};
escape_long_hex_code(Start, {tk, Row, Col}, [C | String], SourceChars, Value) when ?IS_HEX(C) -> escape_long_hex_code(Start, {Row, Col}, [C | String], SourceChars, Value) when ?IS_HEX(C) ->
NewSourceChars = [C | SourceChars], NewSourceChars = [C | SourceChars],
NewValue = 16 * Value + convert_digit(C), NewValue = 16 * Value + convert_digit(C),
escape_long_hex_code(Start, {tk, Row, Col + 1}, String, NewSourceChars, NewValue); escape_long_hex_code(Start, {Row, Col + 1}, String, NewSourceChars, NewValue);
escape_long_hex_code(_, {tk, Row, Col}, [C | _], _, _) -> escape_long_hex_code(_, {Row, Col}, [C | _], _, _) ->
{error, {invalid_hexadecimal, [C], Row, Col}}; {error, {invalid_hexadecimal, [C], Row, Col}};
escape_long_hex_code(_, Tk, [], SourceChars, Value) -> escape_long_hex_code(_, Pos, [], SourceChars, Value) ->
% Just return as if the escape code were closed, and let the string parser % Just return as if the escape code were closed, and let the string parser
% produce an unclosed string error instead. % produce an unclosed string error instead.
{ok, {Value, SourceChars, Tk, []}}. {ok, {Value, SourceChars, Pos, []}}.
escape_char($b) -> {ok, $\b}; escape_char($b) -> {ok, $\b};
escape_char($e) -> {ok, $\e}; escape_char($e) -> {ok, $\e};
@ -186,90 +186,90 @@ escape_char(_) -> error.
%%% write the recursive code, thus programming the BEAM to implement the %%% write the recursive code, thus programming the BEAM to implement the
%%% pushdown automaton that we want. %%% pushdown automaton that we want.
parse_expression(Type, Tk, String) -> parse_expression(Type, Pos, String) ->
case next_token(Tk, String) of case next_token(Pos, String) of
{ok, {Token, NewTk, NewString}} -> {ok, {Token, NewPos, NewString}} ->
parse_expression2(Type, NewTk, NewString, Token); parse_expression2(Type, NewPos, NewString, Token);
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
parse_expression2(Type, Tk, String, {integer, _, Value, Row, Start, End}) -> parse_expression2(Type, Pos, String, {integer, _, Value, Row, Start, End}) ->
case Type of case Type of
{_, _, integer} -> {_, _, integer} ->
{ok, {Value, Tk, String}}; {ok, {Value, Pos, String}};
{_, _, unknown_type} -> {_, _, unknown_type} ->
{ok, {Value, Tk, String}}; {ok, {Value, Pos, String}};
{O, N, _} -> {O, N, _} ->
{error, {wrong_type, O, N, integer, Row, Start, End}} {error, {wrong_type, O, N, integer, Row, Start, End}}
end; end;
parse_expression2(Type, Tk, String, {bytes, _, Value, Row, Start, End}) -> parse_expression2(Type, Pos, String, {bytes, _, Value, Row, Start, End}) ->
Len = byte_size(Value), Len = byte_size(Value),
Result = {bytes, Value}, Result = {bytes, Value},
case Type of case Type of
{_, _, {bytes, [any]}} -> {_, _, {bytes, [any]}} ->
{ok, {Result, Tk, String}}; {ok, {Result, Pos, String}};
{_, _, {bytes, [Len]}} -> {_, _, {bytes, [Len]}} ->
{ok, {Result, Tk, String}}; {ok, {Result, Pos, String}};
{_, _, {bytes, [ExpectedLen]}} -> {_, _, {bytes, [ExpectedLen]}} ->
{error, {bytes_wrong_size, ExpectedLen, Len, Row, Start, End}}; {error, {bytes_wrong_size, ExpectedLen, Len, Row, Start, End}};
{_, _, unknown_type} -> {_, _, unknown_type} ->
{ok, {Result, Tk, String}}; {ok, {Result, Pos, String}};
{O, N, _} -> {O, N, _} ->
{error, {wrong_type, O, N, {bytes, [Len]}, Row, Start, End}} {error, {wrong_type, O, N, {bytes, [Len]}, Row, Start, End}}
end; end;
parse_expression2(Type, Tk, String, {string, _, Value, Row, Start, End}) -> parse_expression2(Type, Pos, String, {string, _, Value, Row, Start, End}) ->
case Type of case Type of
{_, _, string} -> {_, _, string} ->
{ok, {Value, Tk, String}}; {ok, {Value, Pos, String}};
{_, _, unknown_type} -> {_, _, unknown_type} ->
{ok, {Value, Tk, String}}; {ok, {Value, Pos, String}};
{O, N, _} -> {O, N, _} ->
{error, {wrong_type, O, N, string, Row, Start, End}} {error, {wrong_type, O, N, string, Row, Start, End}}
end; end;
parse_expression2(Type, Tk, String, {character, "[", _, Row, Start, _}) -> parse_expression2(Type, Pos, String, {character, "[", _, Row, Start, _}) ->
parse_list(Type, Tk, String, Row, Start); parse_list(Type, Pos, String, Row, Start);
parse_expression2(Type, Tk, String, {character, "(", _, Row, Start, _}) -> parse_expression2(Type, Pos, String, {character, "(", _, Row, Start, _}) ->
parse_tuple(Type, Tk, String, Row, Start); parse_tuple(Type, Pos, String, Row, Start);
parse_expression2(Type, Tk, String, {character, "{", _, Row, Start, _}) -> parse_expression2(Type, Pos, String, {character, "{", _, Row, Start, _}) ->
parse_record_or_map(Type, Tk, String, Row, Start); parse_record_or_map(Type, Pos, String, Row, Start);
parse_expression2(Type, Tk, String, {alphanum, S, _, Row, Start, End}) -> parse_expression2(Type, Pos, String, {alphanum, S, _, Row, Start, End}) ->
parse_alphanum(Type, Tk, String, S, Row, Start, End); parse_alphanum(Type, Pos, String, S, Row, Start, End);
parse_expression2(_, _, _, {_, S, _, Row, Start, End}) -> parse_expression2(_, _, _, {_, S, _, Row, Start, End}) ->
{error, {unexpected_token, S, Row, Start, End}}. {error, {unexpected_token, S, Row, Start, End}}.
unknown_type() -> unknown_type() ->
{unknown_type, already_normalized, unknown_type}. {unknown_type, already_normalized, unknown_type}.
expect_tokens([], Tk, String) -> expect_tokens([], Pos, String) ->
{ok, {Tk, String}}; {ok, {Pos, String}};
expect_tokens([Str | Rest], Tk, String) -> expect_tokens([Str | Rest], Pos, String) ->
case next_token(Tk, String) of case next_token(Pos, String) of
{ok, {{_, Str, _, _, _, _}, NewTk, NewString}} -> {ok, {{_, Str, _, _, _, _}, NewPos, NewString}} ->
expect_tokens(Rest, NewTk, NewString); expect_tokens(Rest, NewPos, NewString);
{ok, {{_, Actual, _, Row, Start, End}, _, _}} -> {ok, {{_, Actual, _, Row, Start, End}, _, _}} ->
{error, {unexpected_token, Actual, Row, Start, End}} {error, {unexpected_token, Actual, Row, Start, End}}
end. end.
%%% Ambiguous Chain Object vs Identifier Parsing %%% Ambiguous Chain Object vs Identifier Parsing
parse_alphanum(Type, Tk, String, [C | _] = S, Row, Start, End) when ?IS_LATIN_UPPER(C) -> parse_alphanum(Type, Pos, String, [C | _] = S, Row, Start, End) when ?IS_LATIN_UPPER(C) ->
% From a programming perspective, we are trying to parse a constant, so % From a programming perspective, we are trying to parse a constant, so
% an alphanum token can really only be a constructor, or a chain object. % an alphanum token can really only be a constructor, or a chain object.
% Chain objects start with lowercase prefixes, like ak_, so clearly this is % Chain objects start with lowercase prefixes, like ak_, so clearly this is
% a variant constructor. % a variant constructor.
parse_variant(Type, Tk, String, S, Row, Start, End); parse_variant(Type, Pos, String, S, Row, Start, End);
parse_alphanum(Type, Tk, String, S, Row, Start, End) -> parse_alphanum(Type, Pos, String, S, Row, Start, End) ->
% Inversely, variant constructors are always uppercase, so now that we have % Inversely, variant constructors are always uppercase, so now that we have
% handled that case, only chain objects are left. % handled that case, only chain objects are left.
try try
case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of
{account_pubkey, Data} -> {account_pubkey, Data} ->
typecheck_address(Type, Tk, String, Data, Row, Start, End); typecheck_address(Type, Pos, String, Data, Row, Start, End);
{contract_pubkey, Data} -> {contract_pubkey, Data} ->
typecheck_contract(Type, Tk, String, Data, Row, Start, End); typecheck_contract(Type, Pos, String, Data, Row, Start, End);
{signature, Data} -> {signature, Data} ->
typecheck_signature(Type, Tk, String, Data, Row, Start, End); typecheck_signature(Type, Pos, String, Data, Row, Start, End);
{_, _} -> {_, _} ->
% Only a few chain objects are recognized by Sophia. The rest % Only a few chain objects are recognized by Sophia. The rest
% are interpreted as identifiers, so we might as well give the % are interpreted as identifiers, so we might as well give the
@ -280,61 +280,61 @@ parse_alphanum(Type, Tk, String, S, Row, Start, End) ->
_:_ -> {error, {unexpected_identifier, S, Row, Start, End}} _:_ -> {error, {unexpected_identifier, S, Row, Start, End}}
end. end.
typecheck_address({_, _, address}, Tk, String, Data, _, _, _) -> typecheck_address({_, _, address}, Pos, String, Data, _, _, _) ->
{ok, {{address, Data}, Tk, String}}; {ok, {{address, Data}, Pos, String}};
typecheck_address({_, _, contract}, Tk, String, Data, _, _, _) -> typecheck_address({_, _, contract}, Pos, String, Data, _, _, _) ->
% The compiler would type error, but we should be lenient here. % The compiler would type error, but we should be lenient here.
{ok, {{contract, Data}, Tk, String}}; {ok, {{contract, Data}, Pos, String}};
typecheck_address({_, _, unknown_type}, Tk, String, Data, _, _, _) -> typecheck_address({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
{ok, {{address, Data}, Tk, String}}; {ok, {{address, Data}, Pos, String}};
typecheck_address({O, N, _}, _, _, _, Row, Start, End) -> typecheck_address({O, N, _}, _, _, _, Row, Start, End) ->
{error, {wrong_type, O, N, address, Row, Start, End}}. {error, {wrong_type, O, N, address, Row, Start, End}}.
typecheck_contract({_, _, contract}, Tk, String, Data, _, _, _) -> typecheck_contract({_, _, contract}, Pos, String, Data, _, _, _) ->
{ok, {{contract, Data}, Tk, String}}; {ok, {{contract, Data}, Pos, String}};
typecheck_contract({_, _, address}, Tk, String, Data, _, _, _) -> typecheck_contract({_, _, address}, Pos, String, Data, _, _, _) ->
% The compiler would type error, but we should be lenient here. % The compiler would type error, but we should be lenient here.
{ok, {{address, Data}, Tk, String}}; {ok, {{address, Data}, Pos, String}};
typecheck_contract({_, _, unknown_type}, Tk, String, Data, _, _, _) -> typecheck_contract({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
{ok, {{contract, Data}, Tk, String}}; {ok, {{contract, Data}, Pos, String}};
typecheck_contract({O, N, _}, _, _, _, Row, Start, End) -> typecheck_contract({O, N, _}, _, _, _, Row, Start, End) ->
{error, {wrong_type, O, N, contract, Row, Start, End}}. {error, {wrong_type, O, N, contract, Row, Start, End}}.
typecheck_signature({_, _, signature}, Tk, String, Data, _, _, _) -> typecheck_signature({_, _, signature}, Pos, String, Data, _, _, _) ->
{ok, {{bytes, Data}, Tk, String}}; {ok, {{bytes, Data}, Pos, String}};
typecheck_signature({_, _, {bytes, [64]}}, Tk, String, Data, _, _, _) -> typecheck_signature({_, _, {bytes, [64]}}, Pos, String, Data, _, _, _) ->
% The compiler would probably type-error, but whatever. % The compiler would probably type-error, but whatever.
{ok, {{bytes, Data}, Tk, String}}; {ok, {{bytes, Data}, Pos, String}};
typecheck_signature({_, _, {bytes, [any]}}, Tk, String, Data, _, _, _) -> typecheck_signature({_, _, {bytes, [any]}}, Pos, String, Data, _, _, _) ->
% The compiler would probably type-error, but whatever. % The compiler would probably type-error, but whatever.
{ok, {{bytes, Data}, Tk, String}}; {ok, {{bytes, Data}, Pos, String}};
typecheck_signature({_, _, unknown_type}, Tk, String, Data, _, _, _) -> typecheck_signature({_, _, unknown_type}, Pos, String, Data, _, _, _) ->
{ok, {{bytes, Data}, Tk, String}}; {ok, {{bytes, Data}, Pos, String}};
typecheck_signature({O, N, _}, _, _, _, Row, Start, End) -> typecheck_signature({O, N, _}, _, _, _, Row, Start, End) ->
{error, {wrong_type, O, N, signature, Row, Start, End}}. {error, {wrong_type, O, N, signature, Row, Start, End}}.
%%% List Parsing %%% List Parsing
parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) -> parse_list({_, _, {list, [Inner]}}, Pos, String, Row, Start) ->
parse_list_loop(Inner, Tk, String, "]", Row, Start, []); parse_list_loop(Inner, Pos, String, "]", Row, Start, []);
parse_list({_, _, unknown_type}, Tk, String, Row, Start) -> parse_list({_, _, unknown_type}, Pos, String, Row, Start) ->
parse_list_loop(unknown_type(), Tk, String, "]", Row, Start, []); parse_list_loop(unknown_type(), Pos, String, "]", Row, Start, []);
parse_list({O, N, _}, _, _, Row, Start) -> parse_list({O, N, _}, _, _, Row, Start) ->
{error, {wrong_type, O, N, list, Row, Start, Start}}. {error, {wrong_type, O, N, list, Row, Start, Start}}.
parse_list_loop(Inner, Tk, String, CloseChar, Row, Start, Acc) -> parse_list_loop(Inner, Pos, String, CloseChar, Row, Start, Acc) ->
case next_token(Tk, String) of case next_token(Pos, String) of
{ok, {{character, CloseChar, _, _, _, _}, NewTk, NewString}} -> {ok, {{character, CloseChar, _, _, _, _}, NewPos, NewString}} ->
{ok, {lists:reverse(Acc), NewTk, NewString}}; {ok, {lists:reverse(Acc), NewPos, NewString}};
{ok, {Token, NewTk, NewString}} -> {ok, {Token, NewPos, NewString}} ->
parse_list_loop2(Inner, NewTk, NewString, CloseChar, Row, Start, Acc, Token) parse_list_loop2(Inner, NewPos, NewString, CloseChar, Row, Start, Acc, Token)
end. end.
parse_list_loop2(Inner, Tk, String, CloseChar, Row, Start, Acc, Token) -> parse_list_loop2(Inner, Pos, String, CloseChar, Row, Start, Acc, Token) ->
case parse_expression2(Inner, Tk, String, Token) of case parse_expression2(Inner, Pos, String, Token) of
{ok, {Value, NewTk, NewString}} -> {ok, {Value, NewPos, NewString}} ->
parse_list_loop3(Inner, NewTk, NewString, CloseChar, Row, Start, [Value | Acc]); parse_list_loop3(Inner, NewPos, NewString, CloseChar, Row, Start, [Value | Acc]);
{error, Reason} -> {error, Reason} ->
Wrapper = choose_list_error_wrapper(CloseChar), Wrapper = choose_list_error_wrapper(CloseChar),
% TODO: Are tuple indices off by one from list indices? % TODO: Are tuple indices off by one from list indices?
@ -342,12 +342,12 @@ parse_list_loop2(Inner, Tk, String, CloseChar, Row, Start, Acc, Token) ->
{error, Wrapped} {error, Wrapped}
end. end.
parse_list_loop3(Inner, Tk, String, CloseChar, Row, Start, Acc) -> parse_list_loop3(Inner, Pos, String, CloseChar, Row, Start, Acc) ->
case next_token(Tk, String) of case next_token(Pos, String) of
{ok, {{character, CloseChar, _, _, _, _}, NewTk, NewString}} -> {ok, {{character, CloseChar, _, _, _, _}, NewPos, NewString}} ->
{ok, {lists:reverse(Acc), NewTk, NewString}}; {ok, {lists:reverse(Acc), NewPos, NewString}};
{ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
parse_list_loop(Inner, NewTk, NewString, CloseChar, Row, Start, Acc); parse_list_loop(Inner, NewPos, NewString, CloseChar, Row, Start, Acc);
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
@ -357,22 +357,22 @@ choose_list_error_wrapper(")") -> tuple_element.
%%% Ambiguous Parenthesis Parsing %%% Ambiguous Parenthesis Parsing
parse_tuple({_, _, unknown_type}, Tk, String, Row, Start) -> parse_tuple({_, _, unknown_type}, Pos, String, Row, Start) ->
% An untyped tuple is a list of untyped terms, and weirdly our list parser % An untyped tuple is a list of untyped terms, and weirdly our list parser
% works perfectly for that, as long as we change the closing character to % works perfectly for that, as long as we change the closing character to
% be ")" instead of "]". % be ")" instead of "]".
case parse_list_loop(unknown_type(), Tk, String, ")", Row, Start, []) of case parse_list_loop(unknown_type(), Pos, String, ")", Row, Start, []) of
{ok, {[Inner], NewTk, NewString}} -> {ok, {[Inner], NewPos, NewString}} ->
% In Sophia, singleton tuples are unwrapped, and given the inner % In Sophia, singleton tuples are unwrapped, and given the inner
% type. % type.
{ok, {Inner, NewTk, NewString}}; {ok, {Inner, NewPos, NewString}};
{ok, {TermList, NewTk, NewString}} -> {ok, {TermList, NewPos, NewString}} ->
Result = {tuple, list_to_tuple(TermList)}, Result = {tuple, list_to_tuple(TermList)},
{ok, {Result, NewTk, NewString}}; {ok, {Result, NewPos, NewString}};
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end; end;
parse_tuple({O, N, T}, Tk, String, _, _) -> parse_tuple({O, N, T}, Pos, String, _, _) ->
% Typed tuple parsing is quite complex, because we also want to support % Typed tuple parsing is quite complex, because we also want to support
% normal parentheses for grouping. It's not strictly necessary for % normal parentheses for grouping. It's not strictly necessary for
% inputting data, since we don't have any infix operators in simple % inputting data, since we don't have any infix operators in simple
@ -382,23 +382,23 @@ parse_tuple({O, N, T}, Tk, String, _, _) ->
% Count how many ambiguous parens there are, including the one we already % Count how many ambiguous parens there are, including the one we already
% saw. % saw.
case count_open_parens(Tk, String, 1) of case count_open_parens(Pos, String, 1) of
{ok, {Count, Token, NewTk, NewString}} -> {ok, {Count, Token, NewPos, NewString}} ->
% Compare that to the amount of nesting tuple connectives are in % Compare that to the amount of nesting tuple connectives are in
% the type we are expected to produce. % the type we are expected to produce.
{ExcessCount, HeadType, Tails} = extract_tuple_type_info(Count, {O, N, T}, []), {ExcessCount, HeadType, Tails} = extract_tuple_type_info(Count, {O, N, T}, []),
% Now work out what to do with all this information. % Now work out what to do with all this information.
parse_tuple2(O, N, ExcessCount, HeadType, Tails, NewTk, NewString, Token); parse_tuple2(O, N, ExcessCount, HeadType, Tails, NewPos, NewString, Token);
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
count_open_parens(Tk, String, Count) -> count_open_parens(Pos, String, Count) ->
case next_token(Tk, String) of case next_token(Pos, String) of
{ok, {{character, "(", _, _, _, _}, NewTk, NewString}} -> {ok, {{character, "(", _, _, _, _}, NewPos, NewString}} ->
count_open_parens(NewTk, NewString, Count + 1); count_open_parens(NewPos, NewString, Count + 1);
{ok, {Token, NewTk, NewString}} -> {ok, {Token, NewPos, NewString}} ->
{ok, {Count, Token, NewTk, NewString}}; {ok, {Count, Token, NewPos, NewString}};
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
@ -412,15 +412,15 @@ extract_tuple_type_info(ParenCount, HeadType, Tails) ->
parse_tuple2(_, _, _, {_, _, unknown_type}, [_ | _], _, _, _) -> parse_tuple2(_, _, _, {_, _, unknown_type}, [_ | _], _, _, _) ->
{error, "Parsing of tuples with known lengths but unknown contents is not yet implemented."}; {error, "Parsing of tuples with known lengths but unknown contents is not yet implemented."};
parse_tuple2(O, N, ExcessCount, HeadType, Tails, Tk, String, {character, ")", _, Row, Col, _}) -> parse_tuple2(O, N, ExcessCount, HeadType, Tails, Pos, String, {character, ")", _, Row, Col, _}) ->
parse_empty_tuple(O, N, ExcessCount, HeadType, Tails, Tk, String, Row, Col); parse_empty_tuple(O, N, ExcessCount, HeadType, Tails, Pos, String, Row, Col);
parse_tuple2(O, N, ExcessCount, HeadType, Tails, Tk, String, Token) -> parse_tuple2(O, N, ExcessCount, HeadType, Tails, Pos, String, Token) ->
% Finished with parentheses for now, try and parse an expression out, to % Finished with parentheses for now, try and parse an expression out, to
% get our head term. % get our head term.
case parse_expression2(HeadType, Tk, String, Token) of case parse_expression2(HeadType, Pos, String, Token) of
{ok, {Result, NewTk, NewString}} -> {ok, {Result, NewPos, NewString}} ->
% Got a head term. Now try to build all the other tuple layers. % Got a head term. Now try to build all the other tuple layers.
parse_tuple_tails(O, N, ExcessCount, Result, Tails, NewTk, NewString); parse_tuple_tails(O, N, ExcessCount, Result, Tails, NewPos, NewString);
{error, Reason} -> {error, Reason} ->
% TODO: Wrap errors here too. % TODO: Wrap errors here too.
{error, Reason} {error, Reason}
@ -434,44 +434,44 @@ parse_empty_tuple(_, _, 0, _, Tails, _, _, Row, Col) ->
% got zero. % got zero.
ExpectCount = 1 + length(Tail), ExpectCount = 1 + length(Tail),
{error, {not_enough_elements, ExpectCount, 0, Row, Col}}; {error, {not_enough_elements, ExpectCount, 0, Row, Col}};
parse_empty_tuple(O, N, ExcessCount, {_, _, {tuple, []}}, Tails, Tk, String, _, _) -> parse_empty_tuple(O, N, ExcessCount, {_, _, {tuple, []}}, Tails, Pos, String, _, _) ->
% If we have some ambiguous parentheses left, we now know one of them is % If we have some ambiguous parentheses left, we now know one of them is
% this empty tuple. % this empty tuple.
HeadTerm = {tuple, {}}, HeadTerm = {tuple, {}},
NewExcessCount = ExcessCount - 1, NewExcessCount = ExcessCount - 1,
% Now continue the loop as if it were an integer or something, in the head % Now continue the loop as if it were an integer or something, in the head
% position. % position.
parse_tuple_tails(O, N, NewExcessCount, HeadTerm, Tails, Tk, String); parse_tuple_tails(O, N, NewExcessCount, HeadTerm, Tails, Pos, String);
parse_empty_tuple(_, _, _, {HeadO, HeadN, _}, _, _, _, Row, Col) -> parse_empty_tuple(_, _, _, {HeadO, HeadN, _}, _, _, _, Row, Col) ->
% We were expecting a head term of a different type! % We were expecting a head term of a different type!
{error, {wrong_type, HeadO, HeadN, unit, Row, Col, Col}}. {error, {wrong_type, HeadO, HeadN, unit, Row, Col, Col}}.
parse_tuple_tails(O, N, 0, HeadTerm, [TailTypes | ParentTails], Tk, String) -> parse_tuple_tails(O, N, 0, HeadTerm, [TailTypes | ParentTails], Pos, String) ->
% Tuples left to build, but no extra open parens to deal with, so we can % Tuples left to build, but no extra open parens to deal with, so we can
% just parse multivalues naively, starting from the "we have a term, % just parse multivalues naively, starting from the "we have a term,
% waiting for a comma" stage of the loop. % waiting for a comma" stage of the loop.
case parse_multivalue3(TailTypes, Tk, String, -1, -1, [HeadTerm]) of case parse_multivalue3(TailTypes, Pos, String, -1, -1, [HeadTerm]) of
{ok, {Terms, NewTk, NewString}} -> {ok, {Terms, NewPos, NewString}} ->
NewHead = {tuple, list_to_tuple(Terms)}, NewHead = {tuple, list_to_tuple(Terms)},
parse_tuple_tails(O, N, 0, NewHead, ParentTails, NewTk, NewString); parse_tuple_tails(O, N, 0, NewHead, ParentTails, NewPos, NewString);
{error, Reason} -> {error, Reason} ->
% TODO: More error wrapping? % TODO: More error wrapping?
{error, Reason} {error, Reason}
end; end;
parse_tuple_tails(_, _, 0, HeadTerm, [], Tk, String) -> parse_tuple_tails(_, _, 0, HeadTerm, [], Pos, String) ->
% No open parens left, no tuples left to build, we are done! % No open parens left, no tuples left to build, we are done!
{ok, {HeadTerm, Tk, String}}; {ok, {HeadTerm, Pos, String}};
parse_tuple_tails(O, N, ExcessCount, HeadTerm, Tails, Tk, String) -> parse_tuple_tails(O, N, ExcessCount, HeadTerm, Tails, Pos, String) ->
% The ambiguous case, where we have a mix of tuple parens, and grouping % The ambiguous case, where we have a mix of tuple parens, and grouping
% parens. We want to peek at the next token, to see if it closes a grouping % parens. We want to peek at the next token, to see if it closes a grouping
% paren. % paren.
case next_token(Tk, String) of case next_token(Pos, String) of
{ok, {{character, ")", _, _, _, _}, NewTk, NewString}} -> {ok, {{character, ")", _, _, _, _}, NewPos, NewString}} ->
% It is grouping! Close one excess paren, and continue. % It is grouping! Close one excess paren, and continue.
parse_tuple_tails(O, N, ExcessCount - 1, HeadTerm, Tails, NewTk, NewString); parse_tuple_tails(O, N, ExcessCount - 1, HeadTerm, Tails, NewPos, NewString);
{ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
% It is a real tuple! Try the normal logic, then. % It is a real tuple! Try the normal logic, then.
parse_tuple_tails2(O, N, ExcessCount, HeadTerm, Tails, NewTk, NewString); parse_tuple_tails2(O, N, ExcessCount, HeadTerm, Tails, NewPos, NewString);
{ok, {{_, Actual, _, Row, Start, End}, _, _}} -> {ok, {{_, Actual, _, Row, Start, End}, _, _}} ->
% Anything else is just a boring parse error we can complain about. % Anything else is just a boring parse error we can complain about.
{error, {unexpected_token, Actual, Row, Start, End}}; {error, {unexpected_token, Actual, Row, Start, End}};
@ -479,11 +479,11 @@ parse_tuple_tails(O, N, ExcessCount, HeadTerm, Tails, Tk, String) ->
{error, Reason} {error, Reason}
end. end.
parse_tuple_tails2(O, N, ExcessCount, HeadTerm, [TailTypes | ParentTails], Tk, String) -> parse_tuple_tails2(O, N, ExcessCount, HeadTerm, [TailTypes | ParentTails], Pos, String) ->
case parse_multivalue(TailTypes, Tk, String, -1, -1, [HeadTerm]) of case parse_multivalue(TailTypes, Pos, String, -1, -1, [HeadTerm]) of
{ok, {Terms, NewTk, NewString}} -> {ok, {Terms, NewPos, NewString}} ->
NewHead = {tuple, list_to_tuple(Terms)}, NewHead = {tuple, list_to_tuple(Terms)},
parse_tuple_tails(O, N, ExcessCount, NewHead, ParentTails, NewTk, NewString); parse_tuple_tails(O, N, ExcessCount, NewHead, ParentTails, NewPos, NewString);
{error, Reason} -> {error, Reason} ->
% TODO: wrap errors? % TODO: wrap errors?
{error, Reason} {error, Reason}
@ -504,43 +504,43 @@ parse_tuple_tails2(O, N, _, _, [], _, _) ->
%%% Unambiguous Tuple/Variant Parsing %%% Unambiguous Tuple/Variant Parsing
parse_multivalue(ElemTypes, Tk, String, Row, Start, Acc) -> parse_multivalue(ElemTypes, Pos, String, Row, Start, Acc) ->
case next_token(Tk, String) of case next_token(Pos, String) of
{ok, {{character, ")", _, Row2, Start2, _}, NewTk, NewString}} -> {ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} ->
check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc); check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc);
{ok, {Token, NewTk, NewString}} -> {ok, {Token, NewPos, NewString}} ->
parse_multivalue2(ElemTypes, NewTk, NewString, Row, Start, Acc, Token) parse_multivalue2(ElemTypes, NewPos, NewString, Row, Start, Acc, Token)
end. end.
parse_multivalue2([Next | Rest], Tk, String, Row, Start, Acc, Token) -> parse_multivalue2([Next | Rest], Pos, String, Row, Start, Acc, Token) ->
case parse_expression2(Next, Tk, String, Token) of case parse_expression2(Next, Pos, String, Token) of
{ok, {Value, NewTk, NewString}} -> {ok, {Value, NewPos, NewString}} ->
parse_multivalue3(Rest, NewTk, NewString, Row, Start, [Value | Acc]); parse_multivalue3(Rest, NewPos, NewString, Row, Start, [Value | Acc]);
{error, Reason} -> {error, Reason} ->
Wrapper = choose_list_error_wrapper(")"), Wrapper = choose_list_error_wrapper(")"),
% TODO: Are tuple indices off by one from list indices? % TODO: Are tuple indices off by one from list indices?
Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}), Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}),
{error, Wrapped} {error, Wrapped}
end; end;
parse_multivalue2([], Tk, String, _, _, Acc, {character, ")", _, _, _, _}) -> parse_multivalue2([], Pos, String, _, _, Acc, {character, ")", _, _, _, _}) ->
{ok, {lists:reverse(Acc), Tk, String}}; {ok, {lists:reverse(Acc), Pos, String}};
parse_multivalue2([], _, _, _, _, _, {_, S, _, Row, Start, End}) -> parse_multivalue2([], _, _, _, _, _, {_, S, _, Row, Start, End}) ->
{error, {unexpected_token, S, Row, Start, End}}. {error, {unexpected_token, S, Row, Start, End}}.
parse_multivalue3(ElemTypes, Tk, String, Row, Start, Acc) -> parse_multivalue3(ElemTypes, Pos, String, Row, Start, Acc) ->
case next_token(Tk, String) of case next_token(Pos, String) of
{ok, {{character, ")", _, Row2, Start2, _}, NewTk, NewString}} -> {ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} ->
check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc); check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc);
{ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
parse_multivalue(ElemTypes, NewTk, NewString, Row, Start, Acc); parse_multivalue(ElemTypes, NewPos, NewString, Row, Start, Acc);
{ok, {{_, Actual, _, Row, Start, End}, _, _}} -> {ok, {{_, Actual, _, Row, Start, End}, _, _}} ->
{error, {unexpected_token, Actual, Row, Start, End}}; {error, {unexpected_token, Actual, Row, Start, End}};
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
check_multivalue_long_enough([], Tk, String, _, _, Acc) -> check_multivalue_long_enough([], Pos, String, _, _, Acc) ->
{ok, {lists:reverse(Acc), Tk, String}}; {ok, {lists:reverse(Acc), Pos, String}};
check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) -> check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) ->
GotCount = length(Got), GotCount = length(Got),
ExpectCount = length(Remaining) + GotCount, ExpectCount = length(Remaining) + GotCount,
@ -548,8 +548,8 @@ check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) ->
%%% Variant parsing %%% Variant parsing
parse_variant({_, _, {variant, Variants}}, Tk, String, Ident, Row, Start, End) -> parse_variant({_, _, {variant, Variants}}, Pos, String, Ident, Row, Start, End) ->
parse_variant2(Variants, Tk, String, Ident, Row, Start, End); parse_variant2(Variants, Pos, String, Ident, Row, Start, End);
parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) -> parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) ->
{error, {unresolved_variant, Row, Start, End}}; {error, {unresolved_variant, Row, Start, End}};
parse_variant({O, N, _}, _, _, _, Row, Start, End) -> parse_variant({O, N, _}, _, _, _, Row, Start, End) ->
@ -560,33 +560,33 @@ parse_variant({O, N, _}, _, _, _, Row, Start, End) ->
% a variant. % a variant.
{error, {wrong_type, O, N, variant, Row, Start, End}}. {error, {wrong_type, O, N, variant, Row, Start, End}}.
parse_variant2(Variants, Tk, String, Ident, Row, Start, End) -> parse_variant2(Variants, Pos, String, Ident, Row, Start, End) ->
case lookup_variant(Ident, Variants, 0) of case lookup_variant(Ident, Variants, 0) of
{ok, {Tag, ElemTypes}} -> {ok, {Tag, ElemTypes}} ->
GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end, GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end,
Arities = lists:map(GetArity, Variants), Arities = lists:map(GetArity, Variants),
parse_variant3(Arities, Tag, ElemTypes, Tk, String); parse_variant3(Arities, Tag, ElemTypes, Pos, String);
error -> error ->
{error, {invalid_constructor, Ident, Row, Start, End}} {error, {invalid_constructor, Ident, Row, Start, End}}
end. end.
parse_variant3(Arities, Tag, [], Tk, String) -> parse_variant3(Arities, Tag, [], Pos, String) ->
% Parsing of 0-arity variants is different. % Parsing of 0-arity variants is different.
Result = {variant, Arities, Tag, {}}, Result = {variant, Arities, Tag, {}},
{ok, {Result, Tk, String}}; {ok, {Result, Pos, String}};
parse_variant3(Arities, Tag, ElemTypes, Tk, String) -> parse_variant3(Arities, Tag, ElemTypes, Pos, String) ->
case next_token(Tk, String) of case next_token(Pos, String) of
{ok, {{character, "(", _, Row, Start, _}, NewTk, NewString}} -> {ok, {{character, "(", _, Row, Start, _}, NewPos, NewString}} ->
parse_variant4(Arities, Tag, ElemTypes, NewTk, NewString, Row, Start); parse_variant4(Arities, Tag, ElemTypes, NewPos, NewString, Row, Start);
{ok, {{_, Actual, _, Row, Start, End}, _, _}} -> {ok, {{_, Actual, _, Row, Start, End}, _, _}} ->
{error, {unexpected_token, Actual, Row, Start, End}} {error, {unexpected_token, Actual, Row, Start, End}}
end. end.
parse_variant4(Arities, Tag, ElemTypes, Tk, String, Row, Start) -> parse_variant4(Arities, Tag, ElemTypes, Pos, String, Row, Start) ->
case parse_multivalue(ElemTypes, Tk, String, Row, Start, []) of case parse_multivalue(ElemTypes, Pos, String, Row, Start, []) of
{ok, {Terms, NewTk, NewString}} -> {ok, {Terms, NewPos, NewString}} ->
Result = {variant, Arities, Tag, list_to_tuple(Terms)}, Result = {variant, Arities, Tag, list_to_tuple(Terms)},
{ok, {Result, NewTk, NewString}}; {ok, {Result, NewPos, NewString}};
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
@ -600,16 +600,16 @@ lookup_variant(Ident, [_ | Rest], Tag) ->
%%% Record parsing %%% Record parsing
parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Tk, String, _, _) -> parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Pos, String, _, _) ->
parse_map(KeyType, ValueType, Tk, String, #{}); parse_map(KeyType, ValueType, Pos, String, #{});
parse_record_or_map({_, _, {record, Fields}}, Tk, String, _, _) -> parse_record_or_map({_, _, {record, Fields}}, Pos, String, _, _) ->
parse_record(Fields, Tk, String, #{}); parse_record(Fields, Pos, String, #{});
parse_record_or_map({_, _, unknown_type}, Tk, String, _, _) -> parse_record_or_map({_, _, unknown_type}, Pos, String, _, _) ->
case next_token(Tk, String) of case next_token(Pos, String) of
{ok, {{character, "}", _, _, _, _}, NewTk, NewString}} -> {ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
{ok, {#{}, NewTk, NewString}}; {ok, {#{}, NewPos, NewString}};
{ok, {{character, "[", _, _, _, _}, NewTk, NewString}} -> {ok, {{character, "[", _, _, _, _}, NewPos, NewString}} ->
parse_map2(unknown_type(), unknown_type(), NewTk, NewString, #{}); parse_map2(unknown_type(), unknown_type(), NewPos, NewString, #{});
{ok, {{alphanum, _, _, Row, Start, End}, _, _}} -> {ok, {{alphanum, _, _, Row, Start, End}, _, _}} ->
{error, {unresolved_record, Row, Start, End}}; {error, {unresolved_record, Row, Start, End}};
{ok, {{_, S, _, Row, Start, End}, _, _}} -> {ok, {{_, S, _, Row, Start, End}, _, _}} ->
@ -618,67 +618,67 @@ parse_record_or_map({_, _, unknown_type}, Tk, String, _, _) ->
parse_record_or_map({O, N, _}, _, _, Row, Start) -> parse_record_or_map({O, N, _}, _, _, Row, Start) ->
{error, {wrong_type, O, N, map, Row, Start, Start}}. {error, {wrong_type, O, N, map, Row, Start, Start}}.
parse_record(Fields, Tk, String, Acc) -> parse_record(Fields, Pos, String, Acc) ->
case next_token(Tk, String) of case next_token(Pos, String) of
{ok, {{alphanum, Ident, _, Row, Start, End}, NewTk, NewString}} -> {ok, {{alphanum, Ident, _, Row, Start, End}, NewPos, NewString}} ->
parse_record2(Fields, NewTk, NewString, Acc, Ident, Row, Start, End); parse_record2(Fields, NewPos, NewString, Acc, Ident, Row, Start, End);
{ok, {{character, "}", _, Row, Start, End}, NewTk, NewString}} -> {ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} ->
parse_record_end(Fields, NewTk, NewString, Acc, Row, Start, End); parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End);
{ok, {{_, S, _, Row, Start, End}, _, _}} -> {ok, {{_, S, _, Row, Start, End}, _, _}} ->
{error, {unexpected_token, S, Row, Start, End}}; {error, {unexpected_token, S, Row, Start, End}};
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
parse_record2(Fields, Tk, String, Acc, Ident, Row, Start, End) -> parse_record2(Fields, Pos, String, Acc, Ident, Row, Start, End) ->
case lists:keyfind(Ident, 1, Fields) of case lists:keyfind(Ident, 1, Fields) of
{_, Type} -> {_, Type} ->
parse_record3(Fields, Tk, String, Acc, Ident, Row, Start, End, Type); parse_record3(Fields, Pos, String, Acc, Ident, Row, Start, End, Type);
false -> false ->
{error, {invalid_field, Ident, Row, Start, End}} {error, {invalid_field, Ident, Row, Start, End}}
end. end.
parse_record3(Fields, Tk, String, Acc, Ident, Row, Start, End, Type) -> parse_record3(Fields, Pos, String, Acc, Ident, Row, Start, End, Type) ->
case maps:is_key(Ident, Acc) of case maps:is_key(Ident, Acc) of
false -> false ->
parse_record4(Fields, Tk, String, Acc, Ident, Type); parse_record4(Fields, Pos, String, Acc, Ident, Type);
true -> true ->
{error, {field_already_present, Ident, Row, Start, End}} {error, {field_already_present, Ident, Row, Start, End}}
end. end.
parse_record4(Fields, Tk, String, Acc, Ident, Type) -> parse_record4(Fields, Pos, String, Acc, Ident, Type) ->
case expect_tokens(["="], Tk, String) of case expect_tokens(["="], Pos, String) of
{ok, {NewTk, NewString}} -> {ok, {NewPos, NewString}} ->
parse_record5(Fields, NewTk, NewString, Acc, Ident, Type); parse_record5(Fields, NewPos, NewString, Acc, Ident, Type);
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
parse_record5(Fields, Tk, String, Acc, Ident, Type) -> parse_record5(Fields, Pos, String, Acc, Ident, Type) ->
case parse_expression(Type, Tk, String) of case parse_expression(Type, Pos, String) of
{ok, {Result, NewTk, NewString}} -> {ok, {Result, NewPos, NewString}} ->
NewAcc = maps:put(Ident, Result, Acc), NewAcc = maps:put(Ident, Result, Acc),
parse_record6(Fields, NewTk, NewString, NewAcc); parse_record6(Fields, NewPos, NewString, NewAcc);
{error, Reason} -> {error, Reason} ->
wrap_error(Reason, {record_field, Ident}) wrap_error(Reason, {record_field, Ident})
end. end.
parse_record6(Fields, Tk, String, Acc) -> parse_record6(Fields, Pos, String, Acc) ->
case next_token(Tk, String) of case next_token(Pos, String) of
{ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
parse_record(Fields, NewTk, NewString, Acc); parse_record(Fields, NewPos, NewString, Acc);
{ok, {{character, "}", _, Row, Start, End}, NewTk, NewString}} -> {ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} ->
parse_record_end(Fields, NewTk, NewString, Acc, Row, Start, End); parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End);
{ok, {{_, S, _, Row, Start, End}, _, _}} -> {ok, {{_, S, _, Row, Start, End}, _, _}} ->
{error, {unexpected_token, S, Row, Start, End}}; {error, {unexpected_token, S, Row, Start, End}};
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
parse_record_end(Fields, Tk, String, FieldValues, Row, Start, End) -> parse_record_end(Fields, Pos, String, FieldValues, Row, Start, End) ->
case parse_record_final_loop(Fields, FieldValues, []) of case parse_record_final_loop(Fields, FieldValues, []) of
{ok, Result} -> {ok, Result} ->
{ok, {Result, Tk, String}}; {ok, {Result, Pos, String}};
{error, {missing_field, Name}} -> {error, {missing_field, Name}} ->
{error, {missing_field, Name, Row, Start, End}} {error, {missing_field, Name, Row, Start, End}}
end. end.
@ -702,47 +702,47 @@ parse_record_final_loop([], _, FieldsReverse) ->
%%% Map Parsing %%% Map Parsing
parse_map(KeyType, ValueType, Tk, String, Acc) -> parse_map(KeyType, ValueType, Pos, String, Acc) ->
case next_token(Tk, String) of case next_token(Pos, String) of
{ok, {{character, "[", _, _, _, _}, NewTk, NewString}} -> {ok, {{character, "[", _, _, _, _}, NewPos, NewString}} ->
parse_map2(KeyType, ValueType, NewTk, NewString, Acc); parse_map2(KeyType, ValueType, NewPos, NewString, Acc);
{ok, {{character, "}", _, _, _, _}, NewTk, NewString}} -> {ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
{ok, {Acc, NewTk, NewString}}; {ok, {Acc, NewPos, NewString}};
{ok, {{_, S, _, Row, Start, End}, _, _}} -> {ok, {{_, S, _, Row, Start, End}, _, _}} ->
{error, {unexpected_token, S, Row, Start, End}} {error, {unexpected_token, S, Row, Start, End}}
end. end.
parse_map2(KeyType, ValueType, Tk, String, Acc) -> parse_map2(KeyType, ValueType, Pos, String, Acc) ->
case parse_expression(KeyType, Tk, String) of case parse_expression(KeyType, Pos, String) of
{ok, {Result, NewTk, NewString}} -> {ok, {Result, NewPos, NewString}} ->
parse_map3(KeyType, ValueType, NewTk, NewString, Acc, Result); parse_map3(KeyType, ValueType, NewPos, NewString, Acc, Result);
{error, Reason} -> {error, Reason} ->
wrap_error(Reason, {map_key, maps:size(Acc)}) wrap_error(Reason, {map_key, maps:size(Acc)})
end. end.
parse_map3(KeyType, ValueType, Tk, String, Acc, Key) -> parse_map3(KeyType, ValueType, Pos, String, Acc, Key) ->
case expect_tokens(["]", "="], Tk, String) of case expect_tokens(["]", "="], Pos, String) of
{ok, {NewTk, NewString}} -> {ok, {NewPos, NewString}} ->
parse_map4(KeyType, ValueType, NewTk, NewString, Acc, Key); parse_map4(KeyType, ValueType, NewPos, NewString, Acc, Key);
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
parse_map4(KeyType, ValueType, Tk, String, Acc, Key) -> parse_map4(KeyType, ValueType, Pos, String, Acc, Key) ->
case parse_expression(ValueType, Tk, String) of case parse_expression(ValueType, Pos, String) of
{ok, {Result, NewTk, NewString}} -> {ok, {Result, NewPos, NewString}} ->
NewAcc = maps:put(Key, Result, Acc), NewAcc = maps:put(Key, Result, Acc),
parse_map5(KeyType, ValueType, NewTk, NewString, NewAcc); parse_map5(KeyType, ValueType, NewPos, NewString, NewAcc);
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
parse_map5(KeyType, ValueType, Tk, String, Acc) -> parse_map5(KeyType, ValueType, Pos, String, Acc) ->
case next_token(Tk, String) of case next_token(Pos, String) of
{ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} ->
parse_map(KeyType, ValueType, NewTk, NewString, Acc); parse_map(KeyType, ValueType, NewPos, NewString, Acc);
{ok, {{character, "}", _, _, _, _}, NewTk, NewString}} -> {ok, {{character, "}", _, _, _, _}, NewPos, NewString}} ->
{ok, {Acc, NewTk, NewString}}; {ok, {Acc, NewPos, NewString}};
{ok, {{_, S, _, Row, Start, End}, _, _}} -> {ok, {{_, S, _, Row, Start, End}, _, _}} ->
{error, {unexpected_token, S, Row, Start, End}} {error, {unexpected_token, S, Row, Start, End}}
end. end.