Compare commits
No commits in common. "78c9c67f38cba528249c16dc06b6fca9f9393f76" and "a1fc5f19fa2c40fdec16d3e925e15e395c1674d6" have entirely different histories.
78c9c67f38
...
a1fc5f19fa
@ -65,8 +65,6 @@ next_token({Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) ->
|
||||
bytes_token({Row, Col}, {Row, Col + 1}, [C | Rest], "#", []);
|
||||
next_token({Row, Col}, "\"" ++ Rest) ->
|
||||
string_token({Row, Col}, {Row, Col + 1}, Rest, "\"", <<>>);
|
||||
next_token({Row, Col}, "'" ++ Rest) ->
|
||||
character_token({Row, Col}, {Row, Col + 1}, Rest, "'");
|
||||
next_token({Row, Col}, [Char | Rest]) ->
|
||||
Token = {character, [Char], Char, Row, Col, Col},
|
||||
{ok, {Token, {Row, Col + 1}, Rest}}.
|
||||
@ -117,70 +115,41 @@ reverse_combine_nibbles([D1], Acc) ->
|
||||
reverse_combine_nibbles([], Acc) ->
|
||||
Acc.
|
||||
|
||||
string_token(Start, {Row, Col}, "\\x" ++ String, SourceChars, Value) ->
|
||||
case escape_hex_code({Row, Col}, {Row, Col + 2}, String, "x\\" ++ SourceChars) of
|
||||
{ok, {Codepoint, NewSourceChars, NewPos, NewString}} ->
|
||||
NewValue = <<Value/binary, Codepoint/utf8>>,
|
||||
string_token(Start, NewPos, NewString, NewSourceChars, NewValue);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end;
|
||||
string_token(Start, {Row, Col}, [$\\, C | Rest], SourceChars, Value) ->
|
||||
case escape_char(C) of
|
||||
{ok, ByteVal} ->
|
||||
string_token(Start, {Row, Col + 2}, Rest, [C, $\ | SourceChars], <<Value/binary, ByteVal>>);
|
||||
error ->
|
||||
{error, {invalid_escape_code, [C], Row, Col}}
|
||||
end;
|
||||
string_token({_, Start}, {Row, Col}, [$" | Rest], SourceChars, Value) ->
|
||||
SourceStr = lists:reverse([$" | SourceChars]),
|
||||
Token = {string, SourceStr, Value, Row, Start, Col},
|
||||
{ok, {Token, {Row, Col + 1}, Rest}};
|
||||
string_token({_, Start}, {Row, Col}, [], SourceChars, _) ->
|
||||
SourceStr = lists:reverse(SourceChars),
|
||||
{error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}};
|
||||
string_token({_, Start}, {Row, Col}, [$\r | _], SourceChars, _) ->
|
||||
SourceStr = lists:reverse(SourceChars),
|
||||
{error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}};
|
||||
string_token({_, Start}, {Row, Col}, [$\n | _], SourceChars, _) ->
|
||||
SourceStr = lists:reverse(SourceChars),
|
||||
{error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}};
|
||||
string_token(Start, Pos, String, SourceChars, Value) ->
|
||||
case parse_char(Start, Pos, String, SourceChars) of
|
||||
{ok, {Char, NewSourceChars, NewPos, NewString}} ->
|
||||
% TODO: ERTS probably had to convert this FROM utf8 at some point,
|
||||
% so why bother, if we need to convert it back? I guess we could
|
||||
% accept iolists if we really wanted to waste time on this point...
|
||||
NewValue = <<Value/binary, Char/utf8>>,
|
||||
string_token(Start, NewPos, NewString, NewSourceChars, NewValue);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
string_token(Start, {Row, Col}, [C | Rest], SourceChars, Value) ->
|
||||
% TODO: ERTS probably had to convert this FROM utf8 at some point, so why
|
||||
% bother, if we need to convert it back? I guess we could accept iolists if
|
||||
% we really wanted to waste time on this point...
|
||||
string_token(Start, {Row, Col + 1}, Rest, [C | SourceChars], <<Value/binary, C/utf8>>).
|
||||
|
||||
character_token({_, Start}, {Row, Col}, [], SourceChars) ->
|
||||
SourceStr = lists:reverse(SourceChars),
|
||||
{error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}};
|
||||
character_token({_, Start}, {Row, Col}, [$\r | _], SourceChars) ->
|
||||
SourceStr = lists:reverse(SourceChars),
|
||||
{error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}};
|
||||
character_token({_, Start}, {Row, Col}, [$\n | _], SourceChars) ->
|
||||
SourceStr = lists:reverse(SourceChars),
|
||||
{error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}};
|
||||
character_token(Start, Pos, String, SourceChars) ->
|
||||
case parse_char(Start, Pos, String, SourceChars) of
|
||||
{ok, {Char, NewSourceChars, NewPos, NewString}} ->
|
||||
character_token2(Start, NewPos, NewString, NewSourceChars, Char);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
character_token2({_, Start}, {Row, Col}, [$' | Rest], SourceChars, Value) ->
|
||||
SourceStr = lists:reverse([$' | SourceChars]),
|
||||
Token = {char_literal, SourceStr, Value, Row, Start, Col},
|
||||
{ok, {Token, {Row, Col + 1}, Rest}};
|
||||
character_token2({_, Start}, {Row, Col}, _, SourceChars, _) ->
|
||||
SourceStr = lists:reverse(SourceChars),
|
||||
{error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}}.
|
||||
|
||||
parse_char(Start, {Row, Col}, "\\x{" ++ String, SourceChars) ->
|
||||
escape_long_hex_code(Start, {Row, Col + 3}, String, "{x\\" ++ SourceChars, 0);
|
||||
parse_char(_, {Row, Col}, [$\\, $x, A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) ->
|
||||
escape_hex_code(Start, {Row, Col}, "{" ++ String, SourceChars) ->
|
||||
escape_long_hex_code(Start, {Row, Col + 1}, String, "{" ++ SourceChars, 0);
|
||||
escape_hex_code(_, {Row, Col}, [A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) ->
|
||||
% As of writing this, the Sophia compiler will convert this byte from
|
||||
% extended ASCII to unicode... But it really shouldn't. The literal parser
|
||||
% does what the compiler should do.
|
||||
Byte = convert_digit(A) * 16 + convert_digit(B),
|
||||
{ok, {Byte, [B, A, $x, $\\ | SourceChars], {Row, Col + 4}, String}};
|
||||
parse_char({Row, Start}, {Row, Col}, [$\\, C | Rest], SourceChars) ->
|
||||
case escape_char(C) of
|
||||
{ok, ByteVal} ->
|
||||
{ok, {ByteVal, [C, $\ | SourceChars], {Row, Col + 2}, Rest}};
|
||||
error ->
|
||||
{error, {invalid_escape_code, [$\\, C], Row, Start, Col + 1}}
|
||||
end;
|
||||
parse_char(_, {Row, Col}, [C | Rest], SourceChars) ->
|
||||
{ok, {C, [C | SourceChars], {Row, Col + 1}, Rest}}.
|
||||
{ok, {Byte, [B, A | SourceChars], {Row, Col + 2}, String}};
|
||||
escape_hex_code({Row1, Col1}, _, _, _) ->
|
||||
{error, {invalid_escape_code, "\\x", Row1, Col1}}.
|
||||
|
||||
escape_long_hex_code(_, {Row, Col}, "}" ++ String, SourceChars, Value) ->
|
||||
{ok, {Value, "}" ++ SourceChars, {Row, Col + 1}, String}};
|
||||
@ -202,10 +171,7 @@ escape_char($n) -> {ok, $\n};
|
||||
escape_char($r) -> {ok, $\r};
|
||||
escape_char($t) -> {ok, $\t};
|
||||
escape_char($v) -> {ok, $\v};
|
||||
% Technically \" and \' are only valid inside their own quote characters, not
|
||||
% each other, but whatever, we will just be permissive here.
|
||||
escape_char($") -> {ok, $\"};
|
||||
escape_char($') -> {ok, $\'};
|
||||
escape_char($\\) -> {ok, $\\};
|
||||
escape_char(_) -> error.
|
||||
|
||||
@ -236,13 +202,13 @@ parse_expression(Type, Pos, String) ->
|
||||
end.
|
||||
|
||||
parse_expression2(Type, Pos, String, {integer, _, Value, Row, Start, End}) ->
|
||||
typecheck_integer(Type, Pos, String, Value, Row, Start, End);
|
||||
parse_expression2(Type, Pos, String, {character, "-", _, _, _, _}) ->
|
||||
case next_token(Pos, String) of
|
||||
{ok, {{integer, _, Value, Row, Start, End}, NewPos, NewString}} ->
|
||||
typecheck_integer(Type, NewPos, NewString, -Value, Row, Start, End);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
case Type of
|
||||
{_, _, integer} ->
|
||||
{ok, {Value, Pos, String}};
|
||||
{_, _, unknown_type} ->
|
||||
{ok, {Value, Pos, String}};
|
||||
{O, N, _} ->
|
||||
{error, {wrong_type, O, N, integer, Row, Start, End}}
|
||||
end;
|
||||
parse_expression2(Type, Pos, String, {bytes, _, Value, Row, Start, End}) ->
|
||||
Len = byte_size(Value),
|
||||
@ -254,10 +220,6 @@ parse_expression2(Type, Pos, String, {bytes, _, Value, Row, Start, End}) ->
|
||||
{ok, {Result, Pos, String}};
|
||||
{_, _, {bytes, [ExpectedLen]}} ->
|
||||
{error, {bytes_wrong_size, ExpectedLen, Len, Row, Start, End}};
|
||||
{_, _, bits} ->
|
||||
Size = bit_size(Value),
|
||||
<<IntValue:Size>> = Value,
|
||||
{ok, {{bits, IntValue}, Pos, String}};
|
||||
{_, _, unknown_type} ->
|
||||
{ok, {Result, Pos, String}};
|
||||
{O, N, _} ->
|
||||
@ -272,15 +234,6 @@ parse_expression2(Type, Pos, String, {string, _, Value, Row, Start, End}) ->
|
||||
{O, N, _} ->
|
||||
{error, {wrong_type, O, N, string, Row, Start, End}}
|
||||
end;
|
||||
parse_expression2(Type, Pos, String, {char_literal, _, Value, Row, Start, End}) ->
|
||||
case Type of
|
||||
{_, _, char} ->
|
||||
{ok, {Value, Pos, String}};
|
||||
{_, _, unknown_type} ->
|
||||
{ok, {Value, Pos, String}};
|
||||
{O, N, _} ->
|
||||
{error, {wrong_type, O, N, char, Row, Start, End}}
|
||||
end;
|
||||
parse_expression2(Type, Pos, String, {character, "[", _, Row, Start, _}) ->
|
||||
parse_list(Type, Pos, String, Row, Start);
|
||||
parse_expression2(Type, Pos, String, {character, "(", _, _, _, _}) ->
|
||||
@ -323,14 +276,6 @@ unexpected_token({_, S, _, Row, Start, End}) ->
|
||||
|
||||
%%% Ambiguous Chain Object vs Identifier Parsing
|
||||
|
||||
parse_alphanum(Type, Pos, String, ["true"], Row, Start, End) ->
|
||||
typecheck_bool(Type, Pos, String, true, Row, Start, End);
|
||||
parse_alphanum(Type, Pos, String, ["false"], Row, Start, End) ->
|
||||
typecheck_bool(Type, Pos, String, false, Row, Start, End);
|
||||
parse_alphanum(Type, Pos, String, ["Bits", "all"], Row, Start, End) ->
|
||||
typecheck_bits(Type, Pos, String, -1, Row, Start, End);
|
||||
parse_alphanum(Type, Pos, String, ["Bits", "none"], Row, Start, End) ->
|
||||
typecheck_bits(Type, Pos, String, 0, Row, Start, End);
|
||||
parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN_LOWER(C) ->
|
||||
% From a programming perspective, we are trying to parse a constant, so
|
||||
% an alphanum token can really only be a constructor, or a chain object.
|
||||
@ -358,29 +303,6 @@ parse_alphanum(Type, Pos, String, Path, Row, Start, End) ->
|
||||
% must be a variant constructor, or invalid.
|
||||
parse_variant(Type, Pos, String, Path, Row, Start, End).
|
||||
|
||||
typecheck_integer({_, _, integer}, Pos, String, Value, _, _, _) ->
|
||||
{ok, {Value, Pos, String}};
|
||||
typecheck_integer({_, _, unknown_type}, Pos, String, Value, _, _, _) ->
|
||||
{ok, {Value, Pos, String}};
|
||||
typecheck_integer({_, _, bits}, Pos, String, Value, _, _, _) ->
|
||||
{ok, {{bits, Value}, Pos, String}};
|
||||
typecheck_integer({O, N, _}, _, _, _, Row, Start, End) ->
|
||||
{error, {wrong_type, O, N, integer, Row, Start, End}}.
|
||||
|
||||
typecheck_bool({_, _, unknown_type}, Pos, String, Value, _, _, _) ->
|
||||
{ok, {Value, Pos, String}};
|
||||
typecheck_bool({_, _, boolean}, Pos, String, Value, _, _, _) ->
|
||||
{ok, {Value, Pos, String}};
|
||||
typecheck_bool({O, N, _}, _, _, _, Row, Start, End) ->
|
||||
{error, {wrong_type, O, N, boolean, Row, Start, End}}.
|
||||
|
||||
typecheck_bits({_, _, unknown_type}, Pos, String, Value, _, _, _) ->
|
||||
{ok, {{bits, Value}, Pos, String}};
|
||||
typecheck_bits({_, _, bits}, Pos, String, Value, _, _, _) ->
|
||||
{ok, {{bits, Value}, Pos, String}};
|
||||
typecheck_bits({O, N, _}, _, _, _, Row, Start, End) ->
|
||||
{error, {wrong_type, O, N, bits, Row, Start, End}}.
|
||||
|
||||
typecheck_address({_, _, address}, Pos, String, Data, _, _, _) ->
|
||||
{ok, {{address, Data}, Pos, String}};
|
||||
typecheck_address({_, _, contract}, Pos, String, Data, _, _, _) ->
|
||||
@ -963,24 +885,11 @@ anon_types_test() ->
|
||||
% Integers.
|
||||
check_parser("123"),
|
||||
check_parser("1_2_3"),
|
||||
check_parser("-123"),
|
||||
% Booleans.
|
||||
check_parser("true"),
|
||||
check_parser("false"),
|
||||
check_parser("[true, false]"),
|
||||
% Bytes.
|
||||
check_parser("#DEAD000BEEF"),
|
||||
check_parser("#DE_AD0_00B_EEF"),
|
||||
% Strings.
|
||||
check_parser("\"hello world\""),
|
||||
% The Sophia compiler doesn't handle this right, but we should still.
|
||||
%check_parser("\"ÿ\""),
|
||||
%check_parser("\"♣\""),
|
||||
% Characters.
|
||||
check_parser("'A'"),
|
||||
check_parser("['a', ' ', '[']"),
|
||||
%check_parser("'ÿ'"),
|
||||
%check_parser("'♣'"),
|
||||
% List of integers.
|
||||
check_parser("[1, 2, 3]"),
|
||||
% List of lists.
|
||||
@ -996,13 +905,6 @@ string_escape_codes_test() ->
|
||||
check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""),
|
||||
check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""),
|
||||
check_parser("\"\\x{0}\\x{7}\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\\x{0000007F}\""),
|
||||
check_parser("\"'\""),
|
||||
|
||||
check_parser("['\\b', '\\e', '\\f', '\\n', '\\r', '\\t', '\\v', '\"', '\\'', '\\\\']"),
|
||||
check_parser("['\\x00', '\\x11', '\\x77', '\\x4a', '\\x4A']"),
|
||||
check_parser("['\\x{0}', '\\x{7}', '\\x{7F}', '\\x{07F}', '\\x{007F}', '\\x{0007F}', '\\x{0000007F}']"),
|
||||
check_parser("'\"'"),
|
||||
|
||||
ok.
|
||||
|
||||
records_test() ->
|
||||
@ -1063,15 +965,6 @@ chain_objects_test() ->
|
||||
|
||||
ok.
|
||||
|
||||
bits_test() ->
|
||||
check_parser("Bits.all"),
|
||||
check_parser("Bits.none"),
|
||||
{_, Type} = compile_entrypoint_value_and_type("contract C = entrypoint f() = Bits.all", "f"),
|
||||
check_sophia_to_fate(Type, "5", {bits, 5}),
|
||||
check_sophia_to_fate(Type, "-5", {bits, -5}),
|
||||
check_sophia_to_fate(Type, "#123", {bits, 256 + 32 + 3}),
|
||||
ok.
|
||||
|
||||
singleton_records_test() ->
|
||||
TypeDef = "record singleton('a) = {it: 'a}",
|
||||
check_parser_with_typedef(TypeDef, "{it = 123}"),
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user