Make Hakuzaru Great Again #22
+31
-12
@@ -103,12 +103,13 @@ reverse_combine_nibbles([D1], Acc) ->
|
|||||||
reverse_combine_nibbles([], Acc) ->
|
reverse_combine_nibbles([], Acc) ->
|
||||||
Acc.
|
Acc.
|
||||||
|
|
||||||
string_token(Start, {tk, Row, Col}, [$\\, $x, A, B | Rest], SourceChars, Value) ->
|
string_token(Start, {tk, Row, Col}, "\\x" ++ String, SourceChars, Value) ->
|
||||||
case escape_hex_code(A, B) of
|
case escape_hex_code({tk, Row, Col}, {tk, Row + 2, Col}, String, "x\\" ++ SourceChars) of
|
||||||
{ok, ByteVal} ->
|
{ok, {Codepoint, NewSourceChars, NewTk, NewString}} ->
|
||||||
string_token(Start, {tk, Row + 4, Col}, Rest, [B, A, $x, $\ | SourceChars], <<Value/binary, ByteVal>>);
|
NewValue = <<Value/binary, Codepoint/utf8>>,
|
||||||
error ->
|
string_token(Start, NewTk, NewString, NewSourceChars, NewValue);
|
||||||
{error, {invalid_escape_code, [$\\, $x, A, B], Row, Col}}
|
{error, Reason} ->
|
||||||
|
{error, Reason}
|
||||||
end;
|
end;
|
||||||
string_token(Start, {tk, Row, Col}, [$\\, C | Rest], SourceChars, Value) ->
|
string_token(Start, {tk, Row, Col}, [$\\, C | Rest], SourceChars, Value) ->
|
||||||
case escape_char(C) of
|
case escape_char(C) of
|
||||||
@@ -122,16 +123,34 @@ string_token({tk, _, Start}, {tk, Row, End}, [$" | Rest], SourceChars, Value) ->
|
|||||||
Token = {string, SourceStr, Value, Row, Start, End},
|
Token = {string, SourceStr, Value, Row, Start, End},
|
||||||
{ok, {Token, {tk, Row, End}, Rest}};
|
{ok, {Token, {tk, Row, End}, Rest}};
|
||||||
string_token(Start, {tk, Row, Col}, [C | Rest], SourceChars, Value) ->
|
string_token(Start, {tk, Row, Col}, [C | Rest], SourceChars, Value) ->
|
||||||
string_token(Start, {tk, Row + 1, Col}, Rest, [C | SourceChars], <<Value/binary, C>>).
|
% TODO: ERTS probably had to convert this FROM utf8 at some point, so why
|
||||||
|
% bother, if we need to convert it back? I guess we could accept iolists if
|
||||||
|
% we really wanted to waste time on this point...
|
||||||
|
string_token(Start, {tk, Row + 1, Col}, Rest, [C | SourceChars], <<Value/binary, C/utf8>>).
|
||||||
|
|
||||||
escape_hex_code(A, B) when ?IS_HEX(A), ?IS_HEX(B) ->
|
escape_hex_code(Start, {tk, Row, Col}, "{" ++ String, SourceChars) ->
|
||||||
|
escape_long_hex_code(Start, {tk, Row + 1, Col}, String, "{" ++ SourceChars, 0);
|
||||||
|
escape_hex_code(_, {tk, Row, Col}, [A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) ->
|
||||||
% As of writing this, the Sophia compiler will convert this byte from
|
% As of writing this, the Sophia compiler will convert this byte from
|
||||||
% extended ASCII to unicode... But it really shouldn't. The literal parser
|
% extended ASCII to unicode... But it really shouldn't. The literal parser
|
||||||
% does what the compiler should do.
|
% does what the compiler should do.
|
||||||
Byte = convert_digit(A) * 16 + convert_digit(B),
|
Byte = convert_digit(A) * 16 + convert_digit(B),
|
||||||
{ok, Byte};
|
{ok, {Byte, [B, A | SourceChars], {tk, Row + 2, Col}, String}};
|
||||||
escape_hex_code(_, _) ->
|
escape_hex_code({tk, Row1, Col1}, _, _, _) ->
|
||||||
error.
|
{error, {invalid_escape_code, "\\x", Row1, Col1}}.
|
||||||
|
|
||||||
|
escape_long_hex_code(_, {tk, Row, Col}, "}" ++ String, SourceChars, Value) ->
|
||||||
|
{ok, {Value, "}" ++ SourceChars, {tk, Row + 1, Col}, String}};
|
||||||
|
escape_long_hex_code(Start, {tk, Row, Col}, [C | String], SourceChars, Value) when ?IS_HEX(C) ->
|
||||||
|
NewSourceChars = [C | SourceChars],
|
||||||
|
NewValue = 16 * Value + convert_digit(C),
|
||||||
|
escape_long_hex_code(Start, {tk, Row + 1, Col}, String, NewSourceChars, NewValue);
|
||||||
|
escape_long_hex_code(_, {tk, Row, Col}, [C | _], _, _) ->
|
||||||
|
{error, {invalid_hexadecimal, [C], Row, Col}};
|
||||||
|
escape_long_hex_code(_, Tk, [], SourceChars, Value) ->
|
||||||
|
% Just return as if the escape code were closed, and let the string parser
|
||||||
|
% produce an unclosed string error instead.
|
||||||
|
{ok, {Value, SourceChars, Tk, []}}.
|
||||||
|
|
||||||
escape_char($b) -> {ok, $\b};
|
escape_char($b) -> {ok, $\b};
|
||||||
escape_char($e) -> {ok, $\e};
|
escape_char($e) -> {ok, $\e};
|
||||||
@@ -747,7 +766,7 @@ anon_types_test() ->
|
|||||||
string_escape_codes_test() ->
|
string_escape_codes_test() ->
|
||||||
check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""),
|
check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""),
|
||||||
check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""),
|
check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""),
|
||||||
check_parser("\"\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\""),
|
check_parser("\"\\x{0}\\x{7}\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\\x{0000007F}\""),
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
records_test() ->
|
records_test() ->
|
||||||
|
|||||||
Reference in New Issue
Block a user