Parse strings
This commit is contained in:
parent
966b4b2748
commit
49cd8b6687
@ -52,6 +52,8 @@ next_token(Tk, [C | _] = String) when ?IS_NUM(C) ->
|
||||
num_token(Tk, Tk, String, [], 0);
|
||||
next_token({tk, Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) ->
|
||||
bytes_token({tk, Row, Col}, {tk, Row + 1, Col}, [C | Rest], "#", []);
|
||||
next_token({tk, Row, Col}, "\"" ++ Rest) ->
|
||||
string_token({tk, Row, Col}, {tk, Row + 1, Col}, Rest, "\"", <<>>);
|
||||
next_token({tk, Row, Col}, [Char | Rest]) ->
|
||||
Token = {character, [Char], Char, Row, Col, Col},
|
||||
{ok, {Token, {tk, Row + 1, Col}, Rest}}.
|
||||
@ -101,6 +103,46 @@ reverse_combine_nibbles([D1], Acc) ->
|
||||
reverse_combine_nibbles([], Acc) ->
|
||||
Acc.
|
||||
|
||||
string_token(Start, {tk, Row, Col}, [$\\, $x, A, B | Rest], SourceChars, Value) ->
|
||||
case escape_hex_code(A, B) of
|
||||
{ok, ByteVal} ->
|
||||
string_token(Start, {tk, Row + 4, Col}, Rest, [B, A, $x, $\ | SourceChars], <<Value/binary, ByteVal>>);
|
||||
error ->
|
||||
{error, {invalid_escape_code, [$\\, $x, A, B], Row, Col}}
|
||||
end;
|
||||
string_token(Start, {tk, Row, Col}, [$\\, C | Rest], SourceChars, Value) ->
|
||||
case escape_char(C) of
|
||||
{ok, ByteVal} ->
|
||||
string_token(Start, {tk, Row + 2, Col}, Rest, [C, $\ | SourceChars], <<Value/binary, ByteVal>>);
|
||||
error ->
|
||||
{error, {invalid_escape_code, [C], Row, Col}}
|
||||
end;
|
||||
string_token({tk, _, Start}, {tk, Row, End}, [$" | Rest], SourceChars, Value) ->
|
||||
SourceStr = lists:reverse([$" | SourceChars]),
|
||||
Token = {string, SourceStr, Value, Row, Start, End},
|
||||
{ok, {Token, {tk, Row, End}, Rest}};
|
||||
string_token(Start, {tk, Row, Col}, [C | Rest], SourceChars, Value) ->
|
||||
string_token(Start, {tk, Row + 1, Col}, Rest, [C | SourceChars], <<Value/binary, C>>).
|
||||
|
||||
escape_hex_code(A, B) when ?IS_HEX(A), ?IS_HEX(B) ->
|
||||
% As of writing this, the Sophia compiler will convert this byte from
|
||||
% extended ASCII to unicode... But it really shouldn't. The literal parser
|
||||
% does what the compiler should do.
|
||||
Byte = convert_digit(A) * 16 + convert_digit(B),
|
||||
{ok, Byte};
|
||||
escape_hex_code(_, _) ->
|
||||
error.
|
||||
|
||||
escape_char($b) -> {ok, $\b};
|
||||
escape_char($e) -> {ok, $\e};
|
||||
escape_char($f) -> {ok, $\f};
|
||||
escape_char($n) -> {ok, $\n};
|
||||
escape_char($r) -> {ok, $\r};
|
||||
escape_char($t) -> {ok, $\t};
|
||||
escape_char($v) -> {ok, $\v};
|
||||
escape_char($") -> {ok, $\"};
|
||||
escape_char($\\) -> {ok, $\\};
|
||||
escape_char(_) -> error.
|
||||
|
||||
%%% Sophia Literal Parser
|
||||
|
||||
@ -121,8 +163,12 @@ reverse_combine_nibbles([], Acc) ->
|
||||
%%% pushdown automaton that we want.
|
||||
|
||||
parse_expression(Type, Tk, String) ->
|
||||
{ok, {Token, NewTk, NewString}} = next_token(Tk, String),
|
||||
parse_expression2(Type, NewTk, NewString, Token).
|
||||
case next_token(Tk, String) of
|
||||
{ok, {Token, NewTk, NewString}} ->
|
||||
parse_expression2(Type, NewTk, NewString, Token);
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
parse_expression2(Type, Tk, String, {integer, _, Value, Row, Start, End}) ->
|
||||
case Type of
|
||||
@ -146,7 +192,16 @@ parse_expression2(Type, Tk, String, {bytes, _, Value, Row, Start, End}) ->
|
||||
{_, _, unknown_type} ->
|
||||
{ok, {Result, Tk, String}};
|
||||
{O, N, _} ->
|
||||
{error, {wrong_type, O, N, integer, Row, Start, End}}
|
||||
{error, {wrong_type, O, N, {bytes, [Len]}, Row, Start, End}}
|
||||
end;
|
||||
parse_expression2(Type, Tk, String, {string, _, Value, Row, Start, End}) ->
|
||||
case Type of
|
||||
{_, _, string} ->
|
||||
{ok, {Value, Tk, String}};
|
||||
{_, _, unknown_type} ->
|
||||
{ok, {Value, Tk, String}};
|
||||
{O, N, _} ->
|
||||
{error, {wrong_type, O, N, string, Row, Start, End}}
|
||||
end;
|
||||
parse_expression2(Type, Tk, String, {character, "[", _, Row, Start, _}) ->
|
||||
parse_list(Type, Tk, String, Row, Start);
|
||||
@ -545,6 +600,10 @@ anon_types_test() ->
|
||||
% Bytes.
|
||||
check_parser("#DEAD000BEEF"),
|
||||
check_parser("#DE_AD0_00B_EEF"),
|
||||
% Strings.
|
||||
check_parser("\"hello world\""),
|
||||
check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""),
|
||||
check_parser("\"\\x00\\x11\\x77\""),
|
||||
% List of integers.
|
||||
check_parser("[1, 2, 3]"),
|
||||
% List of lists.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user