Parse strings
This commit is contained in:
parent
966b4b2748
commit
49cd8b6687
@ -52,6 +52,8 @@ next_token(Tk, [C | _] = String) when ?IS_NUM(C) ->
|
|||||||
num_token(Tk, Tk, String, [], 0);
|
num_token(Tk, Tk, String, [], 0);
|
||||||
next_token({tk, Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) ->
|
next_token({tk, Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) ->
|
||||||
bytes_token({tk, Row, Col}, {tk, Row + 1, Col}, [C | Rest], "#", []);
|
bytes_token({tk, Row, Col}, {tk, Row + 1, Col}, [C | Rest], "#", []);
|
||||||
|
next_token({tk, Row, Col}, "\"" ++ Rest) ->
|
||||||
|
string_token({tk, Row, Col}, {tk, Row + 1, Col}, Rest, "\"", <<>>);
|
||||||
next_token({tk, Row, Col}, [Char | Rest]) ->
|
next_token({tk, Row, Col}, [Char | Rest]) ->
|
||||||
Token = {character, [Char], Char, Row, Col, Col},
|
Token = {character, [Char], Char, Row, Col, Col},
|
||||||
{ok, {Token, {tk, Row + 1, Col}, Rest}}.
|
{ok, {Token, {tk, Row + 1, Col}, Rest}}.
|
||||||
@ -101,6 +103,46 @@ reverse_combine_nibbles([D1], Acc) ->
|
|||||||
reverse_combine_nibbles([], Acc) ->
|
reverse_combine_nibbles([], Acc) ->
|
||||||
Acc.
|
Acc.
|
||||||
|
|
||||||
|
string_token(Start, {tk, Row, Col}, [$\\, $x, A, B | Rest], SourceChars, Value) ->
|
||||||
|
case escape_hex_code(A, B) of
|
||||||
|
{ok, ByteVal} ->
|
||||||
|
string_token(Start, {tk, Row + 4, Col}, Rest, [B, A, $x, $\ | SourceChars], <<Value/binary, ByteVal>>);
|
||||||
|
error ->
|
||||||
|
{error, {invalid_escape_code, [$\\, $x, A, B], Row, Col}}
|
||||||
|
end;
|
||||||
|
string_token(Start, {tk, Row, Col}, [$\\, C | Rest], SourceChars, Value) ->
|
||||||
|
case escape_char(C) of
|
||||||
|
{ok, ByteVal} ->
|
||||||
|
string_token(Start, {tk, Row + 2, Col}, Rest, [C, $\ | SourceChars], <<Value/binary, ByteVal>>);
|
||||||
|
error ->
|
||||||
|
{error, {invalid_escape_code, [C], Row, Col}}
|
||||||
|
end;
|
||||||
|
string_token({tk, _, Start}, {tk, Row, End}, [$" | Rest], SourceChars, Value) ->
|
||||||
|
SourceStr = lists:reverse([$" | SourceChars]),
|
||||||
|
Token = {string, SourceStr, Value, Row, Start, End},
|
||||||
|
{ok, {Token, {tk, Row, End}, Rest}};
|
||||||
|
string_token(Start, {tk, Row, Col}, [C | Rest], SourceChars, Value) ->
|
||||||
|
string_token(Start, {tk, Row + 1, Col}, Rest, [C | SourceChars], <<Value/binary, C>>).
|
||||||
|
|
||||||
|
escape_hex_code(A, B) when ?IS_HEX(A), ?IS_HEX(B) ->
|
||||||
|
% As of writing this, the Sophia compiler will convert this byte from
|
||||||
|
% extended ASCII to unicode... But it really shouldn't. The literal parser
|
||||||
|
% does what the compiler should do.
|
||||||
|
Byte = convert_digit(A) * 16 + convert_digit(B),
|
||||||
|
{ok, Byte};
|
||||||
|
escape_hex_code(_, _) ->
|
||||||
|
error.
|
||||||
|
|
||||||
|
escape_char($b) -> {ok, $\b};
|
||||||
|
escape_char($e) -> {ok, $\e};
|
||||||
|
escape_char($f) -> {ok, $\f};
|
||||||
|
escape_char($n) -> {ok, $\n};
|
||||||
|
escape_char($r) -> {ok, $\r};
|
||||||
|
escape_char($t) -> {ok, $\t};
|
||||||
|
escape_char($v) -> {ok, $\v};
|
||||||
|
escape_char($") -> {ok, $\"};
|
||||||
|
escape_char($\\) -> {ok, $\\};
|
||||||
|
escape_char(_) -> error.
|
||||||
|
|
||||||
%%% Sophia Literal Parser
|
%%% Sophia Literal Parser
|
||||||
|
|
||||||
@ -121,8 +163,12 @@ reverse_combine_nibbles([], Acc) ->
|
|||||||
%%% pushdown automaton that we want.
|
%%% pushdown automaton that we want.
|
||||||
|
|
||||||
parse_expression(Type, Tk, String) ->
|
parse_expression(Type, Tk, String) ->
|
||||||
{ok, {Token, NewTk, NewString}} = next_token(Tk, String),
|
case next_token(Tk, String) of
|
||||||
parse_expression2(Type, NewTk, NewString, Token).
|
{ok, {Token, NewTk, NewString}} ->
|
||||||
|
parse_expression2(Type, NewTk, NewString, Token);
|
||||||
|
{error, Reason} ->
|
||||||
|
{error, Reason}
|
||||||
|
end.
|
||||||
|
|
||||||
parse_expression2(Type, Tk, String, {integer, _, Value, Row, Start, End}) ->
|
parse_expression2(Type, Tk, String, {integer, _, Value, Row, Start, End}) ->
|
||||||
case Type of
|
case Type of
|
||||||
@ -146,7 +192,16 @@ parse_expression2(Type, Tk, String, {bytes, _, Value, Row, Start, End}) ->
|
|||||||
{_, _, unknown_type} ->
|
{_, _, unknown_type} ->
|
||||||
{ok, {Result, Tk, String}};
|
{ok, {Result, Tk, String}};
|
||||||
{O, N, _} ->
|
{O, N, _} ->
|
||||||
{error, {wrong_type, O, N, integer, Row, Start, End}}
|
{error, {wrong_type, O, N, {bytes, [Len]}, Row, Start, End}}
|
||||||
|
end;
|
||||||
|
parse_expression2(Type, Tk, String, {string, _, Value, Row, Start, End}) ->
|
||||||
|
case Type of
|
||||||
|
{_, _, string} ->
|
||||||
|
{ok, {Value, Tk, String}};
|
||||||
|
{_, _, unknown_type} ->
|
||||||
|
{ok, {Value, Tk, String}};
|
||||||
|
{O, N, _} ->
|
||||||
|
{error, {wrong_type, O, N, string, Row, Start, End}}
|
||||||
end;
|
end;
|
||||||
parse_expression2(Type, Tk, String, {character, "[", _, Row, Start, _}) ->
|
parse_expression2(Type, Tk, String, {character, "[", _, Row, Start, _}) ->
|
||||||
parse_list(Type, Tk, String, Row, Start);
|
parse_list(Type, Tk, String, Row, Start);
|
||||||
@ -545,6 +600,10 @@ anon_types_test() ->
|
|||||||
% Bytes.
|
% Bytes.
|
||||||
check_parser("#DEAD000BEEF"),
|
check_parser("#DEAD000BEEF"),
|
||||||
check_parser("#DE_AD0_00B_EEF"),
|
check_parser("#DE_AD0_00B_EEF"),
|
||||||
|
% Strings.
|
||||||
|
check_parser("\"hello world\""),
|
||||||
|
check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""),
|
||||||
|
check_parser("\"\\x00\\x11\\x77\""),
|
||||||
% List of integers.
|
% List of integers.
|
||||||
check_parser("[1, 2, 3]"),
|
check_parser("[1, 2, 3]"),
|
||||||
% List of lists.
|
% List of lists.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user