Handle underscores in integers/bytes
This forces us to test for alpha/num/hex enough times that it's now worth making macros for these things.
This commit is contained in:
parent
f1696e2b9e
commit
fe182a5233
@ -29,6 +29,11 @@ parse_literal2(Result, Tk, String) ->
|
||||
|
||||
%%% Tokenizer
|
||||
|
||||
-define(IS_ALPHA(C), ((((C) >= $A) and ((C) =< $Z)) or (((C) >= $a) and ((C) =< $z)) or ((C) == $_))).
|
||||
-define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))).
|
||||
-define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C))).
|
||||
-define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))).
|
||||
|
||||
next_token({tk, Row, Col}, []) ->
|
||||
{ok, {{eof, "", Row, Col, Col}, {tk, Row, Col}, []}};
|
||||
next_token({tk, Row, Col}, " " ++ Rest) ->
|
||||
@ -41,46 +46,36 @@ next_token({tk, _, Col}, "\r" ++ Rest) ->
|
||||
next_token({tk, 1, Col + 1}, Rest);
|
||||
next_token({tk, _, Col}, "\n" ++ Rest) ->
|
||||
next_token({tk, 1, Col + 1}, Rest);
|
||||
next_token(Tk, [N | _] = String) when N >= $A, N =< $Z ->
|
||||
next_token(Tk, [C | _] = String) when ?IS_ALPHA(C) ->
|
||||
alphanum_token(Tk, Tk, String, []);
|
||||
next_token(Tk, [N | _] = String) when N >= $a, N =< $z ->
|
||||
alphanum_token(Tk, Tk, String, []);
|
||||
next_token(Tk, "_" ++ _ = String) ->
|
||||
alphanum_token(Tk, Tk, String, []);
|
||||
next_token(Tk, [N | _] = String) when N >= $0, N =< $9 ->
|
||||
next_token(Tk, [C | _] = String) when ?IS_NUM(C) ->
|
||||
num_token(Tk, Tk, String, []);
|
||||
next_token({tk, Row, Col}, "#" ++ Rest) ->
|
||||
bytes_token({tk, Row, Col}, {tk, Row + 1, col}, Rest, "#");
|
||||
next_token({tk, Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) ->
|
||||
bytes_token({tk, Row, Col}, {tk, Row + 2, Col}, Rest, [C, $#]);
|
||||
next_token({tk, Row, Col}, [Char | Rest]) ->
|
||||
Token = {character, [Char], Row, Col, Col},
|
||||
{ok, {Token, {tk, Row + 1, Col}, Rest}}.
|
||||
|
||||
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $A, C =< $Z ->
|
||||
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) ->
|
||||
alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]);
|
||||
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $a, C =< $z ->
|
||||
alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]);
|
||||
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $0, C =< $9 ->
|
||||
alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]);
|
||||
alphanum_token(Start, {tk, Row, Col}, [$_ | Rest], Acc) ->
|
||||
alphanum_token(Start, {tk, Row, Col}, Rest, [$_ | Acc]);
|
||||
alphanum_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
||||
AlphaString = lists:reverse(Acc),
|
||||
Token = {alphanum, AlphaString, Row, Start, End},
|
||||
{ok, {Token, {tk, Row, End}, String}}.
|
||||
|
||||
num_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 ->
|
||||
num_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
||||
num_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_NUM(C) ->
|
||||
num_token(Start, {tk, Row + 1, Col}, Rest, [C | Acc]);
|
||||
num_token(Start, {tk, Row, Col}, [$_, C | Rest], Acc) when ?IS_NUM(C) ->
|
||||
num_token(Start, {tk, Row + 2, Col}, Rest, [C, $_ | Acc]);
|
||||
num_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
||||
NumString = lists:reverse(Acc),
|
||||
Token = {integer, NumString, Row, Start, End},
|
||||
{ok, {Token, {tk, Row, End}, String}}.
|
||||
|
||||
bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 ->
|
||||
bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
||||
bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $A, N =< $F ->
|
||||
bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
||||
bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $a, N =< $f ->
|
||||
bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
||||
bytes_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_HEX(C) ->
|
||||
bytes_token(Start, {tk, Row + 1, Col}, Rest, [C | Acc]);
|
||||
bytes_token(Start, {tk, Row, Col}, [$_, C | Rest], Acc) when ?IS_HEX(C) ->
|
||||
bytes_token(Start, {tk, Row + 1, Col}, Rest, [C, $_ | Acc]);
|
||||
bytes_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
||||
BytesString = lists:reverse(Acc),
|
||||
Token = {bytes, BytesString, Row, Start, End},
|
||||
@ -110,7 +105,7 @@ parse_expression(Type, Tk, String) ->
|
||||
parse_expression2(Type, NewTk, NewString, Token).
|
||||
|
||||
parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) ->
|
||||
Value = list_to_integer(S),
|
||||
Value = convert_int(S),
|
||||
case Type of
|
||||
{_, _, integer} ->
|
||||
{ok, {Value, Tk, String}};
|
||||
@ -159,15 +154,38 @@ expect_tokens([Str | Rest], Tk, String) ->
|
||||
{error, {unexpected_token, Actual, Row, Start, End}}
|
||||
end.
|
||||
|
||||
convert_int(Chars) ->
|
||||
convert_int(Chars, 0).
|
||||
|
||||
convert_int("_" ++ Chars, Result) ->
|
||||
convert_int(Chars, Result);
|
||||
convert_int([N | Chars], Result) ->
|
||||
Digit = N - $0,
|
||||
NewResult = Result * 10 + Digit,
|
||||
convert_int(Chars, NewResult);
|
||||
convert_int([], Result) ->
|
||||
Result.
|
||||
|
||||
convert_bytes(Chars) ->
|
||||
Digits = lists:foldl(fun(C, Acc) -> [convert_nibble(C) | Acc] end, [], Chars),
|
||||
% We do this as two reversing foldl type loops. One removes underscores and
|
||||
% converts the ASCII into integers, and the other peels off pairs of
|
||||
% numbers to form bytes.
|
||||
Digits = reverse_convert_digits(Chars, []),
|
||||
reverse_combine_nibbles(Digits, <<>>).
|
||||
|
||||
convert_nibble(C) when C >= $0, C =< $9 ->
|
||||
reverse_convert_digits("_" ++ Rest, Acc) ->
|
||||
reverse_convert_digits(Rest, Acc);
|
||||
reverse_convert_digits([C | Rest], Acc) ->
|
||||
Digit = convert_digit(C),
|
||||
reverse_convert_digits(Rest, [Digit | Acc]);
|
||||
reverse_convert_digits([], Acc) ->
|
||||
Acc.
|
||||
|
||||
convert_digit(C) when C >= $0, C =< $9 ->
|
||||
C - $0;
|
||||
convert_nibble(C) when C >= $A, C =< $Z ->
|
||||
convert_digit(C) when C >= $A, C =< $Z ->
|
||||
C - $A + 10;
|
||||
convert_nibble(C) when C >= $a, C =< $z ->
|
||||
convert_digit(C) when C >= $a, C =< $z ->
|
||||
C - $a + 10.
|
||||
|
||||
reverse_combine_nibbles([D1, D2 | Rest], Acc) ->
|
||||
@ -544,23 +562,23 @@ check_parser_with_typedef(Typedef, Sophia) ->
|
||||
% definitions in the AACI, so untyped parses probably don't work.
|
||||
check_sophia_to_fate(Type, Sophia, Fate).
|
||||
|
||||
int_test() ->
|
||||
check_parser("123").
|
||||
anon_types_test() ->
|
||||
% Integers.
|
||||
check_parser("123"),
|
||||
check_parser("1_2_3"),
|
||||
% Bytes.
|
||||
check_parser("#DEAD000BEEF"),
|
||||
check_parser("#DE_AD0_00B_EEF"),
|
||||
% List of integers.
|
||||
check_parser("[1, 2, 3]"),
|
||||
% List of lists.
|
||||
check_parser("[[], [1], [2, 3]]"),
|
||||
% Tuple.
|
||||
check_parser("(1, [2, 3], (4, 5))"),
|
||||
% Map.
|
||||
check_parser("{[1] = 2, [3] = 4}"),
|
||||
|
||||
list_test() ->
|
||||
check_parser("[1, 2, 3]").
|
||||
|
||||
list_of_lists_test() ->
|
||||
check_parser("[[], [1], [2, 3]]").
|
||||
|
||||
tuple_test() ->
|
||||
check_parser("(1, [2, 3], (4, 5))").
|
||||
|
||||
maps_test() ->
|
||||
check_parser("{[1] = 2, [3] = 4}").
|
||||
|
||||
bytes_test() ->
|
||||
check_parser("#DEAD000BEEF").
|
||||
ok.
|
||||
|
||||
records_test() ->
|
||||
TypeDef = "record pair = {x: int, y: int}",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user