Handle underscores in integers/bytes
This forces us to test for alpha/num/hex enough times that it's now worth making macros for these things.
This commit is contained in:
parent
f1696e2b9e
commit
fe182a5233
@ -29,6 +29,11 @@ parse_literal2(Result, Tk, String) ->
|
|||||||
|
|
||||||
%%% Tokenizer
|
%%% Tokenizer
|
||||||
|
|
||||||
|
-define(IS_ALPHA(C), ((((C) >= $A) and ((C) =< $Z)) or (((C) >= $a) and ((C) =< $z)) or ((C) == $_))).
|
||||||
|
-define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))).
|
||||||
|
-define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C))).
|
||||||
|
-define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))).
|
||||||
|
|
||||||
next_token({tk, Row, Col}, []) ->
|
next_token({tk, Row, Col}, []) ->
|
||||||
{ok, {{eof, "", Row, Col, Col}, {tk, Row, Col}, []}};
|
{ok, {{eof, "", Row, Col, Col}, {tk, Row, Col}, []}};
|
||||||
next_token({tk, Row, Col}, " " ++ Rest) ->
|
next_token({tk, Row, Col}, " " ++ Rest) ->
|
||||||
@ -41,46 +46,36 @@ next_token({tk, _, Col}, "\r" ++ Rest) ->
|
|||||||
next_token({tk, 1, Col + 1}, Rest);
|
next_token({tk, 1, Col + 1}, Rest);
|
||||||
next_token({tk, _, Col}, "\n" ++ Rest) ->
|
next_token({tk, _, Col}, "\n" ++ Rest) ->
|
||||||
next_token({tk, 1, Col + 1}, Rest);
|
next_token({tk, 1, Col + 1}, Rest);
|
||||||
next_token(Tk, [N | _] = String) when N >= $A, N =< $Z ->
|
next_token(Tk, [C | _] = String) when ?IS_ALPHA(C) ->
|
||||||
alphanum_token(Tk, Tk, String, []);
|
alphanum_token(Tk, Tk, String, []);
|
||||||
next_token(Tk, [N | _] = String) when N >= $a, N =< $z ->
|
next_token(Tk, [C | _] = String) when ?IS_NUM(C) ->
|
||||||
alphanum_token(Tk, Tk, String, []);
|
|
||||||
next_token(Tk, "_" ++ _ = String) ->
|
|
||||||
alphanum_token(Tk, Tk, String, []);
|
|
||||||
next_token(Tk, [N | _] = String) when N >= $0, N =< $9 ->
|
|
||||||
num_token(Tk, Tk, String, []);
|
num_token(Tk, Tk, String, []);
|
||||||
next_token({tk, Row, Col}, "#" ++ Rest) ->
|
next_token({tk, Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) ->
|
||||||
bytes_token({tk, Row, Col}, {tk, Row + 1, col}, Rest, "#");
|
bytes_token({tk, Row, Col}, {tk, Row + 2, Col}, Rest, [C, $#]);
|
||||||
next_token({tk, Row, Col}, [Char | Rest]) ->
|
next_token({tk, Row, Col}, [Char | Rest]) ->
|
||||||
Token = {character, [Char], Row, Col, Col},
|
Token = {character, [Char], Row, Col, Col},
|
||||||
{ok, {Token, {tk, Row + 1, Col}, Rest}}.
|
{ok, {Token, {tk, Row + 1, Col}, Rest}}.
|
||||||
|
|
||||||
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $A, C =< $Z ->
|
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) ->
|
||||||
alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]);
|
alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]);
|
||||||
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $a, C =< $z ->
|
|
||||||
alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]);
|
|
||||||
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $0, C =< $9 ->
|
|
||||||
alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]);
|
|
||||||
alphanum_token(Start, {tk, Row, Col}, [$_ | Rest], Acc) ->
|
|
||||||
alphanum_token(Start, {tk, Row, Col}, Rest, [$_ | Acc]);
|
|
||||||
alphanum_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
alphanum_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
||||||
AlphaString = lists:reverse(Acc),
|
AlphaString = lists:reverse(Acc),
|
||||||
Token = {alphanum, AlphaString, Row, Start, End},
|
Token = {alphanum, AlphaString, Row, Start, End},
|
||||||
{ok, {Token, {tk, Row, End}, String}}.
|
{ok, {Token, {tk, Row, End}, String}}.
|
||||||
|
|
||||||
num_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 ->
|
num_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_NUM(C) ->
|
||||||
num_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
num_token(Start, {tk, Row + 1, Col}, Rest, [C | Acc]);
|
||||||
|
num_token(Start, {tk, Row, Col}, [$_, C | Rest], Acc) when ?IS_NUM(C) ->
|
||||||
|
num_token(Start, {tk, Row + 2, Col}, Rest, [C, $_ | Acc]);
|
||||||
num_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
num_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
||||||
NumString = lists:reverse(Acc),
|
NumString = lists:reverse(Acc),
|
||||||
Token = {integer, NumString, Row, Start, End},
|
Token = {integer, NumString, Row, Start, End},
|
||||||
{ok, {Token, {tk, Row, End}, String}}.
|
{ok, {Token, {tk, Row, End}, String}}.
|
||||||
|
|
||||||
bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 ->
|
bytes_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_HEX(C) ->
|
||||||
bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
bytes_token(Start, {tk, Row + 1, Col}, Rest, [C | Acc]);
|
||||||
bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $A, N =< $F ->
|
bytes_token(Start, {tk, Row, Col}, [$_, C | Rest], Acc) when ?IS_HEX(C) ->
|
||||||
bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
bytes_token(Start, {tk, Row + 1, Col}, Rest, [C, $_ | Acc]);
|
||||||
bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $a, N =< $f ->
|
|
||||||
bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
|
||||||
bytes_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
bytes_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
||||||
BytesString = lists:reverse(Acc),
|
BytesString = lists:reverse(Acc),
|
||||||
Token = {bytes, BytesString, Row, Start, End},
|
Token = {bytes, BytesString, Row, Start, End},
|
||||||
@ -110,7 +105,7 @@ parse_expression(Type, Tk, String) ->
|
|||||||
parse_expression2(Type, NewTk, NewString, Token).
|
parse_expression2(Type, NewTk, NewString, Token).
|
||||||
|
|
||||||
parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) ->
|
parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) ->
|
||||||
Value = list_to_integer(S),
|
Value = convert_int(S),
|
||||||
case Type of
|
case Type of
|
||||||
{_, _, integer} ->
|
{_, _, integer} ->
|
||||||
{ok, {Value, Tk, String}};
|
{ok, {Value, Tk, String}};
|
||||||
@ -159,15 +154,38 @@ expect_tokens([Str | Rest], Tk, String) ->
|
|||||||
{error, {unexpected_token, Actual, Row, Start, End}}
|
{error, {unexpected_token, Actual, Row, Start, End}}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
convert_int(Chars) ->
|
||||||
|
convert_int(Chars, 0).
|
||||||
|
|
||||||
|
convert_int("_" ++ Chars, Result) ->
|
||||||
|
convert_int(Chars, Result);
|
||||||
|
convert_int([N | Chars], Result) ->
|
||||||
|
Digit = N - $0,
|
||||||
|
NewResult = Result * 10 + Digit,
|
||||||
|
convert_int(Chars, NewResult);
|
||||||
|
convert_int([], Result) ->
|
||||||
|
Result.
|
||||||
|
|
||||||
convert_bytes(Chars) ->
|
convert_bytes(Chars) ->
|
||||||
Digits = lists:foldl(fun(C, Acc) -> [convert_nibble(C) | Acc] end, [], Chars),
|
% We do this as two reversing foldl type loops. One removes underscores and
|
||||||
|
% converts the ASCII into integers, and the other peels off pairs of
|
||||||
|
% numbers to form bytes.
|
||||||
|
Digits = reverse_convert_digits(Chars, []),
|
||||||
reverse_combine_nibbles(Digits, <<>>).
|
reverse_combine_nibbles(Digits, <<>>).
|
||||||
|
|
||||||
convert_nibble(C) when C >= $0, C =< $9 ->
|
reverse_convert_digits("_" ++ Rest, Acc) ->
|
||||||
|
reverse_convert_digits(Rest, Acc);
|
||||||
|
reverse_convert_digits([C | Rest], Acc) ->
|
||||||
|
Digit = convert_digit(C),
|
||||||
|
reverse_convert_digits(Rest, [Digit | Acc]);
|
||||||
|
reverse_convert_digits([], Acc) ->
|
||||||
|
Acc.
|
||||||
|
|
||||||
|
convert_digit(C) when C >= $0, C =< $9 ->
|
||||||
C - $0;
|
C - $0;
|
||||||
convert_nibble(C) when C >= $A, C =< $Z ->
|
convert_digit(C) when C >= $A, C =< $Z ->
|
||||||
C - $A + 10;
|
C - $A + 10;
|
||||||
convert_nibble(C) when C >= $a, C =< $z ->
|
convert_digit(C) when C >= $a, C =< $z ->
|
||||||
C - $a + 10.
|
C - $a + 10.
|
||||||
|
|
||||||
reverse_combine_nibbles([D1, D2 | Rest], Acc) ->
|
reverse_combine_nibbles([D1, D2 | Rest], Acc) ->
|
||||||
@ -544,23 +562,23 @@ check_parser_with_typedef(Typedef, Sophia) ->
|
|||||||
% definitions in the AACI, so untyped parses probably don't work.
|
% definitions in the AACI, so untyped parses probably don't work.
|
||||||
check_sophia_to_fate(Type, Sophia, Fate).
|
check_sophia_to_fate(Type, Sophia, Fate).
|
||||||
|
|
||||||
int_test() ->
|
anon_types_test() ->
|
||||||
check_parser("123").
|
% Integers.
|
||||||
|
check_parser("123"),
|
||||||
|
check_parser("1_2_3"),
|
||||||
|
% Bytes.
|
||||||
|
check_parser("#DEAD000BEEF"),
|
||||||
|
check_parser("#DE_AD0_00B_EEF"),
|
||||||
|
% List of integers.
|
||||||
|
check_parser("[1, 2, 3]"),
|
||||||
|
% List of lists.
|
||||||
|
check_parser("[[], [1], [2, 3]]"),
|
||||||
|
% Tuple.
|
||||||
|
check_parser("(1, [2, 3], (4, 5))"),
|
||||||
|
% Map.
|
||||||
|
check_parser("{[1] = 2, [3] = 4}"),
|
||||||
|
|
||||||
list_test() ->
|
ok.
|
||||||
check_parser("[1, 2, 3]").
|
|
||||||
|
|
||||||
list_of_lists_test() ->
|
|
||||||
check_parser("[[], [1], [2, 3]]").
|
|
||||||
|
|
||||||
tuple_test() ->
|
|
||||||
check_parser("(1, [2, 3], (4, 5))").
|
|
||||||
|
|
||||||
maps_test() ->
|
|
||||||
check_parser("{[1] = 2, [3] = 4}").
|
|
||||||
|
|
||||||
bytes_test() ->
|
|
||||||
check_parser("#DEAD000BEEF").
|
|
||||||
|
|
||||||
records_test() ->
|
records_test() ->
|
||||||
TypeDef = "record pair = {x: int, y: int}",
|
TypeDef = "record pair = {x: int, y: int}",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user