Bytes lexing
I don't handle underscores in bytes correctly... Nor in integers, for that matter.
This commit is contained in:
parent
2bf384ca82
commit
f1696e2b9e
@ -4,6 +4,8 @@
|
|||||||
-copyright("Jarvis Carroll <spiveehere@gmail.com>").
|
-copyright("Jarvis Carroll <spiveehere@gmail.com>").
|
||||||
-license("GPL-3.0-or-later").
|
-license("GPL-3.0-or-later").
|
||||||
|
|
||||||
|
-export([check_parser/1]).
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
|
||||||
parse_literal(Type, String) ->
|
parse_literal(Type, String) ->
|
||||||
@ -33,25 +35,26 @@ next_token({tk, Row, Col}, " " ++ Rest) ->
|
|||||||
next_token({tk, Row + 1, Col}, Rest);
|
next_token({tk, Row + 1, Col}, Rest);
|
||||||
next_token({tk, Row, Col}, "\t" ++ Rest) ->
|
next_token({tk, Row, Col}, "\t" ++ Rest) ->
|
||||||
next_token({tk, Row + 1, Col}, Rest);
|
next_token({tk, Row + 1, Col}, Rest);
|
||||||
next_token(Tk, [N | _] = String) when N >= $0, N =< $9 ->
|
next_token({tk, _, Col}, "\r\n" ++ Rest) ->
|
||||||
num_token(Tk, Tk, String, []);
|
next_token({tk, 1, Col + 1}, Rest);
|
||||||
|
next_token({tk, _, Col}, "\r" ++ Rest) ->
|
||||||
|
next_token({tk, 1, Col + 1}, Rest);
|
||||||
|
next_token({tk, _, Col}, "\n" ++ Rest) ->
|
||||||
|
next_token({tk, 1, Col + 1}, Rest);
|
||||||
next_token(Tk, [N | _] = String) when N >= $A, N =< $Z ->
|
next_token(Tk, [N | _] = String) when N >= $A, N =< $Z ->
|
||||||
alphanum_token(Tk, Tk, String, []);
|
alphanum_token(Tk, Tk, String, []);
|
||||||
next_token(Tk, [N | _] = String) when N >= $a, N =< $z ->
|
next_token(Tk, [N | _] = String) when N >= $a, N =< $z ->
|
||||||
alphanum_token(Tk, Tk, String, []);
|
alphanum_token(Tk, Tk, String, []);
|
||||||
next_token(Tk, [$_ | _] = String) ->
|
next_token(Tk, "_" ++ _ = String) ->
|
||||||
alphanum_token(Tk, Tk, String, []);
|
alphanum_token(Tk, Tk, String, []);
|
||||||
|
next_token(Tk, [N | _] = String) when N >= $0, N =< $9 ->
|
||||||
|
num_token(Tk, Tk, String, []);
|
||||||
|
next_token({tk, Row, Col}, "#" ++ Rest) ->
|
||||||
|
bytes_token({tk, Row, Col}, {tk, Row + 1, col}, Rest, "#");
|
||||||
next_token({tk, Row, Col}, [Char | Rest]) ->
|
next_token({tk, Row, Col}, [Char | Rest]) ->
|
||||||
Token = {character, [Char], Row, Col, Col},
|
Token = {character, [Char], Row, Col, Col},
|
||||||
{ok, {Token, {tk, Row + 1, Col}, Rest}}.
|
{ok, {Token, {tk, Row + 1, Col}, Rest}}.
|
||||||
|
|
||||||
num_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 ->
|
|
||||||
num_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
|
||||||
num_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
|
||||||
NumString = lists:reverse(Acc),
|
|
||||||
Token = {integer, NumString, Row, Start, End},
|
|
||||||
{ok, {Token, {tk, Row, End}, String}}.
|
|
||||||
|
|
||||||
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $A, C =< $Z ->
|
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $A, C =< $Z ->
|
||||||
alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]);
|
alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]);
|
||||||
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $a, C =< $z ->
|
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $a, C =< $z ->
|
||||||
@ -65,6 +68,24 @@ alphanum_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
|||||||
Token = {alphanum, AlphaString, Row, Start, End},
|
Token = {alphanum, AlphaString, Row, Start, End},
|
||||||
{ok, {Token, {tk, Row, End}, String}}.
|
{ok, {Token, {tk, Row, End}, String}}.
|
||||||
|
|
||||||
|
num_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 ->
|
||||||
|
num_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
||||||
|
num_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
||||||
|
NumString = lists:reverse(Acc),
|
||||||
|
Token = {integer, NumString, Row, Start, End},
|
||||||
|
{ok, {Token, {tk, Row, End}, String}}.
|
||||||
|
|
||||||
|
bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 ->
|
||||||
|
bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
||||||
|
bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $A, N =< $F ->
|
||||||
|
bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
||||||
|
bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $a, N =< $f ->
|
||||||
|
bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
||||||
|
bytes_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
||||||
|
BytesString = lists:reverse(Acc),
|
||||||
|
Token = {bytes, BytesString, Row, Start, End},
|
||||||
|
{ok, {Token, {tk, Row, End}, String}}.
|
||||||
|
|
||||||
|
|
||||||
%%% Sophia Literal Parser
|
%%% Sophia Literal Parser
|
||||||
|
|
||||||
@ -98,6 +119,22 @@ parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) ->
|
|||||||
{O, N, _} ->
|
{O, N, _} ->
|
||||||
{error, {wrong_type, O, N, integer, Row, Start, End}}
|
{error, {wrong_type, O, N, integer, Row, Start, End}}
|
||||||
end;
|
end;
|
||||||
|
parse_expression2(Type, Tk, String, {bytes, "#" ++ S, Row, Start, End}) ->
|
||||||
|
Value = convert_bytes(S),
|
||||||
|
Len = byte_size(Value),
|
||||||
|
Result = {bytes, Value},
|
||||||
|
case Type of
|
||||||
|
{_, _, {bytes, [any]}} ->
|
||||||
|
{ok, {Result, Tk, String}};
|
||||||
|
{_, _, {bytes, [Len]}} ->
|
||||||
|
{ok, {Result, Tk, String}};
|
||||||
|
{_, _, {bytes, [ExpectedLen]}} ->
|
||||||
|
{error, {bytes_wrong_size, ExpectedLen, Len, Row, Start, End}};
|
||||||
|
{_, _, unknown_type} ->
|
||||||
|
{ok, {Result, Tk, String}};
|
||||||
|
{O, N, _} ->
|
||||||
|
{error, {wrong_type, O, N, integer, Row, Start, End}}
|
||||||
|
end;
|
||||||
parse_expression2(Type, Tk, String, {character, "[", Row, Start, _}) ->
|
parse_expression2(Type, Tk, String, {character, "[", Row, Start, _}) ->
|
||||||
parse_list(Type, Tk, String, Row, Start);
|
parse_list(Type, Tk, String, Row, Start);
|
||||||
parse_expression2(Type, Tk, String, {character, "(", Row, Start, _}) ->
|
parse_expression2(Type, Tk, String, {character, "(", Row, Start, _}) ->
|
||||||
@ -122,6 +159,25 @@ expect_tokens([Str | Rest], Tk, String) ->
|
|||||||
{error, {unexpected_token, Actual, Row, Start, End}}
|
{error, {unexpected_token, Actual, Row, Start, End}}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
convert_bytes(Chars) ->
|
||||||
|
Digits = lists:foldl(fun(C, Acc) -> [convert_nibble(C) | Acc] end, [], Chars),
|
||||||
|
reverse_combine_nibbles(Digits, <<>>).
|
||||||
|
|
||||||
|
convert_nibble(C) when C >= $0, C =< $9 ->
|
||||||
|
C - $0;
|
||||||
|
convert_nibble(C) when C >= $A, C =< $Z ->
|
||||||
|
C - $A + 10;
|
||||||
|
convert_nibble(C) when C >= $a, C =< $z ->
|
||||||
|
C - $a + 10.
|
||||||
|
|
||||||
|
reverse_combine_nibbles([D1, D2 | Rest], Acc) ->
|
||||||
|
NewAcc = <<D2:4, D1:4, Acc/binary>>,
|
||||||
|
reverse_combine_nibbles(Rest, NewAcc);
|
||||||
|
reverse_combine_nibbles([D1], Acc) ->
|
||||||
|
<<0:4, D1:4, Acc/binary>>;
|
||||||
|
reverse_combine_nibbles([], Acc) ->
|
||||||
|
Acc.
|
||||||
|
|
||||||
%%% List Parsing
|
%%% List Parsing
|
||||||
|
|
||||||
parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) ->
|
parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) ->
|
||||||
@ -430,12 +486,13 @@ wrap_error(Reason, _) -> Reason.
|
|||||||
%%% Tests
|
%%% Tests
|
||||||
|
|
||||||
check_sophia_to_fate(Type, Sophia, Fate) ->
|
check_sophia_to_fate(Type, Sophia, Fate) ->
|
||||||
{ok, FateActual} = parse_literal(Type, Sophia),
|
case parse_literal(Type, Sophia) of
|
||||||
case FateActual of
|
{ok, Fate} ->
|
||||||
Fate ->
|
|
||||||
ok;
|
ok;
|
||||||
_ ->
|
{ok, FateActual} ->
|
||||||
erlang:error({to_fate_failed, Fate, FateActual})
|
erlang:error({to_fate_failed, Sophia, Fate, {ok, FateActual}});
|
||||||
|
{error, Reason} ->
|
||||||
|
erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}})
|
||||||
end.
|
end.
|
||||||
|
|
||||||
compile_entrypoint_code_and_type(Source, Entrypoint) ->
|
compile_entrypoint_code_and_type(Source, Entrypoint) ->
|
||||||
@ -502,6 +559,9 @@ tuple_test() ->
|
|||||||
maps_test() ->
|
maps_test() ->
|
||||||
check_parser("{[1] = 2, [3] = 4}").
|
check_parser("{[1] = 2, [3] = 4}").
|
||||||
|
|
||||||
|
bytes_test() ->
|
||||||
|
check_parser("#DEAD000BEEF").
|
||||||
|
|
||||||
records_test() ->
|
records_test() ->
|
||||||
TypeDef = "record pair = {x: int, y: int}",
|
TypeDef = "record pair = {x: int, y: int}",
|
||||||
Sophia = "{x = 1, y = 2}",
|
Sophia = "{x = 1, y = 2}",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user