Bytes lexing
I don't handle underscores in bytes correctly... Nor in integers, for that matter.
This commit is contained in:
parent
2bf384ca82
commit
f1696e2b9e
@ -4,6 +4,8 @@
|
||||
-copyright("Jarvis Carroll <spiveehere@gmail.com>").
|
||||
-license("GPL-3.0-or-later").
|
||||
|
||||
-export([check_parser/1]).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
parse_literal(Type, String) ->
|
||||
@ -33,25 +35,26 @@ next_token({tk, Row, Col}, " " ++ Rest) ->
|
||||
next_token({tk, Row + 1, Col}, Rest);
|
||||
next_token({tk, Row, Col}, "\t" ++ Rest) ->
|
||||
next_token({tk, Row + 1, Col}, Rest);
|
||||
next_token(Tk, [N | _] = String) when N >= $0, N =< $9 ->
|
||||
num_token(Tk, Tk, String, []);
|
||||
next_token({tk, _, Col}, "\r\n" ++ Rest) ->
|
||||
next_token({tk, 1, Col + 1}, Rest);
|
||||
next_token({tk, _, Col}, "\r" ++ Rest) ->
|
||||
next_token({tk, 1, Col + 1}, Rest);
|
||||
next_token({tk, _, Col}, "\n" ++ Rest) ->
|
||||
next_token({tk, 1, Col + 1}, Rest);
|
||||
next_token(Tk, [N | _] = String) when N >= $A, N =< $Z ->
|
||||
alphanum_token(Tk, Tk, String, []);
|
||||
next_token(Tk, [N | _] = String) when N >= $a, N =< $z ->
|
||||
alphanum_token(Tk, Tk, String, []);
|
||||
next_token(Tk, [$_ | _] = String) ->
|
||||
next_token(Tk, "_" ++ _ = String) ->
|
||||
alphanum_token(Tk, Tk, String, []);
|
||||
next_token(Tk, [N | _] = String) when N >= $0, N =< $9 ->
|
||||
num_token(Tk, Tk, String, []);
|
||||
next_token({tk, Row, Col}, "#" ++ Rest) ->
|
||||
bytes_token({tk, Row, Col}, {tk, Row + 1, col}, Rest, "#");
|
||||
next_token({tk, Row, Col}, [Char | Rest]) ->
|
||||
Token = {character, [Char], Row, Col, Col},
|
||||
{ok, {Token, {tk, Row + 1, Col}, Rest}}.
|
||||
|
||||
num_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 ->
|
||||
num_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
||||
num_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
||||
NumString = lists:reverse(Acc),
|
||||
Token = {integer, NumString, Row, Start, End},
|
||||
{ok, {Token, {tk, Row, End}, String}}.
|
||||
|
||||
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $A, C =< $Z ->
|
||||
alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]);
|
||||
alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $a, C =< $z ->
|
||||
@ -65,6 +68,24 @@ alphanum_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
||||
Token = {alphanum, AlphaString, Row, Start, End},
|
||||
{ok, {Token, {tk, Row, End}, String}}.
|
||||
|
||||
num_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 ->
|
||||
num_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
||||
num_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
||||
NumString = lists:reverse(Acc),
|
||||
Token = {integer, NumString, Row, Start, End},
|
||||
{ok, {Token, {tk, Row, End}, String}}.
|
||||
|
||||
bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 ->
|
||||
bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
||||
bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $A, N =< $F ->
|
||||
bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
||||
bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $a, N =< $f ->
|
||||
bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]);
|
||||
bytes_token({tk, _, Start}, {tk, Row, End}, String, Acc) ->
|
||||
BytesString = lists:reverse(Acc),
|
||||
Token = {bytes, BytesString, Row, Start, End},
|
||||
{ok, {Token, {tk, Row, End}, String}}.
|
||||
|
||||
|
||||
%%% Sophia Literal Parser
|
||||
|
||||
@ -98,6 +119,22 @@ parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) ->
|
||||
{O, N, _} ->
|
||||
{error, {wrong_type, O, N, integer, Row, Start, End}}
|
||||
end;
|
||||
parse_expression2(Type, Tk, String, {bytes, "#" ++ S, Row, Start, End}) ->
|
||||
Value = convert_bytes(S),
|
||||
Len = byte_size(Value),
|
||||
Result = {bytes, Value},
|
||||
case Type of
|
||||
{_, _, {bytes, [any]}} ->
|
||||
{ok, {Result, Tk, String}};
|
||||
{_, _, {bytes, [Len]}} ->
|
||||
{ok, {Result, Tk, String}};
|
||||
{_, _, {bytes, [ExpectedLen]}} ->
|
||||
{error, {bytes_wrong_size, ExpectedLen, Len, Row, Start, End}};
|
||||
{_, _, unknown_type} ->
|
||||
{ok, {Result, Tk, String}};
|
||||
{O, N, _} ->
|
||||
{error, {wrong_type, O, N, integer, Row, Start, End}}
|
||||
end;
|
||||
parse_expression2(Type, Tk, String, {character, "[", Row, Start, _}) ->
|
||||
parse_list(Type, Tk, String, Row, Start);
|
||||
parse_expression2(Type, Tk, String, {character, "(", Row, Start, _}) ->
|
||||
@ -122,6 +159,25 @@ expect_tokens([Str | Rest], Tk, String) ->
|
||||
{error, {unexpected_token, Actual, Row, Start, End}}
|
||||
end.
|
||||
|
||||
convert_bytes(Chars) ->
|
||||
Digits = lists:foldl(fun(C, Acc) -> [convert_nibble(C) | Acc] end, [], Chars),
|
||||
reverse_combine_nibbles(Digits, <<>>).
|
||||
|
||||
convert_nibble(C) when C >= $0, C =< $9 ->
|
||||
C - $0;
|
||||
convert_nibble(C) when C >= $A, C =< $Z ->
|
||||
C - $A + 10;
|
||||
convert_nibble(C) when C >= $a, C =< $z ->
|
||||
C - $a + 10.
|
||||
|
||||
reverse_combine_nibbles([D1, D2 | Rest], Acc) ->
|
||||
NewAcc = <<D2:4, D1:4, Acc/binary>>,
|
||||
reverse_combine_nibbles(Rest, NewAcc);
|
||||
reverse_combine_nibbles([D1], Acc) ->
|
||||
<<0:4, D1:4, Acc/binary>>;
|
||||
reverse_combine_nibbles([], Acc) ->
|
||||
Acc.
|
||||
|
||||
%%% List Parsing
|
||||
|
||||
parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) ->
|
||||
@ -430,12 +486,13 @@ wrap_error(Reason, _) -> Reason.
|
||||
%%% Tests
|
||||
|
||||
check_sophia_to_fate(Type, Sophia, Fate) ->
|
||||
{ok, FateActual} = parse_literal(Type, Sophia),
|
||||
case FateActual of
|
||||
Fate ->
|
||||
case parse_literal(Type, Sophia) of
|
||||
{ok, Fate} ->
|
||||
ok;
|
||||
_ ->
|
||||
erlang:error({to_fate_failed, Fate, FateActual})
|
||||
{ok, FateActual} ->
|
||||
erlang:error({to_fate_failed, Sophia, Fate, {ok, FateActual}});
|
||||
{error, Reason} ->
|
||||
erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}})
|
||||
end.
|
||||
|
||||
compile_entrypoint_code_and_type(Source, Entrypoint) ->
|
||||
@ -502,6 +559,9 @@ tuple_test() ->
|
||||
maps_test() ->
|
||||
check_parser("{[1] = 2, [3] = 4}").
|
||||
|
||||
bytes_test() ->
|
||||
check_parser("#DEAD000BEEF").
|
||||
|
||||
records_test() ->
|
||||
TypeDef = "record pair = {x: int, y: int}",
|
||||
Sophia = "{x = 1, y = 2}",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user