diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index c73ef06..db47d22 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -21,7 +21,7 @@ parse_literal2(Result, Tk, String) -> case next_token(Tk, String) of {ok, {{eof, _, _, _, _}, _, _}} -> {ok, Result}; - {ok, {{_, S, Row, Start, End}, _, _}} -> + {ok, {{_, S, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, S, Row, Start, End}}; {error, Reason} -> {error, Reason} @@ -49,38 +49,58 @@ next_token({tk, _, Col}, "\n" ++ Rest) -> next_token(Tk, [C | _] = String) when ?IS_ALPHA(C) -> alphanum_token(Tk, Tk, String, []); next_token(Tk, [C | _] = String) when ?IS_NUM(C) -> - num_token(Tk, Tk, String, []); + num_token(Tk, Tk, String, [], 0); next_token({tk, Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) -> - bytes_token({tk, Row, Col}, {tk, Row + 2, Col}, Rest, [C, $#]); + bytes_token({tk, Row, Col}, {tk, Row + 1, Col}, [C | Rest], "#", []); next_token({tk, Row, Col}, [Char | Rest]) -> - Token = {character, [Char], Row, Col, Col}, + Token = {character, [Char], Char, Row, Col, Col}, {ok, {Token, {tk, Row + 1, Col}, Rest}}. alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) -> alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]); alphanum_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> AlphaString = lists:reverse(Acc), - Token = {alphanum, AlphaString, Row, Start, End}, + Token = {alphanum, AlphaString, AlphaString, Row, Start, End}, {ok, {Token, {tk, Row, End}, String}}. -num_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_NUM(C) -> - num_token(Start, {tk, Row + 1, Col}, Rest, [C | Acc]); -num_token(Start, {tk, Row, Col}, [$_, C | Rest], Acc) when ?IS_NUM(C) -> - num_token(Start, {tk, Row + 2, Col}, Rest, [C, $_ | Acc]); -num_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> - NumString = lists:reverse(Acc), - Token = {integer, NumString, Row, Start, End}, +num_token(Start, {tk, Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) -> + NewValue = Value * 10 + (C - $0), + num_token(Start, {tk, Row + 1, Col}, Rest, [C | Chars], NewValue); +num_token(Start, {tk, Row, Col}, [$_, C | Rest], Chars, Value) when ?IS_NUM(C) -> + NewValue = Value * 10 + (C - $0), + num_token(Start, {tk, Row + 2, Col}, Rest, [C, $_ | Chars], NewValue); +num_token({tk, _, Start}, {tk, Row, End}, String, Chars, Value) -> + NumString = lists:reverse(Chars), + Token = {integer, NumString, Value, Row, Start, End}, {ok, {Token, {tk, Row, End}, String}}. -bytes_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_HEX(C) -> - bytes_token(Start, {tk, Row + 1, Col}, Rest, [C | Acc]); -bytes_token(Start, {tk, Row, Col}, [$_, C | Rest], Acc) when ?IS_HEX(C) -> - bytes_token(Start, {tk, Row + 1, Col}, Rest, [C, $_ | Acc]); -bytes_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> - BytesString = lists:reverse(Acc), - Token = {bytes, BytesString, Row, Start, End}, +bytes_token(Start, {tk, Row, Col}, [C | Rest], Chars, Digits) when ?IS_HEX(C) -> + Digit = convert_digit(C), + bytes_token(Start, {tk, Row + 1, Col}, Rest, [C | Chars], [Digit | Digits]); +bytes_token(Start, {tk, Row, Col}, [$_, C | Rest], Chars, Digits) when ?IS_HEX(C) -> + Digit = convert_digit(C), + bytes_token(Start, {tk, Row + 1, Col}, Rest, [C, $_ | Chars], [Digit | Digits]); +bytes_token({tk, _, Start}, {tk, Row, End}, String, Chars, Digits) -> + BytesString = lists:reverse(Chars), + Value = reverse_combine_nibbles(Digits, <<>>), + Token = {bytes, BytesString, Value, Row, Start, End}, {ok, {Token, {tk, Row, End}, String}}. +convert_digit(C) when C >= $0, C =< $9 -> + C - $0; +convert_digit(C) when C >= $A, C =< $Z -> + C - $A + 10; +convert_digit(C) when C >= $a, C =< $z -> + C - $a + 10. + +reverse_combine_nibbles([D1, D2 | Rest], Acc) -> + NewAcc = <>, + reverse_combine_nibbles(Rest, NewAcc); +reverse_combine_nibbles([D1], Acc) -> + <<0:4, D1:4, Acc/binary>>; +reverse_combine_nibbles([], Acc) -> + Acc. + %%% Sophia Literal Parser @@ -104,8 +124,7 @@ parse_expression(Type, Tk, String) -> {ok, {Token, NewTk, NewString}} = next_token(Tk, String), parse_expression2(Type, NewTk, NewString, Token). -parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) -> - Value = convert_int(S), +parse_expression2(Type, Tk, String, {integer, _, Value, Row, Start, End}) -> case Type of {_, _, integer} -> {ok, {Value, Tk, String}}; @@ -114,8 +133,7 @@ parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) -> {O, N, _} -> {error, {wrong_type, O, N, integer, Row, Start, End}} end; -parse_expression2(Type, Tk, String, {bytes, "#" ++ S, Row, Start, End}) -> - Value = convert_bytes(S), +parse_expression2(Type, Tk, String, {bytes, _, Value, Row, Start, End}) -> Len = byte_size(Value), Result = {bytes, Value}, case Type of @@ -130,15 +148,15 @@ parse_expression2(Type, Tk, String, {bytes, "#" ++ S, Row, Start, End}) -> {O, N, _} -> {error, {wrong_type, O, N, integer, Row, Start, End}} end; -parse_expression2(Type, Tk, String, {character, "[", Row, Start, _}) -> +parse_expression2(Type, Tk, String, {character, "[", _, Row, Start, _}) -> parse_list(Type, Tk, String, Row, Start); -parse_expression2(Type, Tk, String, {character, "(", Row, Start, _}) -> +parse_expression2(Type, Tk, String, {character, "(", _, Row, Start, _}) -> parse_tuple(Type, Tk, String, Row, Start); -parse_expression2(Type, Tk, String, {character, "{", Row, Start, _}) -> +parse_expression2(Type, Tk, String, {character, "{", _, Row, Start, _}) -> parse_record_or_map(Type, Tk, String, Row, Start); -parse_expression2(Type, Tk, String, {alphanum, Ident, Row, Start, End}) -> +parse_expression2(Type, Tk, String, {alphanum, Ident, _, Row, Start, End}) -> parse_variant(Type, Tk, String, Ident, Row, Start, End); -parse_expression2(_, _, _, {_, S, Row, Start, End}) -> +parse_expression2(_, _, _, {_, S, _, Row, Start, End}) -> {error, {unexpected_token, S, Row, Start, End}}. unknown_type() -> @@ -148,54 +166,12 @@ expect_tokens([], Tk, String) -> {ok, {Tk, String}}; expect_tokens([Str | Rest], Tk, String) -> case next_token(Tk, String) of - {ok, {{_, Str, _, _, _}, NewTk, NewString}} -> + {ok, {{_, Str, _, _, _, _}, NewTk, NewString}} -> expect_tokens(Rest, NewTk, NewString); - {ok, {{_, Actual, Row, Start, End}}} -> + {ok, {{_, Actual, _, Row, Start, End}}} -> {error, {unexpected_token, Actual, Row, Start, End}} end. -convert_int(Chars) -> - convert_int(Chars, 0). - -convert_int("_" ++ Chars, Result) -> - convert_int(Chars, Result); -convert_int([N | Chars], Result) -> - Digit = N - $0, - NewResult = Result * 10 + Digit, - convert_int(Chars, NewResult); -convert_int([], Result) -> - Result. - -convert_bytes(Chars) -> - % We do this as two reversing foldl type loops. One removes underscores and - % converts the ASCII into integers, and the other peels off pairs of - % numbers to form bytes. - Digits = reverse_convert_digits(Chars, []), - reverse_combine_nibbles(Digits, <<>>). - -reverse_convert_digits("_" ++ Rest, Acc) -> - reverse_convert_digits(Rest, Acc); -reverse_convert_digits([C | Rest], Acc) -> - Digit = convert_digit(C), - reverse_convert_digits(Rest, [Digit | Acc]); -reverse_convert_digits([], Acc) -> - Acc. - -convert_digit(C) when C >= $0, C =< $9 -> - C - $0; -convert_digit(C) when C >= $A, C =< $Z -> - C - $A + 10; -convert_digit(C) when C >= $a, C =< $z -> - C - $a + 10. - -reverse_combine_nibbles([D1, D2 | Rest], Acc) -> - NewAcc = <>, - reverse_combine_nibbles(Rest, NewAcc); -reverse_combine_nibbles([D1], Acc) -> - <<0:4, D1:4, Acc/binary>>; -reverse_combine_nibbles([], Acc) -> - Acc. - %%% List Parsing parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) -> @@ -207,7 +183,7 @@ parse_list({O, N, _}, _, _, Row, Start) -> parse_list_loop(Inner, Tk, String, CloseChar, Row, Start, Acc) -> case next_token(Tk, String) of - {ok, {{character, CloseChar, _, _, _}, NewTk, NewString}} -> + {ok, {{character, CloseChar, _, _, _, _}, NewTk, NewString}} -> {ok, {lists:reverse(Acc), NewTk, NewString}}; {ok, {Token, NewTk, NewString}} -> parse_list_loop2(Inner, NewTk, NewString, CloseChar, Row, Start, Acc, Token) @@ -226,9 +202,9 @@ parse_list_loop2(Inner, Tk, String, CloseChar, Row, Start, Acc, Token) -> parse_list_loop3(Inner, Tk, String, CloseChar, Row, Start, Acc) -> case next_token(Tk, String) of - {ok, {{character, CloseChar, _, _, _}, NewTk, NewString}} -> + {ok, {{character, CloseChar, _, _, _, _}, NewTk, NewString}} -> {ok, {lists:reverse(Acc), NewTk, NewString}}; - {ok, {{character, ",", _, _, _}, NewTk, NewString}} -> + {ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> parse_list_loop(Inner, NewTk, NewString, CloseChar, Row, Start, Acc); {error, Reason} -> {error, Reason} @@ -281,14 +257,14 @@ parse_multivalue2([Next | Rest], Tk, String, Row, Start, Acc, Token) -> end; parse_multivalue2([], Tk, String, _, _, Acc, {character, ")", _, _, _}) -> {ok, {lists:reverse(Acc), Tk, String}}; -parse_multivalue2([], _, _, _, _, _, {_, S, Row, Start, End}) -> +parse_multivalue2([], _, _, _, _, _, {_, S, _, Row, Start, End}) -> {error, {unexpected_token, S, Row, Start, End}}. parse_multivalue3(ElemTypes, Tk, String, Row, Start, Acc) -> case next_token(Tk, String) of - {ok, {{character, ")", Row2, Start2, _}, NewTk, NewString}} -> + {ok, {{character, ")", _, Row2, Start2, _}, NewTk, NewString}} -> check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc); - {ok, {{character, ",", _, _, _}, NewTk, NewString}} -> + {ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> parse_multivalue(ElemTypes, NewTk, NewString, Row, Start, Acc); {error, Reason} -> {error, Reason} @@ -331,9 +307,9 @@ parse_variant3(Arities, Tag, [], Tk, String) -> {ok, {Result, Tk, String}}; parse_variant3(Arities, Tag, ElemTypes, Tk, String) -> case next_token(Tk, String) of - {ok, {{character, "(", Row, Start, _}, NewTk, NewString}} -> + {ok, {{character, "(", _, Row, Start, _}, NewTk, NewString}} -> parse_variant4(Arities, Tag, ElemTypes, NewTk, NewString, Row, Start); - {ok, {{_, Actual, Row, Start, End}}} -> + {ok, {{_, Actual, _, Row, Start, End}}} -> {error, {unexpected_token, Actual, Row, Start, End}} end. @@ -361,13 +337,13 @@ parse_record_or_map({_, _, {record, Fields}}, Tk, String, _, _) -> parse_record(Fields, Tk, String, #{}); parse_record_or_map({_, _, unknown_type}, Tk, String, _, _) -> case next_token(Tk, String) of - {ok, {{character, "}", _, _, _}, NewTk, NewString}} -> + {ok, {{character, "}", _, _, _, _}, NewTk, NewString}} -> {ok, {#{}, NewTk, NewString}}; - {ok, {{character, "[", _, _, _}, NewTk, NewString}} -> + {ok, {{character, "[", _, _, _, _}, NewTk, NewString}} -> parse_map2(unknown_type(), unknown_type(), NewTk, NewString, #{}); - {ok, {{alphanum, _, Row, Start, End}, _, _}} -> + {ok, {{alphanum, _, _, Row, Start, End}, _, _}} -> {error, {unresolved_record, Row, Start, End}}; - {ok, {{_, S, Row, Start, End}, _, _}} -> + {ok, {{_, S, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, S, Row, Start, End}} end; parse_record_or_map({O, N, _}, _, _, Row, Start) -> @@ -375,11 +351,11 @@ parse_record_or_map({O, N, _}, _, _, Row, Start) -> parse_record(Fields, Tk, String, Acc) -> case next_token(Tk, String) of - {ok, {{alphanum, Ident, Row, Start, End}, NewTk, NewString}} -> + {ok, {{alphanum, Ident, _, Row, Start, End}, NewTk, NewString}} -> parse_record2(Fields, NewTk, NewString, Acc, Ident, Row, Start, End); - {ok, {{character, "}", Row, Start, End}, NewTk, NewString}} -> + {ok, {{character, "}", _, Row, Start, End}, NewTk, NewString}} -> parse_record_end(Fields, NewTk, NewString, Acc, Row, Start, End); - {ok, {{_, S, Row, Start, End}, _, _}} -> + {ok, {{_, S, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, S, Row, Start, End}}; {error, Reason} -> {error, Reason} @@ -420,11 +396,11 @@ parse_record5(Fields, Tk, String, Acc, Ident, Type) -> parse_record6(Fields, Tk, String, Acc) -> case next_token(Tk, String) of - {ok, {{character, ",", _, _, _}, NewTk, NewString}} -> + {ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> parse_record(Fields, NewTk, NewString, Acc); - {ok, {{character, "}", Row, Start, End}, NewTk, NewString}} -> + {ok, {{character, "}", _, Row, Start, End}, NewTk, NewString}} -> parse_record_end(Fields, NewTk, NewString, Acc, Row, Start, End); - {ok, {{_, S, Row, Start, End}, _, _}} -> + {ok, {{_, S, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, S, Row, Start, End}}; {error, Reason} -> {error, Reason} @@ -455,11 +431,11 @@ parse_record_final_loop([], _, FieldsReverse) -> parse_map(KeyType, ValueType, Tk, String, Acc) -> case next_token(Tk, String) of - {ok, {{character, "[", _, _, _}, NewTk, NewString}} -> + {ok, {{character, "[", _, _, _, _}, NewTk, NewString}} -> parse_map2(KeyType, ValueType, NewTk, NewString, Acc); - {ok, {{character, "}", _, _, _}, NewTk, NewString}} -> + {ok, {{character, "}", _, _, _, _}, NewTk, NewString}} -> {ok, {Acc, NewTk, NewString}}; - {ok, {{_, S, Row, Start, End}}} -> + {ok, {{_, S, _, Row, Start, End}}} -> {error, {unexpected_token, S, Row, Start, End}} end. @@ -490,11 +466,11 @@ parse_map4(KeyType, ValueType, Tk, String, Acc, Key) -> parse_map5(KeyType, ValueType, Tk, String, Acc) -> case next_token(Tk, String) of - {ok, {{character, ",", _, _, _}, NewTk, NewString}} -> + {ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> parse_map(KeyType, ValueType, NewTk, NewString, Acc); - {ok, {{character, "}", _, _, _}, NewTk, NewString}} -> + {ok, {{character, "}", _, _, _, _}, NewTk, NewString}} -> {ok, {Acc, NewTk, NewString}}; - {ok, {{_, S, Row, Start, End}}} -> + {ok, {{_, S, _, Row, Start, End}}} -> {error, {unexpected_token, S, Row, Start, End}} end.