diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index c9f106c..0e491a5 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -100,6 +100,8 @@ parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) -> end; parse_expression2(Type, Tk, String, {character, "[", Row, Start, _}) -> parse_list(Type, Tk, String, Row, Start); +parse_expression2(Type, Tk, String, {character, "(", Row, Start, _}) -> + parse_tuple(Type, Tk, String, Row, Start); parse_expression2(Type, Tk, String, {character, "{", Row, Start, _}) -> parse_record_or_map(Type, Tk, String, Row, Start); parse_expression2(_, _, _, {_, S, Row, Start, End}) -> @@ -121,39 +123,108 @@ expect_tokens([Str | Rest], Tk, String) -> %%% List Parsing parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) -> - parse_list_loop(Inner, Tk, String, Row, Start, []); + parse_list_loop(Inner, Tk, String, "]", Row, Start, []); parse_list({_, _, unknown_type}, Tk, String, Row, Start) -> - parse_list_loop(unknown_type(), Tk, String, Row, Start, []); + parse_list_loop(unknown_type(), Tk, String, "]", Row, Start, []); parse_list({O, N, _}, _, _, Row, Start) -> {error, {wrong_type, O, N, list, Row, Start, Start}}. -parse_list_loop(Inner, Tk, String, Row, Start, Acc) -> +parse_list_loop(Inner, Tk, String, CloseChar, Row, Start, Acc) -> case next_token(Tk, String) of - {ok, {{character, "]", _, _, _}, NewTk, NewString}} -> + {ok, {{character, CloseChar, _, _, _}, NewTk, NewString}} -> {ok, {lists:reverse(Acc), NewTk, NewString}}; {ok, {Token, NewTk, NewString}} -> - parse_list_loop2(Inner, NewTk, NewString, Row, Start, Acc, Token) + parse_list_loop2(Inner, NewTk, NewString, CloseChar, Row, Start, Acc, Token) end. -parse_list_loop2(Inner, Tk, String, Row, Start, Acc, Token) -> +parse_list_loop2(Inner, Tk, String, CloseChar, Row, Start, Acc, Token) -> case parse_expression2(Inner, Tk, String, Token) of {ok, {Value, NewTk, NewString}} -> - parse_list_loop3(Inner, NewTk, NewString, Row, Start, [Value | Acc]); + parse_list_loop3(Inner, NewTk, NewString, CloseChar, Row, Start, [Value | Acc]); {error, Reason} -> - Wrapped = wrap_error(Reason, {list_element, length(Acc)}), + Wrapper = choose_list_error_wrapper(CloseChar), + % TODO: Are tuple indices off by one from list indices? + Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}), {error, Wrapped} end. -parse_list_loop3(Inner, Tk, String, Row, Start, Acc) -> +parse_list_loop3(Inner, Tk, String, CloseChar, Row, Start, Acc) -> case next_token(Tk, String) of - {ok, {{character, "]", _, _, _}, NewTk, NewString}} -> + {ok, {{character, CloseChar, _, _, _}, NewTk, NewString}} -> {ok, {lists:reverse(Acc), NewTk, NewString}}; {ok, {{character, ",", _, _, _}, NewTk, NewString}} -> - parse_list_loop(Inner, NewTk, NewString, Row, Start, Acc); + parse_list_loop(Inner, NewTk, NewString, CloseChar, Row, Start, Acc); {error, Reason} -> {error, Reason} end. +choose_list_error_wrapper("]") -> list_element; +choose_list_error_wrapper(")") -> tuple_element. + +%%% Tuple Parsing + +parse_tuple({_, _, {tuple, Types}}, Tk, String, Row, Start) -> + case parse_multivalue(Types, Tk, String, Row, Start, []) of + {ok, {TermList, NewTk, NewString}} -> + Result = {tuple, list_to_tuple(TermList)}, + {ok, {Result, NewTk, NewString}}; + {error, Reason} -> + {error, Reason} + end; +parse_tuple({_, _, unknown_type}, Tk, String, Row, Start) -> + % An untyped tuple is a list of untyped terms, and weirdly our list parser + % works perfectly for that, as long as we change the closing character to + % be ")" instead of "]". + case parse_list_loop(unknown_type(), Tk, String, ")", Row, Start, []) of + {ok, {TermList, NewTk, NewString}} -> + Result = {tuple, list_to_tuple(TermList)}, + {ok, {Result, NewTk, NewString}}; + {error, Reason} -> + {error, Reason} + end; +parse_tuple({O, N, _}, _, _, Row, Start) -> + {error, {wrong_type, O, N, tuple, Row, Start, Start}}. + +parse_multivalue(ElemTypes, Tk, String, Row, Start, Acc) -> + case next_token(Tk, String) of + {ok, {{character, ")", Row2, Start2, _}, NewTk, NewString}} -> + check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc); + {ok, {Token, NewTk, NewString}} -> + parse_multivalue2(ElemTypes, NewTk, NewString, Row, Start, Acc, Token) + end. + +parse_multivalue2([Next | Rest], Tk, String, Row, Start, Acc, Token) -> + case parse_expression2(Next, Tk, String, Token) of + {ok, {Value, NewTk, NewString}} -> + parse_multivalue3(Rest, NewTk, NewString, Row, Start, [Value | Acc]); + {error, Reason} -> + Wrapper = choose_list_error_wrapper(")"), + % TODO: Are tuple indices off by one from list indices? + Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}), + {error, Wrapped} + end; +parse_multivalue2([], Tk, String, _, _, Acc, {character, ")", _, _, _}) -> + {ok, {lists:reverse(Acc), Tk, String}}; +parse_multivalue2([], _, _, _, _, _, {_, S, Row, Start, End}) -> + {error, {unexpected_token, S, Row, Start, End}}. + +parse_multivalue3(ElemTypes, Tk, String, Row, Start, Acc) -> + case next_token(Tk, String) of + {ok, {{character, ")", Row2, Start2, _}, NewTk, NewString}} -> + check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc); + {ok, {{character, ",", _, _, _}, NewTk, NewString}} -> + parse_multivalue(ElemTypes, NewTk, NewString, Row, Start, Acc); + {error, Reason} -> + {error, Reason} + end. + +check_multivalue_long_enough([], Tk, String, _, _, Acc) -> + {ok, {lists:reverse(Acc), Tk, String}}; +check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) -> + GotCount = length(Got), + ExpectCount = length(Remaining) + GotCount, + {error, {not_enough_elements, ExpectCount, GotCount, Row, Col}}. + %%% Record parsing parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Tk, String, _, _) -> @@ -350,6 +421,9 @@ list_test() -> list_of_lists_test() -> check_parser("[[], [1], [2, 3]]", [[], [1], [2, 3]]). +tuple_test() -> + check_parser("(1, [2, 3], (4, 5))", {tuple, {1, [2, 3], {tuple, {4, 5}}}}). + maps_test() -> check_parser("{[1] = 2, [3] = 4}", #{1 => 2, 3 => 4}).