From d014ae09826c16c06b50b3234e32645397cb175b Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Wed, 4 Feb 2026 07:00:39 +0000 Subject: [PATCH] Handle token/parse errors more carefully --- src/hz_sophia.erl | 82 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 26 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index 8d578c2..3ae94d5 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -24,8 +24,8 @@ parse_literal2(Result, Pos, String) -> case next_token(Pos, String) of {ok, {{eof, _, _, _, _}, _, _}} -> {ok, Result}; - {ok, {{_, S, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, S, Row, Start, End}}; + {ok, {Token, _, _}} -> + unexpected_token(Token); {error, Reason} -> {error, Reason} end. @@ -235,8 +235,10 @@ parse_expression2(Type, Pos, String, {character, "{", _, Row, Start, _}) -> parse_record_or_map(Type, Pos, String, Row, Start); parse_expression2(Type, Pos, String, {alphanum, S, _, Row, Start, End}) -> parse_alphanum(Type, Pos, String, S, Row, Start, End); -parse_expression2(_, _, _, {_, S, _, Row, Start, End}) -> - {error, {unexpected_token, S, Row, Start, End}}. +parse_expression2(_, _, _, {eof, _, _, _, _, _}) -> + {error, unexpected_end_of_file}; +parse_expression2(_, _, _, Token) -> + unexpected_token(Token). unknown_type() -> {unknown_type, already_normalized, unknown_type}. @@ -247,10 +249,24 @@ expect_tokens([Str | Rest], Pos, String) -> case next_token(Pos, String) of {ok, {{_, Str, _, _, _, _}, NewPos, NewString}} -> expect_tokens(Rest, NewPos, NewString); - {ok, {{_, Actual, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, Actual, Row, Start, End}} + {ok, {Token, _, _}} -> + unexpected_token(Token, Str); + {error, Reason} -> + {error, Reason} end. +unexpected_token(Token, _Expected) -> + % I don't know if this is a good idea, but sometimes there are only one or + % two tokens that could have worked, which might make for simple + % non-technical error messages. I don't know how to format that yet, + % though. + unexpected_token(Token). + +unexpected_token({eof, _, _, _, _, _}) -> + {error, expression_incomplete}; +unexpected_token({_, S, _, Row, Start, End}) -> + {error, {unexpected_token, S, Row, Start, End}}. + %%% Ambiguous Chain Object vs Identifier Parsing parse_alphanum(Type, Pos, String, [C | _] = S, Row, Start, End) when ?IS_LATIN_UPPER(C) -> @@ -328,7 +344,9 @@ parse_list_loop(Inner, Pos, String, CloseChar, Row, Start, Acc) -> {ok, {{character, CloseChar, _, _, _, _}, NewPos, NewString}} -> {ok, {lists:reverse(Acc), NewPos, NewString}}; {ok, {Token, NewPos, NewString}} -> - parse_list_loop2(Inner, NewPos, NewString, CloseChar, Row, Start, Acc, Token) + parse_list_loop2(Inner, NewPos, NewString, CloseChar, Row, Start, Acc, Token); + {error, Reason} -> + {error, Reason} end. parse_list_loop2(Inner, Pos, String, CloseChar, Row, Start, Acc, Token) -> @@ -348,6 +366,8 @@ parse_list_loop3(Inner, Pos, String, CloseChar, Row, Start, Acc) -> {ok, {lists:reverse(Acc), NewPos, NewString}}; {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> parse_list_loop(Inner, NewPos, NewString, CloseChar, Row, Start, Acc); + {ok, {Token, _, _}} -> + unexpected_token(Token, CloseChar); {error, Reason} -> {error, Reason} end. @@ -472,9 +492,9 @@ parse_tuple_tails(O, N, ExcessCount, HeadTerm, Tails, Pos, String) -> {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> % It is a real tuple! Try the normal logic, then. parse_tuple_tails2(O, N, ExcessCount, HeadTerm, Tails, NewPos, NewString); - {ok, {{_, Actual, _, Row, Start, End}, _, _}} -> + {ok, {Token, _, _}} -> % Anything else is just a boring parse error we can complain about. - {error, {unexpected_token, Actual, Row, Start, End}}; + unexpected_token(Token, ")"); {error, Reason} -> {error, Reason} end. @@ -509,7 +529,9 @@ parse_multivalue(ElemTypes, Pos, String, Row, Start, Acc) -> {ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} -> check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc); {ok, {Token, NewPos, NewString}} -> - parse_multivalue2(ElemTypes, NewPos, NewString, Row, Start, Acc, Token) + parse_multivalue2(ElemTypes, NewPos, NewString, Row, Start, Acc, Token); + {error, Reason} -> + {error, Reason} end. parse_multivalue2([Next | Rest], Pos, String, Row, Start, Acc, Token) -> @@ -524,8 +546,8 @@ parse_multivalue2([Next | Rest], Pos, String, Row, Start, Acc, Token) -> end; parse_multivalue2([], Pos, String, _, _, Acc, {character, ")", _, _, _, _}) -> {ok, {lists:reverse(Acc), Pos, String}}; -parse_multivalue2([], _, _, _, _, _, {_, S, _, Row, Start, End}) -> - {error, {unexpected_token, S, Row, Start, End}}. +parse_multivalue2([], _, _, _, _, _, Token) -> + unexpected_token(Token, ")"). parse_multivalue3(ElemTypes, Pos, String, Row, Start, Acc) -> case next_token(Pos, String) of @@ -533,8 +555,8 @@ parse_multivalue3(ElemTypes, Pos, String, Row, Start, Acc) -> check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc); {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> parse_multivalue(ElemTypes, NewPos, NewString, Row, Start, Acc); - {ok, {{_, Actual, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, Actual, Row, Start, End}}; + {ok, {Token, _, _}} -> + unexpected_token(Token, ")"); {error, Reason} -> {error, Reason} end. @@ -578,8 +600,10 @@ parse_variant3(Arities, Tag, ElemTypes, Pos, String) -> case next_token(Pos, String) of {ok, {{character, "(", _, Row, Start, _}, NewPos, NewString}} -> parse_variant4(Arities, Tag, ElemTypes, NewPos, NewString, Row, Start); - {ok, {{_, Actual, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, Actual, Row, Start, End}} + {ok, {Token, _, _}} -> + unexpected_token(Token, "("); + {error, Reason} -> + {error, Reason} end. parse_variant4(Arities, Tag, ElemTypes, Pos, String, Row, Start) -> @@ -612,8 +636,10 @@ parse_record_or_map({_, _, unknown_type}, Pos, String, _, _) -> parse_map2(unknown_type(), unknown_type(), NewPos, NewString, #{}); {ok, {{alphanum, _, _, Row, Start, End}, _, _}} -> {error, {unresolved_record, Row, Start, End}}; - {ok, {{_, S, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, S, Row, Start, End}} + {ok, {Token, _, _}} -> + unexpected_token(Token, "}"); + {error, Reason} -> + {error, Reason} end; parse_record_or_map({O, N, _}, _, _, Row, Start) -> {error, {wrong_type, O, N, map, Row, Start, Start}}. @@ -624,8 +650,8 @@ parse_record(Fields, Pos, String, Acc) -> parse_record2(Fields, NewPos, NewString, Acc, Ident, Row, Start, End); {ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} -> parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End); - {ok, {{_, S, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, S, Row, Start, End}}; + {ok, {Token, _, _}} -> + unexpected_token(Token, "}"); {error, Reason} -> {error, Reason} end. @@ -669,8 +695,8 @@ parse_record6(Fields, Pos, String, Acc) -> parse_record(Fields, NewPos, NewString, Acc); {ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} -> parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End); - {ok, {{_, S, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, S, Row, Start, End}}; + {ok, {Token, _, _}} -> + unexpected_token(Token, "}"); {error, Reason} -> {error, Reason} end. @@ -708,8 +734,10 @@ parse_map(KeyType, ValueType, Pos, String, Acc) -> parse_map2(KeyType, ValueType, NewPos, NewString, Acc); {ok, {{character, "}", _, _, _, _}, NewPos, NewString}} -> {ok, {Acc, NewPos, NewString}}; - {ok, {{_, S, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, S, Row, Start, End}} + {ok, {Token, _, _}} -> + unexpected_token(Token, "}"); + {error, Reason} -> + {error, Reason} end. parse_map2(KeyType, ValueType, Pos, String, Acc) -> @@ -743,8 +771,10 @@ parse_map5(KeyType, ValueType, Pos, String, Acc) -> parse_map(KeyType, ValueType, NewPos, NewString, Acc); {ok, {{character, "}", _, _, _, _}, NewPos, NewString}} -> {ok, {Acc, NewPos, NewString}}; - {ok, {{_, S, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, S, Row, Start, End}} + {ok, {Token, _, _}} -> + unexpected_token(Token, "}"); + {error, Reason} -> + {error, Reason} end. % TODO