diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index be294c2..4c1c8cb 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -4,7 +4,8 @@ -copyright("Jarvis Carroll "). -license("GPL-3.0-or-later"). --export([parse_literal/1, parse_literal/2, check_parser/1]). +-export([parse_literal/1, parse_literal/2]). +-export([fate_to_list/1, fate_to_list/2, fate_to_iolist/1, fate_to_iolist/2]). -include_lib("eunit/include/eunit.hrl"). @@ -173,7 +174,7 @@ parse_char(_, {Row, Col}, [$\\, $x, A, B | String], SourceChars) when ?IS_HEX(A) Byte = convert_digit(A) * 16 + convert_digit(B), {ok, {Byte, [B, A, $x, $\\ | SourceChars], {Row, Col + 4}, String}}; parse_char({Row, Start}, {Row, Col}, [$\\, C | Rest], SourceChars) -> - case escape_char(C) of + case unescape_char(C) of {ok, ByteVal} -> {ok, {ByteVal, [C, $\ | SourceChars], {Row, Col + 2}, Rest}}; error -> @@ -195,19 +196,31 @@ escape_long_hex_code(_, Pos, [], SourceChars, Value) -> % produce an unclosed string error instead. {ok, {Value, SourceChars, Pos, []}}. -escape_char($b) -> {ok, $\b}; -escape_char($e) -> {ok, $\e}; -escape_char($f) -> {ok, $\f}; -escape_char($n) -> {ok, $\n}; -escape_char($r) -> {ok, $\r}; -escape_char($t) -> {ok, $\t}; -escape_char($v) -> {ok, $\v}; +unescape_char($b) -> {ok, $\b}; +unescape_char($e) -> {ok, $\e}; +unescape_char($f) -> {ok, $\f}; +unescape_char($n) -> {ok, $\n}; +unescape_char($r) -> {ok, $\r}; +unescape_char($t) -> {ok, $\t}; +unescape_char($v) -> {ok, $\v}; % Technically \" and \' are only valid inside their own quote characters, not % each other, but whatever, we will just be permissive here. -escape_char($") -> {ok, $\"}; -escape_char($') -> {ok, $\'}; -escape_char($\\) -> {ok, $\\}; -escape_char(_) -> error. +unescape_char($") -> {ok, $\"}; +unescape_char($') -> {ok, $\'}; +unescape_char($\\) -> {ok, $\\}; +unescape_char(_) -> error. + +% Not needed until later, but we'll put it here for symmetry. +escape_char($\b) -> "\\b"; +escape_char($\e) -> "\\e"; +escape_char($\f) -> "\\f"; +escape_char($\n) -> "\\n"; +escape_char($\r) -> "\\r"; +escape_char($\t) -> "\\t"; +escape_char($\v) -> "\\v"; +escape_char($\") -> "\\\""; +escape_char($\\) -> "\\\\"; +escape_char(I) -> I. %%% Sophia Literal Parser @@ -902,16 +915,240 @@ parse_map5(KeyType, ValueType, Pos, String, Acc) -> % TODO wrap_error(Reason, _) -> Reason. +%%% Pretty Printing + +fate_to_list(Term) -> + fate_to_list(unknown_type(), Term). + +fate_to_list(Type, Term) -> + IOList = fate_to_iolist(Type, Term), + unicode:characters_to_list(IOList). + +fate_to_iolist(Term) -> + fate_to_iolist(unknown_type(), Term). + +% Special case for singleton records, since they are erased during compilation. +fate_to_iolist({_, _, {record, [{FieldName, FieldType}]}}, Term) -> + singleton_record_to_iolist(FieldName, FieldType, Term); +% Aggregate types, where we should check if there is useful type information to +% act on. Case logic is made explicit so that the default cases stand out. +fate_to_iolist(Type, {tuple, Tuple}) -> + case Type of + {_, _, {record, FieldTypes}} -> + record_to_iolist(FieldTypes, Tuple); + {_, _, {tuple, ElemTypes}} -> + tuple_to_iolist(ElemTypes, Tuple); + _ -> + tuple_to_iolist([], Tuple) + end; +fate_to_iolist(Type, {variant, _, Tag, Tuple}) -> + case Type of + {O, N, {variant, VariantTypes}} when Tag < length(VariantTypes) -> + variant_to_iolist(O, N, VariantTypes, Tag, Tuple); + {O, N, _} -> + % TODO: Make up a special syntax for anonymous variant terms. + erlang:exit({untyped_variant, O, N}); + _ -> + erlang:exit({untyped_variant, unknown_type, already_normalized}) + end; +fate_to_iolist(Type, List) when is_list(List) -> + case Type of + {_, _, {list, [InnerType]}} -> + list_to_iolist(InnerType, List); + _ -> + list_to_iolist(unknown_type(), List) + end; +fate_to_iolist(Type, Map) when is_map(Map) -> + case Type of + {_, _, {map, [K, V]}} -> + map_to_iolist(K, V, Map); + _ -> + map_to_iolist(unknown_type(), unknown_type(), Map) + end; +% Other FATE types, where no recursion is needed, but type information could +% influence the format that is used. +fate_to_iolist(_, true) -> + "true"; +fate_to_iolist(_, false) -> + "false"; +fate_to_iolist(_, {bits, 0}) -> + "Bits.none"; +fate_to_iolist(_, {bits, -1}) -> + "Bits.all"; +fate_to_iolist(_, {bits, I}) when I > 0 -> + ["#", integer_to_list(I, 16)]; +fate_to_iolist(_, {bits, I}) when I < 0 -> + integer_to_list(I, 10); +fate_to_iolist({_, _, char}, $') -> + % Special case since it needs to be escaped in char literals. + "'\\''"; +fate_to_iolist({_, _, char}, $") -> + % Special case since it does NOT need to be escaped in char literals. + "'\"'"; +fate_to_iolist({_, _, char}, I) when is_integer(I) -> + [$', escape_char(I), $']; +fate_to_iolist(_, I) when is_integer(I) -> + integer_to_list(I); +fate_to_iolist(_, {address, Addr}) -> + gmser_api_encoder:encode(account_pubkey, Addr); +fate_to_iolist(_, {contract, Addr}) -> + gmser_api_encoder:encode(contract_pubkey, Addr); +fate_to_iolist(_, {bytes, Bytes}) -> + Size = bit_size(Bytes), + <> = Bytes, + ["#", integer_to_list(IntValue, 16)]; +fate_to_iolist(_, Bytes) when is_binary(Bytes) -> + escape_string(Bytes). + +escape_string(Binary) -> + escape_string(Binary, []). + +escape_string(<>, Acc) -> + NewAcc = [Acc, escape_char(C)], + escape_string(Rest, NewAcc); +escape_string(<<>>, Acc) -> + [$", Acc, $"]. + +tuple_to_iolist([ElemType], {Elem}) -> + Inner = fate_to_iolist(ElemType, Elem), + ["(", Inner, ",)"]; +tuple_to_iolist(_, {Elem}) -> + Inner = fate_to_iolist(unknown_type(), Elem), + ["(", Inner, ",)"]; +tuple_to_iolist(ElemTypes, Tuple) -> + Elems = tuple_to_list(Tuple), + Multivalue = multivalue_to_iolist(ElemTypes, Elems), + ["(", Multivalue, ")"]. + +list_to_iolist(InnerType, Elems) -> + InnerChars = list_elems_to_iolist(InnerType, Elems), + ["[", InnerChars, "]"]. + +variant_to_iolist(O, N, Variants, Tag, Tuple) -> + Prefix = choose_variant_prefix(O, N), + {Name, ElemTypes} = lists:nth(Tag + 1, Variants), + case tuple_size(Tuple) of + 0 -> + [Prefix, Name]; + _ -> + Elems = tuple_to_list(Tuple), + Multivalue = multivalue_to_iolist(ElemTypes, Elems), + [Prefix, Name, "(", Multivalue, ")"] + end. + +choose_variant_prefix(O, N) -> + case get_typename(O, N) of + [Namespace, _] -> + [Namespace, "."]; + _ -> + [] + end. + +multivalue_to_iolist([FirstType | ElemTypes], [FirstTerm | Elems]) -> + FirstTermChars = fate_to_iolist(FirstType, FirstTerm), + multivalue_to_iolist(ElemTypes, Elems, FirstTermChars); +multivalue_to_iolist(_, Elems) -> + list_elems_to_iolist(unknown_type(), Elems). + +multivalue_to_iolist([NextType | RestTypes], [NextTerm | RestTerms], Acc) -> + NextTermChars = fate_to_iolist(NextType, NextTerm), + multivalue_to_iolist(RestTypes, RestTerms, [Acc, ", ", NextTermChars]); +multivalue_to_iolist(_, Elems, Acc) -> + list_elems_to_iolist(unknown_type(), Elems, Acc). + +list_elems_to_iolist(Type, [FirstTerm | Rest]) -> + FirstTermChars = fate_to_iolist(Type, FirstTerm), + list_elems_to_iolist(Type, Rest, FirstTermChars); +list_elems_to_iolist(_, []) -> + "". + +list_elems_to_iolist(Type, [Next | Rest], Acc) -> + NextChars = fate_to_iolist(Type, Next), + list_elems_to_iolist(Type, Rest, [Acc, ", ", NextChars]); +list_elems_to_iolist(_, [], Acc) -> + Acc. + +singleton_record_to_iolist(FieldName, FieldType, Term) -> + FieldChars = fate_to_iolist(FieldType, Term), + ["{", FieldName, " = ", FieldChars, "}"]. + +record_to_iolist(FieldTypes, Tuple) -> + case length(FieldTypes) == tuple_size(Tuple) of + true -> + Chars = record_fields_to_iolist(FieldTypes, tuple_to_list(Tuple)), + ["{", Chars, "}"]; + false -> + tuple_to_iolist([], Tuple) + end. + +record_fields_to_iolist([{Name, Type} | FieldTypes], [Term | Terms]) -> + TermChars = fate_to_iolist(Type, Term), + record_fields_to_iolist(FieldTypes, Terms, [Name, " = ", TermChars]); +record_fields_to_iolist(_, []) -> + "". + +record_fields_to_iolist([{Name, Type} | FieldTypes], [Term | Terms], Acc) -> + TermChars = fate_to_iolist(Type, Term), + NewAcc = [Acc, ", ", Name, " = ", TermChars], + record_fields_to_iolist(FieldTypes, Terms, NewAcc); +record_fields_to_iolist(_, [], Acc) -> + Acc. + +map_to_iolist(K, V, Map) -> + Iter = maps:iterator(Map), + case maps:next(Iter) of + {KeyTerm, ValTerm, Rest} -> + KChars = fate_to_iolist(K, KeyTerm), + VChars = fate_to_iolist(V, ValTerm), + RestChars = map_to_iolist_inner(K, V, Rest, ["[", KChars, "] = ", VChars]), + ["{", RestChars, "}"]; + none -> + "{}" + end. + +map_to_iolist_inner(K, V, Iter, Acc) -> + case maps:next(Iter) of + {KeyTerm, ValTerm, Rest} -> + KChars = fate_to_iolist(K, KeyTerm), + VChars = fate_to_iolist(V, ValTerm), + map_to_iolist_inner(K, V, Rest, [Acc, ", [", KChars, "] = ", VChars]); + none -> + Acc + end. + %%% Tests check_sophia_to_fate(Type, Sophia, Fate) -> case parse_literal(Type, Sophia) of {ok, Fate} -> ok; - {ok, FateActual} -> - erlang:error({to_fate_failed, Sophia, Fate, {ok, FateActual}}); - {error, Reason} -> - erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}}) + Result -> + erlang:error({to_fate_failed, Sophia, Fate, Result}) + end. + +check_fate_to_sophia(Type, Fate, Sophia) -> + case fate_to_list(Type, Fate) of + Sophia -> + ok; + Result -> + erlang:error({to_sophia_failed, Fate, Sophia, Result}) + end. + +roundtrip_parser(Type, Sophia, Fate) -> + check_sophia_to_fate(Type, Sophia, Fate), + check_fate_to_sophia(Type, Fate, Sophia), + + ok. + +% These test function names are getting ridiculous... I might want to optarg +% them or something, but, whatever, it's test code. +roundtrip_parser_lenient(Type, Sophia, Fate) -> + check_sophia_to_fate(Type, Sophia, Fate), + case fate_to_list(Type, Fate) of + Sophia -> + ok; + SophiaActual -> + check_sophia_to_fate(Type, SophiaActual, Fate) end. compile_entrypoint_value_and_type(Source, Entrypoint) -> @@ -943,65 +1180,76 @@ check_parser(Sophia) -> {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"), % Check that when we parse the term we get the same value as the Sophia - % compiler. + % compiler. Also check that the pretty printer gives the same string back. check_sophia_to_fate(unknown_type(), Sophia, Fate), % Then, once we know that the term is correct, make sure that it is still - % accepted *with* type info. + % accepted *with* type info. Don't bother roundtripping this, since the + % pretty printer doesn't enforce types anyway. check_sophia_to_fate(Type, Sophia, Fate). +check_parser_roundtrip(Sophia) -> + Source = "contract C = entrypoint f() = " ++ Sophia, + {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"), + roundtrip_parser(Type, Sophia, Fate), + % Without type information we might get a more generic result in Sophia + % syntax. Let's do a lenient test. + roundtrip_parser_lenient(unknown_type(), Sophia, Fate). + check_parser_with_typedef(Typedef, Sophia) -> % Compile the type definitions alongside the usual literal expression. Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia, {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"), % Do a typed parse, as usual, but there are probably record/variant - % definitions in the AACI, so untyped parses probably don't work. - check_sophia_to_fate(Type, Sophia, Fate). + % definitions in the AACI, so untyped parses probably don't work, and + % variants often have optional namespaces, so the sophia result might not + % match exactly, but should still be equivalent. + roundtrip_parser_lenient(Type, Sophia, Fate). anon_types_test() -> % Integers. - check_parser("123"), + check_parser_roundtrip("123"), check_parser("1_2_3"), - check_parser("-123"), + check_parser_roundtrip("-123"), % Booleans. - check_parser("true"), - check_parser("false"), - check_parser("[true, false]"), + check_parser_roundtrip("true"), + check_parser_roundtrip("false"), + check_parser_roundtrip("[true, false]"), % Bytes. - check_parser("#DEAD000BEEF"), + check_parser_roundtrip("#DEAD000BEEF"), check_parser("#DE_AD0_00B_EEF"), % Strings. - check_parser("\"hello world\""), + check_parser_roundtrip("\"hello world\""), % The Sophia compiler doesn't handle this right, but we should still. - %check_parser("\"ÿ\""), - %check_parser("\"♣\""), + %check_parser_roundtrip("\"ÿ\""), + %check_parser_roundtrip("\"♣\""), % Characters. - check_parser("'A'"), - check_parser("['a', ' ', '[']"), - %check_parser("'ÿ'"), - %check_parser("'♣'"), + check_parser_roundtrip("'A'"), + check_parser_roundtrip("['a', ' ', '[']"), + %check_parser_roundtrip("'ÿ'"), + %check_parser_roundtrip("'♣'"), % List of integers. - check_parser("[1, 2, 3]"), + check_parser_roundtrip("[1, 2, 3]"), % List of lists. - check_parser("[[], [1], [2, 3]]"), + check_parser_roundtrip("[[], [1], [2, 3]]"), % Tuple. - check_parser("(1, [2, 3], (4, 5))"), + check_parser_roundtrip("(1, [2, 3], (4, 5))"), % Map. - check_parser("{[1] = 2, [3] = 4}"), + check_parser_roundtrip("{[1] = 2, [3] = 4}"), ok. string_escape_codes_test() -> - check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""), + check_parser_roundtrip("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""), check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""), check_parser("\"\\x{0}\\x{7}\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\\x{0000007F}\""), - check_parser("\"'\""), + check_parser_roundtrip("\"'\""), - check_parser("['\\b', '\\e', '\\f', '\\n', '\\r', '\\t', '\\v', '\"', '\\'', '\\\\']"), + check_parser_roundtrip("['\\b', '\\e', '\\f', '\\n', '\\r', '\\t', '\\v', '\"', '\\'', '\\\\']"), check_parser("['\\x00', '\\x11', '\\x77', '\\x4a', '\\x4A']"), check_parser("['\\x{0}', '\\x{7}', '\\x{7F}', '\\x{07F}', '\\x{007F}', '\\x{0007F}', '\\x{0000007F}']"), - check_parser("'\"'"), + check_parser_roundtrip("'\"'"), ok. @@ -1039,15 +1287,17 @@ namespace_variant_test() -> Term = "[N.A, N.B]", Source = "namespace N = datatype mytype = A | B\ncontract C = entrypoint f() = " ++ Term, {Fate, VariantType} = compile_entrypoint_value_and_type(Source, "f"), - check_sophia_to_fate(VariantType, Term, Fate), + roundtrip_parser(VariantType, Term, Fate), ok. chain_objects_test() -> % Address, - check_parser("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"), + check_parser_roundtrip("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"), % Two different forms of signature, - check_parser("[sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n, #00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF]"), + check_parser("sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n"), + check_parser("#00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF"), + check_parser_roundtrip("#112233445566778899AABBCCDDEEFF00112233445566778899AABBCCDDEEFF00112233445566778899AABBCCDDEEFF00112233445566778899AABBCCDDEEFF"), % We have to build a totally custom contract example in order to get an % AACI and return value for parsing contract addresses. This is because the @@ -1058,18 +1308,18 @@ chain_objects_test() -> Contract = "ct_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx", Source = "contract C = entrypoint f(): C = " ++ Contract, {Fate, ContractType} = compile_entrypoint_value_and_type(Source, "f"), - check_sophia_to_fate(ContractType, Contract, Fate), - check_sophia_to_fate(unknown_type(), Contract, Fate), + roundtrip_parser(ContractType, Contract, Fate), + roundtrip_parser(unknown_type(), Contract, Fate), ok. bits_test() -> - check_parser("Bits.all"), - check_parser("Bits.none"), + check_parser_roundtrip("Bits.all"), + check_parser_roundtrip("Bits.none"), {_, Type} = compile_entrypoint_value_and_type("contract C = entrypoint f() = Bits.all", "f"), - check_sophia_to_fate(Type, "5", {bits, 5}), - check_sophia_to_fate(Type, "-5", {bits, -5}), - check_sophia_to_fate(Type, "#123", {bits, 256 + 32 + 3}), + roundtrip_parser_lenient(Type, "5", {bits, 5}), + roundtrip_parser(Type, "-5", {bits, -5}), + roundtrip_parser(Type, "#123", {bits, 256 + 32 + 3}), ok. singleton_records_test() -> @@ -1104,7 +1354,8 @@ excess_parens_test() -> % Including multiple nestings of tuples and grouping, interleaved. check_parser("((((1), ((2, 3)))), 4)"), % Also empty tuples exist! - check_parser("()"), + check_parser_roundtrip("()"), + check_parser_roundtrip("(((), ()), ((), ()))"), check_parser("(((((), ())), ()))"), ok. @@ -1166,12 +1417,14 @@ singleton_test() -> % Now let's do some testing with this weird type, to see if we handle it % correctly. {ok, {tuple, {1}}} = parse_literal(SingletonType, "(1,)"), + "(1,)" = fate_to_list(SingletonType, {tuple, {1}}), % Some ambiguous nesting parens, for fun. {ok, {tuple, {1}}} = parse_literal(SingletonType, "(((1),))"), % No trailing comma should give an error. {error, {expected_trailing_comma, 1, 3}} = parse_literal(SingletonType, "(1)"), % All of the above should behave the same in untyped contexts: {ok, {tuple, {1}}} = parse_literal(unknown_type(), "(1,)"), + "(1,)" = fate_to_list(unknown_type(), {tuple, {1}}), {ok, {tuple, {1}}} = parse_literal(unknown_type(), "(((1),))"), {ok, 1} = parse_literal(unknown_type(), "(1)"),