Pretty print Sophia expressions.

I think all of the tests roundtrip now, so if my parser was thorough, the pretty printer should be as thorough.
This commit is contained in:
Jarvis Carroll 2026-02-17 07:26:42 +00:00
parent 78c9c67f38
commit a0fbeebcdb

View File

@ -4,7 +4,8 @@
-copyright("Jarvis Carroll <spiveehere@gmail.com>").
-license("GPL-3.0-or-later").
-export([parse_literal/1, parse_literal/2, check_parser/1]).
-export([parse_literal/1, parse_literal/2]).
-export([fate_to_list/1, fate_to_list/2, fate_to_iolist/1, fate_to_iolist/2]).
-include_lib("eunit/include/eunit.hrl").
@ -173,7 +174,7 @@ parse_char(_, {Row, Col}, [$\\, $x, A, B | String], SourceChars) when ?IS_HEX(A)
Byte = convert_digit(A) * 16 + convert_digit(B),
{ok, {Byte, [B, A, $x, $\\ | SourceChars], {Row, Col + 4}, String}};
parse_char({Row, Start}, {Row, Col}, [$\\, C | Rest], SourceChars) ->
case escape_char(C) of
case unescape_char(C) of
{ok, ByteVal} ->
{ok, {ByteVal, [C, $\ | SourceChars], {Row, Col + 2}, Rest}};
error ->
@ -195,19 +196,31 @@ escape_long_hex_code(_, Pos, [], SourceChars, Value) ->
% produce an unclosed string error instead.
{ok, {Value, SourceChars, Pos, []}}.
escape_char($b) -> {ok, $\b};
escape_char($e) -> {ok, $\e};
escape_char($f) -> {ok, $\f};
escape_char($n) -> {ok, $\n};
escape_char($r) -> {ok, $\r};
escape_char($t) -> {ok, $\t};
escape_char($v) -> {ok, $\v};
unescape_char($b) -> {ok, $\b};
unescape_char($e) -> {ok, $\e};
unescape_char($f) -> {ok, $\f};
unescape_char($n) -> {ok, $\n};
unescape_char($r) -> {ok, $\r};
unescape_char($t) -> {ok, $\t};
unescape_char($v) -> {ok, $\v};
% Technically \" and \' are only valid inside their own quote characters, not
% each other, but whatever, we will just be permissive here.
escape_char($") -> {ok, $\"};
escape_char($') -> {ok, $\'};
escape_char($\\) -> {ok, $\\};
escape_char(_) -> error.
unescape_char($") -> {ok, $\"};
unescape_char($') -> {ok, $\'};
unescape_char($\\) -> {ok, $\\};
unescape_char(_) -> error.
% Not needed until later, but we'll put it here for symmetry.
escape_char($\b) -> "\\b";
escape_char($\e) -> "\\e";
escape_char($\f) -> "\\f";
escape_char($\n) -> "\\n";
escape_char($\r) -> "\\r";
escape_char($\t) -> "\\t";
escape_char($\v) -> "\\v";
escape_char($\") -> "\\\"";
escape_char($\\) -> "\\\\";
escape_char(I) -> I.
%%% Sophia Literal Parser
@ -902,16 +915,240 @@ parse_map5(KeyType, ValueType, Pos, String, Acc) ->
% TODO
wrap_error(Reason, _) -> Reason.
%%% Pretty Printing
fate_to_list(Term) ->
fate_to_list(unknown_type(), Term).
fate_to_list(Type, Term) ->
IOList = fate_to_iolist(Type, Term),
unicode:characters_to_list(IOList).
fate_to_iolist(Term) ->
fate_to_iolist(unknown_type(), Term).
% Special case for singleton records, since they are erased during compilation.
fate_to_iolist({_, _, {record, [{FieldName, FieldType}]}}, Term) ->
singleton_record_to_iolist(FieldName, FieldType, Term);
% Aggregate types, where we should check if there is useful type information to
% act on. Case logic is made explicit so that the default cases stand out.
fate_to_iolist(Type, {tuple, Tuple}) ->
case Type of
{_, _, {record, FieldTypes}} ->
record_to_iolist(FieldTypes, Tuple);
{_, _, {tuple, ElemTypes}} ->
tuple_to_iolist(ElemTypes, Tuple);
_ ->
tuple_to_iolist([], Tuple)
end;
fate_to_iolist(Type, {variant, _, Tag, Tuple}) ->
case Type of
{O, N, {variant, VariantTypes}} when Tag < length(VariantTypes) ->
variant_to_iolist(O, N, VariantTypes, Tag, Tuple);
{O, N, _} ->
% TODO: Make up a special syntax for anonymous variant terms.
erlang:exit({untyped_variant, O, N});
_ ->
erlang:exit({untyped_variant, unknown_type, already_normalized})
end;
fate_to_iolist(Type, List) when is_list(List) ->
case Type of
{_, _, {list, [InnerType]}} ->
list_to_iolist(InnerType, List);
_ ->
list_to_iolist(unknown_type(), List)
end;
fate_to_iolist(Type, Map) when is_map(Map) ->
case Type of
{_, _, {map, [K, V]}} ->
map_to_iolist(K, V, Map);
_ ->
map_to_iolist(unknown_type(), unknown_type(), Map)
end;
% Other FATE types, where no recursion is needed, but type information could
% influence the format that is used.
fate_to_iolist(_, true) ->
"true";
fate_to_iolist(_, false) ->
"false";
fate_to_iolist(_, {bits, 0}) ->
"Bits.none";
fate_to_iolist(_, {bits, -1}) ->
"Bits.all";
fate_to_iolist(_, {bits, I}) when I > 0 ->
["#", integer_to_list(I, 16)];
fate_to_iolist(_, {bits, I}) when I < 0 ->
integer_to_list(I, 10);
fate_to_iolist({_, _, char}, $') ->
% Special case since it needs to be escaped in char literals.
"'\\''";
fate_to_iolist({_, _, char}, $") ->
% Special case since it does NOT need to be escaped in char literals.
"'\"'";
fate_to_iolist({_, _, char}, I) when is_integer(I) ->
[$', escape_char(I), $'];
fate_to_iolist(_, I) when is_integer(I) ->
integer_to_list(I);
fate_to_iolist(_, {address, Addr}) ->
gmser_api_encoder:encode(account_pubkey, Addr);
fate_to_iolist(_, {contract, Addr}) ->
gmser_api_encoder:encode(contract_pubkey, Addr);
fate_to_iolist(_, {bytes, Bytes}) ->
Size = bit_size(Bytes),
<<IntValue:Size>> = Bytes,
["#", integer_to_list(IntValue, 16)];
fate_to_iolist(_, Bytes) when is_binary(Bytes) ->
escape_string(Bytes).
escape_string(Binary) ->
escape_string(Binary, []).
escape_string(<<C/utf8, Rest/binary>>, Acc) ->
NewAcc = [Acc, escape_char(C)],
escape_string(Rest, NewAcc);
escape_string(<<>>, Acc) ->
[$", Acc, $"].
tuple_to_iolist([ElemType], {Elem}) ->
Inner = fate_to_iolist(ElemType, Elem),
["(", Inner, ",)"];
tuple_to_iolist(_, {Elem}) ->
Inner = fate_to_iolist(unknown_type(), Elem),
["(", Inner, ",)"];
tuple_to_iolist(ElemTypes, Tuple) ->
Elems = tuple_to_list(Tuple),
Multivalue = multivalue_to_iolist(ElemTypes, Elems),
["(", Multivalue, ")"].
list_to_iolist(InnerType, Elems) ->
InnerChars = list_elems_to_iolist(InnerType, Elems),
["[", InnerChars, "]"].
variant_to_iolist(O, N, Variants, Tag, Tuple) ->
Prefix = choose_variant_prefix(O, N),
{Name, ElemTypes} = lists:nth(Tag + 1, Variants),
case tuple_size(Tuple) of
0 ->
[Prefix, Name];
_ ->
Elems = tuple_to_list(Tuple),
Multivalue = multivalue_to_iolist(ElemTypes, Elems),
[Prefix, Name, "(", Multivalue, ")"]
end.
choose_variant_prefix(O, N) ->
case get_typename(O, N) of
[Namespace, _] ->
[Namespace, "."];
_ ->
[]
end.
multivalue_to_iolist([FirstType | ElemTypes], [FirstTerm | Elems]) ->
FirstTermChars = fate_to_iolist(FirstType, FirstTerm),
multivalue_to_iolist(ElemTypes, Elems, FirstTermChars);
multivalue_to_iolist(_, Elems) ->
list_elems_to_iolist(unknown_type(), Elems).
multivalue_to_iolist([NextType | RestTypes], [NextTerm | RestTerms], Acc) ->
NextTermChars = fate_to_iolist(NextType, NextTerm),
multivalue_to_iolist(RestTypes, RestTerms, [Acc, ", ", NextTermChars]);
multivalue_to_iolist(_, Elems, Acc) ->
list_elems_to_iolist(unknown_type(), Elems, Acc).
list_elems_to_iolist(Type, [FirstTerm | Rest]) ->
FirstTermChars = fate_to_iolist(Type, FirstTerm),
list_elems_to_iolist(Type, Rest, FirstTermChars);
list_elems_to_iolist(_, []) ->
"".
list_elems_to_iolist(Type, [Next | Rest], Acc) ->
NextChars = fate_to_iolist(Type, Next),
list_elems_to_iolist(Type, Rest, [Acc, ", ", NextChars]);
list_elems_to_iolist(_, [], Acc) ->
Acc.
singleton_record_to_iolist(FieldName, FieldType, Term) ->
FieldChars = fate_to_iolist(FieldType, Term),
["{", FieldName, " = ", FieldChars, "}"].
record_to_iolist(FieldTypes, Tuple) ->
case length(FieldTypes) == tuple_size(Tuple) of
true ->
Chars = record_fields_to_iolist(FieldTypes, tuple_to_list(Tuple)),
["{", Chars, "}"];
false ->
tuple_to_iolist([], Tuple)
end.
record_fields_to_iolist([{Name, Type} | FieldTypes], [Term | Terms]) ->
TermChars = fate_to_iolist(Type, Term),
record_fields_to_iolist(FieldTypes, Terms, [Name, " = ", TermChars]);
record_fields_to_iolist(_, []) ->
"".
record_fields_to_iolist([{Name, Type} | FieldTypes], [Term | Terms], Acc) ->
TermChars = fate_to_iolist(Type, Term),
NewAcc = [Acc, ", ", Name, " = ", TermChars],
record_fields_to_iolist(FieldTypes, Terms, NewAcc);
record_fields_to_iolist(_, [], Acc) ->
Acc.
map_to_iolist(K, V, Map) ->
Iter = maps:iterator(Map),
case maps:next(Iter) of
{KeyTerm, ValTerm, Rest} ->
KChars = fate_to_iolist(K, KeyTerm),
VChars = fate_to_iolist(V, ValTerm),
RestChars = map_to_iolist_inner(K, V, Rest, ["[", KChars, "] = ", VChars]),
["{", RestChars, "}"];
none ->
"{}"
end.
map_to_iolist_inner(K, V, Iter, Acc) ->
case maps:next(Iter) of
{KeyTerm, ValTerm, Rest} ->
KChars = fate_to_iolist(K, KeyTerm),
VChars = fate_to_iolist(V, ValTerm),
map_to_iolist_inner(K, V, Rest, [Acc, ", [", KChars, "] = ", VChars]);
none ->
Acc
end.
%%% Tests
check_sophia_to_fate(Type, Sophia, Fate) ->
case parse_literal(Type, Sophia) of
{ok, Fate} ->
ok;
{ok, FateActual} ->
erlang:error({to_fate_failed, Sophia, Fate, {ok, FateActual}});
{error, Reason} ->
erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}})
Result ->
erlang:error({to_fate_failed, Sophia, Fate, Result})
end.
check_fate_to_sophia(Type, Fate, Sophia) ->
case fate_to_list(Type, Fate) of
Sophia ->
ok;
Result ->
erlang:error({to_sophia_failed, Fate, Sophia, Result})
end.
roundtrip_parser(Type, Sophia, Fate) ->
check_sophia_to_fate(Type, Sophia, Fate),
check_fate_to_sophia(Type, Fate, Sophia),
ok.
% These test function names are getting ridiculous... I might want to optarg
% them or something, but, whatever, it's test code.
roundtrip_parser_lenient(Type, Sophia, Fate) ->
check_sophia_to_fate(Type, Sophia, Fate),
case fate_to_list(Type, Fate) of
Sophia ->
ok;
SophiaActual ->
check_sophia_to_fate(Type, SophiaActual, Fate)
end.
compile_entrypoint_value_and_type(Source, Entrypoint) ->
@ -943,65 +1180,76 @@ check_parser(Sophia) ->
{Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
% Check that when we parse the term we get the same value as the Sophia
% compiler.
% compiler. Also check that the pretty printer gives the same string back.
check_sophia_to_fate(unknown_type(), Sophia, Fate),
% Then, once we know that the term is correct, make sure that it is still
% accepted *with* type info.
% accepted *with* type info. Don't bother roundtripping this, since the
% pretty printer doesn't enforce types anyway.
check_sophia_to_fate(Type, Sophia, Fate).
check_parser_roundtrip(Sophia) ->
Source = "contract C = entrypoint f() = " ++ Sophia,
{Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
roundtrip_parser(Type, Sophia, Fate),
% Without type information we might get a more generic result in Sophia
% syntax. Let's do a lenient test.
roundtrip_parser_lenient(unknown_type(), Sophia, Fate).
check_parser_with_typedef(Typedef, Sophia) ->
% Compile the type definitions alongside the usual literal expression.
Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia,
{Fate, Type} = compile_entrypoint_value_and_type(Source, "f"),
% Do a typed parse, as usual, but there are probably record/variant
% definitions in the AACI, so untyped parses probably don't work.
check_sophia_to_fate(Type, Sophia, Fate).
% definitions in the AACI, so untyped parses probably don't work, and
% variants often have optional namespaces, so the sophia result might not
% match exactly, but should still be equivalent.
roundtrip_parser_lenient(Type, Sophia, Fate).
anon_types_test() ->
% Integers.
check_parser("123"),
check_parser_roundtrip("123"),
check_parser("1_2_3"),
check_parser("-123"),
check_parser_roundtrip("-123"),
% Booleans.
check_parser("true"),
check_parser("false"),
check_parser("[true, false]"),
check_parser_roundtrip("true"),
check_parser_roundtrip("false"),
check_parser_roundtrip("[true, false]"),
% Bytes.
check_parser("#DEAD000BEEF"),
check_parser_roundtrip("#DEAD000BEEF"),
check_parser("#DE_AD0_00B_EEF"),
% Strings.
check_parser("\"hello world\""),
check_parser_roundtrip("\"hello world\""),
% The Sophia compiler doesn't handle this right, but we should still.
%check_parser("\"ÿ\""),
%check_parser("\"\""),
%check_parser_roundtrip("\"ÿ\""),
%check_parser_roundtrip("\"\""),
% Characters.
check_parser("'A'"),
check_parser("['a', ' ', '[']"),
%check_parser("'ÿ'"),
%check_parser("'♣'"),
check_parser_roundtrip("'A'"),
check_parser_roundtrip("['a', ' ', '[']"),
%check_parser_roundtrip("'ÿ'"),
%check_parser_roundtrip("'♣'"),
% List of integers.
check_parser("[1, 2, 3]"),
check_parser_roundtrip("[1, 2, 3]"),
% List of lists.
check_parser("[[], [1], [2, 3]]"),
check_parser_roundtrip("[[], [1], [2, 3]]"),
% Tuple.
check_parser("(1, [2, 3], (4, 5))"),
check_parser_roundtrip("(1, [2, 3], (4, 5))"),
% Map.
check_parser("{[1] = 2, [3] = 4}"),
check_parser_roundtrip("{[1] = 2, [3] = 4}"),
ok.
string_escape_codes_test() ->
check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""),
check_parser_roundtrip("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""),
check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""),
check_parser("\"\\x{0}\\x{7}\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\\x{0000007F}\""),
check_parser("\"'\""),
check_parser_roundtrip("\"'\""),
check_parser("['\\b', '\\e', '\\f', '\\n', '\\r', '\\t', '\\v', '\"', '\\'', '\\\\']"),
check_parser_roundtrip("['\\b', '\\e', '\\f', '\\n', '\\r', '\\t', '\\v', '\"', '\\'', '\\\\']"),
check_parser("['\\x00', '\\x11', '\\x77', '\\x4a', '\\x4A']"),
check_parser("['\\x{0}', '\\x{7}', '\\x{7F}', '\\x{07F}', '\\x{007F}', '\\x{0007F}', '\\x{0000007F}']"),
check_parser("'\"'"),
check_parser_roundtrip("'\"'"),
ok.
@ -1039,15 +1287,17 @@ namespace_variant_test() ->
Term = "[N.A, N.B]",
Source = "namespace N = datatype mytype = A | B\ncontract C = entrypoint f() = " ++ Term,
{Fate, VariantType} = compile_entrypoint_value_and_type(Source, "f"),
check_sophia_to_fate(VariantType, Term, Fate),
roundtrip_parser(VariantType, Term, Fate),
ok.
chain_objects_test() ->
% Address,
check_parser("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"),
check_parser_roundtrip("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"),
% Two different forms of signature,
check_parser("[sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n, #00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF]"),
check_parser("sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n"),
check_parser("#00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF"),
check_parser_roundtrip("#112233445566778899AABBCCDDEEFF00112233445566778899AABBCCDDEEFF00112233445566778899AABBCCDDEEFF00112233445566778899AABBCCDDEEFF"),
% We have to build a totally custom contract example in order to get an
% AACI and return value for parsing contract addresses. This is because the
@ -1058,18 +1308,18 @@ chain_objects_test() ->
Contract = "ct_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx",
Source = "contract C = entrypoint f(): C = " ++ Contract,
{Fate, ContractType} = compile_entrypoint_value_and_type(Source, "f"),
check_sophia_to_fate(ContractType, Contract, Fate),
check_sophia_to_fate(unknown_type(), Contract, Fate),
roundtrip_parser(ContractType, Contract, Fate),
roundtrip_parser(unknown_type(), Contract, Fate),
ok.
bits_test() ->
check_parser("Bits.all"),
check_parser("Bits.none"),
check_parser_roundtrip("Bits.all"),
check_parser_roundtrip("Bits.none"),
{_, Type} = compile_entrypoint_value_and_type("contract C = entrypoint f() = Bits.all", "f"),
check_sophia_to_fate(Type, "5", {bits, 5}),
check_sophia_to_fate(Type, "-5", {bits, -5}),
check_sophia_to_fate(Type, "#123", {bits, 256 + 32 + 3}),
roundtrip_parser_lenient(Type, "5", {bits, 5}),
roundtrip_parser(Type, "-5", {bits, -5}),
roundtrip_parser(Type, "#123", {bits, 256 + 32 + 3}),
ok.
singleton_records_test() ->
@ -1104,7 +1354,8 @@ excess_parens_test() ->
% Including multiple nestings of tuples and grouping, interleaved.
check_parser("((((1), ((2, 3)))), 4)"),
% Also empty tuples exist!
check_parser("()"),
check_parser_roundtrip("()"),
check_parser_roundtrip("(((), ()), ((), ()))"),
check_parser("(((((), ())), ()))"),
ok.
@ -1166,12 +1417,14 @@ singleton_test() ->
% Now let's do some testing with this weird type, to see if we handle it
% correctly.
{ok, {tuple, {1}}} = parse_literal(SingletonType, "(1,)"),
"(1,)" = fate_to_list(SingletonType, {tuple, {1}}),
% Some ambiguous nesting parens, for fun.
{ok, {tuple, {1}}} = parse_literal(SingletonType, "(((1),))"),
% No trailing comma should give an error.
{error, {expected_trailing_comma, 1, 3}} = parse_literal(SingletonType, "(1)"),
% All of the above should behave the same in untyped contexts:
{ok, {tuple, {1}}} = parse_literal(unknown_type(), "(1,)"),
"(1,)" = fate_to_list(unknown_type(), {tuple, {1}}),
{ok, {tuple, {1}}} = parse_literal(unknown_type(), "(((1),))"),
{ok, 1} = parse_literal(unknown_type(), "(1)"),