Factor handling of different ACI typedef cases

A lot of this complexity was a consequence of trying to avoid redundant extraction of the namespace's/contract's name, so on the other hand letting it be redundant made all of the complexity kind of evaporate. Add to that that we're now building a little deep list and then flattening it, and this logic was able to get really neat in a way that I couldn't work out a year ago.
Rename 'flatten' and so on to 'annotate'
2025-01-31 13:54:49 +11:00 · 2025-01-31 13:54:49 +11:00 · 2025-01-31 13:54:42 +11:00
1 changed files with 153 additions and 119 deletions
--- a/src/hz.erl
+++ b/src/hz.erl
@ -1392,99 +1392,100 @@ prepare_contract(File) ->
    end.

 prepare_aaci(ACI) ->
-    Types = lists:foldl(fun prepare_namespace_types/2, #{}, ACI),
+    % We want to take the types represented by the ACI, things like N1.T(N2.T),
+    % and dereference them down to concrete types like
+    % {tuple, [integer, string]}. Our type dereferencing algorithms
+    % shouldn't act directly on the JSON-based structures that the compiler
+    % gives us, though, though, so before we do the analysis, we should strip
+    % the ACI down to a list of 'opaque' type defintions and function specs.
+    {Name, OpaqueSpecs, TypeDefs} = convert_aci_types(ACI),

+    % Now that we have the opaque types, we can dereference the function specs
+    % down to the concrete types they actually represent. We annotate each
+    % subexpression of this concrete type with other info too, in case it helps
+    % make error messages easier to understand.
+    Specs = annotate_function_specs(OpaqueSpecs, TypeDefs, #{}),
+
+    {aaci, Name, Specs, TypeDefs}.
+
+convert_aci_types(ACI) ->
+    % Find the main contract, so we can get the specifications of its
+    % entrypoints.
    [{NameBin, SpecDefs}] =
        [{N, F}
         || #{contract := #{kind      := contract_main,
                            functions := F,
                            name      := N}} <- ACI],
    Name = binary_to_list(NameBin),
-    Specs = simplify_specs(SpecDefs, #{}, Types),
-    {aaci, Name, Specs, Types}.
+    % Turn these specifications into opaque types that we can reason about.
+    Specs = lists:map(fun convert_function_spec/1, SpecDefs),

-prepare_namespace_types(#{namespace := NS}, Types) ->
-    prepare_namespace_types2(NS, false, Types);
-prepare_namespace_types(#{contract := NS}, Types) ->
-    prepare_namespace_types2(NS, true, Types).
+    % These specifications can reference other type definitions from the main
+    % contract and any other namespaces, so extract these types and convert
+    % them too.
+    TypeDefTree = lists:map(fun convert_namespace_typedefs/1, ACI),
+    % The tree structure of the ACI naturally leads to a tree of opaque types,
+    % but we want a map, so flatten it out before we continue.
+    TypeDefMap = collect_opaque_types(TypeDefTree, #{}),

-prepare_namespace_types2(NS, IsContract, Types) ->
-    TypeDefs = maps:get(typedefs, NS),
-    NameBin = maps:get(name, NS),
+    % This is all the information we actually need from the ACI, the rest is
+    % just pre-compute and acceleration.
+    {Name, Specs, TypeDefMap}.
+
+convert_function_spec(#{name := NameBin, arguments := Args, returns := Result}) ->
    Name = binary_to_list(NameBin),
-    Types2 = case IsContract of
-                 true ->
-                     maps:put(Name, {[], contract}, Types);
-                 false ->
-                     Types
-             end,
-    Types3 = case maps:find(state, NS) of
-                 {ok, StateDefACI} ->
-                     StateDefOpaque = opaque_type([], StateDefACI),
-                     maps:put(Name ++ ".state", {[], StateDefOpaque}, Types2);
-                 error ->
-                     Types2
-             end,
-    simplify_typedefs(TypeDefs, Types3, Name ++ ".").
+    ArgTypes = lists:map(fun convert_arg/1, Args),
+    ResultType = opaque_type([], Result),
+    {Name, ArgTypes, ResultType}.

-simplify_typedefs([], Types, _NamePrefix) ->
-    Types;
-simplify_typedefs([Next | Rest], Types, NamePrefix) ->
-    #{name := NameBin, vars := ParamDefs, typedef := T} = Next,
-    Name = NamePrefix ++ binary_to_list(NameBin),
-    Params = [binary_to_list(Param) || #{name := Param} <- ParamDefs],
-    Type = opaque_type(Params, T),
-    NewTypes = maps:put(Name, {Params, Type}, Types),
-    simplify_typedefs(Rest, NewTypes, NamePrefix).
-
-simplify_specs([], Specs, _Types) ->
-    Specs;
-simplify_specs([Next | Rest], Specs, Types) ->
-    #{name := NameBin, arguments := ArgDefs, returns := ResultDef} = Next,
+convert_arg(#{name := NameBin, type := TypeDef}) ->
    Name = binary_to_list(NameBin),
-    ArgTypes = [simplify_args(Arg, Types) || Arg <- ArgDefs],
-    {ok, ResultType} = type(ResultDef, Types),
-    NewSpecs = maps:put(Name, {ArgTypes, ResultType}, Specs),
-    simplify_specs(Rest, NewSpecs, Types).
-
-simplify_args(#{name := NameBin, type := TypeDef}, Types) ->
-    Name = binary_to_list(NameBin),
-    % FIXME We should make this error more informative, and continue
-    % propogating it up, so that the user can provide their own ACI and find
-    % out whether it worked or not. At that point ACI -> AACI could almost be a
-    % module or package of its own.
-    {ok, Type} = type(TypeDef, Types),
+    {ok, Type} = opaque_type([], TypeDef),
    {Name, Type}.

-% Type preparation has two goals. First, we need a data structure that can be
-% traversed quickly, to take sophia-esque erlang expressions and turn them into
-% fate-esque erlang expressions that aebytecode can serialize. Second, we need
-% partially substituted names, so that error messages can be generated for why
-% "foobar" is not valid as the third field of a `bazquux`, because the third
-% field is supposed to be `option(integer)`, not `string`.
-%
-% To achieve this we need three representations of each type expression, which
-% together form an 'annotated type'. First, we need the fully opaque name,
-% "bazquux", then we need the normalized name, which is an opaque name with the
-% bare-minimum substitution needed to make the outer-most type-constructor an
-% identifiable built-in, ADT, or record type, and then we need the flattened
-% type, which is the raw {variant, [{Name, Fields}, ...]} or
-% {record, [{Name, Type}]} expression that can be used in actual Sophia->FATE
-% coercion. The type sub-expressions in these flattened types will each be
-% fully annotated as well, i.e. they will each contain *all three* of the above
-% representations, so that coercion of subexpressions remains fast AND
-% informative.
-%
-% In a lot of cases the opaque type given will already be normalized, in which
-% case either the normalized field or the non-normalized field of an annotated
-% type can simple be the atom `already_normalized`, which means error messages
-% can simply render the normalized type expression and know that the error will
-% make sense.
+convert_namespace_typedefs(#{namespace := NS}) ->
+    Name = namespace_name(NS),
+    convert_typedefs(NS, Name);
+convert_namespace_typedefs(#{contract := NS}) ->
+    Name = namespace_name(NS),
+    ImplicitTypes = convert_implicit_types(NS, Name),
+    ExplicitTypes = convert_typedefs(NS, Name),
+    [ImplicitTypes, ExplicitTypes].

-type(T, Types) ->
-    O = opaque_type([], T),
-    flatten_opaque_type(O, Types).
+namespace_name(#{name := NameBin}) ->
+    binary_to_list(NameBin).

+convert_implicit_types(#{state := StateDefACI}, Name) ->
+    StateDefOpaque = opaque_type([], StateDefACI),
+    [{Name, [], contract},
+     {Name ++ ".state", [], StateDefOpaque}];
+convert_implicit_types(_, Name) ->
+    [{Name, [], contract}].
+
+convert_typedefs(#{typedefs := TypeDefs}, Name) ->
+    convert_typedefs_loop(TypeDefs, Name ++ ".", []).
+
+% Take a namespace that has already had a period appended, and use that as a
+% prefix to convert and annotate a list of types.
+convert_typedefs_loop([], _NamePrefix, Converted) ->
+    Converted;
+convert_typedefs_loop([Next | Rest], NamePrefix, Converted) ->
+    #{name := NameBin, vars := ParamDefs, typedef := DefACI} = Next,
+    Name = NamePrefix ++ binary_to_list(NameBin),
+    Params = [binary_to_list(Param) || #{name := Param} <- ParamDefs],
+    Def = opaque_type(Params, DefACI),
+    convert_typedefs_loop(Rest, NamePrefix, [Converted, {Name, Params, Def}]).
+
+collect_opaque_types([], Types) ->
+    Types;
+collect_opaque_types([L | R], Types) ->
+    NewTypes = collect_opaque_types(L, Types),
+    collect_opaque_types(R, NewTypes);
+collect_opaque_types({Name, Params, Def}, Types) ->
+    maps:put(Name, {Params, Def}, Types).
+
+% Convert an ACI type defintion/spec into the 'opaque type' representation that
+% our dereferencing algorithms can reason about.
 opaque_type(Params, NameBin) when is_binary(NameBin) ->
    Name = opaque_type_name(NameBin),
    case not is_atom(Name) and lists:member(Name, Params) of
@ -1508,7 +1509,7 @@ opaque_type(Params, Pair) when is_map(Pair) ->
    [{Name, TypeArgs}] = maps:to_list(Pair),
    {opaque_type_name(Name), [opaque_type(Params, Arg) || Arg <- TypeArgs]}.

-% atoms for builtins, lists for user defined types
+% atoms for builtins, strings (lists) for user-defined types
 opaque_type_name(<<"int">>)      -> integer;
 opaque_type_name(<<"address">>)  -> address;
 opaque_type_name(<<"contract">>) -> contract;
@ -1519,16 +1520,49 @@ opaque_type_name(<<"map">>)      -> map;
 opaque_type_name(<<"string">>)   -> string;
 opaque_type_name(Name)           -> binary_to_list(Name).

-flatten_opaque_type(T, Types) ->
+% Type preparation has two goals. First, we need a data structure that can be
+% traversed quickly, to take sophia-esque erlang expressions and turn them into
+% fate-esque erlang expressions that aebytecode can serialize. Second, we need
+% partially substituted names, so that error messages can be generated for why
+% "foobar" is not valid as the third field of a `bazquux`, because the third
+% field is supposed to be `option(integer)`, not `string`.
+%
+% To achieve this we need three representations of each type expression, which
+% together form an 'annotated type'. First, we need the fully opaque name,
+% "bazquux", then we need the normalized name, which is an opaque name with the
+% bare-minimum substitution needed to make the outer-most type-constructor an
+% identifiable built-in, ADT, or record type, and then we need the dereferenced
+% type, which is the raw {variant, [{Name, Fields}, ...]} or
+% {record, [{Name, Type}]} expression that can be used in actual Sophia->FATE
+% coercion. The type sub-expressions in these dereferenced types will each be
+% fully annotated as well, i.e. they will each contain *all three* of the above
+% representations, so that coercion of subexpressions remains fast and
+% informative.
+%
+% In a lot of cases the opaque type given will already be normalized, in which
+% case either the normalized field or the non-normalized field of an annotated
+% type can simple be the atom `already_normalized`, which means error messages
+% can simply render the normalized type expression and know that the error will
+% make sense.
+
+annotate_function_specs([], _Types, Specs) ->
+    Specs;
+annotate_function_specs([{Name, ArgsOpaque, ResultOpaque} | Rest], Types, Specs) ->
+    {ok, Args} = annotate_types(ArgsOpaque, Types, []),
+    {ok, Result} = annotate_type(ResultOpaque, Types),
+    NewSpecs = maps:put(Name, {Args, Result}, Specs),
+    annotate_function_specs(Rest, Types, NewSpecs).
+
+annotate_type(T, Types) ->
    case normalize_opaque_type(T, Types) of
        {ok, AlreadyNormalized, NOpaque, NExpanded} ->
-            flatten_opaque_type2(T, AlreadyNormalized, NOpaque, NExpanded, Types);
+            annotate_type2(T, AlreadyNormalized, NOpaque, NExpanded, Types);
        Error ->
            Error
    end.

-flatten_opaque_type2(T, AlreadyNormalized, NOpaque, NExpanded, Types) ->
-    case flatten_normalized_type(NExpanded, Types) of
+annotate_type2(T, AlreadyNormalized, NOpaque, NExpanded, Types) ->
+    case annotate_type_subexpressions(NExpanded, Types) of
        {ok, Flat} ->
            case AlreadyNormalized of
                true -> {ok, {T, already_normalized, Flat}};
@ -1538,48 +1572,48 @@ flatten_opaque_type2(T, AlreadyNormalized, NOpaque, NExpanded, Types) ->
            Error
    end.

-flatten_opaque_types([T | Rest], Types, Acc) ->
-    case flatten_opaque_type(T, Types) of
-        {ok, Type} -> flatten_opaque_types(Rest, Types, [Type | Acc]);
+annotate_types([T | Rest], Types, Acc) ->
+    case annotate_type(T, Types) of
+        {ok, Type} -> annotate_types(Rest, Types, [Type | Acc]);
        Error      -> Error
    end;
-flatten_opaque_types([], _Types, Acc) ->
+annotate_types([], _Types, Acc) ->
    {ok, lists:reverse(Acc)}.

-flatten_opaque_bindings([{Name, T} | Rest], Types, Acc) ->
-    case flatten_opaque_type(T, Types) of
-        {ok, Type} -> flatten_opaque_bindings(Rest, Types, [{Name, Type} | Acc]);
-        Error      -> Error
-    end;
-flatten_opaque_bindings([], _Types, Acc) ->
-    {ok, lists:reverse(Acc)}.
-
-flatten_opaque_variants([{Name, Elems} | Rest], Types, Acc) ->
-    case flatten_opaque_types(Elems, Types, []) of
-        {ok, ElemsFlat} -> flatten_opaque_variants(Rest, Types, [{Name, ElemsFlat} | Acc]);
-        Error           -> Error
-    end;
-flatten_opaque_variants([], _Types, Acc) ->
-    {ok, lists:reverse(Acc)}.
-
-flatten_normalized_type(PrimitiveType, _Types) when is_atom(PrimitiveType) ->
+annotate_type_subexpressions(PrimitiveType, _Types) when is_atom(PrimitiveType) ->
    {ok, PrimitiveType};
-flatten_normalized_type({variant, VariantsOpaque}, Types) ->
-    case flatten_opaque_variants(VariantsOpaque, Types, []) of
+annotate_type_subexpressions({variant, VariantsOpaque}, Types) ->
+    case annotate_variants(VariantsOpaque, Types, []) of
        {ok, Variants} -> {ok, {variant, Variants}};
        Error          -> Error
    end;
-flatten_normalized_type({record, FieldsOpaque}, Types) ->
-    case flatten_opaque_bindings(FieldsOpaque, Types, []) of
+annotate_type_subexpressions({record, FieldsOpaque}, Types) ->
+    case annotate_bindings(FieldsOpaque, Types, []) of
        {ok, Fields} -> {ok, {record, Fields}};
        Error        -> Error
    end;
-flatten_normalized_type({T, ElemsOpaque}, Types) ->
-    case flatten_opaque_types(ElemsOpaque, Types, []) of
+annotate_type_subexpressions({T, ElemsOpaque}, Types) ->
+    case annotate_types(ElemsOpaque, Types, []) of
        {ok, Elems} -> {ok, {T, Elems}};
        Error       -> Error
    end.

+annotate_bindings([{Name, T} | Rest], Types, Acc) ->
+    case annotate_type(T, Types) of
+        {ok, Type} -> annotate_bindings(Rest, Types, [{Name, Type} | Acc]);
+        Error      -> Error
+    end;
+annotate_bindings([], _Types, Acc) ->
+    {ok, lists:reverse(Acc)}.
+
+annotate_variants([{Name, Elems} | Rest], Types, Acc) ->
+    case annotate_types(Elems, Types, []) of
+        {ok, ElemsFlat} -> annotate_variants(Rest, Types, [{Name, ElemsFlat} | Acc]);
+        Error           -> Error
+    end;
+annotate_variants([], _Types, Acc) ->
+    {ok, lists:reverse(Acc)}.
+
 normalize_opaque_type(T, Types) ->
    case type_is_expanded(T) of
        false -> normalize_opaque_type(T, Types, true);
@ -2194,11 +2228,11 @@ try_coerce(Type, Sophia, Fate) ->
    ok.

 coerce_int_test() ->
-    {ok, Type} = flatten_opaque_type(integer, #{}),
+    {ok, Type} = annotate_type(integer, #{}),
    try_coerce(Type, 123, 123).

 coerce_address_test() ->
-    {ok, Type} = flatten_opaque_type(address, #{}),
+    {ok, Type} = annotate_type(address, #{}),
    try_coerce(Type,
               "ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx",
               {address, <<164,136,155,90,124,22,40,206,255,76,213,56,238,123,
@ -2206,7 +2240,7 @@ coerce_address_test() ->
                           210,39,214>>}).

 coerce_contract_test() ->
-    {ok, Type} = flatten_opaque_type(contract, #{}),
+    {ok, Type} = annotate_type(contract, #{}),
    try_coerce(Type,
               "ct_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx",
               {contract, <<164,136,155,90,124,22,40,206,255,76,213,56,238,123,
@ -2214,35 +2248,35 @@ coerce_contract_test() ->
                            210,39,214>>}).

 coerce_bool_test() ->
-    {ok, Type} = flatten_opaque_type(boolean, #{}),
+    {ok, Type} = annotate_type(boolean, #{}),
    try_coerce(Type, true, true),
    try_coerce(Type, false, false).

 coerce_string_test() ->
-    {ok, Type} = flatten_opaque_type(string, #{}),
+    {ok, Type} = annotate_type(string, #{}),
    try_coerce(Type, "hello world", <<"hello world">>).

 coerce_list_test() ->
-    {ok, Type} = flatten_opaque_type({list, [string]}, #{}),
+    {ok, Type} = annotate_type({list, [string]}, #{}),
    try_coerce(Type, ["hello world", [65, 32, 65]], [<<"hello world">>, <<65, 32, 65>>]).

 coerce_map_test() ->
-    {ok, Type} = flatten_opaque_type({map, [string, {list, [integer]}]}, #{}),
+    {ok, Type} = annotate_type({map, [string, {list, [integer]}]}, #{}),
    try_coerce(Type, #{"a" => "a", "b" => "b"}, #{<<"a">> => "a", <<"b">> => "b"}).

 coerce_tuple_test() ->
-    {ok, Type} = flatten_opaque_type({tuple, [integer, string]}, #{}),
+    {ok, Type} = annotate_type({tuple, [integer, string]}, #{}),
    try_coerce(Type, {123, "456"}, {tuple, {123, <<"456">>}}).

 coerce_variant_test() ->
-    {ok, Type} = flatten_opaque_type({variant, [{"A", [integer]},
+    {ok, Type} = annotate_type({variant, [{"A", [integer]},
                                                {"B", [integer, integer]}]},
                                     #{}),
    try_coerce(Type, {"A", 123}, {variant, [1, 2], 0, {123}}),
    try_coerce(Type, {"B", 456, 789}, {variant, [1, 2], 1, {456, 789}}).

 coerce_record_test() ->
-    {ok, Type} = flatten_opaque_type({record, [{"a", integer}, {"b", integer}]}, #{}),
+    {ok, Type} = annotate_type({record, [{"a", integer}, {"b", integer}]}, #{}),
    try_coerce(Type, #{"a" => 123, "b" => 456}, {tuple, {123, 456}}).