diff --git a/src/hz_aaci.erl b/src/hz_aaci.erl index 7b588e8..de3d8ea 100644 --- a/src/hz_aaci.erl +++ b/src/hz_aaci.erl @@ -29,14 +29,281 @@ -include_lib("eunit/include/eunit.hrl"). --type aaci() :: {aaci, string(), #{string() => function_spec()}, #{string() => typedef()}}. --type function_spec() :: {[{string(), annotated_type()}], annotated_type()}. --type typedef() :: {[string()], typedef_rhs()}. +%% @doc +%% The Sophia-flavored 'Erlang representation' of on-chain data. +%% Data is stored and manipulated on the chain without knowledge of Sophia +%% types, which leads to a specialized representation that is confusing to +%% manipulate directly. If you want to form contract arguments using an Erlang +%% program, or pattern match the outputs of a contract call using an Erlang +%% program, this Sophia-flavored representation is much more convenient. It +%% de-anonymizes variant types and record types, and is more lenient in how it +%% interprets a variety of cryptographic, binary, and string data types. +%% +%% When calling functions that manipulate this erlang representation, AACI type +%% information representing the Sophia type of that term must be provided. The +%% Sophia type used to produce that AACI type will determine what Erlang terms +%% are actually accepted without producing errors. +%% +-type erlang_repr() :: erlang_repr_int() + | erlang_repr_address() + | erlang_repr_contract() + | erlang_repr_signature() + | erlang_repr_bool() + | erlang_repr_string() + | erlang_repr_char() + | erlang_repr_bytes() + | erlang_repr_bits() + | erlang_repr_list() + | erlang_repr_map() + | erlang_repr_tuple() + | erlang_repr_variant() + | erlang_repr_record(). + + +%-type erlang_repr() :: integer() + %| string() + %| boolean() + %| binary() + %| tuple() % Tuples, variants, or raw addresses + %| [erlang_repr()] + %| #{erlang_repr() => erlang_repr()}. + +%% @doc +%% The Erlang representation of a Sophia `int` +%% Integers will be used as-is. Strings will be parsed using list_to_integer/1. +%% fate_to_erlang/2 always produces the integer representation. + +-type erlang_repr_int() :: integer() | string(). + +%% @doc +%% The Erlang representation of a Sophia `address` +%% This can either be the "ak_..." string produced by gmserialization, +%% GajuDesk, etc. or a 'raw' binary of 32 bytes. fate_to_erlang/2 always +%% produces the "ak_..." string as an Erlang list. The Sophia-flavored Erlang +%% representation should not be used if this is undesirable. + +-type erlang_repr_address() :: unicode:chardata() | {raw, <<_:32*8>>}. + +%% @doc +%% The Erlang representation of a Sophia `contract` +%% This can either be the "ct_..." string produced by gmserialization, +%% GajuDesk, etc. or a 'raw' binary of 32 bytes. fate_to_erlang/2 always +%% produces the "ct_..." string as an Erlang list. + +-type erlang_repr_contract() :: unicode:chardata() | {raw, <<_:32*8>>}. + +%% @doc +%% The Erlang representation of a Sophia `signature` +%% This can either be the "sg_..." string produced by gmserialization, +%% GajuDesk, etc. or a 'raw' binary of 64 bytes. (Not 32.) Unlike addresses and +%% contracts, 'raw' binaries can be wrapped or unwrapped when representing a +%% signature. fate_to_erlang/2 always produces the "sg_..." string as an Erlang +%% list. + +-type erlang_repr_signature() :: unicode:chardata() | <<_:64*8>> | {raw, <<_:64*8>>}. + +%% @doc +%% The Erlang representation of a Sophia `bool` +%% fate_to_erlang/2 always produces atoms, but erlang_to_fate/2 also accepts +%% the lists "true" and "false". + +-type erlang_repr_bool() :: true | false | string(). + +%% @doc +%% The Erlang representation of a Sophia `string` +%% The conversion uses unicode:characters_to_binary/1, so a list, a UTF8 +%% binary, or an iolist mixing both are all acceptable inputs. fate_to_erlang/2 +%% always produces a list. + +-type erlang_repr_string() :: unicode:chardata(). + +%% @doc +%% The Erlang representation of a Sophia `char` +%% On-chain a `char` means one unicode code point, and is just a FATE integer. +%% fate_to_erlang/2 will provide this integer as-is, but erlang_to_fate/2 can +%% be passed an arbitrary unicode string, as long as it decodes to a single +%% unicode code point. + +-type erlang_repr_char() :: integer() | unicode:chardata(). + +%% @doc +%% The Erlang representation of Sophia `bytes()` +%% Sophia has fixed-length `bytes(10)` etc. and variable length `bytes()`. +%% These are treated the same in the Erlang representation, but +%% erlang_to_fate/2 will check the length of the binary in the fixed length +%% case, and provide errors if it doesn't agree. + +-type erlang_repr_bytes() :: binary(). + +%% @doc +%% The Erlang representation of Sophia `bits()` +%% FATE has a representation of bitstrings that one might call novel. A +%% FATE/Sophia bitstring is actually represented as an integer, so there is no +%% concept of bitstring 'length', all bitstrings have infinitely many leading +%% zeroes, if the integer is positive, and, surprisingly, infinitely many +%% leading ones, if the integer is negative! To represent this in the general +%% case, erlang_to_fate/2 accepts arbitrary integers, positive or negative, and +%% fate_to_erlang/2 always produces integers, but for convenience, +%% erlang_to_fate/2 also accepts arbitrary Erlang bitstrings, which are +%% converted into positive integers, i.e. '0 by default' FATE bitstrings. + +-type erlang_repr_bits() :: bitstring(). + +%% @doc +%% The Erlang representation of a Sophia `list(_)` +%% Simply a list. Each element of the list is converted forwards/backwards as +%% normal. + +-type erlang_repr_list() :: [erlang_repr()]. + +%% @doc +%% The Erlang representation of a Sophia `map(_, _)` +%% Simply a map. Each key and value is converted forwards/backwards as normal. + +-type erlang_repr_map() :: #{erlang_repr() => erlang_repr()}. + +%% @doc +%% The Erlang representation of a Sophia tuple +%% In Sophia these types are written `a * b`, `a * b * c`, and so on. Despite +%% the binary infix notation, a product of more than two types gives a single +%% tuple type with that many elements, so (1, 2, 3) is an int * int * int. +%% gmbytecode requires FATE tuples to be wrapped in {tuple, {X, Y}}, etc. but +%% the Erlang representation specifically requires that the tuple be provided +%% without any wrappers, so {X, Y}, etc. These representations cannot be mixed, +%% since at the highest level they are both just tuples. Each element of the +%% tuple is also converted forwards/backwards as normal. Although FATE has +%% singleton tuples, Sophia doesn't, so an ACI/AACI will never produce a +%% singleton tuple in an interface; if your contract takes singleton tuples, +%% these Sophia representations will probably still work, but you won't be able +%% to generate the AACI that makes them work, so it is likely simpler to just +%% use the FATE representation. + +-type erlang_repr_tuple() :: {} | {erlang_repr(), erlang_repr()} | tuple(). + +%% @doc +%% The Erlang representation of a Sophia ADT +%% Sophia has a `datatype` keyword that allows the definition of algebraic data +%% types, also known as variants, tagged unions, sum types, coproduct types, +%% etc. In Erlang these are normally represented as an atom, or as a tuple +%% whose first term is an atom, so for familiarity, erlang_to_fate/2 accepts +%% lists in place of atoms, or tuples whose first term is a list. Note that +%% constructors in Sophia have to be capitalized, so actual atoms wouldn't be +%% that convenient. fate_to_erlang/2 always produces a tuple whose first term +%% is a list, even if that tuple is a singleton. This allows the user to +%% blindly call element(0) or tuple_to_list(_) without annoying special cases. +%% +%% Sophia also has a few built-in algebraic data types, for building its +%% standard library, and for exposing certain FATE primitives, which will +%% therefore also use this representation, e.g. "None", {"None"}, or +%% {"Some", Datum} for the `option(_)` type. + +-type erlang_repr_variant() :: string() | {string()} | {string(), erlang_repr()} | tuple(). + +%% @doc +%% The Erlang representation of a Sophia record type +%% Sophia has a `record` keyword, that allows the definition of new record +%% types. Sophia records are meant to be reminiscent of Sophia maps, so in the +%% Erlang representation of Sophia records, we use a map, with strings as keys, +%% and arbitrary erlang_repr() terms as values. + +-type erlang_repr_record() :: #{string() => erlang_repr()}. + +%% @doc +%% The Accelerated Aeternity Contract Interface +%% Sophia tooling was originally written around a javascript use-case, but +%% hakuzaru is written for Erlang, so we don't really want to walk through big +%% JSON trees every time we do an on-chain action, so the AACI exists to +%% accelerate these actions, so that interacting with contract entrypoints from +%% within a pure Erlang environment is convenient and fast. +%% +%% The layout may change, but an AACI basically consists of three parts: +%% - The name of the contract, +%% - The 'annotated' entrypoint specs, designed for fast conversion to/from +%% the representation used on-chain, see function_spec/0, +%% - The 'opaque' type definitions, all the internal type aliases and +%% definitions within the contract and its imported namespaces. + +-type aaci() :: {aaci, string(), #{string() => function_spec()}, #{string() => typedef()}}. + +%% @doc +%% The fully annotated spec of a contract entrypoint, for fast call formation +%% The first term is a list of parameter names and their types, as expected by +%% erlang_args_to_fate/2, and the second term is a single type, as expected by +%% fate_to_erlang/2. See annotated_type/0 for the details of how these types +%% are represented and why, but for most purposes it is fine to just store and +%% pass these type terms around without looking at their contents. +-type function_spec() :: {[{string(), annotated_type()}], annotated_type()}. +%% @doc +%% A fully annotated Sophia type +%% Sophia allows for arbitrary nesting of type aliases, each with parameters, +%% and each potentially substituting for another arbitrarily complex type +%% alias, so there is a potentially indefinite amount of work converting the +%% type `my_type_alias` as it would appear in Sophia/in the ACI, into the +%% actual variant/record/list/map/tuple type expression that it ultimately +%% represents. To overcome this, we 'annotate' a type, recording what its +%% aliased name was, along with its actual definition. +%% +%% Normally you can extract the annotated types from a function_spec(), and +%% pass them into the conversion function that needs them, but it can also be +%% useful to walk through the annotated types yourself. Confusingly, if you +%% want to recursively descend down an annotated type, you want to recurse on +%% the third element in the tuple, not the first two, as the first two +%% represent incomplete levels of normalization, which can be more descriptive +%% for users, but aren't as actionable as the fully normalized third element. +%% +%% Despite the third term being the most important, it is kept at the end, +%% because that is what is most memorable, since each element of the triple is +%% more normalized than the last, and because that is what is easiest to read, +%% since the third term is usually an explosion of nested braces and brackets, +%% making anything written after it basically unreadable. +%% +%% If you look at examples of annotated types produced in your own programs, +%% you will tend to see things like {integer, alread_normalized, integer}, +%% making it even less clear that the third element is the important one, or +%% why that is. For some fairly simple but informative examples, consider these +%% type aliases: +%% contract C = +%% record my_record('t) = {x: 't, y: 't} +%% type my_alias1 = int +%% type my_alias2 = list(my_alias1) +%% type my_alias3 = my_record(my_alias1) +%% If these type aliases appeared in a function spec, the AACI would represent +%% them as the following annotated types: +%% {"my_alias1", integer, integer} +%% {"my_alias2", {list, ["my_alias1"]}, {list, [{"my_alias1", integer, integer}]}} +%% {"my_alias3", {"my_record", ["my_alias1"]}, {record, [{"x", {"my_alias1", integer, integer}}, {"y", {"my_alias1", integer, integer}}]}} +%% +%% The first term is the type roughly as it appeared in the ACI, see +%% opaque_type/0 for more information. +%% +%% The second term is that same type but 'head normalized', chasing type +%% aliases iteratively, until it is some built in type like an integer, or some +%% user-defined record type or ADT. If the alias reduces to a list or map or +%% tuple with more aliased types nested inside, these nested type +%% subexpressions are not normalized any further, as the 'list' or 'map' +%% connective is considered the 'head' of the type expression, and is +%% normalized. Record type names and ADT names are not considered aliases, and +%% so are considered head normalized, but both can take parameters, which can +%% also stay un-normalized, as with lists or maps. If the head normalized type +%% is the same as the opaque type, then the atom 'already_normalized' is placed +%% instead, as a hint that instead of printing messages like +%% "my_alias1 (i.e. int)", a simple message like "list(my_record)" will do. +%% +%% The third term is the head normalized type with two changes, first, record +%% and variant definitions are subtituted in as well, giving a list of field +%% names or constructor names in full, and second, each subexpression is +%% recursively annotated, meaning its opaque, head-normalized, and fully +%% normalized parts also appear as triples. + +-type annotated_type() :: {opaque_type(), already_normalized | opaque_type(), annotated_type_body()}. + +%% @doc +%% The primitive connectives that complex type expressions can be built out of. +%% It takes a parameter, since builtin_type(opaque_type()), +%% builtin_type(annotated_type()), and builtin_type(typedef_expression()) are +%% all useful recursive applications of these connectives. --type annotated_type() :: {opaque_type(), already_normalized | opaque_type(), builtin_type(annotated_type())}. -type builtin_type(T) :: {bytes, [integer() | any]} - | {record, [{string(), T}]} - | {variant, [{string(), [T]}]} | {tuple, [T]} | {list, [T]} | {map, [T]} @@ -51,17 +318,55 @@ | channel | unknown_type. +%% @doc +%% The connectives for defining new records and ADTs. +%% Record types and ADTs can both appear in the original type definitions in +%% the body of a contract, as well as in the recursively normalized 'annotated +%% types' that the AACI stores. We use the same layout in both cases. +-type user_defined_type(T) :: {record, [{string(), T}]} | {variant, [{string(), [T]}]}. + +%% @doc +%% An opaque type as it originally appeared in a function spec. +%% The Sophia compiler may have a different representation for these type +%% expressions, but we make a simple representation here as well. +%% These type expressions are really function applications, in a limited sort +%% of rewrite calculus without higher order functions. After performing some +%% rewrites, the format actually stays the same, so the second term in a type +%% triple is also this 'opaque type', but that is a coincidence; this type is +%% primarily designed to represent types that haven't been head-normalized at +%% all % yet. -type opaque_type() :: string() | {string(), [opaque_type()]} | builtin_type(opaque_type()). --type typedef_rhs() :: {var, string()} | string() | {string(), [opaque_type()]} | builtin_type(typedef_rhs()). +%% @doc +%% The recursively annotated part of an annotated type triple +%% This can be any anonymous type connective, with annotated types inside, or +%% it can be a record definition, with annotated types for fields, or it can be +%% an ADT definition, with annotated types for each constructor input. +-type annotated_type_body() :: builtin_type(annotated_type()) | user_defined_type(annotated_type()). --type erlang_repr() :: integer() - | string() - | boolean() - | binary() - | tuple() % Tuples, variants, or raw addresses - | [erlang_repr()] - | #{erlang_repr() => erlang_repr()}. +%% @doc +%% The recursive type expressions that can appear in the definitions of type aliases. +%% Similar to opaque_type(), but type aliases can take parameters as well, +%% which means those parameters can also appear anywhere within the recursive +%% type expression that defines the type alias. +-type typedef_expression() :: {var, string()} + | string() + | {string(), [typedef_expression()]} + | builtin_type(typedef_expression()). +%% @doc +%% A type definition as it appears in the AACI. +%% A type definition has a list of parameter names, and then some body defined +%% using builtin type connectives, other defined types, and those parameters. +-type typedef() :: {[string()], typedef_body()}. + +%% @doc +%% The possible right-hand-sides of a type definition +%% A type definition means a type alias, a record definition, or an ADT +%% definition. Aliases are just some type expression, possibly with type +%% parameters, and records and variants are already defined above in +%% user_defined_type/1, with arbitrary type expressions in each one, but again, +%% they could contain type parameters as well. +-type typedef_body() :: typedef_expression() | user_defined_type(typedef_expression()). %%% ACI/AACI @@ -179,7 +484,7 @@ convert_typedefs_loop([Next | Rest], NamePrefix, Converted) -> when Tree :: typedef_tree(), TypeDefs :: #{string() => typedef()}. --type typedef_tree() :: {string(), [string()], typedef_rhs()} | list(typedef_tree()). +-type typedef_tree() :: {string(), [string()], typedef_body()} | list(typedef_tree()). collect_opaque_types([], Types) -> Types; @@ -194,7 +499,7 @@ collect_opaque_types({Name, Params, Def}, Types) -> -spec opaque_type(Params, ACIType) -> Opaque when Params :: [string()], ACIType :: binary() | map(), - Opaque :: typedef_rhs(). + Opaque :: opaque_type(). % Convert an ACI type defintion/spec into the 'opaque type' representation that % our dereferencing algorithms can reason about. @@ -513,6 +818,16 @@ substitute_opaque_types(Bindings, Types) -> Reason :: term(), PathStep :: term(). +%% @doc +%% Call erlang_to_fate/2 on a list of named values. +%% See the documentation for the erlang_repr/0 type for more information on the +%% format required. +%% This is mainly used by hz.erl to form contract calls. The parameter names +%% and parameter types are provided in one zipped list, exactly as they appear +%% in the AACI datatype, and then a second list of concrete arguments are +%% provided in the format that erlang_to_fate/2 expects. The parameter names +%% are used to provide slightly more informative errors. + erlang_args_to_fate(VarTypes, Terms) -> DefLength = length(VarTypes), ArgLength = length(Terms), @@ -530,6 +845,15 @@ erlang_args_to_fate(VarTypes, Terms) -> Reason :: term(), PathStep :: term(). +%% @doc +%% Convert one Sophia-flavored Erlang term into one FATE-flavored Erlang terms. +%% This is not usually used on its own, since if you need to form a contract +%% call, you have a list of arguments, not a single argument. Nonetheless, if +%% for some reason you want to use a mix of FATE-flavored Erlang terms and +%% Sophia-flavored Erlang terms in one function call, it may be useful to +%% convert the Sophia-flavored terms individually, to form a single +%% FATE-flavored list for call formation. + erlang_to_fate({_, _, integer}, S) when is_integer(S) -> {ok, S}; erlang_to_fate({O, N, integer}, S) when is_list(S) -> @@ -1082,9 +1406,9 @@ coerce_tuple_test() -> check_roundtrip(Type, {123, "456"}, {tuple, {123, <<"456">>}}). coerce_variant_test() -> - {ok, Type} = annotate_type({variant, [{"A", [integer]}, - {"B", [integer, integer]}]}, - #{}), + Definition = {variant, [{"A", [integer]}, + {"B", [integer, integer]}]}, + {ok, Type} = annotate_type("t", #{"t" => {[], Definition}}), check_roundtrip(Type, {"A", 123}, {variant, [1, 2], 0, {123}}), check_roundtrip(Type, {"B", 456, 789}, {variant, [1, 2], 1, {456, 789}}). @@ -1094,7 +1418,8 @@ coerce_option_test() -> check_roundtrip(Type, {"Some", 1}, {variant, [0, 1], 1, {1}}). coerce_record_test() -> - {ok, Type} = annotate_type({record, [{"a", integer}, {"b", integer}]}, #{}), + Definition = {record, [{"a", integer}, {"b", integer}]}, + {ok, Type} = annotate_type("t", #{"t" => {[], Definition}}), check_roundtrip(Type, #{"a" => 123, "b" => 456}, {tuple, {123, 456}}). coerce_bytes_test() ->