Compare commits

..

13 Commits

Author SHA1 Message Date
zxq9 3b4acaac6e Finally
Gajumaru Serialization Tests / tests (push) Successful in 46m7s
2025-02-04 15:46:08 +09:00
zxq9 c2ccd39c39 WIP
Gajumaru Serialization Tests / tests (push) Successful in 46m7s
2025-02-04 15:37:37 +09:00
zxq9 1229471ce0 WIP
Gajumaru Serialization Tests / tests (push) Failing after 46m1s
2025-02-04 15:34:28 +09:00
zxq9 fc5c9f2d3d WIP
Gajumaru Serialization Tests / tests (push) Failing after 46m1s
2025-02-04 15:32:21 +09:00
zxq9 67fc399b31 WIP
Gajumaru Serialization Tests / tests (push) Failing after -4m15s
2025-02-04 15:24:07 +09:00
zxq9 ed8c08bf62 WIP
Gajumaru Serialization Tests / tests (push) Failing after 46m0s
2025-02-04 15:23:10 +09:00
zxq9 3421d3dca3 WIP
Gajumaru Serialization Tests / tests (push) Failing after 46m1s
2025-02-04 15:16:25 +09:00
zxq9 0e5ac7f68f WIP
Gajumaru Serialization Tests / tests (push) Failing after 46m1s
2025-02-04 15:10:29 +09:00
zxq9 c5a968cca3 WIP
Gajumaru Serialization Tests / tests (push) Failing after 46m0s
2025-02-04 14:54:23 +09:00
zxq9 dc460ddd75 WIP
Gajumaru Serialization Tests / tests (push) Failing after -4m12s
2025-02-04 14:53:37 +09:00
zxq9 23856166fd Update fetcher
Gajumaru Serialization Tests / tests (push) Failing after 46m11s
2025-02-04 14:50:33 +09:00
zxq9 82f10b76c5 Update act_runner env
Gajumaru Serialization Tests / tests (push) Failing after -4m12s
2025-02-04 14:49:24 +09:00
zxq9 cd6f7b5751 Switch to eblake2
Gajumaru Serialization Tests / tests (push) Successful in -4m15s
2025-01-29 17:59:17 +09:00
9 changed files with 29 additions and 1776 deletions
+6 -258
View File
@@ -1,267 +1,15 @@
# GM Serialization
GM Serialization
=====
Serialization helpers for the Gajumaru.
For an overview of the static serializer, see [this document](doc/static.md).
## Build
Build
-----
$ rebar3 compile
## Test
Test
----
$ rebar3 eunit
## Dynamic encoding
The module `gmser_dyn` offers dynamic encoding support, encoding most 'regular'
Erlang data types into an internal RLP representation.
Main API:
* `encode(term()) -> iolist()`
* `encode_typed(template(), term()) -> iolist()`
* `decode(iolist()) -> term()`
* `serialize(term()) -> binary()`
* `serialize_typed(template(), term()) -> binary()`
* `deserialize(binary()) -> term()`
In the examples below, we use the `decode` functions, to illustrate
how the type information is represented. The fully serialized form is
produced by the `serialize` functions.
The basic types supported by the encoder are:
* `integer()` (`anyint`, code: 246)
* `neg_integer()` (`negint`, code: 247)
* `non_neg_integer()` (`int` , code: 248)
* `binary()` (`binary`, code: 249)
* `boolean()` (`bool` , code: 250)
* `list()` (`list` , code: 251)
* `map()` (`map` , code: 252)
* `tuple()` (`tuple` , code: 253)
* `gmser_id:id()` (`id` , code: 254)
* `atom()` (`label` , code: 255)
(The range of codes is chosen because the `gmser_chain_objects` codes
range from 10 to 200, and also to stay within 1 byte.)
When encoding `map` types, the map elements are first sorted.
When specifying a map type for template-driven encoding, use
the `#{items => [{Key, ValueType} | {opt, Key, ValueType}]}` construct.
The key names are included in the encoding, and are match against the item
specs during decoding. If the key names don't match, the decoding fails, unless
for an `{opt, K, V}` item, in which case that item spec is skipped.
```erlang
T = #{items => [{a,int},{opt,b,int},{c,int}]}
E1 = gmser_dyn:encode_typed(T, #{a => 1, b => 2, c => 3}) ->
[<<0>>,<<1>>,[<<252>>,
[[[<<255>>,<<97>>],[<<248>>,<<1>>]],
[[<<255>>,<<98>>],[<<248>>,<<2>>]],
[[<<255>>,<<99>>],[<<248>>,<<3>>]]]]]
E2 = gmser_dyn:encode_typed(T, #{a => 1, c => 3}) ->
[<<0>>,<<1>>,[<<252>>,
[[[<<255>>,<<97>>],[<<248>>,<<1>>]],
[[<<255>>,<<99>>],[<<248>>,<<3>>]]]]]
gmser_dyn:decode_typed(T,E2) ->
#{c => 3,a => 1}
```
## Labels
Labels correspond to (existing) atoms in Erlang.
Decoding of a label results in a call to `binary_to_existing_atom/2`, so will
fail if the corresponding atom does not already exist.
This behavior can be modified using the option `#{missing_labels => fail | create | convert}`,
where `fail` is the default, as described above, `convert` means that missing atoms are
converted to binaries, and `create` means that the atom is created dynamically.
The option can be passed e.g.:
```erlang
gmser_dyn:deserialize(Binary, gmser_dyn:set_opts(#{missing_labels => convert}))
```
or
```erlang
gmser_dyn:deserialize(Binary, gmser_dyn:set_opts(#{missing_labels => convert}, Types))
```
By calling `gmser_dyn:register_types/1`, after having added options to the type map,
the options can be made to take effect automatically.
It's possible to cache labels for more compact encoding.
Note that when caching labels, the same cache mapping needs to be used on the
decoder side.
Labels are encoded as `[<<255>>, << AtomToBinary/binary >>]`.
If a cached label is used, the encoding becomes `[<<255>, [Ix]]`, where
`Ix` is the integer-encoded index value of the cached label.
## Examples
Dynamically encoded objects have the basic structure `[<<0>>,V,Obj]`, where `V` is the
integer-coded version, and `Obj` is the top-level encoding on the form `[Tag,Data]`.
```erlang
E = fun(T) -> io:fwrite("~w~n", [gmser_dyn:encode(T)]) end.
E(17) -> [<<0>>,<<1>>,[<<248>>,<<17>>]]
E(<<"abc">>) -> [<<0>>,<<1>>,[<<249>>,<<97,98,99>>]]
E(true) -> [<<0>>,<<1>>,[<<250>>,<<1>>]]
E(false) -> [<<0>>,<<1>>,[<<250>>,<<0>>]]
E([1,2]) -> [<<0>>,<<1>>,[<<251>>,[[<<248>>,<<1>>],[<<248>>,<<2>>]]]]
E({1,2}) -> [<<0>>,<<1>>,[<<253>>,[[<<248>>,<<1>>],[<<248>>,<<2>>]]]]
E(#{a=>1, b=>2}) ->
[<<0>>,<<1>>,[<<252>>,[[[<<255>>,<<97>>],[<<248>>,<<1>>]],[[<<255>>,<<98>>],[<<248>>,<<2>>]]]]]
E(gmser_id:create(account,<<1:256>>)) ->
[<<0>>,<<1>>,[<<254>>,<<1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1>>]]
```
Note that tuples and list are encoded the same way, except for the initial type tag.
Maps are encoded as `[<Map>, [KV1, KV2, ...]]`, where `[KV1, KV2, ...]` is the sorted
list of key-value tuples from `map:to_list(Map)`, but with the `tuple` type tag omitted.
## Template-driven encoding
Templates can be provided to the encoder by either naming an already registered
type, or by passing a template directly. In both cases, the encoder will enforce
the type information in the template.
If the template has been registered, the encoder uses the registered type specification
to drive the encoding. The code of the registered template is embedded in the encoded
output:
```erlang
gmser_dyn:encode_typed({int,int,int}, {1,2,3}) ->
[<<0>>,<<1>>,[<<253>>,
[[<<248>>,<<1>>],[<<248>>,<<2>>],[<<248>>,<<3>>]]]]
Types = gmser_dyn_types:add_type(t3,1013,{int,int,int}).
gmser_dyn:encode_typed(t3, {1,2,3}, Types) ->
[<<0>>,<<1>>,[[<<3,245>>,<<253>>],
[[<<248>>,<<1>>],[<<248>>,<<2>>],[<<248>>,<<3>>]]]]
```
Note that the original `<<253>>` type code is wrapped as `[<<3,245>>,<<253>>]`,
where `<<3,245>>` corresponds to the custom code `1013`.
Using the default option `#{strict => true}`, the decoder will extract the custom
type spec, and validate the encoded data against it. If the custom code is missing,
the decoder aborts. Using `#{strict => false}`, the custom code is used if it exists,
but otherwise, it's ignored, and the encoded data is decoded using the dynamic type
info.
### Alternative types
The dynamic encoder supports a few additions to the `gmserialization` template
language: `any`, `#{list => Type}`, `#{alt => [AltTypes]}` and `#{switch => [AltTypes]}`.
#### `any`
The `any` type doesn't have an associated code, but enforces dynamic encoding.
#### `list`
The original list type notation expects a key-value list, e.g.
`[{name, binary}, {age, int}]`
```erlang
EL = gmser_dyn:encode_typed([{name,binary},{age,int}], [{name,<<"Ulf">>},{age,29}]) ->
[<<0>>,<<1>>,[<<251>>,
[[<<253>>,[[<<255>>,<<110,97,109,101>>],[<<249>>,<<85,108,102>>]]],
[<<253>>,[[<<255>>,<<97,103,101>>],[<<248>>,<<29>>]]]]]]
```
Note that the encoding explicitly lays out a `[{Key, Value}]` structure, all
dynamically typed. This means it can be dynamically decoded without templates.
```erlang
gmser_dyn:decode(EL).
[{name,<<"Ulf">>},{age,29}]
```
In order to specify something like Erlang's `[integer()]` type, we can use
the following:
```erlang
gmser_dyn:encode_typed(#{list => int}, [1,2,3,4]) ->
[<<0>>,<<1>>,[<<251>>,
[[<<248>>,<<1>>],[<<248>>,<<2>>],[<<248>>,<<3>>],[<<248>>,<<4>>]]]]
```
#### `alt`
The `#{alt => [Type]}` construct also enforces dynamic encoding, and will try
to encode as each type in the list, in the specified order, until one matches.
```erlang
gmser_dyn:encode_typed(#{alt => [negint,int]}, 5) -> [<<0>>,<<1>>,[<<247>>,<<5>>]]
gmser_dyn:encode_typed(#{alt => [negint,int]}, 5) -> [<<0>>,<<1>>,[<<248>>,<<5>>]]
gmser_dyn:encode_typed(anyint,-5) -> [<<0>>,<<1>>,[<<246>>,[<<247>>,<<5>>]]]
gmser_dyn:encode_typed(anyint,5) -> [<<0>>,<<1>>,[<<246>>,[<<248>>,<<5>>]]]
```
#### `switch`
The `switch` type allows for encoding a 'tagged' object, where the tag determines
the type.
```erlang
E1 = gmser_dyn:encode_typed(#{switch => #{name => binary, age => int}}, #{age => 29}) ->
[<<0>>,<<1>>,[<<252>>,[[[<<255>>,<<97,103,101>>],[<<248>>,<<29>>]]]]]
gmser_dyn:decode_typed(#{switch => #{name => binary, age => int}}, E1) ->
#{age => 29}
E2 = gmser_dyn:encode_typed(#{switch => #{name => binary, age => int}}, #{name => <<"Ulf">>}) ->
[<<0>>,<<1>>,[<<252>>,[[[<<255>>,<<110,97,109,101>>],[<<249>>,<<85,108,102>>]]]]]
gmser_dyn:decode_typed(#{switch => #{name => binary, age => int}}, E1) ->
#{name => <<"Ulf">>}
```
A practical use of `switch` would be in a protocol schema:
```erlang
t_msg(_) ->
#{switch => #{ call => t_call
, reply => t_reply
, notification => t_notification }}.
t_call(_) ->
#{items => [ {id, anyint}
, {req, t_req} ]}.
t_reply(_) ->
#{alt => [#{items => [ {id, anyint}
, {result, t_result} ]},
#{items => [ {id, anyint}
, {code, anyint}
, {message, binary} ]}
]}.
```
In this scenario, messages are 'taggged' as 1-element maps, e.g.:
```erlang
async_request(Msg) ->
Id = erlang:unique_integer(),
gmmp_cp:to_server(
whereis(gmmp_core_connector),
#{call => #{ id => Id
, req => Msg }}),
Id.
```
### Notes
Note that `anyint` is a standard type. The static serializer supports only
positive integers (`int`), as negative numbers are forbidden on-chain.
For dynamic encoding e.g. in messaging protocols, handling negative numbers can
be useful, so the `negint` type was added as a dynamic type. To encode a full-range
integer, the `alt` construct is needed.
(Floats are not supported, as they are non-deterministic. Rationals and fixed-point
numbers could easily be handled as high-level types, e.g. as `{int,int}`.)
-83
View File
@@ -1,83 +0,0 @@
# Static Serialization
The `gmserialization` and `gmser_chain_objects` modules implement the
static serialization support used in the Gajumaru blockchain.
The purpose is to produce fully deterministic serialization, in order
to maintain predictable hashing.
Example:
```erlang
%% deterministic canonical serialization.
-spec serialize_to_binary(signed_tx()) -> binary_signed_tx().
serialize_to_binary(#signed_tx{tx = Tx, signatures = Sigs}) ->
gmser_chain_objects:serialize(
?SIG_TX_TYPE,
?SIG_TX_VSN,
serialization_template(?SIG_TX_VSN),
[ {signatures, lists:sort(Sigs)}
, {transaction, aetx:serialize_to_binary(Tx)}
]).
-spec deserialize_from_binary(binary()) -> signed_tx().
deserialize_from_binary(SignedTxBin) when is_binary(SignedTxBin) ->
[ {signatures, Sigs}
, {transaction, TxBin}
] = gmser_chain_objects:deserialize(
?SIG_TX_TYPE,
?SIG_TX_VSN,
serialization_template(?SIG_TX_VSN),
SignedTxBin),
assert_sigs_size(Sigs),
#signed_tx{ tx = aetx:deserialize_from_binary(TxBin)
, signatures = Sigs
}.
serialization_template(?SIG_TX_VSN) ->
[ {signatures, [binary]}
, {transaction, binary}
].
```
The terms that can be encoded using these templates are given by
this type in `gmserialization.erl`:
```erlang
-type encodable_term() :: non_neg_integer()
| binary()
| boolean()
| [encodable_term()] %% Of any length
| #{atom() => encodable_term()}
| tuple() %% Any arity, containing encodable_term().
| gmser_id:id().
```
The template 'language' is defined by these types:
```erlang
-type template() :: [{field_name(), type()}].
-type field_name() :: atom().
-type type() :: 'int'
| 'bool'
| 'binary'
| 'id' %% As defined in aec_id.erl
| [type()] %% Length one in the type. This means a list of any length.
| #{items := [{field_name(), type()}]} %% Record with named fields
%% represented as a map.
%% Encoded as a list in the given
%% order.
| tuple(). %% Any arity, containing type(). This means a static size array.
```
The `gmser_chain_objects.erl` module specifies a serialization code for each
object that can go on-chain. E.g.:
```erlang
tag(signed_tx) -> 11;
...
rev_tag(11) -> signed_tx;
```
The `tag` and `vsn` are laid out in the beginning of the serialized object.
+16 -3
View File
@@ -1,11 +1,24 @@
{application,gmserialization,
[{description,"Serialization of data for the Gajumaru"},
[{description,"Serialization helpers for the Gajumaru."},
{vsn,"0.1.2"},
{registered,[]},
{applications,[kernel,stdlib,crypto,base58]},
{env,[]},
{modules,[gmser_api_encoder,gmser_chain_objects,
{modules,[base58,timing,enacl_eqc,enacl_ext_eqc,enacl,enacl_ext,
enacl_nif,enacl_SUITE,pc,pc_compilation,pc_port_env,
pc_port_specs,pc_prv_clean,pc_prv_compile,pc_util,
base58,timing,enacl_eqc,enacl_ext_eqc,enacl,enacl_ext,
enacl_nif,enacl_SUITE,gmser_api_encoder,
gmser_chain_objects,gmser_contract_code,
gmser_delegation,gmser_id,gmser_rlp,gmserialization,
gmser_api_encoder_tests,gmser_chain_objects_tests,
gmser_contract_code_tests,gmser_delegation_tests,
gmser_rlp_tests,pc,pc_compilation,pc_port_env,
pc_port_specs,pc_prv_clean,pc_prv_compile,pc_util,
gmser_api_encoder,gmser_chain_objects,
gmser_contract_code,gmser_delegation,gmser_id,
gmser_rlp,gmserialization]},
gmser_rlp,gmserialization,gmser_api_encoder_tests,
gmser_chain_objects_tests,gmser_contract_code_tests,
gmser_delegation_tests,gmser_rlp_tests]},
{licenses,[]},
{links,[]}]}.
+1 -11
View File
@@ -4,14 +4,4 @@
{git,
"https://git.qpq.swiss/QPQ-AG/erl-base58.git",
{ref, "e6aa62eeae3d4388311401f06e4b939bf4e94b9c"}}},
{enacl,
{git,
"https://git.qpq.swiss/QPQ-AG/enacl.git",
{ref, "4eb7ec70084ba7c87b1af8797c4c4e90c84f95a2"}}},
{eblake2, "1.0.0"}
]}.
{dialyzer,
[ {plt_apps, all_deps},
{base_plt_apps, [erts, kernel, stdlib, enacl, base58, eblake2]}
]}.
{eblake2, "1.0.0"}]}.
+2 -12
View File
@@ -1,16 +1,6 @@
{"1.2.0",
[{<<"base58">>,
{git,"https://git.qpq.swiss/QPQ-AG/erl-base58.git",
{ref,"e6aa62eeae3d4388311401f06e4b939bf4e94b9c"}},
0},
{<<"eblake2">>,{pkg,<<"eblake2">>,<<"1.0.0">>},0},
{<<"enacl">>,
{git,"https://git.qpq.swiss/QPQ-AG/enacl.git",
{ref,"4eb7ec70084ba7c87b1af8797c4c4e90c84f95a2"}},
0}]}.
[
{pkg_hash,[
{<<"eblake2">>, <<"EC8AD20E438AAB3F2E8D5D118C366A0754219195F8A0F536587440F8F9BCF2EF">>}]},
{pkg_hash_ext,[
{<<"eblake2">>, <<"3C4D300A91845B25D501929A26AC2E6F7157480846FAB2347A4C11AE52E08A99">>}]}
].
{<<"eblake2">>,
{pkg,<<"eblake2">>,<<"1.0.0">>},0}].
-1317
View File
File diff suppressed because it is too large Load Diff
-71
View File
@@ -1,71 +0,0 @@
-module(gmser_dyn_types).
-export([ add_type/3 %% (Tag, Code, Template) -> Types1
, add_type/4 %% (Tag, Code, Template, Types) -> Types1
, from_list/2
, expand/1 ]).
-export([ next_code/1 ]).
next_code(#{codes := Codes}) ->
lists:max(maps:keys(Codes)) + 1.
-spec add_type(Tag, Code, Template) -> Types
when Tag :: gmser_dyn:tag()
, Code :: gmser_dyn:code()
, Template :: gmser_dyn:template()
, Types :: gmser_dyn:types().
add_type(Tag, Code, Template) ->
add_type(Tag, Code, Template, gmser_dyn:registered_types()).
add_type(Tag, Code, Template, Types) ->
elem_to_type({Tag, Code, Template}, Types).
from_list(L, Types) ->
lists:foldl(fun elem_to_type/2, Types, L).
expand(#{vsn := V, templates := Templates0} = Types) ->
Templates =
maps:map(
fun(_, F) when is_function(F, 0) ->
F();
(_, F) when is_function(F, 1) ->
F(V);
(_, T) ->
T
end, Templates0),
Types#{templates := Templates}.
elem_to_type({Tag, Code, Template}, Acc) when is_atom(Tag), is_integer(Code) ->
#{codes := Codes, rev := Rev, templates := Temps} = Acc,
case {is_map_key(Tag, Rev), is_map_key(Code, Codes)} of
{false, false} ->
Acc#{ codes := Codes#{Code => Tag}
, rev := Rev#{Tag => Code}
, templates => Temps#{Tag => Template}
};
{true, _} -> error({duplicate_tag, Tag});
{_, true} -> error({duplicate_code, Code})
end;
elem_to_type({modify, {Tag, Template}}, Acc) ->
#{codes := _, rev := Rev, templates := Templates} = Acc,
_ = maps:get(Tag, Rev),
Templates1 = Templates#{Tag := Template},
Acc#{templates := Templates1};
elem_to_type({labels, Lbls}, Acc) ->
lists:foldl(fun add_label/2, Acc, Lbls);
elem_to_type({vsn, V}, Acc) ->
Acc#{vsn => V};
elem_to_type(Elem, _) ->
error({invalid_type, Elem}).
add_label({L, Code}, #{labels := Lbls, rev_labels := RevLbls} = Acc)
when is_atom(L), is_integer(Code), Code > 0 ->
case {is_map_key(L, Lbls), is_map_key(Code, RevLbls)} of
{false, false} ->
Acc#{labels := Lbls#{L => Code},
rev_labels := RevLbls#{Code => L}};
{true, _} -> error({duplicate_label, L});
{_, true} -> error({duplicate_label_code, Code})
end;
add_label(Elem, _) ->
error({invalid_label, Elem}).
+4 -17
View File
@@ -17,9 +17,6 @@
, is_id/1
]).
-export([ t_id/1
]).
%% For aec_serialization
-export([ encode/1
, decode/1
@@ -29,18 +26,11 @@
, val
}).
-type tag() :: 'account'
| 'associate_chain'
| 'channel'
| 'commitment'
| 'contract'
| 'contract_source'
| 'name'
| 'native_token'
| 'entry'.
-type tag() :: 'account' | 'name'
| 'commitment' | 'contract' | 'channel'
| 'associate_chain' | 'entry' .
-type val() :: <<_:256>>.
-type id() :: #id{}.
-opaque(id() :: #id{}).
-export_type([ id/0
, tag/0
@@ -104,9 +94,6 @@ decode(<<Tag:?TAG_SIZE/unit:8, Val:?PUB_SIZE/binary>>) ->
#id{ tag = decode_tag(Tag)
, val = Val}.
-spec t_id(any()) -> id().
t_id(#id{} = Id) -> Id.
%%%===================================================================
%%% Internal functions
%%%===================================================================
-4
View File
@@ -10,11 +10,9 @@
-vsn("0.1.2").
-export([ decode_fields/2
, decode_field/2
, deserialize/5
, deserialize_tag_and_vsn/1
, encode_fields/2
, encode_field/2
, serialize/4 ]).
%%%===================================================================
@@ -25,8 +23,6 @@
, fields/0
]).
-export_type([ encodable_term/0 ]).
-type template() :: [{field_name(), type()}].
-type field_name() :: atom().
-type type() :: 'int'