Compare commits

..

No commits in common. "master" and "v1.2.0" have entirely different histories.

5 changed files with 6 additions and 1298 deletions

159
README.md
View File

@ -1,162 +1,15 @@
# GM Serialization
GM Serialization
=====
Serialization helpers for the Gajumaru.
For an overview of the static serializer, see [this document](doc/static.md).
## Build
Build
-----
$ rebar3 compile
## Test
Test
----
$ rebar3 eunit
## Dynamic encoding
The module `gmser_dyn` offers dynamic encoding support, encoding most 'regular'
Erlang data types into an internal RLP representation.
Main API:
* `encode(term()) -> iolist()`
* `encode_typed(template(), term()) -> iolist()`
* `decode(iolist()) -> term()`
* `serialize(term()) -> binary()`
* `serialize_typed(template(), term()) -> binary()`
* `deserialize(binary()) -> term()`
In the examples below, we use the `decode` functions, to illustrate
how the type information is represented. The fully serialized form is
produced by the `serialize` functions.
The basic types supported by the encoder are:
* `integer()` (`anyint`, code: 246)
* `neg_integer()` (`negint`, code: 247)
* `non_neg_integer()` (`int` , code: 248)
* `binary()` (`binary`, code: 249)
* `boolean()` (`bool` , code: 250)
* `list()` (`list` , code: 251)
* `map()` (`map` , code: 252)
* `tuple()` (`tuple` , code: 253)
* `gmser_id:id()` (`id` , code: 254)
* `atom()` (`label` , code: 255)
(The range of codes is chosen because the `gmser_chain_objects` codes
range from 10 to 200, and also to stay within 1 byte.)
When encoding `map` types, the map elements are first sorted.
When specifying a map type for template-driven encoding, use
the `#{items => [{Key, Value}]}` construct.
## Labels
Labels correspond to (existing) atoms in Erlang.
Decoding of a label results in a call to `binary_to_existing_atom/2`, so will
fail if the corresponding atom does not already exist.
This behavior can be modified using the option `#{missing_labels => fail | create | convert}`,
where `fail` is the default, as described above, `convert` means that missing atoms are
converted to binaries, and `create` means that the atom is created dynamically.
The option can be passed e.g.:
```erlang
gmser_dyn:deserialize(Binary, set_opts(#{missing_labels => convert}))
```
or
```erlang
gmser_dyn:deserialize(Binary, set_opts(#{missing_labels => convert}, Types))
```
By calling `gmser_dyn:register_types/1`, after having added options to the type map,
the options can be made to take effect automatically.
It's possible to cache labels for more compact encoding.
Note that when caching labels, the same cache mapping needs to be used on the
decoder side.
Labels are encoded as `[<<255>>, << AtomToBinary/binary >>]`.
If a cached label is used, the encoding becomes `[<<255>, [Ix]]`, where
`Ix` is the integer-encoded index value of the cached label.
## Examples
Dynamically encoded objects have the basic structure `[<<0>>,V,Obj]`, where `V` is the
integer-coded version, and `Obj` is the top-level encoding on the form `[Tag,Data]`.
```erlang
E = fun(T) -> io:fwrite("~w~n", [gmser_dyn:encode(T)]) end.
E(17) -> [<<0>>,<<1>>,[<<248>>,<<17>>]]
E(<<"abc">>) -> [<<0>>,<<1>>,[<<249>>,<<97,98,99>>]]
E(true) -> [<<0>>,<<1>>,[<<250>>,<<1>>]]
E(false) -> [<<0>>,<<1>>,[<<250>>,<<0>>]]
E([1,2]) -> [<<0>>,<<1>>,[<<251>>,[[<<248>>,<<1>>],[<<248>>,<<2>>]]]]
E({1,2}) -> [<<0>>,<<1>>,[<<253>>,[[<<248>>,<<1>>],[<<248>>,<<2>>]]]]
E(#{a=>1, b=>2}) ->
[<<0>>,<<1>>,[<<252>>,[[[<<255>>,<<97>>],[<<248>>,<<1>>]],[[<<255>>,<<98>>],[<<248>>,<<2>>]]]]]
E(gmser_id:create(account,<<1:256>>)) ->
[<<0>>,<<1>>,[<<254>>,<<1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1>>]]
```
Note that tuples and list are encoded the same way, except for the initial type tag.
Maps are encoded as `[<Map>, [KV1, KV2, ...]]`, where `[KV1, KV2, ...]` is the sorted
list of key-value tuples from `map:to_list(Map)`, but with the `tuple` type tag omitted.
## Template-driven encoding
Templates can be provided to the encoder by either naming an already registered
type, or by passing a template directly. In both cases, the encoder will enforce
the type information in the template.
If the template has been registered, the encoder omits inner type tags (still
inserting the top-level tag), leading to some compression of the output.
This also means that the serialized term cannot be decoded without the same
schema information on the decoder side.
In the case of a directly provided template, all type information is inserted,
such that the serialized term can be decoded without any added type information.
The template types are still enforced during encoding.
```erlang
ET = fun(Type,Term) -> io:fwrite("~w~n", [gmser_dyn:encode_typed(Type,Term)]) end.
ET([{int,int}], [{1,2}]) -> [<<0>>,<<1>>,[<<251>>,[[[<<248>>,<<1>>],[<<248>>,<<2>>]]]]]
gmser_dyn:register_type(1000,lt2i,[{int,int}]).
ET(lt2i, [{1,2}]) -> [<<0>>,<<1>>,[<<3,232>>,[[<<1>>,<<2>>]]]]
```
### Alternative types
The dynamic encoder supports two additions to the `gmserialization` template
language: `any` and `#{alt => [AltTypes]}`.
The `any` type doesn't have an associated code, but enforces dynamic encoding.
The `#{alt => [Type]}` construct also enforces dynamic encoding, and will try
to encode as each type in the list, in the specified order, until one matches.
```erlang
gmser_dyn:encode_typed(#{alt => [negint,int]}, 5) -> [<<0>>,<<1>>,[<<247>>,<<5>>]]
gmser_dyn:encode_typed(#{alt => [negint,int]}, 5) -> [<<0>>,<<1>>,[<<248>>,<<5>>]]
gmser_dyn:encode_typed(anyint,-5) -> [<<0>>,<<1>>,[<<246>>,[<<247>>,<<5>>]]]
gmser_dyn:encode_typed(anyint,5) -> [<<0>>,<<1>>,[<<246>>,[<<248>>,<<5>>]]]
```
### Notes
Note that `anyint` is a standard type. The static serializer supports only
positive integers (`int`), as negative numbers are forbidden on-chain.
For dynamic encoding e.g. in messaging protocols, handling negative numbers can
be useful, so the `negint` type was added as a dynamic type. To encode a full-range
integer, the `alt` construct is needed.
(Floats are not supported, as they are non-deterministic. Rationals and fixed-point
numbers could easily be handled as high-level types, e.g. as `{int,int}`.)

View File

@ -1,83 +0,0 @@
# Static Serialization
The `gmserialization` and `gmser_chain_objects` modules implement the
static serialization support used in the Gajumaru blockchain.
The purpose is to produce fully deterministic serialization, in order
to maintain predictable hashing.
Example:
```erlang
%% deterministic canonical serialization.
-spec serialize_to_binary(signed_tx()) -> binary_signed_tx().
serialize_to_binary(#signed_tx{tx = Tx, signatures = Sigs}) ->
gmser_chain_objects:serialize(
?SIG_TX_TYPE,
?SIG_TX_VSN,
serialization_template(?SIG_TX_VSN),
[ {signatures, lists:sort(Sigs)}
, {transaction, aetx:serialize_to_binary(Tx)}
]).
-spec deserialize_from_binary(binary()) -> signed_tx().
deserialize_from_binary(SignedTxBin) when is_binary(SignedTxBin) ->
[ {signatures, Sigs}
, {transaction, TxBin}
] = gmser_chain_objects:deserialize(
?SIG_TX_TYPE,
?SIG_TX_VSN,
serialization_template(?SIG_TX_VSN),
SignedTxBin),
assert_sigs_size(Sigs),
#signed_tx{ tx = aetx:deserialize_from_binary(TxBin)
, signatures = Sigs
}.
serialization_template(?SIG_TX_VSN) ->
[ {signatures, [binary]}
, {transaction, binary}
].
```
The terms that can be encoded using these templates are given by
this type in `gmserialization.erl`:
```erlang
-type encodable_term() :: non_neg_integer()
| binary()
| boolean()
| [encodable_term()] %% Of any length
| #{atom() => encodable_term()}
| tuple() %% Any arity, containing encodable_term().
| gmser_id:id().
```
The template 'language' is defined by these types:
```erlang
-type template() :: [{field_name(), type()}].
-type field_name() :: atom().
-type type() :: 'int'
| 'bool'
| 'binary'
| 'id' %% As defined in aec_id.erl
| [type()] %% Length one in the type. This means a list of any length.
| #{items := [{field_name(), type()}]} %% Record with named fields
%% represented as a map.
%% Encoded as a list in the given
%% order.
| tuple(). %% Any arity, containing type(). This means a static size array.
```
The `gmser_chain_objects.erl` module specifies a serialization code for each
object that can go on-chain. E.g.:
```erlang
tag(signed_tx) -> 11;
...
rev_tag(11) -> signed_tx;
```
The `tag` and `vsn` are laid out in the beginning of the serialized object.

View File

@ -1,996 +0,0 @@
-module(gmser_dyn).
-export([ encode/1 %% (Term) -> rlp()
, encode/2 %% (Term, Types) -> rlp()
, encode/3 %% (Term, Vsn, Types) -> rlp()
, encode_typed/2 %% (Type, Term) -> rlp()
, encode_typed/3 %% (Type, Term, Types) -> rlp()
, encode_typed/4 %% (Type, Term, Vsn, Types) -> rlp()
, decode/1 %% (RLP) -> Term
, decode/2 %% (RLP, Types) -> Term
, decode/3 %% (RLP, Vsn, Types) -> Term
, decode_typed/2 %% (Type, RLP) -> Term
, decode_typed/3 %% (Type, RLP, Types) -> Term
, decode_typed/4 ]). %% (Type, RLP, Vsn, Types) -> Term
-export([ serialize/1 %% (Term) -> Bin
, serialize/2 %% (Term, Types) -> Bin
, serialize/3 %% (Term, Vsn, Types) -> Bin
, serialize_typed/2 %% (Type, Term) -> Bin
, serialize_typed/3 %% (Type, Term, Types) -> Bin
, serialize_typed/4 %% (Type, Term, Vsn, Types) -> Bin
, deserialize/1 %% (Bin) -> Term
, deserialize/2 %% (Bin, Types) -> Term
, deserialize/3 ]). %% (Bin, Vsn, Types) -> Term
%% register a type schema, inspect existing schema
-export([ register_types/1
, registered_types/0
, registered_types/1
, latest_vsn/0
, get_opts/1
, set_opts/1
, set_opts/2
, types_from_list/1
, revert_to_default_types/0
, dynamic_types/0 ]).
%% Register individual types, or cache labels
-export([ register_type/3
, cache_label/2
]).
-import(gmserialization, [ decode_field/2 ]).
-define(VSN, 1).
-include_lib("kernel/include/logger.hrl").
-ifdef(TEST).
-compile([export_all, nowarn_export_all]).
-include_lib("eunit/include/eunit.hrl").
-endif.
serialize(Term) ->
Vsn = latest_vsn(),
rlp_encode(encode(Term, Vsn, registered_types(Vsn))).
serialize(Term, Types0) ->
Types = proper_types(Types0),
Vsn = types_vsn(Types),
rlp_encode(encode(Term, Vsn, Types)).
serialize(Term, Vsn, Types) ->
rlp_encode(encode(Term, Vsn, proper_types(Types, Vsn))).
serialize_typed(Type, Term) ->
Vsn = latest_vsn(),
rlp_encode(encode_typed(Type, Term, Vsn, registered_types(Vsn))).
serialize_typed(Type, Term, Types0) ->
Types = proper_types(Types0),
Vsn = types_vsn(Types),
rlp_encode(encode_typed(Type, Term, Vsn, Types)).
serialize_typed(Type, Term, Vsn, Types) ->
rlp_encode(encode_typed(Type, Term, Vsn, proper_types(Types, Vsn))).
deserialize(Binary) ->
Fields0 = rlp_decode(Binary),
case decode_tag_and_vsn(Fields0) of
{0, Vsn, Fields} ->
decode_(Fields, Vsn, registered_types(Vsn));
Other ->
error({illegal_serialization, Other})
end.
deserialize(Binary, Types0) ->
Types = proper_types(Types0),
Vsn0 = types_vsn(Types),
Fields0 = rlp_decode(Binary),
case decode_tag_and_vsn(Fields0) of
{0, Vsn, Fields} when Vsn0 == undefined; Vsn0 == Vsn ->
decode_(Fields, Vsn, Types);
Other ->
error({illegal_serialization, Other})
end.
deserialize(Binary, Vsn, Types0) ->
Types = proper_types(Types0),
Fields0 = rlp_decode(Binary),
case decode_tag_and_vsn(Fields0) of
{0, Vsn, Fields} ->
decode_(Fields, Vsn, Types);
Other ->
error({illegal_serialization, Other})
end.
encode(Term) ->
Vsn = latest_vsn(),
encode(Term, Vsn, registered_types(Vsn)).
encode(Term, Types0) ->
Types = proper_types(Types0),
encode(Term, types_vsn(Types), Types).
encode(Term, Vsn, Types0) ->
Types = proper_types(Types0, Vsn),
[ encode_basic(int, 0)
, encode_basic(int, Vsn)
, encode_(Term, Vsn, Types) ].
encode_typed(Type, Term) ->
Vsn = latest_vsn(),
encode_typed(Type, Term, Vsn, registered_types(Vsn)).
encode_typed(Type, Term, Types0) ->
Types = proper_types(Types0),
encode_typed(Type, Term, types_vsn(Types), Types).
encode_typed(Type, Term, Vsn, Types0) ->
Types = proper_types(Types0, Vsn),
[ encode_basic(int, 0)
, encode_basic(int, Vsn)
, encode_typed_(Type, Term, Vsn, Types) ].
decode(Fields) ->
Vsn = latest_vsn(),
decode(Fields, Vsn, registered_types(Vsn)).
decode(Fields, Types0) ->
Types = proper_types(Types0),
decode(Fields, types_vsn(Types), Types).
decode(Fields0, Vsn, Types0) ->
Types = proper_types(Types0, Vsn),
case decode_tag_and_vsn(Fields0) of
{0, Vsn, Fields} ->
decode_(Fields, Vsn, Types);
Other ->
error({illegal_encoding, Other})
end.
decode_typed(Type, Fields) ->
Vsn = latest_vsn(),
decode_typed(Type, Fields, Vsn, registered_types(Vsn)).
decode_typed(Type, Fields, Types0) ->
Types = proper_types(Types0),
decode_typed(Type, Fields, types_vsn(Types), Types).
decode_typed(Type, Fields0, Vsn, Types0) ->
Types = proper_types(Types0, Vsn),
case decode_tag_and_vsn(Fields0) of
{0, Vsn, Fields} ->
decode_from_template(Type, Fields, Vsn, Types);
Other ->
error({illegal_encoding, Other})
end.
decode_tag_and_vsn([TagBin, VsnBin, Fields]) ->
{decode_basic(int, TagBin),
decode_basic(int, VsnBin),
Fields}.
proper_types(undefined) ->
registered_types(latest_vsn());
proper_types(#{} = Types) ->
Types.
proper_types(undefined, Vsn) ->
registered_types(Vsn);
proper_types(#{} = Types, Vsn) ->
assert_vsn(Vsn, Types).
types_vsn(#{vsn := Vsn}) -> Vsn;
types_vsn(_) -> latest_vsn().
assert_vsn(V, #{vsn := V} = Types) -> Types;
assert_vsn(V, #{vsn := Other} ) -> error({version_mismatch, V, Other});
assert_vsn(V, #{} = Types ) -> Types#{vsn => V}.
dynamic_types() ->
#{ vsn => ?VSN
, codes =>
#{ 246 => anyint
, 247 => negint
, 248 => int
, 249 => binary
, 250 => bool
, 251 => list
, 252 => map
, 253 => tuple
, 254 => id
, 255 => label}
, rev =>
#{ anyint => 246
, negint => 247
, int => 248
, binary => 249
, bool => 250
, list => 251
, map => 252
, tuple => 253
, id => 254
, label => 255}
, labels => #{}
, rev_labels => #{}
, templates =>
#{ anyint => #{alt => [negint, int]}
, negint => negint
, int => int
, binary => binary
, bool => bool
, list => list
, map => map
, tuple => tuple
, id => id
, label => label
}
, options => #{}
}.
registered_types() ->
registered_types(latest_vsn()).
registered_types(Vsn) ->
case persistent_term:get(pt_key(), undefined) of
undefined ->
dynamic_types();
#{latest_vsn := _, types := #{Vsn := Types}} ->
Types;
#{latest_vsn := _, types := _} ->
dynamic_types()
end.
template(TagOrCode, Vsn, Types) ->
{Tag, Template} = get_template(TagOrCode, Types),
{Tag, dyn_template_(Template, Vsn)}.
get_template(Code, #{codes := Codes, templates := Ts}) when is_integer(Code) ->
Tag = maps:get(Code, Codes),
{Tag, maps:get(Tag, Ts)};
get_template(Tag, #{templates := Ts}) when is_atom(Tag) ->
{Tag, maps:get(Tag, Ts)}.
dyn_template_(F, Vsn) ->
if is_function(F, 0) -> F();
is_function(F, 1) -> F(Vsn);
true -> F
end.
find_cached_label(Lbl, #{labels := Lbls}) ->
maps:find(Lbl, Lbls).
decode_([CodeBin, Flds], Vsn, Types) ->
Code = decode_basic(int, CodeBin),
{_Tag, Template} = template(Code, Vsn, Types),
decode_from_template(Template, Flds, Vsn, Types).
encode_(Term, Vsn, Types) ->
encode_(Term, dyn(emit()), Vsn, Types).
encode_(Term, E, Vsn, Types) ->
{_Tag, Template} = auto_template(Term),
encode_from_template(Template, Term, E, Vsn, Types).
%% To control when to emit type codes:
%% If the template is predefined, it's 'not dynamic' (nodyn(E)).
%% If we are encoding against a type that's part of a predefined template,
%% we typically don't emit the type code, except at the very top.
%% So: emit type codes if the 'emit' bit is set, or if the 'dyn' bit is set.
emit() -> 2#01.
dyn() -> 2#10.
emit(E) -> E bor 2#01.
noemit(E) -> E band 2#10.
dyn(E) -> E bor 2#10.
nodyn(E) -> E band 2#01.
encode_typed_(Type, Term, Vsn, #{codes := Codes, rev := Rev} = Types) ->
case (is_map_key(Type, Codes) orelse is_map_key(Type, Rev)) of
true ->
encode_typed_(Type, Term, nodyn(emit()), Vsn, Types);
false ->
encode_maybe_template(Type, Term, Vsn, Types)
end.
encode_typed_(any, Term, _, Vsn, Types) ->
encode_(Term, dyn(emit()), Vsn, Types);
encode_typed_(Code, Term, E, Vsn, #{codes := Codes} = Types) when is_map_key(Code, Codes) ->
{_Tag, Template} = template(Code, Vsn, Types),
[encode_basic(int,Code),
encode_from_template(Template, Term, noemit(nodyn(E)), Vsn, Types)];
encode_typed_(Tag, Term, E, Vsn, #{templates := Ts, rev := Rev} = Types)
when is_map_key(Tag, Ts) ->
Template = dyn_template_(maps:get(Tag, Ts), Vsn),
Code = maps:get(Tag, Rev),
[encode_basic(int,Code),
encode_from_template(Template, Term, noemit(nodyn(E)), Vsn, Types)];
encode_typed_(MaybeTemplate, Term, _, Vsn, Types) ->
encode_maybe_template(MaybeTemplate, Term, Vsn, Types).
encode_maybe_template(#{items := _} = Type, Term, Vsn, Types) ->
case is_map(Term) of
true ->
encode_from_template(Type, Term, emit(dyn()), Vsn, Types);
false ->
error({invalid, Type, Term})
end;
encode_maybe_template(#{alt := _} = Type, Term, Vsn, Types) ->
encode_from_template(Type, Term, Vsn, emit(dyn()), Types);
encode_maybe_template(#{switch := _} = Type, Term, Vsn, Types) ->
encode_from_template(Type, Term, Vsn, emit(dyn()), Types);
encode_maybe_template(Pat, Term, Vsn, Types) when is_list(Pat);
is_tuple(Pat) ->
encode_from_template(Pat, Term, emit(dyn()), Vsn, Types);
encode_maybe_template(Other, Term, _Vsn, _Types) ->
error({illegal_template, Other, Term}).
auto_template({id,Tag,V}) when Tag == account
; Tag == name
; Tag == commitment
; Tag == contract
; Tag == channel
; Tag == associate_chain
; Tag == entry ->
if is_binary(V) -> {id, id};
true ->
%% close, but no cigar
{tuple, tuple}
end;
auto_template(T) ->
if is_map(T) -> {map, map};
is_list(T) -> {list, list};
is_tuple(T) -> {tuple, tuple};
is_binary(T) -> {binary, binary};
is_boolean(T) -> {bool, bool};
is_atom(T) -> {label, label}; % binary_to_existing_atom()
is_integer(T),
T >= 0 -> {int, int};
is_integer(T),
T < 0 -> {negint, negint};
true ->
error({invalid_type, T})
end.
decode_from_template(any, Fld, Vsn, Types) ->
decode_(Fld, Vsn, Types);
decode_from_template(#{items := Items}, Fld, Vsn, Types) when is_list(Fld) ->
Zipped = lists:zipwith(
fun({{K, T}, V}) -> {K, T, V};
({{opt,K,T}, V}) -> {K, T, V}
end, Items, Fld),
lists:foldl(
fun({K, Type, V}, Map) ->
maps:is_key(K, Map) andalso error(badarg, duplicate_field),
Map#{K => decode_from_template({any,Type}, V, Vsn, Types)}
end, #{}, Zipped);
decode_from_template(#{alt := Alts} = T, Fld, Vsn, Types) when is_list(Alts) ->
decode_alt(Alts, Fld, T, Vsn, Types);
decode_from_template(#{switch := Alts} = T, Fld, Vsn, Types) when is_map(Alts) ->
decode_switch(Alts, Fld, T, Vsn, Types);
decode_from_template(list, Flds, Vsn, Types) ->
[decode_(F, Vsn, Types) || F <- Flds];
decode_from_template(map, Fld, Vsn, Types) ->
TupleFields = [F || F <- Fld],
Items = [decode_from_template({any,any}, T, Vsn, Types)
|| T <- TupleFields],
maps:from_list(Items);
decode_from_template(tuple, Fld, Vsn, Types) ->
Items = [decode_(F, Vsn, Types) || F <- Fld],
list_to_tuple(Items);
decode_from_template([Type], Fields, Vsn, Types) ->
[decode_from_template(Type, F, Vsn, Types)
|| F <- Fields];
decode_from_template(Type, V, Vsn, Types) when is_list(Type), is_list(V) ->
decode_fields(Type, V, Vsn, Types);
decode_from_template(Type, V, Vsn, Types) when is_tuple(Type), is_list(V) ->
Zipped = lists:zip(tuple_to_list(Type), V),
Items = [decode_from_template(T1, V1, Vsn, Types) || {T1, V1} <- Zipped],
list_to_tuple(Items);
decode_from_template(label, [C], _, #{rev_labels := RLbls}) ->
Code = decode_basic(int, C),
maps:get(Code, RLbls);
decode_from_template(Type, Fld, _, Types) when Type == int
; Type == negint
; Type == binary
; Type == bool
; Type == id
; Type == label ->
decode_basic(Type, Fld, Types).
encode_from_template(any, V, _E, Vsn, Types) ->
encode_(V, dyn(emit()), Vsn, Types);
encode_from_template(list, L, E, Vsn, Types) when is_list(L) ->
assert_type(is_list(L), list, L),
emit(E, list, Types,
[encode_(V, Vsn, Types) || V <- L]);
encode_from_template(#{items := Items}, M, E, Vsn, Types) ->
assert_type(is_map(M), map, M),
Emit = noemit(E),
Encode = fun(K, Type, V) ->
[encode_from_template(any, K, Emit, Vsn, Types),
encode_from_template(Type, V, Emit, Vsn, Types)]
end,
emit(E, map, Types,
lists:foldr(
fun({K, Type}, Acc) ->
V = maps:get(K, M),
[Encode(K, Type, V) | Acc];
({opt, K, Type}, Acc) ->
case maps:find(K, M) of
{ok, V} ->
[Encode(K, Type, V) | Acc];
error ->
Acc
end
end, [], Items));
encode_from_template(#{alt := Alts} = T, Term, E, Vsn, Types) when is_list(Alts) ->
encode_alt(Alts, Term, T, E, Vsn, Types);
encode_from_template(#{switch := Alts} = T, Term, E, Vsn, Types) when is_map(Alts),
is_map(Term) ->
encode_switch(Alts, Term, T, E, Vsn, Types);
encode_from_template(map, M, E, Vsn, Types) ->
assert_type(is_map(M), map, M),
Emit = emit(E),
emit(E, map, Types,
[[encode_from_template(any, K, Emit, Vsn, Types),
encode_from_template(any, V, Emit, Vsn, Types)]
|| {K, V} <- lists:sort(maps:to_list(M))]);
encode_from_template(tuple, T, E, Vsn, Types) ->
assert_type(is_tuple(T), tuple, T),
emit(E, tuple, Types,
[encode_(V, noemit(E), Vsn, Types) || V <- tuple_to_list(T)]);
encode_from_template(T, V, E, Vsn, Types) when is_tuple(T) ->
assert_type(is_tuple(V), T, V),
assert_type(tuple_size(T) =:= tuple_size(V), T, V),
Zipped = lists:zip(tuple_to_list(T), tuple_to_list(V)),
emit(E, tuple, Types,
[encode_from_template(T1, V1, noemit(E), Vsn, Types) || {T1, V1} <- Zipped]);
encode_from_template([Type] = T, List, E, Vsn, Types) ->
assert_type(is_list(List), T, List),
emit(E, list, Types,
[encode_from_template(Type, V, noemit(E), Vsn, Types) || V <- List]);
encode_from_template(Type, List, E, Vsn, Types) when is_list(Type), is_list(List) ->
encode_fields(Type, List, E, Vsn, Types);
encode_from_template(label, V, E, _, Types) ->
assert_type(is_atom(V), label, V),
case find_cached_label(V, Types) of
error ->
encode_basic(label, V, E, Types);
{ok, Code} when is_integer(Code) ->
emit(E, label, Types,
[encode_basic(int, Code)])
end;
encode_from_template(Type, V, E, _, Types) when Type == id
; Type == binary
; Type == bool
; Type == int
; Type == negint
; Type == label ->
encode_basic(Type, V, E, Types);
encode_from_template(Type, V, E, Vsn, Types) ->
encode_typed_(Type, V, E, Vsn, Types).
assert_type(true, _, _) -> ok;
assert_type(_, Type, V) -> error({illegal, Type, V}).
decode_alt([A|Alts], Fld, T, Vsn, Types) ->
try decode_from_template(A, Fld, Vsn, Types)
catch error:_ ->
decode_alt(Alts, Fld, T, Vsn, Types)
end;
decode_alt([], Fld, T, _, _) ->
error({illegal, T, Fld}).
encode_alt(Alts, Term, T, E, Vsn, Types) ->
%% Since we don't know which type may match, treat as dynamic.
encode_alt_(Alts, Term, T, dyn(E), Vsn, Types).
encode_alt_([A|Alts], Term, T, E, Vsn, Types) ->
try encode_from_template(A, Term, E, Vsn, Types)
catch error:_ ->
encode_alt_(Alts, Term, T, E, Vsn, Types)
end;
encode_alt_([], Term, T, _, _, _) ->
error({illegal, T, Term}).
decode_switch(Alts, Fld, T, Vsn, Types) ->
[KFld, VFld] = Fld,
Key = decode_(KFld, Vsn, Types),
case maps:find(Key, Alts) of
{ok, SubType} ->
SubTerm = decode_from_template(SubType, VFld, Vsn, Types),
#{Key => SubTerm};
error ->
error({illegal, T, Fld})
end.
encode_switch(Alts, Term, T, E, Vsn, Types) ->
assert_type(map_size(Term) == 1, singleton_map, Term),
[{Key, Subterm}] = maps:to_list(Term),
case maps:find(Key, Alts) of
{ok, SubType} ->
Enc = encode_from_template(SubType, Subterm, E, Vsn, Types),
emit(E, map, Types,
[[encode_from_template(any, Key, E, Vsn, Types),
Enc]]);
error ->
error({illegal, T, Term})
end.
%% Basically, dynamically encoding a statically defined object
encode_fields([{Field, Type}|TypesLeft],
[{Field, Val}|FieldsLeft], E, Vsn, Types) ->
[ encode_from_template(Type, Val, E, Vsn, Types)
| encode_fields(TypesLeft, FieldsLeft, E, Vsn, Types)];
encode_fields([{_Field, _Type} = FT|_TypesLeft],
[Val |_FieldsLeft], _E, _Vsn, _Types) ->
error({illegal_field, FT, Val});
encode_fields([Type|TypesLeft],
[Val |FieldsLeft], E, Vsn, Types) when is_atom(Type) ->
%% Not sure about this ...
[ encode_from_template(Type, Val, E, Vsn, Types)
| encode_fields(TypesLeft, FieldsLeft, E, Vsn, Types)];
encode_fields([], [], _, _, _) ->
[].
decode_fields([{Tag, Type}|TypesLeft],
[Field |FieldsLeft], Vsn, Types) ->
[ {Tag, decode_from_template(Type, Field, Vsn, Types)}
| decode_fields(TypesLeft, FieldsLeft, Vsn, Types)];
decode_fields([], [], _, _) ->
[].
emit(E, Tag, Types, Enc) when E > 0 ->
[emit_code(Tag, Types), Enc];
emit(0, _, _, Enc) ->
Enc.
emit_code(Tag, #{rev := Tags}) ->
encode_basic(int, maps:get(Tag, Tags)).
decode_basic(Type, [Tag,V], #{codes := Codes} = Types) ->
case decode_basic(int, Tag) of
Code when map_get(Code, Codes) == Type ->
decode_basic_(Type, V, Types);
_ ->
error(illegal)
end;
decode_basic(Type, V, Types) ->
decode_basic_(Type, V, Types).
decode_basic_(label, Fld, #{options := #{missing_labels := Opt}}) ->
Bin = decode_basic(binary, Fld),
case Opt of
create -> binary_to_atom(Bin, utf8);
fail -> binary_to_existing_atom(Bin, utf8);
convert ->
try binary_to_existing_atom(Bin, utf8)
catch
error:_ -> Bin
end
end;
decode_basic_(Type, Fld, _) ->
decode_basic(Type, Fld).
decode_basic(label, Fld) ->
binary_to_existing_atom(decode_basic(binary, Fld), utf8);
decode_basic(negint, Fld) ->
I = gmserialization:decode_field(int, Fld),
-I;
decode_basic(Type, Fld) ->
gmserialization:decode_field(Type, Fld).
encode_basic(negint, I, _, Types) when is_integer(I), I < 0 ->
[emit_code(negint, Types), gmserialization:encode_field(int, -I)];
encode_basic(Tag, V, E, Types) when E > 0 ->
[emit_code(Tag, Types), encode_basic(Tag, V)];
encode_basic(Tag, V, 0, _) ->
encode_basic(Tag, V).
encode_basic(label, A) when is_atom(A) ->
encode_basic(binary, atom_to_binary(A, utf8));
encode_basic(Type, Fld) ->
gmserialization:encode_field(Type, Fld).
rlp_decode(Bin) ->
gmser_rlp:decode(Bin).
rlp_encode(Fields) ->
gmser_rlp:encode(Fields).
%% ===========================================================================
%% Type registration and validation code
register_types(Types) when is_map(Types) ->
register_types(latest_vsn(), Types).
register_types(Vsn, Types) ->
Codes = maps:get(codes, Types, #{}),
Rev = rev_codes(Codes),
Templates = maps:get(templates, Types, #{}),
Labels = maps:get(labels, Types, #{}),
Options = maps:get(options, Types, #{}),
#{codes := Codes0, rev := Rev0, labels := Labels0,
templates := Templates0, options := Options0} =
dynamic_types(),
Merged = #{ codes => maps:merge(Codes0, Codes)
, rev => maps:merge(Rev0, Rev)
, templates => maps:merge(Templates0, Templates)
, options => maps:merge(Options0, Options)
, labels => maps:merge(Labels0, Labels) },
assert_sizes(Merged),
assert_mappings(Merged),
Merged1 = assert_label_cache(Merged),
put_types(Vsn, Merged1).
latest_vsn() ->
case persistent_term:get(pt_key(), undefined) of
undefined -> ?VSN;
#{latest_vsn := V} ->
V
end.
pt_key() -> {?MODULE, types}.
put_types(Types) ->
put_types(types_vsn(Types), Types).
put_types(V, Types) ->
K = pt_key(),
Old = case persistent_term:get(K, undefined) of
undefined -> default_types_pt();
Existing -> Existing
end,
put_types_(K, V, Types, Old).
put_types_(K, V, Types, #{latest_vsn := V0, types := Types0} = Old) ->
New = case V > V0 of
true ->
Old#{latest_vsn := V,
types := Types0#{V => Types#{vsn => V}}};
false ->
Old#{types := Types0#{V => Types#{vsn => V}}}
end,
persistent_term:put(K, New).
types_from_list(L) ->
types_from_list(L, registered_types()).
types_from_list(L, Types) ->
gmser_dyn_types:from_list(L, Types).
register_type(Code, Tag, Template) ->
register_type(latest_vsn(), Code, Tag, Template).
register_type(Vsn, Code, Tag, Template) when is_integer(Code), Code >= 0 ->
#{codes := Codes, rev := Rev, templates := Temps} = Types = registered_types(Vsn),
case {is_map_key(Code, Codes), is_map_key(Tag, Rev)} of
{false, false} ->
New = Types#{ codes := Codes#{Code => Tag}
, rev := Rev#{Tag => Code}
, templates := Temps#{Tag => Template} },
put_types(New),
New;
{true, _} -> error(code_exists);
{_, true} -> error(tag_exists)
end.
set_opts(Opts) ->
set_opts(Opts, registered_types()).
set_opts(Opts, Types) ->
Types#{options => Opts}.
get_opts(#{options := Opts}) ->
Opts.
cache_label(Code, Label) when is_integer(Code), Code >= 0, is_atom(Label) ->
#{labels := Lbls, rev_labels := RevLbls} = Types = registered_types(),
case {is_map_key(Label, Lbls), is_map_key(Code, RevLbls)} of
{false, false} ->
New = Types#{ labels := Lbls#{Label => Code}
, rev_labels := RevLbls#{Code => Label} },
put_types(New),
New;
{true,_} -> error(label_exists);
{_,true} -> error(code_exists)
end.
revert_to_default_types() ->
persistent_term:put(pt_key(), default_types_pt()).
default_types_pt() ->
#{latest_vsn => ?VSN, types => #{?VSN => dynamic_types()}}.
assert_sizes(#{codes := Codes, rev := Rev, templates := Ts} = Types) ->
assert_sizes(map_size(Codes), map_size(Rev), map_size(Ts), Types).
assert_sizes(Sz, Sz, Sz, _) ->
ok;
assert_sizes(Sz, RSz, Sz, Types) when RSz =/= Sz ->
%% Wrong size reverse mapping must mean duplicate mappings
%% We auto-generate the reverse-mappings, so we know there aren't
%% too many of them
?LOG_ERROR("Reverse mapping size doesn't match codes size", []),
Codes = maps:get(codes, Types),
CodeVals = maps:values(Codes),
Duplicates = CodeVals -- lists:usort(CodeVals),
error({duplicate_mappings, Duplicates, Types});
assert_sizes(Sz, _, TSz, Types) when Sz > TSz ->
?LOG_ERROR("More codes than templates", []),
Tags = maps:keys(maps:get(rev, Types)),
Templates = maps:get(templates, Types),
Missing = [T || T <- Tags,
not is_map_key(T, Templates)],
error({missing_mappings, Missing, Types});
assert_sizes(Sz, _, TSz, Types) when TSz > Sz ->
%% More mappings than codes. May not be horrible.
%% We check that all codes have mappings elsewhere.
?LOG_WARNING("More templates than codes in ~p", [Types]),
ok.
assert_mappings(#{rev := Rev, templates := Ts} = Types) ->
Tags = maps:keys(Rev),
case [T || T <- Tags,
not is_map_key(T, Ts)] of
[] ->
ok;
Missing ->
?LOG_ERROR("Missing templates for ~p", [Missing]),
error({missing_templates, Missing, Types})
end.
assert_label_cache(#{labels := Labels} = Types) ->
Ls = maps:keys(Labels),
case [L || L <- Ls, not is_atom(L)] of
[] -> ok;
_NonAtoms ->
error(non_atoms_in_label_cache)
end,
Rev = [{C,L} || {L,C} <- maps:to_list(Labels)],
case [C || {C,_} <- Rev, not is_integer(C)] of
[] -> ok;
_NonInts -> error(non_integer_label_cache_codes)
end,
RevLabels = maps:from_list(Rev),
case map_size(RevLabels) == map_size(Labels) of
true ->
Types#{rev_labels => RevLabels};
false ->
error(non_unique_label_cache_codes)
end.
rev_codes(Codes) ->
L = maps:to_list(Codes),
maps:from_list([{V, K} || {K, V} <- L]).
%% ===========================================================================
%% Unit tests
-ifdef(TEST).
trace() ->
dbg:tracer(),
dbg:tpl(?MODULE, x),
dbg:p(all, [c]).
notrace() ->
dbg:ctpl('_'),
dbg:stop().
round_trip_test_() ->
[?_test(t_round_trip(T)) ||
T <- t_sample_types()
].
ser_round_trip_test_() ->
[?_test(t_ser_round_trip(T)) ||
T <- t_sample_types()
].
t_sample_types() ->
[ 5
, -5
, <<"a">>
, [1,2,3]
, {<<"a">>,1}
, #{<<"a">> => 1}
, [#{1 => <<"c">>, [17] => true}]
, true
].
user_types_test_() ->
{foreach,
fun() ->
revert_to_default_types()
end,
fun(_) ->
revert_to_default_types()
end,
[ ?_test(t_reg_typed_tuple())
, ?_test(t_reg_chain_objects_array())
, ?_test(t_reg_template_fun())
, ?_test(t_reg_template_vsnd_fun())
, ?_test(t_reg_label_cache())
, ?_test(t_reg_label_cache2())
, ?_test(t_reg_options())
]}.
dynamic_types_test_() ->
[ ?_test(revert_to_default_types())
, ?_test(t_typed_map())
, ?_test(t_alts())
, ?_test(t_switch())
, ?_test(t_anyints())
, ?_test(t_missing_labels())
].
versioned_types_test_() ->
[ ?_test(t_new_version())
].
t_round_trip(T) ->
?debugVal(T),
?assertMatch({T, T}, {T, decode(encode(T))}).
t_ser_round_trip(T) ->
Data = serialize(T),
?debugFmt("Data (~p) = ~p~n", [T, Data]),
?assertMatch({T, T}, {T, deserialize(Data)}).
t_round_trip_typed(Type, T) ->
?debugVal(T),
?assertMatch({T, T}, {T, decode(encode_typed(Type, T))}).
t_reg_typed_tuple() ->
Type = {int, int, int},
MyTypes = #{ codes => #{ 1001 => int_tup3 }
, templates => #{ int_tup3 => Type }
},
register_types(MyTypes),
GoodTerm = {2,3,4},
?debugFmt("Type: ~p, GoodTerm = ~p", [Type, GoodTerm]),
Enc = encode_typed(int_tup3, GoodTerm),
GoodTerm = decode(Enc),
t_bad_typed_encode(int_tup3, {1,2,<<"a">>}, {illegal,int,<<"a">>}),
t_bad_typed_encode(int_tup3, {1,2,3,4}, {illegal, {int,int,int}, {1,2,3,4}}).
t_bad_typed_encode(Type, Term, Error) ->
try encode_typed(Type, Term),
error({expected_error, Error})
catch
error:Error ->
ok
end.
t_reg_chain_objects_array() ->
Template = [{foo, {int, binary}}, {bar, [{int, int}]}, {baz, {int}}],
?debugFmt("Template = ~p", [Template]),
MyTypes = #{ codes => #{ 1002 => coa }
, templates => #{ coa => Template } },
register_types(MyTypes),
Values = [{foo, {1, <<"foo">>}}, {bar, [{1, 2}, {3, 4}, {5, 6}]}, {baz, {1}}],
?debugFmt("Values = ~p", [Values]),
Enc = encode_typed(coa, Values),
Values = decode(Enc).
t_reg_template_fun() ->
Template = fun() -> {int,int} end,
New = register_type(1010, tup2f0, Template),
?debugFmt("New = ~p", [New]),
E = encode_typed(tup2f0, {3,4}),
{3,4} = decode(E),
ok.
t_reg_template_vsnd_fun() ->
Template = fun(1) -> {int,int} end,
New = register_type(1011, tup2f1, Template),
?debugFmt("New = ~p", [New]),
E = encode_typed(tup2f1, {3,4}),
{3,4} = decode(E),
ok.
t_reg_label_cache() ->
Enc0 = gmser_dyn:encode('1'),
?debugFmt("Enc0 (no cache): ~w", [Enc0]),
MyTypes1 = #{codes => #{1003 => lbl_tup2}, templates => #{ lbl_tup2 => {label,label} }},
register_types(MyTypes1),
Enc0a = gmser_dyn:encode_typed(lbl_tup2, {'1','1'}),
?debugFmt("Enc0a (no cache): ~w", [Enc0a]),
{'1','1'} = gmser_dyn:decode(Enc0a),
MyTypes2 = MyTypes1#{labels => #{'1' => 49}}, % atom_to_list('1') == [49]
register_types(MyTypes2),
Enc1 = gmser_dyn:encode('1'),
Enc1a = gmser_dyn:encode_typed(lbl_tup2, {'1','1'}),
?debugFmt("Enc1 (w/ cache): ~w", [Enc1]),
?debugFmt("Enc1a (w/ cache): ~w", [Enc1a]),
{'1','1'} = gmser_dyn:decode(Enc1a),
true = Enc0 =/= Enc1,
Enc2 = gmser_dyn:encode_typed(label, '1'),
?debugFmt("Enc2 (typed): ~w", [Enc2]),
?assertEqual(Enc2, Enc1),
?assertNotEqual(Enc0a, Enc1a).
t_reg_label_cache2() ->
TFromL = types_from_list(
[ {lbl_tup2, 1003, {label, label}}
, {labels,
[{'1', 49}]}
]),
?debugFmt("TFromL = ~w", [TFromL]),
register_types(TFromL),
Tup = {'1', '1'},
Enc = gmser_dyn:encode_typed(lbl_tup2, Tup),
[<<0>>,<<1>>,[<<3,235>>,[[<<49>>],[<<49>>]]]] = Enc,
_Tup = gmser_dyn:decode(Enc).
t_reg_options() ->
register_types(set_opts(#{missing_labels => convert})),
[Dyn,Vsn,[Am,<<"random">>]] = gmser_dyn:encode(random),
EncNewAm = [Dyn,Vsn,[Am,<<"foo12345">>]],
<<"foo12345">> = gmser_dyn:decode(EncNewAm),
ok.
t_typed_map() ->
Term = #{a => 13, {key,1} => [a]},
Items = [{a,int},{{key,1},[label]}],
OptItems = [{opt, b, int} | Items],
Enc = encode_typed(#{items => Items}, Term),
?assertEqual(Term, decode(Enc)),
?assertEqual(Enc, encode_typed(#{items => Items}, Term)),
?assertEqual(Enc, encode_typed(#{items => OptItems}, Term)),
Term1 = Term#{b => 4},
Enc1 = encode_typed(#{items => OptItems}, Term1),
?assertEqual(Term1, decode(Enc1)),
?assertEqual(Enc, encode_typed(#{items => Items}, Term1)).
t_alts() ->
t_round_trip_typed(#{alt => [negint, int]}, -4),
t_round_trip_typed(#{alt => [negint, int]}, 4),
ok.
t_switch() ->
T = #{switch => #{a => int, b => binary}},
t_round_trip_typed(T, #{a => 17}),
t_round_trip_typed(T, #{b => <<"foo">>}),
?assertError({illegal,int,<<"foo">>}, encode_typed(T, #{a => <<"foo">>})),
MMap = #{a => 17, b => <<"foo">>},
?assertError({illegal, singleton_map, MMap}, encode_typed(T, MMap)).
t_anyints() ->
t_round_trip_typed(anyint, -5),
t_round_trip_typed(anyint, 5),
ok.
t_missing_labels() ->
[Dyn,Vsn,[Am,<<"random">>]] = gmser_dyn:encode(random),
EncNewAm = [Dyn,Vsn,[Am,<<"flurbee">>]],
?assertError(badarg, gmser_dyn:decode(EncNewAm)),
?assertError(badarg, gmser_dyn:decode(EncNewAm, set_opts(#{missing_labels => fail}))),
<<"flurbee">> = gmser_dyn:decode(EncNewAm, set_opts(#{missing_labels => convert})),
true = is_atom(gmser_dyn:decode(EncNewAm, set_opts(#{missing_labels => create}))),
ok.
t_new_version() ->
V = latest_vsn(),
Types0 = registered_types(V),
V1 = V+1,
Types1 = types_from_list([{vsn, V1},
{msg1, 300, {int, int}}], Types0),
T2 = {3,5},
Enc21 = encode_typed(msg1, T2, Types1),
T2 = decode(Enc21, Types1),
V2 = V1+1,
Types2 = types_from_list([{vsn, V2},
{modify, {msg1, {int, int, int}}}], Types1),
Enc21 = encode_typed(msg1, T2, Types1),
?assertError({illegal,{int,int,int},T2}, encode_typed(msg1, T2, Types2)),
T3 = {3,5,7},
Enc32 = encode_typed(msg1, T3, Types2),
T3 = decode(Enc32, Types2).
-endif.

View File

@ -1,62 +0,0 @@
-module(gmser_dyn_types).
-export([ add_type/4
, from_list/2
, expand/1 ]).
-export([ next_code/1 ]).
next_code(#{codes := Codes}) ->
lists:max(maps:keys(Codes)) + 1.
add_type(Tag, Code, Template, Types) ->
elem_to_type({Tag, Code, Template}, Types).
from_list(L, Types) ->
lists:foldl(fun elem_to_type/2, Types, L).
expand(#{vsn := V, templates := Templates0} = Types) ->
Templates =
maps:map(
fun(_, F) when is_function(F, 0) ->
F();
(_, F) when is_function(F, 1) ->
F(V);
(_, T) ->
T
end, Templates0),
Types#{templates := Templates}.
elem_to_type({Tag, Code, Template}, Acc) when is_atom(Tag), is_integer(Code) ->
#{codes := Codes, rev := Rev, templates := Temps} = Acc,
case {is_map_key(Tag, Rev), is_map_key(Code, Codes)} of
{false, false} ->
Acc#{ codes := Codes#{Code => Tag}
, rev := Rev#{Tag => Code}
, templates => Temps#{Tag => Template}
};
{true, _} -> error({duplicate_tag, Tag});
{_, true} -> error({duplicate_code, Code})
end;
elem_to_type({modify, {Tag, Template}}, Acc) ->
#{codes := _, rev := Rev, templates := Templates} = Acc,
_ = maps:get(Tag, Rev),
Templates1 = Templates#{Tag := Template},
Acc#{templates := Templates1};
elem_to_type({labels, Lbls}, Acc) ->
lists:foldl(fun add_label/2, Acc, Lbls);
elem_to_type({vsn, V}, Acc) ->
Acc#{vsn => V};
elem_to_type(Elem, _) ->
error({invalid_type, Elem}).
add_label({L, Code}, #{labels := Lbls, rev_labels := RevLbls} = Acc)
when is_atom(L), is_integer(Code), Code > 0 ->
case {is_map_key(L, Lbls), is_map_key(Code, RevLbls)} of
{false, false} ->
Acc#{labels := Lbls#{L => Code},
rev_labels := RevLbls#{Code => L}};
{true, _} -> error({duplicate_label, L});
{_, true} -> error({duplicate_label_code, Code})
end;
add_label(Elem, _) ->
error({invalid_label, Elem}).

View File

@ -10,11 +10,9 @@
-vsn("0.1.2").
-export([ decode_fields/2
, decode_field/2
, deserialize/5
, deserialize_tag_and_vsn/1
, encode_fields/2
, encode_field/2
, serialize/4 ]).
%%%===================================================================
@ -25,8 +23,6 @@
, fields/0
]).
-export_type([ encodable_term/0 ]).
-type template() :: [{field_name(), type()}].
-type field_name() :: atom().
-type type() :: 'int'