Support 'negint', 'items' and 'alt'
All checks were successful
Gajumaru Serialization Tests / tests (push) Successful in 49m5s

This commit is contained in:
Ulf Wiger 2025-04-08 08:57:03 +02:00
parent 6563ef9de7
commit dd3e731480
2 changed files with 172 additions and 124 deletions

View File

@ -1,21 +1,17 @@
GM Serialization
=====
# GM Serialization
Serialization helpers for the Gajumaru.
Build
-----
## Build
$ rebar3 compile
Test
----
## Test
$ rebar3 eunit
Dynamic encoding
----
## Dynamic encoding
The module `gmser_dyn` offers dynamic encoding support, encoding most 'regular'
Erlang data types into an internal RLP representation.
@ -34,6 +30,7 @@ how the type information is represented. The fully serialized form is
produced by the `serialize` functions.
The basic types supported by the encoder are:
* `neg_integer()` (`negint`, code: 247)
* `non_neg_integer()` (`int` , code: 248)
* `binary()` (`binary`, code: 249)
* `boolean()` (`bool` , code: 250)
@ -48,8 +45,8 @@ When encoding `map` types, the map elements are first sorted.
When specifying a map type for template-driven encoding, use
the `#{items => [{Key, Value}]}` construct.
Labels
----
## Labels
Labels correspond to (existing) atoms in Erlang.
Decoding of a label results in a call to `binary_to_existing_atom/2`, so will
@ -63,8 +60,7 @@ Labels are encoded as `[<<255>>, << AtomToBinary/binary >>]`.
If a cached label is used, the encoding becomes `[<<255>, [Ix]]`, where
`Ix` is the integer-encoded index value of the cached label.
Examples
----
## Examples
Dynamically encoded objects have the basic structure `[<<0>>,V,Obj]`, where `V` is the
integer-coded version, and `Obj` is the top-level encoding on the form `[Tag,Data]`.
@ -88,8 +84,7 @@ Note that tuples and list are encoded the same way, except for the initial type
Maps are encoded as `[<Map>, [KV1, KV2, ...]]`, where `[KV1, KV2, ...]` is the sorted
list of key-value tuples from `map:to_list(Map)`, but with the `tuple` type tag omitted.
Template-driven encoding
----
## Template-driven encoding
Templates can be provided to the encoder by either naming an already registered
type, or by passing a template directly. In both cases, the encoder will enforce
@ -112,3 +107,23 @@ ET([{int,int}], [{1,2}]) -> [<<0>>,<<1>>,[<<251>>,[[[<<248>>,<<1>>],[<<248>>,<<2
gmser_dyn:register_type(1000,lt2i,[{int,int}]).
ET(lt2i, [{1,2}]) -> [<<0>>,<<1>>,[<<3,232>>,[[<<1>>,<<2>>]]]]
```
### Alternative types
The dynamic encoder supports two additions to the `gmserialization` template
language: `any` and `#{alt => [AltTypes]}`.
The `any` type doesn't have an associated code, but enforces dynamic encoding.
The `#{alt => [Type]}` construct also enforces dynamic encoding, and will try
to encode as each type in the list, in the specified order, until one matches.
```erlang
gmser_dyn:encode_typed(#{alt => [negint,int]}, 5) -> [<<0>>,<<1>>,[<<247>>,<<5>>]]
gmser_dyn:encode_typed(#{alt => [negint,int]}, 5) -> [<<0>>,<<1>>,[<<248>>,<<5>>]]
gmser_dyn:register_type(246, anyint, #{alt => [negint, int]})
gmser_dyn:encode_typed(anyint,-5) -> [<<0>>,<<1>>,[<<246>>,[<<247>>,<<5>>]]]
gmser_dyn:encode_typed(anyint,5) -> [<<0>>,<<1>>,[<<246>>,[<<248>>,<<5>>]]]
```

View File

@ -73,13 +73,12 @@ decode(Fields) ->
decode(Fields0, Types) ->
case decode_tag_and_vsn(Fields0) of
{0, Vsn, Fields} ->
[Val] = decode_(Fields, Vsn, Types, []),
Val;
decode_(Fields, Vsn, Types);
Other ->
error({illegal_serialization, Other})
end.
decode_tag_and_vsn([TagBin, VsnBin | Fields]) ->
decode_tag_and_vsn([TagBin, VsnBin, Fields]) ->
{decode_basic(int, TagBin),
decode_basic(int, VsnBin),
Fields}.
@ -151,83 +150,70 @@ dyn_template_(F, Vsn) ->
find_cached_label(Lbl, #{labels := Lbls}) ->
maps:find(Lbl, Lbls).
decode_(Fields, Vsn, Types, Acc) ->
{_Tag, Term, Rest} = decode_field_(Fields, Vsn, Types),
Acc1 = [Term | Acc],
case Rest of
[] ->
lists:reverse(Acc1);
_ ->
decode_(Rest, Vsn, Types, Acc1)
end.
decode_field_([H|T], Vsn, Types) ->
{CodeBin, Field, Rest} =
case H of
[C, F] -> {C, F, T};
C when is_binary(C) -> {C, hd(T), tl(T)}
end,
decode_([CodeBin, Flds], Vsn, Types) ->
Code = decode_basic(int, CodeBin),
{Tag, Template} = template(Code, Vsn, Types),
%% [Fld|Rest] = Fields,
Val = decode_from_template(Template, Field, Vsn, Types),
{Tag, Val, Rest}.
{_Tag, Template} = template(Code, Vsn, Types),
decode_from_template(Template, Flds, Vsn, Types).
encode_(Term, Vsn, Types) ->
encode_(Term, true, Vsn, Types).
encode_(Term, dyn(emit()), Vsn, Types).
encode_(Term, Emit, Vsn, Types) ->
{Tag, Template} = auto_template(Term),
if Emit ->
[emit_code(Tag, Types),
encode_from_template(Template, Term, false, true, Vsn, Types)];
true ->
encode_from_template(Template, Term, false, true, Vsn, Types)
end.
%% if Emit ->
%% [emit_code(Tag, Types), Enc];
%% true ->
%% Enc
%% end.
encode_(Term, E, Vsn, Types) ->
{_Tag, Template} = auto_template(Term),
encode_from_template(Template, Term, E, Vsn, Types).
%% To control when to emit type codes:
%% If the template is predefined, it's 'not dynamic' (nodyn(E)).
%% If we are encoding against a type that's part of a predefined template,
%% we typically don't emit the type code, except at the very top.
%% So: emit type codes if the 'emit' bit is set, or if the 'dyn' bit is set.
emit() -> 2#01.
dyn() -> 2#10.
emit(E) -> E bor 2#01.
noemit(E) -> E band 2#10.
dyn(E) -> E bor 2#10.
nodyn(E) -> E band 2#01.
encode_typed_(Type, Term, Vsn, #{codes := Codes, rev := Rev} = Types) ->
case (is_map_key(Type, Codes) orelse is_map_key(Type, Rev)) of
true ->
encode_typed_(Type, Term, true, true, Vsn, Types);
encode_typed_(Type, Term, nodyn(emit()), Vsn, Types);
false ->
encode_maybe_template(Type, Term, Vsn, Types)
end.
encode_typed_(any, Term, _, _, Vsn, Types) ->
encode_(Term, true, Vsn, Types);
encode_typed_(Code, Term, Emit, IsDyn, Vsn, #{codes := Codes} = Types) when is_map_key(Code, Codes) ->
encode_typed_(any, Term, _, Vsn, Types) ->
encode_(Term, dyn(emit()), Vsn, Types);
encode_typed_(Code, Term, E, Vsn, #{codes := Codes} = Types) when is_map_key(Code, Codes) ->
{_Tag, Template} = template(Code, Vsn, Types),
maybe_emit(Emit, Code, encode_from_template(Template, Term, false, IsDyn, Vsn, Types));
encode_typed_(Tag, Term, Emit, IsDyn, Vsn, #{templates := Ts, rev := Rev} = Types)
[encode_basic(int,Code),
encode_from_template(Template, Term, noemit(nodyn(E)), Vsn, Types)];
encode_typed_(Tag, Term, E, Vsn, #{templates := Ts, rev := Rev} = Types)
when is_map_key(Tag, Ts) ->
Template = dyn_template_(maps:get(Tag, Ts), Vsn),
Code = maps:get(Tag, Rev),
maybe_emit(Emit, Code, encode_from_template(Template, Term, false, IsDyn, Vsn, Types));
encode_typed_(MaybeTemplate, Term, _, _, Vsn, Types) ->
[encode_basic(int,Code),
encode_from_template(Template, Term, noemit(nodyn(E)), Vsn, Types)];
encode_typed_(MaybeTemplate, Term, _, Vsn, Types) ->
encode_maybe_template(MaybeTemplate, Term, Vsn, Types).
maybe_emit(true, Code, Enc) ->
maybe_emit(E, Code, Enc) when E > 0 ->
[encode_basic(int, Code), Enc];
maybe_emit(false, _, Enc) ->
maybe_emit(0, _, Enc) ->
Enc.
encode_maybe_template(#{items := _} = Type, Term, Vsn, Types) ->
case is_map(Term) of
true ->
[emit_code(map, Types), encode_from_template(Type, Term, true, true, Vsn, Types)];
encode_from_template(Type, Term, emit(dyn()), Vsn, Types);
false ->
error({invalid, Type, Term})
end;
encode_maybe_template(#{alt := _} = Type, Term, Vsn, Types) ->
encode_from_template(Type, Term, Vsn, emit(dyn()), Types);
encode_maybe_template(Pat, Term, Vsn, Types) when is_list(Pat);
is_tuple(Pat) ->
{Tag, _} = auto_template(Pat),
[emit_code(Tag, Types),
encode_from_template(Pat, Term, true, true, Vsn, Types)];
encode_from_template(Pat, Term, emit(dyn()), Vsn, Types);
encode_maybe_template(Other, Term, _Vsn, _Types) ->
error({illegal_template, Other, Term}).
@ -258,23 +244,26 @@ auto_template(T) ->
error({invalid_type, T})
end.
decode_from_template(any, Fld, Vsn, Types) ->
decode_(Fld, Vsn, Types);
decode_from_template(#{items := Items}, Fld, Vsn, Types) when is_list(Fld) ->
Zipped = lists:zip(Items, Fld),
lists:foldl(
fun({{K, Type}, V}, Map) ->
maps:is_key(K, Map) andalso error(badarg, duplicate_field),
Map#{K => decode_from_template(Type, V, Vsn, Types)}
Map#{K => decode_from_template({any,Type}, V, Vsn, Types)}
end, #{}, Zipped);
decode_from_template(list, Fld, Vsn, Types) ->
decode_(Fld, Vsn, Types, []);
decode_from_template(#{alt := Alts} = T, Fld, Vsn, Types) when is_list(Alts) ->
decode_alt(Alts, Fld, T, Vsn, Types);
decode_from_template(list, Flds, Vsn, Types) ->
[decode_(F, Vsn, Types) || F <- Flds];
decode_from_template(map, Fld, Vsn, Types) ->
TupleFields = [F || F <- Fld],
Items = [decode_from_template(tuple, T, Vsn, Types)
Items = [decode_from_template({any,any}, T, Vsn, Types)
|| T <- TupleFields],
maps:from_list(Items);
decode_from_template(tuple, Fld, Vsn, Types) ->
Items = [decode_field_(F, Vsn, Types) || F <- Fld],
%% Items = decode_(Fld, Vsn, Types, []),
Items = [decode_(F, Vsn, Types) || F <- Fld],
list_to_tuple(Items);
decode_from_template([Type], Fields, Vsn, Types) ->
[decode_from_template(Type, F, Vsn, Types)
@ -296,75 +285,103 @@ decode_from_template(Type, Fld, _, Types) when Type == int
; Type == label ->
decode_basic(Type, Fld, Types).
encode_from_template(Type, V, Vsn, Types) ->
encode_from_template(Type, V, true, true, Vsn, Types).
encode_from_template(any, V, _Emit, _IsDyn, Vsn, Types) ->
encode_(V, true, Vsn, Types);
encode_from_template(list, L, _, _IsDyn, Vsn, Types) when is_list(L) ->
encode_from_template(any, V, _E, Vsn, Types) ->
encode_(V, dyn(emit()), Vsn, Types);
encode_from_template(list, L, E, Vsn, Types) when is_list(L) ->
assert_type(is_list(L), list, L),
[encode_(V, Vsn, Types) || V <- L];
encode_from_template(#{items := Items}, M, _, IsDyn, Vsn, Types) ->
emit(E, list, Types,
[encode_(V, Vsn, Types) || V <- L]);
encode_from_template(#{items := Items}, M, E, Vsn, Types) ->
assert_type(is_map(M), map, M),
lists:map(
fun({K, Type}) ->
V = maps:get(K, M),
encode_from_template({any,Type}, {K,V}, true, IsDyn, Vsn, Types)
end, Items);
encode_from_template(map, M, _, IsDyn, Vsn, Types) ->
Emit = noemit(E),
emit(E, map, Types,
lists:map(
fun({K, Type}) ->
V = maps:get(K, M),
[encode_from_template(any, K, Emit, Vsn, Types),
encode_from_template(Type, V, Emit, Vsn, Types)]
end, Items));
encode_from_template(#{alt := Alts} = T, Term, E, Vsn, Types) when is_list(Alts) ->
encode_alt(Alts, Term, T, E, Vsn, Types);
encode_from_template(map, M, E, Vsn, Types) ->
assert_type(is_map(M), map, M),
[encode_from_template({any,any}, {K,V}, false, IsDyn, Vsn, Types)
|| {K, V} <- lists:sort(maps:to_list(M))];
encode_from_template(tuple, T, Emit, _IsDyn, Vsn, Types) ->
Emit = emit(E),
emit(E, map, Types,
[[encode_from_template(any, K, Emit, Vsn, Types),
encode_from_template(any, V, Emit, Vsn, Types)]
|| {K, V} <- lists:sort(maps:to_list(M))]);
encode_from_template(tuple, T, E, Vsn, Types) ->
assert_type(is_tuple(T), tuple, T),
[encode_(V, true, Vsn, Types) || V <- tuple_to_list(T)];
encode_from_template(T, V, Emit, IsDyn, Vsn, Types) when is_tuple(T) ->
emit(E, tuple, Types,
[encode_(V, noemit(E), Vsn, Types) || V <- tuple_to_list(T)]);
encode_from_template(T, V, E, Vsn, Types) when is_tuple(T) ->
assert_type(is_tuple(V), T, V),
assert_type(tuple_size(T) =:= tuple_size(V), T, V),
Zipped = lists:zip(tuple_to_list(T), tuple_to_list(V)),
[encode_from_template(T1, V1, Emit, IsDyn, Vsn, Types) || {T1, V1} <- Zipped];
encode_from_template([Type] = T, List, Emit, IsDyn, Vsn, Types) ->
emit(E, tuple, Types,
[encode_from_template(T1, V1, noemit(E), Vsn, Types) || {T1, V1} <- Zipped]);
encode_from_template([Type] = T, List, E, Vsn, Types) ->
assert_type(is_list(List), T, List),
[encode_from_template(Type, V, Emit, IsDyn, Vsn, Types) || V <- List];
encode_from_template(Type, List, Emit, IsDyn, Vsn, Types) when is_list(Type), is_list(List) ->
encode_fields(Type, List, Emit, IsDyn, Vsn, Types);
encode_from_template(label, V, Emit, _, _, Types) ->
emit(E, list, Types,
[encode_from_template(Type, V, noemit(E), Vsn, Types) || V <- List]);
encode_from_template(Type, List, E, Vsn, Types) when is_list(Type), is_list(List) ->
encode_fields(Type, List, E, Vsn, Types);
encode_from_template(label, V, E, _, Types) ->
assert_type(is_atom(V), label, V),
case find_cached_label(V, Types) of
error ->
encode_basic(label, V, Emit, Types);
{ok, Code} when is_integer(Code) ->
[encode_basic(int, Code)]
end;
encode_from_template(Type, V, Emit, _, _, Types) when Type == id
; Type == binary
; Type == bool
; Type == int
; Type == negint
; Type == label ->
encode_basic(Type, V, Emit, Types);
encode_from_template(Type, V, Emit, IsDyn, Vsn, Types) ->
encode_typed_(Type, V, Emit, IsDyn, Vsn, Types).
%% error({illegal, Type, V}).
emit(E, label, Types,
case find_cached_label(V, Types) of
error ->
encode_basic(label, V, E, Types);
{ok, Code} when is_integer(Code) ->
[encode_basic(int, Code)]
end);
encode_from_template(Type, V, E, _, Types) when Type == id
; Type == binary
; Type == bool
; Type == int
; Type == negint
; Type == label ->
encode_basic(Type, V, E, Types);
encode_from_template(Type, V, E, Vsn, Types) ->
encode_typed_(Type, V, E, Vsn, Types).
assert_type(true, _, _) -> ok;
assert_type(_, Type, V) -> error({illegal, Type, V}).
decode_alt([A|Alts], Fld, T, Vsn, Types) ->
try decode_from_template(A, Fld, Vsn, Types)
catch error:_ ->
decode_alt(Alts, Fld, T, Vsn, Types)
end;
decode_alt([], Fld, T, _, _) ->
error({illegal, T, Fld}).
encode_alt(Alts, Term, T, E, Vsn, Types) ->
%% Since we don't know which type may match, treat as dynamic.
encode_alt_(Alts, Term, T, dyn(E), Vsn, Types).
encode_alt_([A|Alts], Term, T, E, Vsn, Types) ->
try encode_from_template(A, Term, E, Vsn, Types)
catch error:_ ->
encode_alt_(Alts, Term, T, E, Vsn, Types)
end;
encode_alt_([], Term, T, _, _, _) ->
error({illegal, T, Term}).
%% Basically, dynamically encoding a statically defined object
encode_fields([{Field, Type}|TypesLeft],
[{Field, Val}|FieldsLeft], Emit, IsDyn, Vsn, Types) ->
[ encode_from_template(Type, Val, Emit, IsDyn, Vsn, Types)
| encode_fields(TypesLeft, FieldsLeft, Emit, IsDyn, Vsn, Types)];
[{Field, Val}|FieldsLeft], E, Vsn, Types) ->
[ encode_from_template(Type, Val, E, Vsn, Types)
| encode_fields(TypesLeft, FieldsLeft, E, Vsn, Types)];
encode_fields([{_Field, _Type} = FT|_TypesLeft],
[Val |_FieldsLeft], _Emit, _IsDyn, _Vsn, _Types) ->
[Val |_FieldsLeft], _E, _Vsn, _Types) ->
error({illegal_field, FT, Val});
encode_fields([Type|TypesLeft],
[Val |FieldsLeft], Emit, IsDyn, Vsn, Types) when is_atom(Type) ->
[Val |FieldsLeft], E, Vsn, Types) when is_atom(Type) ->
%% Not sure about this ...
[ encode_from_template(Type, Val, Emit, IsDyn, Vsn, Types)
| encode_fields(TypesLeft, FieldsLeft, Emit, IsDyn, Vsn, Types)];
encode_fields([], [], _, _, _, _) ->
[ encode_from_template(Type, Val, E, Vsn, Types)
| encode_fields(TypesLeft, FieldsLeft, E, Vsn, Types)];
encode_fields([], [], _, _, _) ->
[].
decode_fields([{Tag, Type}|TypesLeft],
@ -375,6 +392,12 @@ decode_fields([{Tag, Type}|TypesLeft],
decode_fields([], [], _, _) ->
[].
emit(E, Tag, Types, Enc) when E > 0 ->
[emit_code(Tag, Types), Enc];
emit(0, _, _, Enc) ->
Enc.
emit_code(Tag, #{rev := Tags}) ->
encode_basic(int, maps:get(Tag, Tags)).
@ -398,9 +421,9 @@ decode_basic(Type, Fld) ->
encode_basic(negint, I, _, Types) when is_integer(I), I < 0 ->
[emit_code(negint, Types), gmserialization:encode_field(int, -I)];
encode_basic(Tag, V, true, Types) ->
encode_basic(Tag, V, E, Types) when E > 0 ->
[emit_code(Tag, Types), encode_basic(Tag, V)];
encode_basic(Tag, V, false, _) ->
encode_basic(Tag, V, 0, _) ->
encode_basic(Tag, V).
encode_basic(label, A) when is_atom(A) ->
@ -606,13 +629,18 @@ user_types_test_() ->
dynamic_types_test_() ->
[ ?_test(revert_to_default_types())
, ?_test(typed_map())
, ?_test(t_typed_map())
, ?_test(t_alts())
].
t_round_trip(T) ->
?debugVal(T),
?assertMatch({T, T}, {T, decode(encode(T))}).
t_round_trip_typed(Type, T) ->
?debugVal(T),
?assertMatch({T, T}, {T, decode(encode_typed(Type, T))}).
t_reg_typed_tuple() ->
Type = {int, int, int},
MyTypes = #{ codes => #{ 1001 => int_tup3 }
@ -696,9 +724,14 @@ t_reg_label_cache2() ->
[<<0>>,<<1>>,[<<3,235>>,[[<<49>>],[<<49>>]]]] = Enc,
_Tup = gmser_dyn:decode(Enc).
typed_map() ->
t_typed_map() ->
Term = #{a => 13, {key,1} => [a]},
Enc = encode_typed(#{items => [{a,int},{{key,1},[label]}]}, Term),
?assertEqual(Term, decode(Enc)).
t_alts() ->
t_round_trip_typed(#{alt => [negint, int]}, -4),
t_round_trip_typed(#{alt => [negint, int]}, 4),
ok.
-endif.