Fix type-driven encode, more docs
All checks were successful
Gajumaru Serialization Tests / tests (push) Successful in 48m53s

This commit is contained in:
Ulf Wiger 2025-04-05 21:44:36 +02:00
parent 3ede4f22e1
commit dd1c2455f0
2 changed files with 102 additions and 53 deletions

View File

@ -29,6 +29,10 @@ Main API:
* `serialize_typed(template(), term()) -> binary()`
* `deserialize(binary()) -> term()`
In the examples below, we use the `decode` functions, to illustrate
how the type information is represented. The fully serialized form is
produced by the `serialize` functions.
The basic types supported by the encoder are:
* `non_neg_integer()` (`int` , code: 248)
* `binary()` (`binary`, code: 249)
@ -88,16 +92,23 @@ Template-driven encoding
----
Templates can be provided to the encoder by either naming an already registered
type, or by passing a template directly. The template will then be enforced, and
used to slightly compress the encoding.
type, or by passing a template directly. In both cases, the encoder will enforce
the type information in the template.
In the following example, as the encoder knows that `{11,12}` is encoded as a
tuple of two integers, it can omit the inner type tags.
If the template has been registered, the encoder omits inner type tags (still
inserting the top-level tag), leading to some compression of the output.
This also means that the serialized term cannot be decoded without the same
schema information on the decoder side.
In the case of a directly provided template, all type information is inserted,
such that the serialized term can be decoded without any added type information.
The template types are still enforced during encoding.
```erlang
ET = fun(Type,Term) -> io:fwrite("~w~n", [gmser_dyn:encode_typed(Type,Term)]) end.
ET({int,int}, {11,12}) ->[<<0>>,<<1>>,[<<253>>,[<<11>>,<<12>>]]]
ET({int,int}, {11,a}) ->
** exception error: {illegal,int,a} ...
ET([{int,int}], [{1,2}]) -> [<<0>>,<<1>>,[<<251>>,[[[<<248>>,<<1>>],[<<248>>,<<2>>]]]]]
gmser_dyn:register_type(1000,lt2i,[{int,int}]).
ET(lt2i, [{1,2}]) -> [<<0>>,<<1>>,[<<3,232>>,[[<<1>>,<<2>>]]]]
```

View File

@ -182,21 +182,33 @@ encode_(Term, Emit, Vsn, Types) ->
Enc
end.
encode_typed_(Code, Term, Vsn, #{codes := Codes} = Types) when is_map_key(Code, Codes) ->
encode_typed_(Type, Term, Vsn, Types) ->
encode_typed_(Type, Term, true, Vsn, Types).
encode_typed_(any, Term, _, Vsn, Types) ->
encode_(Term, true, Vsn, Types);
encode_typed_(Code, Term, Emit, Vsn, #{codes := Codes} = Types) when is_map_key(Code, Codes) ->
{_Tag, Template} = template(Code, Vsn, Types),
[encode_basic(int, Code), encode_from_template(Template, Term, Vsn, Types)];
encode_typed_(Tag, Term, Vsn, #{templates := Ts} = Types) when is_map_key(Tag, Ts) ->
maybe_emit(Emit, Code, encode_from_template(Template, Term, false, Vsn, Types));
encode_typed_(Tag, Term, Emit, Vsn, #{templates := Ts, rev := Rev} = Types)
when is_map_key(Tag, Ts) ->
Template = dyn_template_(maps:get(Tag, Ts), Vsn),
[emit_code(Tag, Types), encode_from_template(Template, Term, Vsn, Types)];
encode_typed_(MaybeTemplate, Term, Vsn, Types) ->
Code = maps:get(Tag, Rev),
maybe_emit(Emit, Code, encode_from_template(Template, Term, false, Vsn, Types));
encode_typed_(MaybeTemplate, Term, _, Vsn, Types) ->
encode_maybe_template(MaybeTemplate, Term, Vsn, Types).
maybe_emit(true, Code, Enc) ->
[encode_basic(int, Code), Enc];
maybe_emit(false, _, Enc) ->
Enc.
encode_maybe_template(Pat, Term, Vsn, Types) when is_list(Pat);
is_tuple(Pat);
is_map(Pat) ->
{Tag, _} = auto_template(Pat),
[emit_code(Tag, Types),
encode_from_template(Pat, Term, Vsn, Types)];
encode_from_template(Pat, Term, true, Vsn, Types)];
encode_maybe_template(Other, Term, _Vsn, _Types) ->
error({illegal_template, Other, Term}).
@ -247,63 +259,74 @@ decode_from_template(Type, V, Vsn, Types) when is_tuple(Type), is_list(V) ->
decode_from_template(label, [C], _, #{rev_labels := RLbls}) ->
Code = decode_basic(int, C),
maps:get(Code, RLbls);
decode_from_template(Type, Fld, _, _) when Type == int
; Type == binary
; Type == bool
; Type == id
; Type == label ->
decode_basic(Type, Fld).
decode_from_template(Type, Fld, _, Types) when Type == int
; Type == binary
; Type == bool
; Type == id
; Type == label ->
decode_basic(Type, Fld, Types).
encode_from_template(Type, V, Vsn, Types) ->
encode_from_template(Type, V, true, Vsn, Types).
encode_from_template(any, V, _, Vsn, Types) ->
encode_(V, true, Vsn, Types);
encode_from_template(list, L, _, Vsn, Types) when is_list(L) ->
assert_type(is_list(L), list, L),
[encode_(V, Vsn, Types) || V <- L];
encode_from_template(map, M, _, Vsn, Types) when is_map(M) ->
encode_from_template(map, M, _, Vsn, Types) ->
assert_type(is_map(M), map, M),
[encode_({K,V}, false, Vsn, Types)
|| {K, V} <- lists:sort(maps:to_list(M))];
encode_from_template(tuple, T, _, Vsn, Types) when is_tuple(T) ->
[encode_(V, Vsn, Types) || V <- tuple_to_list(T)];
encode_from_template(T, V, _, Vsn, Types) when tuple_size(T) =:= tuple_size(V) ->
encode_from_template(tuple, T, Emit, Vsn, Types) ->
assert_type(is_tuple(T), tuple, T),
[encode_(V, Emit, Vsn, Types) || V <- tuple_to_list(T)];
encode_from_template(T, V, Emit, Vsn, Types) when is_tuple(T) ->
assert_type(is_tuple(V), T, V),
assert_type(tuple_size(T) =:= tuple_size(V), T, V),
Zipped = lists:zip(tuple_to_list(T), tuple_to_list(V)),
[encode_from_template(T1, V1, false, Vsn, Types) || {T1, V1} <- Zipped];
encode_from_template([Type], List, _, Vsn, Types) ->
[encode_from_template(Type, V, false, Vsn, Types) || V <- List];
encode_from_template(Type, List, _, Vsn, Types) when is_list(Type), is_list(List) ->
encode_fields(Type, List, Vsn, Types);
encode_from_template(label, V, _, _, Types) ->
[encode_from_template(T1, V1, Emit, Vsn, Types) || {T1, V1} <- Zipped];
encode_from_template([Type] = T, List, Emit, Vsn, Types) ->
assert_type(is_list(List), T, List),
[encode_from_template(Type, V, Emit, Vsn, Types) || V <- List];
encode_from_template(Type, List, Emit, Vsn, Types) when is_list(Type), is_list(List) ->
encode_fields(Type, List, Emit, Vsn, Types);
encode_from_template(label, V, Emit, _, Types) ->
assert_type(is_atom(V), label, V),
case find_cached_label(V, Types) of
error ->
encode_basic(label, V);
encode_basic(label, V, Emit, Types);
{ok, Code} when is_integer(Code) ->
[encode_basic(int, Code)]
end;
encode_from_template(Type, V, _, _, _Types) when Type == id
; Type == binary
; Type == bool
; Type == int
; Type == label ->
encode_basic(Type, V);
encode_from_template(Type, V, _, _, _) ->
error({illegal, Type, V}).
encode_from_template(Type, V, Emit, _, Types) when Type == id
; Type == binary
; Type == bool
; Type == int
; Type == label ->
encode_basic(Type, V, Emit, Types);
encode_from_template(Type, V, Emit, Vsn, Types) ->
encode_typed_(Type, V, Emit, Vsn, Types).
%% error({illegal, Type, V}).
assert_type(true, _, _) -> ok;
assert_type(_, Type, V) -> error({illegal, Type, V}).
%% Basically, dynamically encoding a statically defined object
encode_fields([{Field, Type}|TypesLeft],
[{Field, Val}|FieldsLeft], Vsn, Types) ->
[ encode_from_template(Type, Val, Vsn, Types)
| encode_fields(TypesLeft, FieldsLeft, Vsn, Types)];
encode_fields([{_Field, Type}|TypesLeft],
[Val |FieldsLeft], Vsn, Types) ->
%% Not sure if we want to try this ...
[ encode_from_template(Type, Val, Vsn, Types)
| encode_fields(TypesLeft, FieldsLeft, Vsn, Types)];
[{Field, Val}|FieldsLeft], Emit, Vsn, Types) ->
[ encode_from_template(Type, Val, Emit, Vsn, Types)
| encode_fields(TypesLeft, FieldsLeft, Emit, Vsn, Types)];
encode_fields([{_Field, _Type} = FT|_TypesLeft],
[Val |_FieldsLeft], _Emit, _Vsn, _Types) ->
error({illegal_field, FT, Val});
encode_fields([Type|TypesLeft],
[Val |FieldsLeft], Vsn, Types) when is_atom(Type) ->
%% Not sure about this either ...
[ encode_from_template(Type, Val, Vsn, Types)
| encode_fields(TypesLeft, FieldsLeft, Vsn, Types)];
encode_fields([], [], _, _) ->
[Val |FieldsLeft], Emit, Vsn, Types) when is_atom(Type) ->
%% Not sure about this ...
[ encode_from_template(Type, Val, Emit, Vsn, Types)
| encode_fields(TypesLeft, FieldsLeft, Emit, Vsn, Types)];
encode_fields([], [], _, _, _) ->
[].
decode_fields([{Tag, Type}|TypesLeft],
@ -317,11 +340,26 @@ decode_fields([], [], _, _) ->
emit_code(Tag, #{rev := Tags}) ->
encode_basic(int, maps:get(Tag, Tags)).
decode_basic(Type, [Tag,V], #{codes := Codes}) ->
case decode_basic(int, Tag) of
Code when map_get(Code, Codes) == Type ->
decode_basic(Type, V);
_ ->
error(illegal)
end;
decode_basic(Type, V, _) ->
decode_basic(Type, V).
decode_basic(label, Fld) ->
binary_to_existing_atom(decode_basic(binary, Fld), utf8);
decode_basic(Type, Fld) ->
gmserialization:decode_field(Type, Fld).
encode_basic(Tag, V, true, Types) ->
[emit_code(Tag, Types), encode_basic(Tag, V)];
encode_basic(Tag, V, false, _) ->
encode_basic(Tag, V).
encode_basic(label, A) when is_atom(A) ->
encode_basic(binary, atom_to_binary(A, utf8));
encode_basic(Type, Fld) ->
@ -593,8 +631,8 @@ t_reg_label_cache() ->
true = Enc0 =/= Enc1,
Enc2 = gmser_dyn:encode_typed(label, '1'),
?debugFmt("Enc2 (typed): ~w", [Enc2]),
true = Enc2 == Enc1,
true = Enc0a =/= Enc1a.
?assertEqual(Enc2, Enc1),
?assertNotEqual(Enc0a, Enc1a).
t_reg_label_cache2() ->
TFromL = gmser_dyn:types_from_list(