From dd1c2455f06aef1fc96d2c942131c0dcc26e2ed8 Mon Sep 17 00:00:00 2001 From: Ulf Wiger Date: Sat, 5 Apr 2025 21:44:36 +0200 Subject: [PATCH] Fix type-driven encode, more docs --- README.md | 25 ++++++--- src/gmser_dyn.erl | 130 ++++++++++++++++++++++++++++++---------------- 2 files changed, 102 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index f5d0dff..94dc396 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,10 @@ Main API: * `serialize_typed(template(), term()) -> binary()` * `deserialize(binary()) -> term()` +In the examples below, we use the `decode` functions, to illustrate +how the type information is represented. The fully serialized form is +produced by the `serialize` functions. + The basic types supported by the encoder are: * `non_neg_integer()` (`int` , code: 248) * `binary()` (`binary`, code: 249) @@ -88,16 +92,23 @@ Template-driven encoding ---- Templates can be provided to the encoder by either naming an already registered -type, or by passing a template directly. The template will then be enforced, and -used to slightly compress the encoding. +type, or by passing a template directly. In both cases, the encoder will enforce +the type information in the template. -In the following example, as the encoder knows that `{11,12}` is encoded as a -tuple of two integers, it can omit the inner type tags. +If the template has been registered, the encoder omits inner type tags (still +inserting the top-level tag), leading to some compression of the output. +This also means that the serialized term cannot be decoded without the same +schema information on the decoder side. + +In the case of a directly provided template, all type information is inserted, +such that the serialized term can be decoded without any added type information. +The template types are still enforced during encoding. ```erlang ET = fun(Type,Term) -> io:fwrite("~w~n", [gmser_dyn:encode_typed(Type,Term)]) end. -ET({int,int}, {11,12}) ->[<<0>>,<<1>>,[<<253>>,[<<11>>,<<12>>]]] -ET({int,int}, {11,a}) -> -** exception error: {illegal,int,a} ... +ET([{int,int}], [{1,2}]) -> [<<0>>,<<1>>,[<<251>>,[[[<<248>>,<<1>>],[<<248>>,<<2>>]]]]] + +gmser_dyn:register_type(1000,lt2i,[{int,int}]). +ET(lt2i, [{1,2}]) -> [<<0>>,<<1>>,[<<3,232>>,[[<<1>>,<<2>>]]]] ``` diff --git a/src/gmser_dyn.erl b/src/gmser_dyn.erl index 2a194ef..52590f5 100644 --- a/src/gmser_dyn.erl +++ b/src/gmser_dyn.erl @@ -182,21 +182,33 @@ encode_(Term, Emit, Vsn, Types) -> Enc end. -encode_typed_(Code, Term, Vsn, #{codes := Codes} = Types) when is_map_key(Code, Codes) -> +encode_typed_(Type, Term, Vsn, Types) -> + encode_typed_(Type, Term, true, Vsn, Types). + +encode_typed_(any, Term, _, Vsn, Types) -> + encode_(Term, true, Vsn, Types); +encode_typed_(Code, Term, Emit, Vsn, #{codes := Codes} = Types) when is_map_key(Code, Codes) -> {_Tag, Template} = template(Code, Vsn, Types), - [encode_basic(int, Code), encode_from_template(Template, Term, Vsn, Types)]; -encode_typed_(Tag, Term, Vsn, #{templates := Ts} = Types) when is_map_key(Tag, Ts) -> + maybe_emit(Emit, Code, encode_from_template(Template, Term, false, Vsn, Types)); +encode_typed_(Tag, Term, Emit, Vsn, #{templates := Ts, rev := Rev} = Types) + when is_map_key(Tag, Ts) -> Template = dyn_template_(maps:get(Tag, Ts), Vsn), - [emit_code(Tag, Types), encode_from_template(Template, Term, Vsn, Types)]; -encode_typed_(MaybeTemplate, Term, Vsn, Types) -> + Code = maps:get(Tag, Rev), + maybe_emit(Emit, Code, encode_from_template(Template, Term, false, Vsn, Types)); +encode_typed_(MaybeTemplate, Term, _, Vsn, Types) -> encode_maybe_template(MaybeTemplate, Term, Vsn, Types). +maybe_emit(true, Code, Enc) -> + [encode_basic(int, Code), Enc]; +maybe_emit(false, _, Enc) -> + Enc. + encode_maybe_template(Pat, Term, Vsn, Types) when is_list(Pat); is_tuple(Pat); is_map(Pat) -> {Tag, _} = auto_template(Pat), [emit_code(Tag, Types), - encode_from_template(Pat, Term, Vsn, Types)]; + encode_from_template(Pat, Term, true, Vsn, Types)]; encode_maybe_template(Other, Term, _Vsn, _Types) -> error({illegal_template, Other, Term}). @@ -247,63 +259,74 @@ decode_from_template(Type, V, Vsn, Types) when is_tuple(Type), is_list(V) -> decode_from_template(label, [C], _, #{rev_labels := RLbls}) -> Code = decode_basic(int, C), maps:get(Code, RLbls); -decode_from_template(Type, Fld, _, _) when Type == int - ; Type == binary - ; Type == bool - ; Type == id - ; Type == label -> - decode_basic(Type, Fld). +decode_from_template(Type, Fld, _, Types) when Type == int + ; Type == binary + ; Type == bool + ; Type == id + ; Type == label -> + decode_basic(Type, Fld, Types). encode_from_template(Type, V, Vsn, Types) -> encode_from_template(Type, V, true, Vsn, Types). +encode_from_template(any, V, _, Vsn, Types) -> + encode_(V, true, Vsn, Types); encode_from_template(list, L, _, Vsn, Types) when is_list(L) -> + assert_type(is_list(L), list, L), [encode_(V, Vsn, Types) || V <- L]; -encode_from_template(map, M, _, Vsn, Types) when is_map(M) -> +encode_from_template(map, M, _, Vsn, Types) -> + assert_type(is_map(M), map, M), [encode_({K,V}, false, Vsn, Types) || {K, V} <- lists:sort(maps:to_list(M))]; -encode_from_template(tuple, T, _, Vsn, Types) when is_tuple(T) -> - [encode_(V, Vsn, Types) || V <- tuple_to_list(T)]; -encode_from_template(T, V, _, Vsn, Types) when tuple_size(T) =:= tuple_size(V) -> +encode_from_template(tuple, T, Emit, Vsn, Types) -> + assert_type(is_tuple(T), tuple, T), + [encode_(V, Emit, Vsn, Types) || V <- tuple_to_list(T)]; +encode_from_template(T, V, Emit, Vsn, Types) when is_tuple(T) -> + assert_type(is_tuple(V), T, V), + assert_type(tuple_size(T) =:= tuple_size(V), T, V), Zipped = lists:zip(tuple_to_list(T), tuple_to_list(V)), - [encode_from_template(T1, V1, false, Vsn, Types) || {T1, V1} <- Zipped]; -encode_from_template([Type], List, _, Vsn, Types) -> - [encode_from_template(Type, V, false, Vsn, Types) || V <- List]; -encode_from_template(Type, List, _, Vsn, Types) when is_list(Type), is_list(List) -> - encode_fields(Type, List, Vsn, Types); -encode_from_template(label, V, _, _, Types) -> + [encode_from_template(T1, V1, Emit, Vsn, Types) || {T1, V1} <- Zipped]; +encode_from_template([Type] = T, List, Emit, Vsn, Types) -> + assert_type(is_list(List), T, List), + [encode_from_template(Type, V, Emit, Vsn, Types) || V <- List]; +encode_from_template(Type, List, Emit, Vsn, Types) when is_list(Type), is_list(List) -> + encode_fields(Type, List, Emit, Vsn, Types); +encode_from_template(label, V, Emit, _, Types) -> + assert_type(is_atom(V), label, V), case find_cached_label(V, Types) of error -> - encode_basic(label, V); + encode_basic(label, V, Emit, Types); {ok, Code} when is_integer(Code) -> [encode_basic(int, Code)] end; -encode_from_template(Type, V, _, _, _Types) when Type == id - ; Type == binary - ; Type == bool - ; Type == int - ; Type == label -> - encode_basic(Type, V); -encode_from_template(Type, V, _, _, _) -> - error({illegal, Type, V}). +encode_from_template(Type, V, Emit, _, Types) when Type == id + ; Type == binary + ; Type == bool + ; Type == int + ; Type == label -> + encode_basic(Type, V, Emit, Types); +encode_from_template(Type, V, Emit, Vsn, Types) -> + encode_typed_(Type, V, Emit, Vsn, Types). + %% error({illegal, Type, V}). + +assert_type(true, _, _) -> ok; +assert_type(_, Type, V) -> error({illegal, Type, V}). %% Basically, dynamically encoding a statically defined object encode_fields([{Field, Type}|TypesLeft], - [{Field, Val}|FieldsLeft], Vsn, Types) -> - [ encode_from_template(Type, Val, Vsn, Types) - | encode_fields(TypesLeft, FieldsLeft, Vsn, Types)]; -encode_fields([{_Field, Type}|TypesLeft], - [Val |FieldsLeft], Vsn, Types) -> - %% Not sure if we want to try this ... - [ encode_from_template(Type, Val, Vsn, Types) - | encode_fields(TypesLeft, FieldsLeft, Vsn, Types)]; + [{Field, Val}|FieldsLeft], Emit, Vsn, Types) -> + [ encode_from_template(Type, Val, Emit, Vsn, Types) + | encode_fields(TypesLeft, FieldsLeft, Emit, Vsn, Types)]; +encode_fields([{_Field, _Type} = FT|_TypesLeft], + [Val |_FieldsLeft], _Emit, _Vsn, _Types) -> + error({illegal_field, FT, Val}); encode_fields([Type|TypesLeft], - [Val |FieldsLeft], Vsn, Types) when is_atom(Type) -> - %% Not sure about this either ... - [ encode_from_template(Type, Val, Vsn, Types) - | encode_fields(TypesLeft, FieldsLeft, Vsn, Types)]; -encode_fields([], [], _, _) -> + [Val |FieldsLeft], Emit, Vsn, Types) when is_atom(Type) -> + %% Not sure about this ... + [ encode_from_template(Type, Val, Emit, Vsn, Types) + | encode_fields(TypesLeft, FieldsLeft, Emit, Vsn, Types)]; +encode_fields([], [], _, _, _) -> []. decode_fields([{Tag, Type}|TypesLeft], @@ -317,11 +340,26 @@ decode_fields([], [], _, _) -> emit_code(Tag, #{rev := Tags}) -> encode_basic(int, maps:get(Tag, Tags)). +decode_basic(Type, [Tag,V], #{codes := Codes}) -> + case decode_basic(int, Tag) of + Code when map_get(Code, Codes) == Type -> + decode_basic(Type, V); + _ -> + error(illegal) + end; +decode_basic(Type, V, _) -> + decode_basic(Type, V). + decode_basic(label, Fld) -> binary_to_existing_atom(decode_basic(binary, Fld), utf8); decode_basic(Type, Fld) -> gmserialization:decode_field(Type, Fld). +encode_basic(Tag, V, true, Types) -> + [emit_code(Tag, Types), encode_basic(Tag, V)]; +encode_basic(Tag, V, false, _) -> + encode_basic(Tag, V). + encode_basic(label, A) when is_atom(A) -> encode_basic(binary, atom_to_binary(A, utf8)); encode_basic(Type, Fld) -> @@ -593,8 +631,8 @@ t_reg_label_cache() -> true = Enc0 =/= Enc1, Enc2 = gmser_dyn:encode_typed(label, '1'), ?debugFmt("Enc2 (typed): ~w", [Enc2]), - true = Enc2 == Enc1, - true = Enc0a =/= Enc1a. + ?assertEqual(Enc2, Enc1), + ?assertNotEqual(Enc0a, Enc1a). t_reg_label_cache2() -> TFromL = gmser_dyn:types_from_list(