diff --git a/README.md b/README.md index 94dc396..4694d78 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,17 @@ -GM Serialization -===== +# GM Serialization Serialization helpers for the Gajumaru. -Build ------ +## Build $ rebar3 compile -Test ----- +## Test $ rebar3 eunit -Dynamic encoding ----- +## Dynamic encoding The module `gmser_dyn` offers dynamic encoding support, encoding most 'regular' Erlang data types into an internal RLP representation. @@ -34,6 +30,7 @@ how the type information is represented. The fully serialized form is produced by the `serialize` functions. The basic types supported by the encoder are: +* `neg_integer()` (`negint`, code: 247) * `non_neg_integer()` (`int` , code: 248) * `binary()` (`binary`, code: 249) * `boolean()` (`bool` , code: 250) @@ -48,8 +45,8 @@ When encoding `map` types, the map elements are first sorted. When specifying a map type for template-driven encoding, use the `#{items => [{Key, Value}]}` construct. -Labels ----- + +## Labels Labels correspond to (existing) atoms in Erlang. Decoding of a label results in a call to `binary_to_existing_atom/2`, so will @@ -63,8 +60,7 @@ Labels are encoded as `[<<255>>, << AtomToBinary/binary >>]`. If a cached label is used, the encoding becomes `[<<255>, [Ix]]`, where `Ix` is the integer-encoded index value of the cached label. -Examples ----- +## Examples Dynamically encoded objects have the basic structure `[<<0>>,V,Obj]`, where `V` is the integer-coded version, and `Obj` is the top-level encoding on the form `[Tag,Data]`. @@ -88,8 +84,7 @@ Note that tuples and list are encoded the same way, except for the initial type Maps are encoded as `[, [KV1, KV2, ...]]`, where `[KV1, KV2, ...]` is the sorted list of key-value tuples from `map:to_list(Map)`, but with the `tuple` type tag omitted. -Template-driven encoding ----- +## Template-driven encoding Templates can be provided to the encoder by either naming an already registered type, or by passing a template directly. In both cases, the encoder will enforce @@ -112,3 +107,23 @@ ET([{int,int}], [{1,2}]) -> [<<0>>,<<1>>,[<<251>>,[[[<<248>>,<<1>>],[<<248>>,<<2 gmser_dyn:register_type(1000,lt2i,[{int,int}]). ET(lt2i, [{1,2}]) -> [<<0>>,<<1>>,[<<3,232>>,[[<<1>>,<<2>>]]]] ``` + +### Alternative types + +The dynamic encoder supports two additions to the `gmserialization` template +language: `any` and `#{alt => [AltTypes]}`. + +The `any` type doesn't have an associated code, but enforces dynamic encoding. + +The `#{alt => [Type]}` construct also enforces dynamic encoding, and will try +to encode as each type in the list, in the specified order, until one matches. + +```erlang +gmser_dyn:encode_typed(#{alt => [negint,int]}, 5) -> [<<0>>,<<1>>,[<<247>>,<<5>>]] +gmser_dyn:encode_typed(#{alt => [negint,int]}, 5) -> [<<0>>,<<1>>,[<<248>>,<<5>>]] + +gmser_dyn:register_type(246, anyint, #{alt => [negint, int]}) + +gmser_dyn:encode_typed(anyint,-5) -> [<<0>>,<<1>>,[<<246>>,[<<247>>,<<5>>]]] +gmser_dyn:encode_typed(anyint,5) -> [<<0>>,<<1>>,[<<246>>,[<<248>>,<<5>>]]] +``` diff --git a/src/gmser_dyn.erl b/src/gmser_dyn.erl index 52590f5..69ff56b 100644 --- a/src/gmser_dyn.erl +++ b/src/gmser_dyn.erl @@ -73,13 +73,12 @@ decode(Fields) -> decode(Fields0, Types) -> case decode_tag_and_vsn(Fields0) of {0, Vsn, Fields} -> - [Val] = decode_(Fields, Vsn, Types, []), - Val; + decode_(Fields, Vsn, Types); Other -> error({illegal_serialization, Other}) end. -decode_tag_and_vsn([TagBin, VsnBin | Fields]) -> +decode_tag_and_vsn([TagBin, VsnBin, Fields]) -> {decode_basic(int, TagBin), decode_basic(int, VsnBin), Fields}. @@ -87,7 +86,8 @@ decode_tag_and_vsn([TagBin, VsnBin | Fields]) -> dynamic_types() -> #{ vsn => ?VSN , codes => - #{ 248 => int + #{ 247 => negint + , 248 => int , 249 => binary , 250 => bool , 251 => list @@ -96,7 +96,8 @@ dynamic_types() -> , 254 => id , 255 => label} , rev => - #{ int => 248 + #{ negint => 247 + , int => 248 , binary => 249 , bool => 250 , list => 251 @@ -107,7 +108,8 @@ dynamic_types() -> , labels => #{} , rev_labels => #{} , templates => - #{ int => int + #{ negint => negint + , int => int , binary => binary , bool => bool , list => list @@ -148,67 +150,70 @@ dyn_template_(F, Vsn) -> find_cached_label(Lbl, #{labels := Lbls}) -> maps:find(Lbl, Lbls). -decode_(Fields, Vsn, Types, Acc) -> - {_Tag, Term, Rest} = decode_field_(Fields, Vsn, Types), - Acc1 = [Term | Acc], - case Rest of - [] -> - lists:reverse(Acc1); - _ -> - decode_(Rest, Vsn, Types, Acc1) - end. - -decode_field_([H|T], Vsn, Types) -> - {CodeBin, Field, Rest} = - case H of - [C, F] -> {C, F, T}; - C when is_binary(C) -> {C, hd(T), tl(T)} - end, +decode_([CodeBin, Flds], Vsn, Types) -> Code = decode_basic(int, CodeBin), - {Tag, Template} = template(Code, Vsn, Types), - %% [Fld|Rest] = Fields, - Val = decode_from_template(Template, Field, Vsn, Types), - {Tag, Val, Rest}. + {_Tag, Template} = template(Code, Vsn, Types), + decode_from_template(Template, Flds, Vsn, Types). encode_(Term, Vsn, Types) -> - encode_(Term, true, Vsn, Types). + encode_(Term, dyn(emit()), Vsn, Types). -encode_(Term, Emit, Vsn, Types) -> - {Tag, Template} = auto_template(Term), - Enc = encode_from_template(Template, Term, Vsn, Types), - if Emit -> - [emit_code(Tag, Types), Enc]; - true -> - Enc +encode_(Term, E, Vsn, Types) -> + {_Tag, Template} = auto_template(Term), + encode_from_template(Template, Term, E, Vsn, Types). + +%% To control when to emit type codes: +%% If the template is predefined, it's 'not dynamic' (nodyn(E)). +%% If we are encoding against a type that's part of a predefined template, +%% we typically don't emit the type code, except at the very top. +%% So: emit type codes if the 'emit' bit is set, or if the 'dyn' bit is set. +emit() -> 2#01. +dyn() -> 2#10. +emit(E) -> E bor 2#01. +noemit(E) -> E band 2#10. +dyn(E) -> E bor 2#10. +nodyn(E) -> E band 2#01. + +encode_typed_(Type, Term, Vsn, #{codes := Codes, rev := Rev} = Types) -> + case (is_map_key(Type, Codes) orelse is_map_key(Type, Rev)) of + true -> + encode_typed_(Type, Term, nodyn(emit()), Vsn, Types); + false -> + encode_maybe_template(Type, Term, Vsn, Types) end. -encode_typed_(Type, Term, Vsn, Types) -> - encode_typed_(Type, Term, true, Vsn, Types). - encode_typed_(any, Term, _, Vsn, Types) -> - encode_(Term, true, Vsn, Types); -encode_typed_(Code, Term, Emit, Vsn, #{codes := Codes} = Types) when is_map_key(Code, Codes) -> + encode_(Term, dyn(emit()), Vsn, Types); +encode_typed_(Code, Term, E, Vsn, #{codes := Codes} = Types) when is_map_key(Code, Codes) -> {_Tag, Template} = template(Code, Vsn, Types), - maybe_emit(Emit, Code, encode_from_template(Template, Term, false, Vsn, Types)); -encode_typed_(Tag, Term, Emit, Vsn, #{templates := Ts, rev := Rev} = Types) + [encode_basic(int,Code), + encode_from_template(Template, Term, noemit(nodyn(E)), Vsn, Types)]; +encode_typed_(Tag, Term, E, Vsn, #{templates := Ts, rev := Rev} = Types) when is_map_key(Tag, Ts) -> Template = dyn_template_(maps:get(Tag, Ts), Vsn), Code = maps:get(Tag, Rev), - maybe_emit(Emit, Code, encode_from_template(Template, Term, false, Vsn, Types)); + [encode_basic(int,Code), + encode_from_template(Template, Term, noemit(nodyn(E)), Vsn, Types)]; encode_typed_(MaybeTemplate, Term, _, Vsn, Types) -> encode_maybe_template(MaybeTemplate, Term, Vsn, Types). -maybe_emit(true, Code, Enc) -> +maybe_emit(E, Code, Enc) when E > 0 -> [encode_basic(int, Code), Enc]; -maybe_emit(false, _, Enc) -> +maybe_emit(0, _, Enc) -> Enc. +encode_maybe_template(#{items := _} = Type, Term, Vsn, Types) -> + case is_map(Term) of + true -> + encode_from_template(Type, Term, emit(dyn()), Vsn, Types); + false -> + error({invalid, Type, Term}) + end; +encode_maybe_template(#{alt := _} = Type, Term, Vsn, Types) -> + encode_from_template(Type, Term, Vsn, emit(dyn()), Types); encode_maybe_template(Pat, Term, Vsn, Types) when is_list(Pat); - is_tuple(Pat); - is_map(Pat) -> - {Tag, _} = auto_template(Pat), - [emit_code(Tag, Types), - encode_from_template(Pat, Term, true, Vsn, Types)]; + is_tuple(Pat) -> + encode_from_template(Pat, Term, emit(dyn()), Vsn, Types); encode_maybe_template(Other, Term, _Vsn, _Types) -> error({illegal_template, Other, Term}). @@ -233,19 +238,32 @@ auto_template(T) -> is_atom(T) -> {label, label}; % binary_to_existing_atom() is_integer(T), T >= 0 -> {int, int}; + is_integer(T), + T < 0 -> {negint, negint}; true -> - error(invalid_type) + error({invalid_type, T}) end. -decode_from_template(list, Fld, Vsn, Types) -> - decode_(Fld, Vsn, Types, []); +decode_from_template(any, Fld, Vsn, Types) -> + decode_(Fld, Vsn, Types); +decode_from_template(#{items := Items}, Fld, Vsn, Types) when is_list(Fld) -> + Zipped = lists:zip(Items, Fld), + lists:foldl( + fun({{K, Type}, V}, Map) -> + maps:is_key(K, Map) andalso error(badarg, duplicate_field), + Map#{K => decode_from_template({any,Type}, V, Vsn, Types)} + end, #{}, Zipped); +decode_from_template(#{alt := Alts} = T, Fld, Vsn, Types) when is_list(Alts) -> + decode_alt(Alts, Fld, T, Vsn, Types); +decode_from_template(list, Flds, Vsn, Types) -> + [decode_(F, Vsn, Types) || F <- Flds]; decode_from_template(map, Fld, Vsn, Types) -> TupleFields = [F || F <- Fld], - Items = [decode_from_template(tuple, T, Vsn, Types) + Items = [decode_from_template({any,any}, T, Vsn, Types) || T <- TupleFields], maps:from_list(Items); decode_from_template(tuple, Fld, Vsn, Types) -> - Items = decode_(Fld, Vsn, Types, []), + Items = [decode_(F, Vsn, Types) || F <- Fld], list_to_tuple(Items); decode_from_template([Type], Fields, Vsn, Types) -> [decode_from_template(Type, F, Vsn, Types) @@ -260,72 +278,109 @@ decode_from_template(label, [C], _, #{rev_labels := RLbls}) -> Code = decode_basic(int, C), maps:get(Code, RLbls); decode_from_template(Type, Fld, _, Types) when Type == int + ; Type == negint ; Type == binary ; Type == bool ; Type == id ; Type == label -> decode_basic(Type, Fld, Types). -encode_from_template(Type, V, Vsn, Types) -> - encode_from_template(Type, V, true, Vsn, Types). - -encode_from_template(any, V, _, Vsn, Types) -> - encode_(V, true, Vsn, Types); -encode_from_template(list, L, _, Vsn, Types) when is_list(L) -> +encode_from_template(any, V, _E, Vsn, Types) -> + encode_(V, dyn(emit()), Vsn, Types); +encode_from_template(list, L, E, Vsn, Types) when is_list(L) -> assert_type(is_list(L), list, L), - [encode_(V, Vsn, Types) || V <- L]; -encode_from_template(map, M, _, Vsn, Types) -> + emit(E, list, Types, + [encode_(V, Vsn, Types) || V <- L]); +encode_from_template(#{items := Items}, M, E, Vsn, Types) -> assert_type(is_map(M), map, M), - [encode_({K,V}, false, Vsn, Types) - || {K, V} <- lists:sort(maps:to_list(M))]; -encode_from_template(tuple, T, Emit, Vsn, Types) -> + Emit = noemit(E), + emit(E, map, Types, + lists:map( + fun({K, Type}) -> + V = maps:get(K, M), + [encode_from_template(any, K, Emit, Vsn, Types), + encode_from_template(Type, V, Emit, Vsn, Types)] + end, Items)); +encode_from_template(#{alt := Alts} = T, Term, E, Vsn, Types) when is_list(Alts) -> + encode_alt(Alts, Term, T, E, Vsn, Types); +encode_from_template(map, M, E, Vsn, Types) -> + assert_type(is_map(M), map, M), + Emit = emit(E), + emit(E, map, Types, + [[encode_from_template(any, K, Emit, Vsn, Types), + encode_from_template(any, V, Emit, Vsn, Types)] + || {K, V} <- lists:sort(maps:to_list(M))]); +encode_from_template(tuple, T, E, Vsn, Types) -> assert_type(is_tuple(T), tuple, T), - [encode_(V, Emit, Vsn, Types) || V <- tuple_to_list(T)]; -encode_from_template(T, V, Emit, Vsn, Types) when is_tuple(T) -> + emit(E, tuple, Types, + [encode_(V, noemit(E), Vsn, Types) || V <- tuple_to_list(T)]); +encode_from_template(T, V, E, Vsn, Types) when is_tuple(T) -> assert_type(is_tuple(V), T, V), assert_type(tuple_size(T) =:= tuple_size(V), T, V), Zipped = lists:zip(tuple_to_list(T), tuple_to_list(V)), - [encode_from_template(T1, V1, Emit, Vsn, Types) || {T1, V1} <- Zipped]; -encode_from_template([Type] = T, List, Emit, Vsn, Types) -> + emit(E, tuple, Types, + [encode_from_template(T1, V1, noemit(E), Vsn, Types) || {T1, V1} <- Zipped]); +encode_from_template([Type] = T, List, E, Vsn, Types) -> assert_type(is_list(List), T, List), - [encode_from_template(Type, V, Emit, Vsn, Types) || V <- List]; -encode_from_template(Type, List, Emit, Vsn, Types) when is_list(Type), is_list(List) -> - encode_fields(Type, List, Emit, Vsn, Types); -encode_from_template(label, V, Emit, _, Types) -> + emit(E, list, Types, + [encode_from_template(Type, V, noemit(E), Vsn, Types) || V <- List]); +encode_from_template(Type, List, E, Vsn, Types) when is_list(Type), is_list(List) -> + encode_fields(Type, List, E, Vsn, Types); +encode_from_template(label, V, E, _, Types) -> assert_type(is_atom(V), label, V), - case find_cached_label(V, Types) of - error -> - encode_basic(label, V, Emit, Types); - {ok, Code} when is_integer(Code) -> - [encode_basic(int, Code)] - end; -encode_from_template(Type, V, Emit, _, Types) when Type == id - ; Type == binary - ; Type == bool - ; Type == int - ; Type == label -> - encode_basic(Type, V, Emit, Types); -encode_from_template(Type, V, Emit, Vsn, Types) -> - encode_typed_(Type, V, Emit, Vsn, Types). - %% error({illegal, Type, V}). + emit(E, label, Types, + case find_cached_label(V, Types) of + error -> + encode_basic(label, V, E, Types); + {ok, Code} when is_integer(Code) -> + [encode_basic(int, Code)] + end); +encode_from_template(Type, V, E, _, Types) when Type == id + ; Type == binary + ; Type == bool + ; Type == int + ; Type == negint + ; Type == label -> + encode_basic(Type, V, E, Types); +encode_from_template(Type, V, E, Vsn, Types) -> + encode_typed_(Type, V, E, Vsn, Types). assert_type(true, _, _) -> ok; assert_type(_, Type, V) -> error({illegal, Type, V}). +decode_alt([A|Alts], Fld, T, Vsn, Types) -> + try decode_from_template(A, Fld, Vsn, Types) + catch error:_ -> + decode_alt(Alts, Fld, T, Vsn, Types) + end; +decode_alt([], Fld, T, _, _) -> + error({illegal, T, Fld}). + +encode_alt(Alts, Term, T, E, Vsn, Types) -> + %% Since we don't know which type may match, treat as dynamic. + encode_alt_(Alts, Term, T, dyn(E), Vsn, Types). + +encode_alt_([A|Alts], Term, T, E, Vsn, Types) -> + try encode_from_template(A, Term, E, Vsn, Types) + catch error:_ -> + encode_alt_(Alts, Term, T, E, Vsn, Types) + end; +encode_alt_([], Term, T, _, _, _) -> + error({illegal, T, Term}). %% Basically, dynamically encoding a statically defined object encode_fields([{Field, Type}|TypesLeft], - [{Field, Val}|FieldsLeft], Emit, Vsn, Types) -> - [ encode_from_template(Type, Val, Emit, Vsn, Types) - | encode_fields(TypesLeft, FieldsLeft, Emit, Vsn, Types)]; + [{Field, Val}|FieldsLeft], E, Vsn, Types) -> + [ encode_from_template(Type, Val, E, Vsn, Types) + | encode_fields(TypesLeft, FieldsLeft, E, Vsn, Types)]; encode_fields([{_Field, _Type} = FT|_TypesLeft], - [Val |_FieldsLeft], _Emit, _Vsn, _Types) -> + [Val |_FieldsLeft], _E, _Vsn, _Types) -> error({illegal_field, FT, Val}); encode_fields([Type|TypesLeft], - [Val |FieldsLeft], Emit, Vsn, Types) when is_atom(Type) -> + [Val |FieldsLeft], E, Vsn, Types) when is_atom(Type) -> %% Not sure about this ... - [ encode_from_template(Type, Val, Emit, Vsn, Types) - | encode_fields(TypesLeft, FieldsLeft, Emit, Vsn, Types)]; + [ encode_from_template(Type, Val, E, Vsn, Types) + | encode_fields(TypesLeft, FieldsLeft, E, Vsn, Types)]; encode_fields([], [], _, _, _) -> []. @@ -337,6 +392,12 @@ decode_fields([{Tag, Type}|TypesLeft], decode_fields([], [], _, _) -> []. + +emit(E, Tag, Types, Enc) when E > 0 -> + [emit_code(Tag, Types), Enc]; +emit(0, _, _, Enc) -> + Enc. + emit_code(Tag, #{rev := Tags}) -> encode_basic(int, maps:get(Tag, Tags)). @@ -352,12 +413,17 @@ decode_basic(Type, V, _) -> decode_basic(label, Fld) -> binary_to_existing_atom(decode_basic(binary, Fld), utf8); +decode_basic(negint, Fld) -> + I = gmserialization:decode_field(int, Fld), + -I; decode_basic(Type, Fld) -> gmserialization:decode_field(Type, Fld). -encode_basic(Tag, V, true, Types) -> +encode_basic(negint, I, _, Types) when is_integer(I), I < 0 -> + [emit_code(negint, Types), gmserialization:encode_field(int, -I)]; +encode_basic(Tag, V, E, Types) when E > 0 -> [emit_code(Tag, Types), encode_basic(Tag, V)]; -encode_basic(Tag, V, false, _) -> +encode_basic(Tag, V, 0, _) -> encode_basic(Tag, V). encode_basic(label, A) when is_atom(A) -> @@ -536,6 +602,7 @@ round_trip_test_() -> t_sample_types() -> [ 5 + , -5 , <<"a">> , [1,2,3] , {<<"a">>,1} @@ -560,10 +627,20 @@ user_types_test_() -> , ?_test(t_reg_label_cache2()) ]}. +dynamic_types_test_() -> + [ ?_test(revert_to_default_types()) + , ?_test(t_typed_map()) + , ?_test(t_alts()) + ]. + t_round_trip(T) -> ?debugVal(T), ?assertMatch({T, T}, {T, decode(encode(T))}). +t_round_trip_typed(Type, T) -> + ?debugVal(T), + ?assertMatch({T, T}, {T, decode(encode_typed(Type, T))}). + t_reg_typed_tuple() -> Type = {int, int, int}, MyTypes = #{ codes => #{ 1001 => int_tup3 } @@ -645,7 +722,16 @@ t_reg_label_cache2() -> Tup = {'1', '1'}, Enc = gmser_dyn:encode_typed(lbl_tup2, Tup), [<<0>>,<<1>>,[<<3,235>>,[[<<49>>],[<<49>>]]]] = Enc, - Tup = gmser_dyn:decode(Enc). + _Tup = gmser_dyn:decode(Enc). +t_typed_map() -> + Term = #{a => 13, {key,1} => [a]}, + Enc = encode_typed(#{items => [{a,int},{{key,1},[label]}]}, Term), + ?assertEqual(Term, decode(Enc)). + +t_alts() -> + t_round_trip_typed(#{alt => [negint, int]}, -4), + t_round_trip_typed(#{alt => [negint, int]}, 4), + ok. -endif.