Compare commits

..

2 Commits

Author SHA1 Message Date
Ulf Wiger
00699b08b7 Fix OTP 28 dialyzer warnings, rewrite gmser_dyn decoder
All checks were successful
Gajumaru Serialization Tests / tests (push) Successful in -3m56s
gmser_dyn no longer tries to compress output by omitting type tags.
Decoding streams using custom template codes can either use 'strict'
decoding, in which case matching templates must be registered on the
decoding end; in `strict => false` mode, the stream can still be decoded
without valudation if the custom template is missing.
2026-02-18 21:10:43 +01:00
Ulf Wiger
8734e67c66 WIP refactor gmser_dyn 2026-02-15 12:12:04 +01:00
6 changed files with 704 additions and 241 deletions

141
README.md
View File

@ -49,8 +49,25 @@ range from 10 to 200, and also to stay within 1 byte.)
When encoding `map` types, the map elements are first sorted. When encoding `map` types, the map elements are first sorted.
When specifying a map type for template-driven encoding, use When specifying a map type for template-driven encoding, use
the `#{items => [{Key, Value}]}` construct. the `#{items => [{Key, ValueType} | {opt, Key, ValueType}]}` construct.
The key names are included in the encoding, and are match against the item
specs during decoding. If the key names don't match, the decoding fails, unless
for an `{opt, K, V}` item, in which case that item spec is skipped.
```erlang
T = #{items => [{a,int},{opt,b,int},{c,int}]}
E1 = gmser_dyn:encode_typed(T, #{a => 1, b => 2, c => 3}) ->
[<<0>>,<<1>>,[<<252>>,
[[[<<255>>,<<97>>],[<<248>>,<<1>>]],
[[<<255>>,<<98>>],[<<248>>,<<2>>]],
[[<<255>>,<<99>>],[<<248>>,<<3>>]]]]]
E2 = gmser_dyn:encode_typed(T, #{a => 1, c => 3}) ->
[<<0>>,<<1>>,[<<252>>,
[[[<<255>>,<<97>>],[<<248>>,<<1>>]],
[[<<255>>,<<99>>],[<<248>>,<<3>>]]]]]
gmser_dyn:decode_typed(T,E2) ->
#{c => 3,a => 1}
```
## Labels ## Labels
@ -64,12 +81,12 @@ converted to binaries, and `create` means that the atom is created dynamically.
The option can be passed e.g.: The option can be passed e.g.:
```erlang ```erlang
gmser_dyn:deserialize(Binary, set_opts(#{missing_labels => convert})) gmser_dyn:deserialize(Binary, gmser_dyn:set_opts(#{missing_labels => convert}))
``` ```
or or
```erlang ```erlang
gmser_dyn:deserialize(Binary, set_opts(#{missing_labels => convert}, Types)) gmser_dyn:deserialize(Binary, gmser_dyn:set_opts(#{missing_labels => convert}, Types))
``` ```
By calling `gmser_dyn:register_types/1`, after having added options to the type map, By calling `gmser_dyn:register_types/1`, after having added options to the type map,
@ -114,31 +131,70 @@ Templates can be provided to the encoder by either naming an already registered
type, or by passing a template directly. In both cases, the encoder will enforce type, or by passing a template directly. In both cases, the encoder will enforce
the type information in the template. the type information in the template.
If the template has been registered, the encoder omits inner type tags (still If the template has been registered, the encoder uses the registered type specification
inserting the top-level tag), leading to some compression of the output. to drive the encoding. The code of the registered template is embedded in the encoded
This also means that the serialized term cannot be decoded without the same output:
schema information on the decoder side.
In the case of a directly provided template, all type information is inserted,
such that the serialized term can be decoded without any added type information.
The template types are still enforced during encoding.
```erlang ```erlang
ET = fun(Type,Term) -> io:fwrite("~w~n", [gmser_dyn:encode_typed(Type,Term)]) end. gmser_dyn:encode_typed({int,int,int}, {1,2,3}) ->
[<<0>>,<<1>>,[<<253>>,
[[<<248>>,<<1>>],[<<248>>,<<2>>],[<<248>>,<<3>>]]]]
ET([{int,int}], [{1,2}]) -> [<<0>>,<<1>>,[<<251>>,[[[<<248>>,<<1>>],[<<248>>,<<2>>]]]]] Types = gmser_dyn_types:add_type(t3,1013,{int,int,int}).
gmser_dyn:encode_typed(t3, {1,2,3}, Types) ->
gmser_dyn:register_type(1000,lt2i,[{int,int}]). [<<0>>,<<1>>,[[<<3,245>>,<<253>>],
ET(lt2i, [{1,2}]) -> [<<0>>,<<1>>,[<<3,232>>,[[<<1>>,<<2>>]]]] [[<<248>>,<<1>>],[<<248>>,<<2>>],[<<248>>,<<3>>]]]]
``` ```
Note that the original `<<253>>` type code is wrapped as `[<<3,245>>,<<253>>]`,
where `<<3,245>>` corresponds to the custom code `1013`.
Using the default option `#{strict => true}`, the decoder will extract the custom
type spec, and validate the encoded data against it. If the custom code is missing,
the decoder aborts. Using `#{strict => false}`, the custom code is used if it exists,
but otherwise, it's ignored, and the encoded data is decoded using the dynamic type
info.
### Alternative types ### Alternative types
The dynamic encoder supports two additions to the `gmserialization` template The dynamic encoder supports a few additions to the `gmserialization` template
language: `any` and `#{alt => [AltTypes]}`. language: `any`, `#{list => Type}`, `#{alt => [AltTypes]}` and `#{switch => [AltTypes]}`.
#### `any`
The `any` type doesn't have an associated code, but enforces dynamic encoding. The `any` type doesn't have an associated code, but enforces dynamic encoding.
#### `list`
The original list type notation expects a key-value list, e.g.
`[{name, binary}, {age, int}]`
```erlang
EL = gmser_dyn:encode_typed([{name,binary},{age,int}], [{name,<<"Ulf">>},{age,29}]) ->
[<<0>>,<<1>>,[<<251>>,
[[<<253>>,[[<<255>>,<<110,97,109,101>>],[<<249>>,<<85,108,102>>]]],
[<<253>>,[[<<255>>,<<97,103,101>>],[<<248>>,<<29>>]]]]]]
```
Note that the encoding explicitly lays out a `[{Key, Value}]` structure, all
dynamically typed. This means it can be dynamically decoded without templates.
```erlang
gmser_dyn:decode(EL).
[{name,<<"Ulf">>},{age,29}]
```
In order to specify something like Erlang's `[integer()]` type, we can use
the following:
```erlang
gmser_dyn:encode_typed(#{list => int}, [1,2,3,4]) ->
[<<0>>,<<1>>,[<<251>>,
[[<<248>>,<<1>>],[<<248>>,<<2>>],[<<248>>,<<3>>],[<<248>>,<<4>>]]]]
```
#### `alt`
The `#{alt => [Type]}` construct also enforces dynamic encoding, and will try The `#{alt => [Type]}` construct also enforces dynamic encoding, and will try
to encode as each type in the list, in the specified order, until one matches. to encode as each type in the list, in the specified order, until one matches.
@ -150,6 +206,55 @@ gmser_dyn:encode_typed(anyint,-5) -> [<<0>>,<<1>>,[<<246>>,[<<247>>,<<5>>]]]
gmser_dyn:encode_typed(anyint,5) -> [<<0>>,<<1>>,[<<246>>,[<<248>>,<<5>>]]] gmser_dyn:encode_typed(anyint,5) -> [<<0>>,<<1>>,[<<246>>,[<<248>>,<<5>>]]]
``` ```
#### `switch`
The `switch` type allows for encoding a 'tagged' object, where the tag determines
the type.
```erlang
E1 = gmser_dyn:encode_typed(#{switch => #{name => binary, age => int}}, #{age => 29}) ->
[<<0>>,<<1>>,[<<252>>,[[[<<255>>,<<97,103,101>>],[<<248>>,<<29>>]]]]]
gmser_dyn:decode_typed(#{switch => #{name => binary, age => int}}, E1) ->
#{age => 29}
E2 = gmser_dyn:encode_typed(#{switch => #{name => binary, age => int}}, #{name => <<"Ulf">>}) ->
[<<0>>,<<1>>,[<<252>>,[[[<<255>>,<<110,97,109,101>>],[<<249>>,<<85,108,102>>]]]]]
gmser_dyn:decode_typed(#{switch => #{name => binary, age => int}}, E1) ->
#{name => <<"Ulf">>}
```
A practical use of `switch` would be in a protocol schema:
```erlang
t_msg(_) ->
#{switch => #{ call => t_call
, reply => t_reply
, notification => t_notification }}.
t_call(_) ->
#{items => [ {id, anyint}
, {req, t_req} ]}.
t_reply(_) ->
#{alt => [#{items => [ {id, anyint}
, {result, t_result} ]},
#{items => [ {id, anyint}
, {code, anyint}
, {message, binary} ]}
]}.
```
In this scenario, messages are 'taggged' as 1-element maps, e.g.:
```erlang
async_request(Msg) ->
Id = erlang:unique_integer(),
gmmp_cp:to_server(
whereis(gmmp_core_connector),
#{call => #{ id => Id
, req => Msg }}),
Id.
```
### Notes ### Notes
Note that `anyint` is a standard type. The static serializer supports only Note that `anyint` is a standard type. The static serializer supports only

View File

@ -7,4 +7,11 @@
{enacl, {enacl,
{git, {git,
"https://git.qpq.swiss/QPQ-AG/enacl.git", "https://git.qpq.swiss/QPQ-AG/enacl.git",
{ref, "4eb7ec70084ba7c87b1af8797c4c4e90c84f95a2"}}}]}. {ref, "4eb7ec70084ba7c87b1af8797c4c4e90c84f95a2"}}},
{eblake2, "1.0.0"}
]}.
{dialyzer,
[ {plt_apps, all_deps},
{base_plt_apps, [erts, kernel, stdlib, enacl, base58, eblake2]}
]}.

View File

@ -1,8 +1,16 @@
{"1.2.0",
[{<<"base58">>, [{<<"base58">>,
{git,"https://git.qpq.swiss/QPQ-AG/erl-base58.git", {git,"https://git.qpq.swiss/QPQ-AG/erl-base58.git",
{ref,"e6aa62eeae3d4388311401f06e4b939bf4e94b9c"}}, {ref,"e6aa62eeae3d4388311401f06e4b939bf4e94b9c"}},
0}, 0},
{<<"eblake2">>,{pkg,<<"eblake2">>,<<"1.0.0">>},0},
{<<"enacl">>, {<<"enacl">>,
{git,"https://git.qpq.swiss/QPQ-AG/enacl.git", {git,"https://git.qpq.swiss/QPQ-AG/enacl.git",
{ref,"4eb7ec70084ba7c87b1af8797c4c4e90c84f95a2"}}, {ref,"4eb7ec70084ba7c87b1af8797c4c4e90c84f95a2"}},
0}]. 0}]}.
[
{pkg_hash,[
{<<"eblake2">>, <<"EC8AD20E438AAB3F2E8D5D118C366A0754219195F8A0F536587440F8F9BCF2EF">>}]},
{pkg_hash_ext,[
{<<"eblake2">>, <<"3C4D300A91845B25D501929A26AC2E6F7157480846FAB2347A4C11AE52E08A99">>}]}
].

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
-module(gmser_dyn_types). -module(gmser_dyn_types).
-export([ add_type/4 -export([ add_type/3 %% (Tag, Code, Template) -> Types1
, add_type/4 %% (Tag, Code, Template, Types) -> Types1
, from_list/2 , from_list/2
, expand/1 ]). , expand/1 ]).
-export([ next_code/1 ]). -export([ next_code/1 ]).
@ -8,6 +9,14 @@
next_code(#{codes := Codes}) -> next_code(#{codes := Codes}) ->
lists:max(maps:keys(Codes)) + 1. lists:max(maps:keys(Codes)) + 1.
-spec add_type(Tag, Code, Template) -> Types
when Tag :: gmser_dyn:tag()
, Code :: gmser_dyn:code()
, Template :: gmser_dyn:template()
, Types :: gmser_dyn:types().
add_type(Tag, Code, Template) ->
add_type(Tag, Code, Template, gmser_dyn:registered_types()).
add_type(Tag, Code, Template, Types) -> add_type(Tag, Code, Template, Types) ->
elem_to_type({Tag, Code, Template}, Types). elem_to_type({Tag, Code, Template}, Types).

View File

@ -17,6 +17,9 @@
, is_id/1 , is_id/1
]). ]).
-export([ t_id/1
]).
%% For aec_serialization %% For aec_serialization
-export([ encode/1 -export([ encode/1
, decode/1 , decode/1
@ -26,11 +29,18 @@
, val , val
}). }).
-type tag() :: 'account' | 'name' -type tag() :: 'account'
| 'commitment' | 'contract' | 'channel' | 'associate_chain'
| 'associate_chain' | 'entry' . | 'channel'
| 'commitment'
| 'contract'
| 'contract_source'
| 'name'
| 'native_token'
| 'entry'.
-type val() :: <<_:256>>. -type val() :: <<_:256>>.
-opaque(id() :: #id{}). -type id() :: #id{}.
-export_type([ id/0 -export_type([ id/0
, tag/0 , tag/0
@ -94,6 +104,9 @@ decode(<<Tag:?TAG_SIZE/unit:8, Val:?PUB_SIZE/binary>>) ->
#id{ tag = decode_tag(Tag) #id{ tag = decode_tag(Tag)
, val = Val}. , val = Val}.
-spec t_id(any()) -> id().
t_id(#id{} = Id) -> Id.
%%%=================================================================== %%%===================================================================
%%% Internal functions %%% Internal functions
%%%=================================================================== %%%===================================================================