3 Commits

Author SHA1 Message Date
Ulf Wiger ea50e9e61a Improve normalization, add anchor support 2026-05-16 16:13:55 +02:00
uwiger 73944804c1 Merge pull request 'Clarify validation example in README' (#6) from uw-clarify-readme into master
Reviewed-on: #6
2026-05-14 19:10:20 +09:00
Ulf Wiger ffa189b885 Clarify validation example in README 2026-05-14 11:32:17 +02:00
3 changed files with 150 additions and 25 deletions
+21 -5
View File
@@ -115,13 +115,29 @@ given string value.
In the test schema, we can see the following definition:
```json
"Pubkey": {
"type": "string",
"x-serialization": {
"tags": ["ak", "ct"]
}
"properties": {
"from": {
"allOf": [
{ "$ref": "#/components/schemas/Pubkey" },
{ "x-serialization": {
"tags": ["ak"]
}}
]
}
}
...
"Pubkey": {
"type": "string",
"x-serialization": {
"tags": ["ak", "ct"]
}
```
Whenever the validator encounters an `x-...` property mapped to a validator fun,
this fun is called with the value and the schema part of the property. The return
value of the fun is ignored, and any normal return is treated as a validation success.
The example illustrates a common pattern in OpenAPI specs, where entity references are
used extensively. The `Pubkey` data type can have a more general `x-serialization`
definition, where multiple key types are accepted, whereas a specialized use of the
type can narrow the scope by accepting only a subset of the possible types.
+116 -18
View File
@@ -97,14 +97,57 @@ use_schema(Schema, RootSchema) ->
normalize() ->
normalize(get_schema()).
normalize(S) when is_map(S) ->
#{bin_key(K) => normalize(V) || K := V <- S};
normalize(S) when is_list(S) ->
[normalize(Sx) || Sx <- S];
normalize(S) ->
normalize(Schema) ->
Schema1 = normalize_map_keys(Schema),
normalize_values(Schema1).
normalize_map_keys(S) when is_map(S) ->
#{bin_key(K) => normalize_map_keys(V) || K := V <- S};
normalize_map_keys(L) when is_list(L) ->
[normalize_map_keys(S) || S <- L];
normalize_map_keys(S) ->
S.
normalize_values(S) when is_map(S) ->
#{K => normalize_value(K, V) || K := V <- S};
normalize_values(L) when is_list(L) ->
[normalize_values(S) || S <- L];
normalize_values(S) ->
S.
normalize_value(<<"type">>, [C|_] = T) when is_integer(C) ->
bin_key(T);
normalize_value(K, L) when is_list(L) ->
%% In some cases, the spec tells us what to do
if K == <<"allOf">>; %% 10.2.1.1
K == <<"anyOf">>; %% 10.2.1.2
K == <<"oneOf">>; %% 10.2.1.3
K == <<"prefixItems">> -> %% 10.3.1.1
%% These MUST refer to arrays
[normalize_values(S) || S <- L];
K == <<"contains">> ->
%% 10.3.1.3 Value MUST be a valid schema
normalize_values(L);
true ->
try unicode:characters_to_binary(L)
catch
error:_ ->
[normalize_values(S) || S <- L]
end
end;
normalize_value(_, V) when is_atom(V) ->
atom_to_binary(V, utf8);
normalize_value(_, V) when is_list(V) ->
try unicode:characters_to_binary(V)
catch
error:_ ->
[normalize_values(S) || S <- V]
end;
normalize_value(_, V) ->
V.
bin_key(A) when is_atom(A) -> atom_to_binary(A, utf8);
bin_key(L) when is_list(L) -> unicode:characters_to_binary(L);
bin_key(B) when is_binary(B) -> B.
clear() ->
@@ -159,10 +202,10 @@ any_schema_prop(P, S0, [S|Ss]) ->
any_schema_prop(P, S, []) ->
schema_prop_find(P, S).
schema_prop_find(P, #st{s = S, r = RS}) when is_map(S) ->
schema_prop_find(P, #st{s = S} = St) when is_map(S) ->
case maps:find(P, S) of
{ok, #{<<"$ref">> := Sub} = M} when map_size(M) == 1 ->
D = expand_ref(Sub, RS),
D = expand_ref(Sub, St),
{ok, D};
Other -> Other
end;
@@ -236,8 +279,8 @@ get_type(#st{} = St0, Value) ->
get_type(#st{} = St, Ss, Value) ->
case any_schema_prop(<<"type">>, St, Ss) of
{ok, TBin} ->
select_type(TBin, Value, St);
{ok, Type} when is_binary(Type); is_list(Type) ->
select_type(Type, Value, St);
error ->
try infer_type(Value)
catch
@@ -357,7 +400,7 @@ convert_enums(V, St0) when is_binary(V) ->
{Ss, St1} = schemas_from_dynamic_eval(V, St),
case any_schema_prop(<<"enum">>, St1, Ss) of
{ok, _} ->
binary_to_atom(V, utf8);
binary_to_atom(V, unicode);
_ ->
V
end;
@@ -775,8 +818,8 @@ any_pattern_({Pat, Schema, I}, P) ->
maybe_expand_ref(#st{s = S} = St) ->
case S of
#{<<"$ref">> := Ref} = R when map_size(R) == 1 ->
St#st{s = expand_ref(Ref, St#st.r)};
#{<<"$ref">> := Ref} ->
St#st{s = expand_ref(Ref, St)};
_ ->
St
end.
@@ -934,7 +977,7 @@ expand_schema(S) ->
%% S#{<<"definitions">> := expand_schema(D, S)}.
expand_schema(#{<<"$ref">> := Path} = V, S0) when map_size(V) == 1 ->
expand_schema(expand_ref(Path, S0), S0);
expand_schema(expand_ref(Path, use_schema(S0)), S0);
expand_schema(S, S0) when is_map(S) ->
%% https://json-schema.org/understanding-json-schema/structuring#dollarref
%% When $id is used in a subschema, it indicates an embedded schema.
@@ -959,7 +1002,7 @@ expand_schema(S, _) ->
S.
expand_schema_(K, #{<<"$ref">> := Path} = V, Acc, S0) when map_size(V) == 1 ->
D = expand_ref(Path, S0),
D = expand_ref(Path, use_schema(S0)),
Acc#{K => D};
expand_schema_(K, V, Acc, S0) ->
Acc#{K => expand_schema(V, S0)}.
@@ -967,13 +1010,13 @@ expand_schema_(K, V, Acc, S0) ->
expand_ref(R, _, #{follow_refs := false}) ->
R;
expand_ref(R, S, _) ->
expand_ref(R, S).
expand_ref(R, use_schema(S)).
expand_ref(<<"#">>, S) ->
expand_ref(<<"#">>, #st{r = R}) ->
%% The $ref keyword may be used to create recursive schemas that refer to themselves.
%% This done by using `{"$ref" : "#"}`
S;
expand_ref(<<"#/", Path/binary>>, S) ->
R;
expand_ref(<<"#/", Path/binary>>, #st{r = S}) ->
Key = filename:split(Path),
case schema(Key, S, #{follow_refs => false}) of
{ok, #{<<"$ref">> := _}} ->
@@ -993,8 +1036,63 @@ expand_ref(<<"#/", Path/binary>>, S) ->
Def;
undefined ->
error(unknown_ref, [Path])
end;
expand_ref(<<"#", Anchor/binary>>, #st{r = S}) ->
case find_anchor(Anchor, S) of
{ok, Ss} ->
Ss;
error ->
error({unknown_anchor, Anchor})
end.
%% get_schema_by_path([T|P], #{<<"type">> := Ts} = S) when is_atom(T) ->
%% case atom_to_binary(T, utf8) of
%% Ts ->
%% get_schema_by_path(P, S);
%% Prop when is_map_key(Prop, S) ->
%% get_schema_by_path(P, maps:get(Prop, S));
%% _ ->
%% error(invalid_schema_path)
%% end;
%% get_schema_by_path([Property|P], #{<<"properties">> := Ps} = S) when is_binary(Property) ->
%% get_schema_by_path(P, maps:get(Property, Ps));
%% get_schema_by_path([], S) ->
%% S.
%% == Anchor search (unoptimized - must search whole root schema)
find_anchor(Anchor, S) when map_get(<<"$anchor">>, S) =:= Anchor ->
{ok, S};
find_anchor(Anchor, S) when is_map(S) ->
Iter = maps:iterator(S),
map_search_anchor(maps:next(Iter), Anchor);
find_anchor(Anchor, S) when is_list(S) ->
list_search_anchor(S, Anchor);
find_anchor(_, _) ->
error.
map_search_anchor({_K, V, I}, Anchor) ->
case find_anchor(Anchor, V) of
{ok, _} = Ok ->
Ok;
error ->
map_search_anchor(maps:next(I), Anchor)
end;
map_search_anchor(none, _) ->
error.
list_search_anchor([H | T], Anchor) ->
case find_anchor(Anchor, H) of
{ok, _} = Ok ->
Ok;
error ->
list_search_anchor(T, Anchor)
end;
list_search_anchor([], _) ->
error.
%% ==
schema(Path) ->
schema(Path, get_schema()).
+13 -2
View File
@@ -41,6 +41,7 @@ schema_spec_examples_test_() ->
?t(t_ref_loop())
, ?t(t_recursive_def())
, ?t(t_nested_refs())
, ?t(t_anchors())
]}.
array() -> #{<<"type">> => <<"array">>}.
@@ -293,7 +294,8 @@ fails(V, S, Opts, Reason) when is_atom(Reason) ->
fails(V, S, Opts, #{e => Reason});
fails(V, S, Opts, Expect) ->
try validate(V, S, Opts) of
_ ->
Other ->
?debugFmt("Expected failure, Other = ~p", [Other]),
error({expected_exception, #{v => V,
s => S,
e => Expect}})
@@ -303,6 +305,8 @@ fails(V, S, Opts, Expect) ->
end.
%% ?assertError({Reason, [], V}, valid(V, S)).
match_expected('_', _) ->
ok;
match_expected(E, R) ->
case maps:fold(
fun(K, V, Acc) ->
@@ -350,7 +354,6 @@ all_fail(Vs, S, Reason) ->
read(F) ->
FullF = filename:join(
filename:dirname(code:which(?MODULE)), F),
?debugFmt("FullF = ~s~n", [FullF]),
{ok, Bin} = file:read_file(FullF),
dec(Bin).
@@ -402,3 +405,11 @@ t_nested_refs() ->
validate(Vs, S, Opts),
fails(Vf, S, Opts, #{e => failing_schemas}),
ok.
t_anchors() ->
S = read("data/anchors.json"),
validate(#{<<"person">> => #{ <<"name">> => <<"Ulf">>
, <<"age">> => 29 }}, S, #{}),
fails(#{<<"person">> => #{ <<"name">> => <<"Ulf">>
, <<"age">> => -17 }}, S, #{}, not_in_range),
ok.