Improve normalization, add anchor support

This commit is contained in:
Ulf Wiger
2026-05-16 16:13:42 +02:00
parent 73944804c1
commit ea50e9e61a
2 changed files with 129 additions and 20 deletions
+116 -18
View File
@@ -97,14 +97,57 @@ use_schema(Schema, RootSchema) ->
normalize() ->
normalize(get_schema()).
normalize(S) when is_map(S) ->
#{bin_key(K) => normalize(V) || K := V <- S};
normalize(S) when is_list(S) ->
[normalize(Sx) || Sx <- S];
normalize(S) ->
normalize(Schema) ->
Schema1 = normalize_map_keys(Schema),
normalize_values(Schema1).
normalize_map_keys(S) when is_map(S) ->
#{bin_key(K) => normalize_map_keys(V) || K := V <- S};
normalize_map_keys(L) when is_list(L) ->
[normalize_map_keys(S) || S <- L];
normalize_map_keys(S) ->
S.
normalize_values(S) when is_map(S) ->
#{K => normalize_value(K, V) || K := V <- S};
normalize_values(L) when is_list(L) ->
[normalize_values(S) || S <- L];
normalize_values(S) ->
S.
normalize_value(<<"type">>, [C|_] = T) when is_integer(C) ->
bin_key(T);
normalize_value(K, L) when is_list(L) ->
%% In some cases, the spec tells us what to do
if K == <<"allOf">>; %% 10.2.1.1
K == <<"anyOf">>; %% 10.2.1.2
K == <<"oneOf">>; %% 10.2.1.3
K == <<"prefixItems">> -> %% 10.3.1.1
%% These MUST refer to arrays
[normalize_values(S) || S <- L];
K == <<"contains">> ->
%% 10.3.1.3 Value MUST be a valid schema
normalize_values(L);
true ->
try unicode:characters_to_binary(L)
catch
error:_ ->
[normalize_values(S) || S <- L]
end
end;
normalize_value(_, V) when is_atom(V) ->
atom_to_binary(V, utf8);
normalize_value(_, V) when is_list(V) ->
try unicode:characters_to_binary(V)
catch
error:_ ->
[normalize_values(S) || S <- V]
end;
normalize_value(_, V) ->
V.
bin_key(A) when is_atom(A) -> atom_to_binary(A, utf8);
bin_key(L) when is_list(L) -> unicode:characters_to_binary(L);
bin_key(B) when is_binary(B) -> B.
clear() ->
@@ -159,10 +202,10 @@ any_schema_prop(P, S0, [S|Ss]) ->
any_schema_prop(P, S, []) ->
schema_prop_find(P, S).
schema_prop_find(P, #st{s = S, r = RS}) when is_map(S) ->
schema_prop_find(P, #st{s = S} = St) when is_map(S) ->
case maps:find(P, S) of
{ok, #{<<"$ref">> := Sub} = M} when map_size(M) == 1 ->
D = expand_ref(Sub, RS),
D = expand_ref(Sub, St),
{ok, D};
Other -> Other
end;
@@ -236,8 +279,8 @@ get_type(#st{} = St0, Value) ->
get_type(#st{} = St, Ss, Value) ->
case any_schema_prop(<<"type">>, St, Ss) of
{ok, TBin} ->
select_type(TBin, Value, St);
{ok, Type} when is_binary(Type); is_list(Type) ->
select_type(Type, Value, St);
error ->
try infer_type(Value)
catch
@@ -357,7 +400,7 @@ convert_enums(V, St0) when is_binary(V) ->
{Ss, St1} = schemas_from_dynamic_eval(V, St),
case any_schema_prop(<<"enum">>, St1, Ss) of
{ok, _} ->
binary_to_atom(V, utf8);
binary_to_atom(V, unicode);
_ ->
V
end;
@@ -775,8 +818,8 @@ any_pattern_({Pat, Schema, I}, P) ->
maybe_expand_ref(#st{s = S} = St) ->
case S of
#{<<"$ref">> := Ref} = R when map_size(R) == 1 ->
St#st{s = expand_ref(Ref, St#st.r)};
#{<<"$ref">> := Ref} ->
St#st{s = expand_ref(Ref, St)};
_ ->
St
end.
@@ -934,7 +977,7 @@ expand_schema(S) ->
%% S#{<<"definitions">> := expand_schema(D, S)}.
expand_schema(#{<<"$ref">> := Path} = V, S0) when map_size(V) == 1 ->
expand_schema(expand_ref(Path, S0), S0);
expand_schema(expand_ref(Path, use_schema(S0)), S0);
expand_schema(S, S0) when is_map(S) ->
%% https://json-schema.org/understanding-json-schema/structuring#dollarref
%% When $id is used in a subschema, it indicates an embedded schema.
@@ -959,7 +1002,7 @@ expand_schema(S, _) ->
S.
expand_schema_(K, #{<<"$ref">> := Path} = V, Acc, S0) when map_size(V) == 1 ->
D = expand_ref(Path, S0),
D = expand_ref(Path, use_schema(S0)),
Acc#{K => D};
expand_schema_(K, V, Acc, S0) ->
Acc#{K => expand_schema(V, S0)}.
@@ -967,13 +1010,13 @@ expand_schema_(K, V, Acc, S0) ->
expand_ref(R, _, #{follow_refs := false}) ->
R;
expand_ref(R, S, _) ->
expand_ref(R, S).
expand_ref(R, use_schema(S)).
expand_ref(<<"#">>, S) ->
expand_ref(<<"#">>, #st{r = R}) ->
%% The $ref keyword may be used to create recursive schemas that refer to themselves.
%% This done by using `{"$ref" : "#"}`
S;
expand_ref(<<"#/", Path/binary>>, S) ->
R;
expand_ref(<<"#/", Path/binary>>, #st{r = S}) ->
Key = filename:split(Path),
case schema(Key, S, #{follow_refs => false}) of
{ok, #{<<"$ref">> := _}} ->
@@ -993,8 +1036,63 @@ expand_ref(<<"#/", Path/binary>>, S) ->
Def;
undefined ->
error(unknown_ref, [Path])
end;
expand_ref(<<"#", Anchor/binary>>, #st{r = S}) ->
case find_anchor(Anchor, S) of
{ok, Ss} ->
Ss;
error ->
error({unknown_anchor, Anchor})
end.
%% get_schema_by_path([T|P], #{<<"type">> := Ts} = S) when is_atom(T) ->
%% case atom_to_binary(T, utf8) of
%% Ts ->
%% get_schema_by_path(P, S);
%% Prop when is_map_key(Prop, S) ->
%% get_schema_by_path(P, maps:get(Prop, S));
%% _ ->
%% error(invalid_schema_path)
%% end;
%% get_schema_by_path([Property|P], #{<<"properties">> := Ps} = S) when is_binary(Property) ->
%% get_schema_by_path(P, maps:get(Property, Ps));
%% get_schema_by_path([], S) ->
%% S.
%% == Anchor search (unoptimized - must search whole root schema)
find_anchor(Anchor, S) when map_get(<<"$anchor">>, S) =:= Anchor ->
{ok, S};
find_anchor(Anchor, S) when is_map(S) ->
Iter = maps:iterator(S),
map_search_anchor(maps:next(Iter), Anchor);
find_anchor(Anchor, S) when is_list(S) ->
list_search_anchor(S, Anchor);
find_anchor(_, _) ->
error.
map_search_anchor({_K, V, I}, Anchor) ->
case find_anchor(Anchor, V) of
{ok, _} = Ok ->
Ok;
error ->
map_search_anchor(maps:next(I), Anchor)
end;
map_search_anchor(none, _) ->
error.
list_search_anchor([H | T], Anchor) ->
case find_anchor(Anchor, H) of
{ok, _} = Ok ->
Ok;
error ->
list_search_anchor(T, Anchor)
end;
list_search_anchor([], _) ->
error.
%% ==
schema(Path) ->
schema(Path, get_schema()).
+13 -2
View File
@@ -41,6 +41,7 @@ schema_spec_examples_test_() ->
?t(t_ref_loop())
, ?t(t_recursive_def())
, ?t(t_nested_refs())
, ?t(t_anchors())
]}.
array() -> #{<<"type">> => <<"array">>}.
@@ -293,7 +294,8 @@ fails(V, S, Opts, Reason) when is_atom(Reason) ->
fails(V, S, Opts, #{e => Reason});
fails(V, S, Opts, Expect) ->
try validate(V, S, Opts) of
_ ->
Other ->
?debugFmt("Expected failure, Other = ~p", [Other]),
error({expected_exception, #{v => V,
s => S,
e => Expect}})
@@ -303,6 +305,8 @@ fails(V, S, Opts, Expect) ->
end.
%% ?assertError({Reason, [], V}, valid(V, S)).
match_expected('_', _) ->
ok;
match_expected(E, R) ->
case maps:fold(
fun(K, V, Acc) ->
@@ -350,7 +354,6 @@ all_fail(Vs, S, Reason) ->
read(F) ->
FullF = filename:join(
filename:dirname(code:which(?MODULE)), F),
?debugFmt("FullF = ~s~n", [FullF]),
{ok, Bin} = file:read_file(FullF),
dec(Bin).
@@ -402,3 +405,11 @@ t_nested_refs() ->
validate(Vs, S, Opts),
fails(Vf, S, Opts, #{e => failing_schemas}),
ok.
t_anchors() ->
S = read("data/anchors.json"),
validate(#{<<"person">> => #{ <<"name">> => <<"Ulf">>
, <<"age">> => 29 }}, S, #{}),
fails(#{<<"person">> => #{ <<"name">> => <<"Ulf">>
, <<"age">> => -17 }}, S, #{}, not_in_range),
ok.