From ea50e9e61af82b47dd8d5bfba09c927fd9eaafa6 Mon Sep 17 00:00:00 2001 From: Ulf Wiger Date: Sat, 16 May 2026 16:13:42 +0200 Subject: [PATCH] Improve normalization, add anchor support --- src/gmconfig_schema_utils.erl | 134 +++++++++++++++++++++++---- test/gmconfig_schema_utils_tests.erl | 15 ++- 2 files changed, 129 insertions(+), 20 deletions(-) diff --git a/src/gmconfig_schema_utils.erl b/src/gmconfig_schema_utils.erl index e64a79a..328d6c0 100644 --- a/src/gmconfig_schema_utils.erl +++ b/src/gmconfig_schema_utils.erl @@ -97,14 +97,57 @@ use_schema(Schema, RootSchema) -> normalize() -> normalize(get_schema()). -normalize(S) when is_map(S) -> - #{bin_key(K) => normalize(V) || K := V <- S}; -normalize(S) when is_list(S) -> - [normalize(Sx) || Sx <- S]; -normalize(S) -> +normalize(Schema) -> + Schema1 = normalize_map_keys(Schema), + normalize_values(Schema1). + +normalize_map_keys(S) when is_map(S) -> + #{bin_key(K) => normalize_map_keys(V) || K := V <- S}; +normalize_map_keys(L) when is_list(L) -> + [normalize_map_keys(S) || S <- L]; +normalize_map_keys(S) -> S. +normalize_values(S) when is_map(S) -> + #{K => normalize_value(K, V) || K := V <- S}; +normalize_values(L) when is_list(L) -> + [normalize_values(S) || S <- L]; +normalize_values(S) -> + S. + +normalize_value(<<"type">>, [C|_] = T) when is_integer(C) -> + bin_key(T); +normalize_value(K, L) when is_list(L) -> + %% In some cases, the spec tells us what to do + if K == <<"allOf">>; %% 10.2.1.1 + K == <<"anyOf">>; %% 10.2.1.2 + K == <<"oneOf">>; %% 10.2.1.3 + K == <<"prefixItems">> -> %% 10.3.1.1 + %% These MUST refer to arrays + [normalize_values(S) || S <- L]; + K == <<"contains">> -> + %% 10.3.1.3 Value MUST be a valid schema + normalize_values(L); + true -> + try unicode:characters_to_binary(L) + catch + error:_ -> + [normalize_values(S) || S <- L] + end + end; +normalize_value(_, V) when is_atom(V) -> + atom_to_binary(V, utf8); +normalize_value(_, V) when is_list(V) -> + try unicode:characters_to_binary(V) + catch + error:_ -> + [normalize_values(S) || S <- V] + end; +normalize_value(_, V) -> + V. + bin_key(A) when is_atom(A) -> atom_to_binary(A, utf8); +bin_key(L) when is_list(L) -> unicode:characters_to_binary(L); bin_key(B) when is_binary(B) -> B. clear() -> @@ -159,10 +202,10 @@ any_schema_prop(P, S0, [S|Ss]) -> any_schema_prop(P, S, []) -> schema_prop_find(P, S). -schema_prop_find(P, #st{s = S, r = RS}) when is_map(S) -> +schema_prop_find(P, #st{s = S} = St) when is_map(S) -> case maps:find(P, S) of {ok, #{<<"$ref">> := Sub} = M} when map_size(M) == 1 -> - D = expand_ref(Sub, RS), + D = expand_ref(Sub, St), {ok, D}; Other -> Other end; @@ -236,8 +279,8 @@ get_type(#st{} = St0, Value) -> get_type(#st{} = St, Ss, Value) -> case any_schema_prop(<<"type">>, St, Ss) of - {ok, TBin} -> - select_type(TBin, Value, St); + {ok, Type} when is_binary(Type); is_list(Type) -> + select_type(Type, Value, St); error -> try infer_type(Value) catch @@ -357,7 +400,7 @@ convert_enums(V, St0) when is_binary(V) -> {Ss, St1} = schemas_from_dynamic_eval(V, St), case any_schema_prop(<<"enum">>, St1, Ss) of {ok, _} -> - binary_to_atom(V, utf8); + binary_to_atom(V, unicode); _ -> V end; @@ -775,8 +818,8 @@ any_pattern_({Pat, Schema, I}, P) -> maybe_expand_ref(#st{s = S} = St) -> case S of - #{<<"$ref">> := Ref} = R when map_size(R) == 1 -> - St#st{s = expand_ref(Ref, St#st.r)}; + #{<<"$ref">> := Ref} -> + St#st{s = expand_ref(Ref, St)}; _ -> St end. @@ -934,7 +977,7 @@ expand_schema(S) -> %% S#{<<"definitions">> := expand_schema(D, S)}. expand_schema(#{<<"$ref">> := Path} = V, S0) when map_size(V) == 1 -> - expand_schema(expand_ref(Path, S0), S0); + expand_schema(expand_ref(Path, use_schema(S0)), S0); expand_schema(S, S0) when is_map(S) -> %% https://json-schema.org/understanding-json-schema/structuring#dollarref %% When $id is used in a subschema, it indicates an embedded schema. @@ -959,7 +1002,7 @@ expand_schema(S, _) -> S. expand_schema_(K, #{<<"$ref">> := Path} = V, Acc, S0) when map_size(V) == 1 -> - D = expand_ref(Path, S0), + D = expand_ref(Path, use_schema(S0)), Acc#{K => D}; expand_schema_(K, V, Acc, S0) -> Acc#{K => expand_schema(V, S0)}. @@ -967,13 +1010,13 @@ expand_schema_(K, V, Acc, S0) -> expand_ref(R, _, #{follow_refs := false}) -> R; expand_ref(R, S, _) -> - expand_ref(R, S). + expand_ref(R, use_schema(S)). -expand_ref(<<"#">>, S) -> +expand_ref(<<"#">>, #st{r = R}) -> %% The $ref keyword may be used to create recursive schemas that refer to themselves. %% This done by using `{"$ref" : "#"}` - S; -expand_ref(<<"#/", Path/binary>>, S) -> + R; +expand_ref(<<"#/", Path/binary>>, #st{r = S}) -> Key = filename:split(Path), case schema(Key, S, #{follow_refs => false}) of {ok, #{<<"$ref">> := _}} -> @@ -993,8 +1036,63 @@ expand_ref(<<"#/", Path/binary>>, S) -> Def; undefined -> error(unknown_ref, [Path]) + end; +expand_ref(<<"#", Anchor/binary>>, #st{r = S}) -> + case find_anchor(Anchor, S) of + {ok, Ss} -> + Ss; + error -> + error({unknown_anchor, Anchor}) end. +%% get_schema_by_path([T|P], #{<<"type">> := Ts} = S) when is_atom(T) -> +%% case atom_to_binary(T, utf8) of +%% Ts -> +%% get_schema_by_path(P, S); +%% Prop when is_map_key(Prop, S) -> +%% get_schema_by_path(P, maps:get(Prop, S)); +%% _ -> +%% error(invalid_schema_path) +%% end; +%% get_schema_by_path([Property|P], #{<<"properties">> := Ps} = S) when is_binary(Property) -> +%% get_schema_by_path(P, maps:get(Property, Ps)); +%% get_schema_by_path([], S) -> +%% S. + +%% == Anchor search (unoptimized - must search whole root schema) + +find_anchor(Anchor, S) when map_get(<<"$anchor">>, S) =:= Anchor -> + {ok, S}; +find_anchor(Anchor, S) when is_map(S) -> + Iter = maps:iterator(S), + map_search_anchor(maps:next(Iter), Anchor); +find_anchor(Anchor, S) when is_list(S) -> + list_search_anchor(S, Anchor); +find_anchor(_, _) -> + error. + +map_search_anchor({_K, V, I}, Anchor) -> + case find_anchor(Anchor, V) of + {ok, _} = Ok -> + Ok; + error -> + map_search_anchor(maps:next(I), Anchor) + end; +map_search_anchor(none, _) -> + error. + +list_search_anchor([H | T], Anchor) -> + case find_anchor(Anchor, H) of + {ok, _} = Ok -> + Ok; + error -> + list_search_anchor(T, Anchor) + end; +list_search_anchor([], _) -> + error. + +%% == + schema(Path) -> schema(Path, get_schema()). diff --git a/test/gmconfig_schema_utils_tests.erl b/test/gmconfig_schema_utils_tests.erl index 3ebba8f..4ab27d0 100644 --- a/test/gmconfig_schema_utils_tests.erl +++ b/test/gmconfig_schema_utils_tests.erl @@ -41,6 +41,7 @@ schema_spec_examples_test_() -> ?t(t_ref_loop()) , ?t(t_recursive_def()) , ?t(t_nested_refs()) + , ?t(t_anchors()) ]}. array() -> #{<<"type">> => <<"array">>}. @@ -293,7 +294,8 @@ fails(V, S, Opts, Reason) when is_atom(Reason) -> fails(V, S, Opts, #{e => Reason}); fails(V, S, Opts, Expect) -> try validate(V, S, Opts) of - _ -> + Other -> + ?debugFmt("Expected failure, Other = ~p", [Other]), error({expected_exception, #{v => V, s => S, e => Expect}}) @@ -303,6 +305,8 @@ fails(V, S, Opts, Expect) -> end. %% ?assertError({Reason, [], V}, valid(V, S)). +match_expected('_', _) -> + ok; match_expected(E, R) -> case maps:fold( fun(K, V, Acc) -> @@ -350,7 +354,6 @@ all_fail(Vs, S, Reason) -> read(F) -> FullF = filename:join( filename:dirname(code:which(?MODULE)), F), - ?debugFmt("FullF = ~s~n", [FullF]), {ok, Bin} = file:read_file(FullF), dec(Bin). @@ -402,3 +405,11 @@ t_nested_refs() -> validate(Vs, S, Opts), fails(Vf, S, Opts, #{e => failing_schemas}), ok. + +t_anchors() -> + S = read("data/anchors.json"), + validate(#{<<"person">> => #{ <<"name">> => <<"Ulf">> + , <<"age">> => 29 }}, S, #{}), + fails(#{<<"person">> => #{ <<"name">> => <<"Ulf">> + , <<"age">> => -17 }}, S, #{}, not_in_range), + ok.