Improve normalization, add anchor support

Merge pull request 'Clarify validation example in README' (#6 ) from uw-clarify-readme into master
Reviewed-on: #6
2026-05-16 16:13:55 +02:00 · 2026-05-14 19:10:20 +09:00 · 2026-05-14 11:32:17 +02:00
3 changed files with 150 additions and 25 deletions
@@ -115,13 +115,29 @@ given string value.

 In the test schema, we can see the following definition:
 ```json
-"Pubkey": {
-    "type": "string",
-    "x-serialization": {
-        "tags": ["ak", "ct"]
-    }
+        "properties": {
+            "from": {
+                "allOf": [
+                    { "$ref": "#/components/schemas/Pubkey" },
+                    { "x-serialization": {
+                        "tags": ["ak"]
+                    }}
+                ]
+            }
+        }
+...
+    "Pubkey": {
+        "type": "string",
+        "x-serialization": {
+            "tags": ["ak", "ct"]
+        }
 ```

 Whenever the validator encounters an `x-...` property mapped to a validator fun,
 this fun is called with the value and the schema part of the property. The return
 value of the fun is ignored, and any normal return is treated as a validation success.
+
+The example illustrates a common pattern in OpenAPI specs, where entity references are
+used extensively. The `Pubkey` data type can have a more general `x-serialization`
+definition, where multiple key types are accepted, whereas a specialized use of the
+type can narrow the scope by accepting only a subset of the possible types.
@@ -97,14 +97,57 @@ use_schema(Schema, RootSchema) ->
 normalize() ->
    normalize(get_schema()).

-normalize(S) when is_map(S) ->
-    #{bin_key(K) => normalize(V) || K := V <- S};
-normalize(S) when is_list(S) ->
-    [normalize(Sx) || Sx <- S];
-normalize(S) ->
+normalize(Schema) ->
+    Schema1 = normalize_map_keys(Schema),
+    normalize_values(Schema1).
+
+normalize_map_keys(S) when is_map(S) ->
+    #{bin_key(K) => normalize_map_keys(V) || K := V <- S};
+normalize_map_keys(L) when is_list(L) ->
+    [normalize_map_keys(S) || S <- L];
+normalize_map_keys(S) ->
    S.

+normalize_values(S) when is_map(S) ->
+    #{K => normalize_value(K, V) || K := V <- S};
+normalize_values(L) when is_list(L) ->
+    [normalize_values(S) || S <- L];
+normalize_values(S) ->
+    S.
+
+normalize_value(<<"type">>, [C|_] = T) when is_integer(C) ->
+    bin_key(T);
+normalize_value(K, L) when is_list(L) ->
+    %% In some cases, the spec tells us what to do
+    if K == <<"allOf">>;                         %% 10.2.1.1
+       K == <<"anyOf">>;                         %% 10.2.1.2
+       K == <<"oneOf">>;                         %% 10.2.1.3
+       K == <<"prefixItems">> ->                 %% 10.3.1.1
+            %% These MUST refer to arrays
+            [normalize_values(S) || S <- L];
+       K == <<"contains">> ->
+            %% 10.3.1.3 Value MUST be a valid schema
+            normalize_values(L);
+       true ->
+            try unicode:characters_to_binary(L)
+            catch
+                error:_ ->
+                    [normalize_values(S) || S <- L]
+            end
+    end;
+normalize_value(_, V) when is_atom(V) ->
+    atom_to_binary(V, utf8);
+normalize_value(_, V) when is_list(V) ->
+    try unicode:characters_to_binary(V)
+    catch
+        error:_ ->
+            [normalize_values(S) || S <- V]
+    end;
+normalize_value(_, V) ->
+    V.
+
 bin_key(A) when is_atom(A)   -> atom_to_binary(A, utf8);
+bin_key(L) when is_list(L)   -> unicode:characters_to_binary(L);
 bin_key(B) when is_binary(B) -> B.

 clear() ->
@@ -159,10 +202,10 @@ any_schema_prop(P, S0, [S|Ss]) ->
 any_schema_prop(P, S, []) ->
    schema_prop_find(P, S).

-schema_prop_find(P, #st{s = S, r = RS}) when is_map(S) ->
+schema_prop_find(P, #st{s = S} = St) when is_map(S) ->
    case maps:find(P, S) of
        {ok, #{<<"$ref">> := Sub} = M} when map_size(M) == 1 ->
-            D = expand_ref(Sub, RS),
+            D = expand_ref(Sub, St),
            {ok, D};
        Other -> Other
    end;
@@ -236,8 +279,8 @@ get_type(#st{} = St0, Value) ->

 get_type(#st{} = St, Ss, Value) ->
    case any_schema_prop(<<"type">>, St, Ss) of
-        {ok, TBin} ->
-            select_type(TBin, Value, St);
+        {ok, Type} when is_binary(Type); is_list(Type) ->
+            select_type(Type, Value, St);
        error ->
            try infer_type(Value)
            catch
@@ -357,7 +400,7 @@ convert_enums(V, St0) when is_binary(V) ->
            {Ss, St1} = schemas_from_dynamic_eval(V, St),
            case any_schema_prop(<<"enum">>, St1, Ss) of
                {ok, _} ->
-                    binary_to_atom(V, utf8);
+                    binary_to_atom(V, unicode);
                _ ->
                    V
            end;
@@ -775,8 +818,8 @@ any_pattern_({Pat, Schema, I}, P) ->

 maybe_expand_ref(#st{s = S} = St) ->
    case S of
-        #{<<"$ref">> := Ref} = R when map_size(R) == 1 ->
-            St#st{s = expand_ref(Ref, St#st.r)};
+        #{<<"$ref">> := Ref} ->
+            St#st{s = expand_ref(Ref, St)};
        _ ->
            St
    end.
@@ -934,7 +977,7 @@ expand_schema(S) ->
 %%     S#{<<"definitions">> := expand_schema(D, S)}.

 expand_schema(#{<<"$ref">> := Path} = V, S0) when map_size(V) == 1 ->
-    expand_schema(expand_ref(Path, S0), S0);
+    expand_schema(expand_ref(Path, use_schema(S0)), S0);
 expand_schema(S, S0) when is_map(S) ->
    %% https://json-schema.org/understanding-json-schema/structuring#dollarref
    %% When $id is used in a subschema, it indicates an embedded schema.
@@ -959,7 +1002,7 @@ expand_schema(S, _) ->
    S.

 expand_schema_(K, #{<<"$ref">> := Path} = V, Acc, S0) when map_size(V) == 1 ->
-    D = expand_ref(Path, S0),
+    D = expand_ref(Path, use_schema(S0)),
    Acc#{K => D};
 expand_schema_(K, V, Acc, S0) ->
    Acc#{K => expand_schema(V, S0)}.
@@ -967,13 +1010,13 @@ expand_schema_(K, V, Acc, S0) ->
 expand_ref(R, _, #{follow_refs := false}) ->
    R;
 expand_ref(R, S, _) ->
-    expand_ref(R, S).
+    expand_ref(R, use_schema(S)).

-expand_ref(<<"#">>, S) ->
+expand_ref(<<"#">>, #st{r = R}) ->
    %% The $ref keyword may be used to create recursive schemas that refer to themselves.
    %% This done by using `{"$ref" : "#"}`
-    S;
-expand_ref(<<"#/", Path/binary>>, S) ->
+    R;
+expand_ref(<<"#/", Path/binary>>, #st{r = S}) ->
    Key = filename:split(Path),
    case schema(Key, S, #{follow_refs => false}) of
        {ok, #{<<"$ref">> := _}} ->
@@ -993,8 +1036,63 @@ expand_ref(<<"#/", Path/binary>>, S) ->
            Def;
        undefined ->
            error(unknown_ref, [Path])
+    end;
+expand_ref(<<"#", Anchor/binary>>, #st{r = S}) ->
+    case find_anchor(Anchor, S) of
+        {ok, Ss} ->
+            Ss;
+        error ->
+            error({unknown_anchor, Anchor})
    end.

+%% get_schema_by_path([T|P], #{<<"type">> := Ts} = S) when is_atom(T) ->
+%%     case atom_to_binary(T, utf8) of
+%%         Ts ->
+%%             get_schema_by_path(P, S);
+%%         Prop when is_map_key(Prop, S) ->
+%%             get_schema_by_path(P, maps:get(Prop, S));
+%%         _ ->
+%%             error(invalid_schema_path)
+%%     end;
+%% get_schema_by_path([Property|P], #{<<"properties">> := Ps} = S) when is_binary(Property) ->
+%%     get_schema_by_path(P, maps:get(Property, Ps));
+%% get_schema_by_path([], S) ->
+%%     S.
+
+%% == Anchor search (unoptimized - must search whole root schema)
+
+find_anchor(Anchor, S) when map_get(<<"$anchor">>, S) =:= Anchor ->
+    {ok, S};
+find_anchor(Anchor, S) when is_map(S) ->
+    Iter = maps:iterator(S),
+    map_search_anchor(maps:next(Iter), Anchor);
+find_anchor(Anchor, S) when is_list(S) ->
+    list_search_anchor(S, Anchor);
+find_anchor(_, _) ->
+    error.
+
+map_search_anchor({_K, V, I}, Anchor) ->
+    case find_anchor(Anchor, V) of
+        {ok, _} = Ok ->
+            Ok;
+        error ->
+            map_search_anchor(maps:next(I), Anchor)
+    end;
+map_search_anchor(none, _) ->
+    error.
+
+list_search_anchor([H | T], Anchor) ->
+    case find_anchor(Anchor, H) of
+        {ok, _} = Ok ->
+            Ok;
+        error ->
+            list_search_anchor(T, Anchor)
+    end;
+list_search_anchor([], _) ->
+    error.
+
+%% ==
+
 schema(Path) ->
    schema(Path, get_schema()).

@@ -41,6 +41,7 @@ schema_spec_examples_test_() ->
       ?t(t_ref_loop())
     , ?t(t_recursive_def())
     , ?t(t_nested_refs())
+     , ?t(t_anchors())
     ]}.

 array() -> #{<<"type">> => <<"array">>}.
@@ -293,7 +294,8 @@ fails(V, S, Opts, Reason) when is_atom(Reason) ->
    fails(V, S, Opts, #{e => Reason});
 fails(V, S, Opts, Expect) ->
    try validate(V, S, Opts) of
-        _ ->
+        Other ->
+            ?debugFmt("Expected failure, Other = ~p", [Other]),
            error({expected_exception, #{v => V,
                                         s => S,
                                         e => Expect}})
@@ -303,6 +305,8 @@ fails(V, S, Opts, Expect) ->
    end.
    %% ?assertError({Reason, [], V}, valid(V, S)).

+match_expected('_', _) ->
+    ok;
 match_expected(E, R) ->
    case maps:fold(
           fun(K, V, Acc) ->
@@ -350,7 +354,6 @@ all_fail(Vs, S, Reason) ->
 read(F) ->
    FullF = filename:join(
              filename:dirname(code:which(?MODULE)), F),
-    ?debugFmt("FullF = ~s~n", [FullF]),
    {ok, Bin} = file:read_file(FullF),
    dec(Bin).

@@ -402,3 +405,11 @@ t_nested_refs() ->
    validate(Vs, S, Opts),
    fails(Vf, S, Opts, #{e => failing_schemas}),
    ok.
+
+t_anchors() ->
+    S = read("data/anchors.json"),
+    validate(#{<<"person">> => #{ <<"name">> => <<"Ulf">>
+                                , <<"age">> => 29 }}, S, #{}),
+    fails(#{<<"person">> => #{ <<"name">> => <<"Ulf">>
+                             , <<"age">> => -17 }}, S, #{}, not_in_range),
+    ok.
Author	SHA1	Message	Date
Ulf Wiger	ea50e9e61a	Improve normalization, add anchor support	2026-05-16 16:13:55 +02:00
uwiger	73944804c1	Merge pull request 'Clarify validation example in README' (#6 ) from uw-clarify-readme into master Reviewed-on: #6	2026-05-14 19:10:20 +09:00
Ulf Wiger	ffa189b885	Clarify validation example in README	2026-05-14 11:32:17 +02:00