File Coverage

blib/lib/JSON/Schema/Modern/Utilities.pm
Criterion Covered Total %
statement 482 503 95.8
branch 235 276 85.1
condition 154 185 83.2
subroutine 73 77 94.8
pod 21 36 58.3
total 965 1077 89.6


line stmt bran cond sub pod time code
1 45     45   265 use strict;
  45         81  
  45         1606  
2 45     45   232 use warnings;
  45         76  
  45         3356  
3             package JSON::Schema::Modern::Utilities;
4             # vim: set ts=8 sts=2 sw=2 tw=100 et :
5             # ABSTRACT: Internal utilities for JSON::Schema::Modern
6              
7             our $VERSION = '0.638';
8              
9 45     45   694 use 5.020;
  45         134  
10 45     45   174 use strictures 2;
  45         266  
  45         1420  
11 45     45   15932 use stable 0.031 'postderef';
  45         577  
  45         217  
12 45     45   6172 use experimental 'signatures';
  45         85  
  45         131  
13 45     45   1988 no autovivification warn => qw(fetch store exists delete);
  45         70  
  45         297  
14 45     45   3295 use if "$]" >= 5.022, experimental => 're_strict';
  45         73  
  45         801  
15 45     45   2704 use if "$]" < 5.025002, experimental => 'lexical_subs';
  45         73  
  45         1373  
16 45     45   167 no if "$]" >= 5.031009, feature => 'indirect';
  45         63  
  45         2005  
17 45     45   191 no if "$]" >= 5.033001, feature => 'multidimensional';
  45         70  
  45         1845  
18 45     45   195 no if "$]" >= 5.033006, feature => 'bareword_filehandles';
  45         69  
  45         1714  
19 45     45   164 no if "$]" >= 5.041009, feature => 'smartmatch';
  45         66  
  45         1394  
20 45     45   162 no feature 'switch';
  45         64  
  45         1060  
21 45     45   215 use B;
  45         64  
  45         1017  
22 45     45   149 use Carp qw(carp croak);
  45         59  
  45         2535  
23 45     45   256 use builtin::compat qw(blessed created_as_number);
  45         74  
  45         400  
24 45     45   6882 use Scalar::Util 'looks_like_number';
  45         74  
  45         2794  
25 45     45   203 use if "$]" < 5.041010, 'List::Util' => 'any';
  45         64  
  45         1385  
26 45     45   157 use if "$]" >= 5.041010, experimental => 'keyword_any';
  45         82  
  45         524  
27 45     45   2818 use Storable 'dclone';
  45         81  
  45         1996  
28 45     45   210 use Feature::Compat::Try;
  45         66  
  45         413  
29 45     45   1957 use Mojo::JSON ();
  45         91  
  45         565  
30 45     45   171 use Mojo::JSON::Pointer ();
  45         69  
  45         585  
31 45     45   147 use JSON::PP ();
  45         91  
  45         803  
32 45     45   148 use Types::Standard qw(Str InstanceOf Enum);
  45         76  
  45         323  
33 45     45   111168 use Mojo::File 'path';
  45         131  
  45         2321  
34 45     45   239 use namespace::clean;
  45         83  
  45         209  
35              
36 45     45   12300 use Exporter 'import';
  45         78  
  45         4223  
37              
38             our @EXPORT_OK = qw(
39             is_type
40             get_type
41             is_bool
42             is_schema
43             is_bignum
44             is_equal
45             is_elements_unique
46             jsonp
47             unjsonp
48             jsonp_get
49             jsonp_elements
50             jsonp_set
51             local_annotations
52             canonical_uri
53             E
54             A
55             abort
56             assert_keyword_exists
57             assert_keyword_type
58             assert_pattern
59             assert_uri_reference
60             assert_uri
61             annotate_self
62             sprintf_num
63             true
64             false
65             json_pointer_type
66             canonical_uri_type
67             core_types_type
68             core_formats_type
69             register_schema
70             load_cached_document
71             add_media_type
72             delete_media_type
73             decode_media_type
74             encode_media_type
75             match_media_type
76             );
77              
78 45     45   270 use constant HAVE_BUILTIN => "$]" >= 5.035010;
  45         82  
  45         3256  
79 45     45   211 use if HAVE_BUILTIN, experimental => 'builtin';
  45         73  
  45         1576  
80              
81 45     45   2758 use constant _BUILTIN_BOOLS => 0;
  45         202  
  45         17222  
82             use constant {
83             _BUILTIN_BOOLS && HAVE_BUILTIN && eval { +require Storable; Storable->VERSION(3.27); 1 }
84 45         343 && Mojo::JSON::JSON_XS && eval { Cpanel::JSON::XS->VERSION(4.38); 1 }
85             ? (true => builtin::true, false => builtin::false)
86             : (true => JSON::PP::true, false => JSON::PP::false)
87 45     45   254 };
  45         67  
88              
89             # Mojo::JSON::JSON_XS is false when the environment variable $MOJO_NO_JSON_XS is set
90             # and also checks if Cpanel::JSON::XS is installed.
91             # Mojo::JSON falls back to its own pure-perl encoder/decoder but does not support all the options
92             # that we require here.
93             use constant _JSON_BACKEND =>
94 45         570 Mojo::JSON::JSON_XS && eval { Cpanel::JSON::XS->VERSION('4.38'); 1 } ? 'Cpanel::JSON::XS'
  45         14755  
95 45 0       71 : eval { JSON::PP->VERSION('4.11'); 1 } ? 'JSON::PP'
  0 50       0  
  0         0  
96 45     45   5819 : die 'Cpanel::JSON::XS 4.38 or JSON::PP 4.11 is required';
  45         76  
97              
98             # supports the six core types, plus integer (which is also a number)
99             # we do NOT check stringy_numbers here -- you must do that in the caller
100             # note that sometimes a value may return true for more than one type, e.g. integer+number,
101             # or number+string, depending on its internal flags.
102             # pass { legacy_ints => 1 } in $config to use draft4 integer behaviour
103             # behaviour is consistent with get_type() (where integers are also numbers).
104 87648     87648 1 1032333 sub is_type ($type, $value, $config = {}) {
  87648         106765  
  87648         116203  
  87648         115910  
  87648         104594  
105 87648 100       151457 if ($type eq 'null') {
106 113         436 return !(defined $value);
107             }
108 87535 100       140281 if ($type eq 'boolean') {
109 11231         23964 return is_bool($value);
110             }
111 76304 100       124796 if ($type eq 'object') {
112 19901         85932 return ref $value eq 'HASH';
113             }
114 56403 100       95880 if ($type eq 'array') {
115 14093         62104 return ref $value eq 'ARRAY';
116             }
117              
118 42310 100 100     121679 if ($type eq 'string' or $type eq 'number' or $type eq 'integer') {
      100        
119 42294 100       69584 return 0 if not defined $value;
120 42274         204160 my $flags = B::svref_2object(\$value)->FLAGS;
121              
122             # dualvars with the same string and (stringified) numeric value could be either a string or a
123             # number, and before 5.36 we can't tell the difference, so we will answer yes for both.
124             # in 5.36+, stringified numbers still get a PV but don't have POK set, whereas
125             # numified strings do have POK set, so we can tell which one came first.
126              
127 42274 100       96276 if ($type eq 'string') {
128             # like created_as_string, but rejects dualvars with stringwise-unequal string and numeric parts
129             return !length ref($value)
130             && !(HAVE_BUILTIN && builtin::is_bool($value))
131             && $flags & B::SVf_POK
132             && (!($flags & (B::SVf_IOK | B::SVf_NOK))
133 45   100 45   287 || do { no warnings 'numeric'; 0+$value eq $value });
  45         84  
  45         24892  
  30307         283261  
134             }
135              
136 11967 100       28239 if ($type eq 'number') {
137             # floats in json will always be parsed into Math::BigFloat, when allow_bignum is enabled
138 8048   100     16669 return is_bignum($value) || created_as_number($value);
139             }
140              
141 3919 50       9857 if ($type eq 'integer') {
142 3919 100       10225 if ($config->{legacy_ints}) {
143             # in draft4, an integer is "A JSON number without a fraction or exponent part.",
144             # therefore 2.0 is NOT an integer
145 17   100     121 return ref($value) eq 'Math::BigInt'
146             || ($flags & B::SVf_IOK) && !($flags & B::SVf_NOK) && created_as_number($value);
147             }
148             else {
149             # note: values that are larger than $Config{ivsize} will be represented as an NV, not IV,
150             # therefore they will fail this check -- which is why use of Math::BigInt is recommended
151             # if the exact type is important, or loss of any accuracy is unacceptable
152 3902   100     9767 return is_bignum($value) && $value->is_int
153             # if dualvar, PV and stringified NV/IV must be identical
154             || created_as_number($value) && int($value) == $value;
155             }
156             }
157             }
158              
159 16 100       77 if ($type =~ /^reference to (.+)\z/) {
160 11   33     76 return !blessed($value) && ref($value) eq $1;
161             }
162              
163 5         23 return ref($value) eq $type;
164             }
165              
166             # returns one of the six core types, plus integer
167             # we do NOT check stringy_numbers here -- you must do that in the caller
168             # pass { legacy_ints => 1 } in $config to use draft4 integer behaviour
169             # behaviour is consistent with is_type().
170 123414     123414 1 824266 sub get_type ($value, $config = {}) {
  123414         155226  
  123414         153629  
  123414         140646  
171 123414 100       340892 return 'object' if ref $value eq 'HASH';
172 48460 100       76402 return 'boolean' if is_bool($value);
173 37060 100       75922 return 'null' if not defined $value;
174 36416 100       60931 return 'array' if ref $value eq 'ARRAY';
175              
176             # floats in json will always be parsed into Math::BigFloat, when allow_bignum is enabled
177 30395 100       52277 if (length(my $ref = ref $value)) {
178             return $ref eq 'Math::BigInt' ? 'integer'
179 880 100 100     5527 : $ref eq 'Math::BigFloat' ? (!$config->{legacy_ints} && $value->is_int ? 'integer' : 'number')
    100          
    100          
    100          
180             : (defined blessed($value) ? '' : 'reference to ').$ref;
181             }
182              
183 29515         90645 my $flags = B::svref_2object(\$value)->FLAGS;
184              
185             # dualvars with the same string and (stringified) numeric value could be either a string or a
186             # number, and before 5.36 we can't tell the difference, so we choose number because it has been
187             # evaluated as a number already.
188             # in 5.36+, stringified numbers still get a PV but don't have POK set, whereas
189             # numified strings do have POK set, so we can tell which one came first.
190              
191             # like created_as_string, but rejects dualvars with stringwise-unequal string and numeric parts
192             return 'string'
193             if $flags & B::SVf_POK
194             && (!($flags & (B::SVf_IOK | B::SVf_NOK))
195 45 100 100 45   286 || do { no warnings 'numeric'; 0+$value eq $value });
  45   100     91  
  45         102107  
  29515         106520  
196              
197 6940 100       14662 if ($config->{legacy_ints}) {
198             # in draft4, an integer is "A JSON number without a fraction or exponent part.",
199             # therefore 2.0 is NOT an integer
200 316 100 66     2117 return ($flags & B::SVf_IOK) && !($flags & B::SVf_NOK) ? 'integer' : 'number'
    50          
201             if created_as_number($value);
202             }
203             else {
204             # note: values that are larger than $Config{ivsize} will be represented as an NV, not IV,
205             # therefore they will fail this check -- which is why use of Math::BigInt is recommended
206             # if the exact type is important, or loss of any accuracy is unacceptable
207 6624 100       32398 return int($value) == $value ? 'integer' : 'number' if created_as_number($value);
    100          
208             }
209              
210             # this might be a scalar with POK|IOK or POK|NOK set
211 15         49 return 'ambiguous type';
212             }
213              
214             # lifted from JSON::MaybeXS
215             # note: unlike builtin::compat::is_bool on older perls, we do not accept
216             # dualvar(0,"") or dualvar(1,"1") because JSON::PP and Cpanel::JSON::XS
217             # do not encode these as booleans.
218 60100     60100 1 69207 sub is_bool ($value) {
  60100         70867  
  60100         64302  
219 60100 100 66     316697 HAVE_BUILTIN and builtin::is_bool($value)
      66        
220             or
221             !!blessed($value)
222             and ($value->isa('JSON::PP::Boolean')
223             or $value->isa('Cpanel::JSON::XS::Boolean')
224             or $value->isa('JSON::XS::Boolean'));
225             }
226              
227 34783     34783 1 44052 sub is_schema ($value) {
  34783         48511  
  34783         42790  
228 34783 100       128351 ref $value eq 'HASH' || is_bool($value);
229             }
230              
231 13509     13509 0 17678 sub is_bignum ($value) {
  13509         19314  
  13509         16383  
232 13509         117664 ref($value) =~ /^Math::Big(?:Int|Float)\z/;
233             }
234              
235             # compares two arbitrary data payloads for equality, as per
236             # https://json-schema.org/draft/2020-12/json-schema-core.html#rfc.section.4.2.2
237             # $state hashref supports the following fields:
238             # - scalarref_booleans (input): treats \0 and \1 as boolean values
239             # - stringy_numbers (input): strings will also be compared numerically
240             # - path (output): location of the first difference
241             # - error (output): description of the first difference
242 17308     17308 1 162707 sub is_equal ($x, $y, $state = {}) {
  17308         22022  
  17308         20695  
  17308         20691  
  17308         18632  
243 17308   100     45914 $state->{path} //= '';
244              
245 17308         29204 my @types = map get_type($_), $x, $y;
246              
247 17308 100       41922 $state->{error} = 'ambiguous type encountered', return 0
248             if grep $types[$_] eq 'ambiguous type', 0..1;
249              
250 17305 100       33218 if ($state->{scalarref_booleans}) {
251 88 100       171 ($x, $types[0]) = (0+!!$$x, 'boolean') if $types[0] eq 'reference to SCALAR';
252 88 100       140 ($y, $types[1]) = (0+!!$$y, 'boolean') if $types[1] eq 'reference to SCALAR';
253             }
254              
255 17305 100       30649 if ($state->{stringy_numbers}) {
256 16 100 100     104 ($x, $types[0]) = (0+$x, int(0+$x) == $x ? 'integer' : 'number')
    100          
257             if $types[0] eq 'string' and looks_like_number($x);
258              
259 16 100 100     62 ($y, $types[1]) = (0+$y, int(0+$y) == $y ? 'integer' : 'number')
    100          
260             if $types[1] eq 'string' and looks_like_number($y);
261             }
262              
263 17305 100       34310 $state->{error} = "wrong type: $types[0] vs $types[1]", return 0 if $types[0] ne $types[1];
264 15900 100       23767 return 1 if $types[0] eq 'null';
265 15864 100 100     47912 ($x eq $y and return 1), $state->{error} = 'strings not equal', return 0
266             if $types[0] eq 'string';
267 6369 100 100     21613 ($x == $y and return 1), $state->{error} = "$types[0]s not equal", return 0
268             if grep $types[0] eq $_, qw(boolean number integer);
269              
270 4263         6811 my $path = $state->{path};
271 4263 100       7491 if ($types[0] eq 'object') {
272 1827 100       5106 $state->{error} = 'property count differs: '.keys(%$x).' vs '.keys(%$y), return 0
273             if keys %$x != keys %$y;
274              
275 1726 100       10451 if (not is_equal(my $arr_x = [ sort keys %$x ], my $arr_y = [ sort keys %$y ], my $s={})) {
276 12         39 my $pos = substr($s->{path}, 1);
277 12         44 $state->{error} = 'property names differ starting at position '.$pos.' ("'.$arr_x->[$pos].'" vs "'.$arr_y->[$pos].'")';
278 12         85 return 0;
279             }
280              
281 1714         5811 foreach my $property (sort keys %$x) {
282 4020         6304 $state->{path} = jsonp($path, $property);
283 4020 100       8230 return 0 if not is_equal($x->{$property}, $y->{$property}, $state);
284             }
285              
286 1527         9242 return 1;
287             }
288              
289 2436 50       4378 if ($types[0] eq 'array') {
290 2436 100       4646 $state->{error} = 'element count differs: '.@$x.' vs '.@$y, return 0 if @$x != @$y;
291 2425         5735 foreach my $idx (0 .. $x->$#*) {
292 5661         10446 $state->{path} = $path.'/'.$idx;
293 5661 100       11299 return 0 if not is_equal($x->[$idx], $y->[$idx], $state);
294             }
295 2226         14004 return 1;
296             }
297              
298 0         0 $state->{error} = 'got surprising type: '.$types[0], return 0; # should never get here
299             }
300              
301             # checks array elements for uniqueness. short-circuits on first pair of matching elements
302             # $state hashref supports the following fields:
303             # - scalarref_booleans (input): treats \0 and \1 as boolean values
304             # - stringy_numbers (input): strings will also be compared numerically
305             # - path (output): location of the first difference
306             # - error (output): description of the first difference
307             # - equal_indices (output): the indices of identical items
308 3609     3609 1 5727 sub is_elements_unique ($array, $state = {}) {
  3609         5210  
  3609         5175  
  3609         5001  
309 3609         11630 foreach my $idx0 (0 .. $array->$#*-1) {
310 1467         4129 foreach my $idx1 ($idx0+1 .. $array->$#*) {
311 2123 100       6693 if (is_equal($array->[$idx0], $array->[$idx1], $state)) {
312 280 50       4972 push $state->{equal_indices}->@*, $idx0, $idx1 if exists $state->{equal_indices};
313 280         1203 return 0;
314             }
315             }
316             }
317 3329         12210 return 1;
318             }
319              
320             # shorthand for creating and appending json pointers
321             # the first argument is an already-encoded json pointer; remaining arguments are path segments to be
322             # encoded and appended
323             sub jsonp {
324 51662 50 66 51662 1 224950 carp q{first argument to jsonp should be '' or start with '/'} if length($_[0]) and substr($_[0],0,1) ne '/';
325 51662         419020 return join('/', shift, map s!~!~0!gr =~ s!/!~1!gr, grep defined, @_);
326             }
327              
328             # splits a json pointer apart into its path segments
329             sub unjsonp {
330 0 0 0 0 1 0 carp q{argument to unjsonp should be '' or start with '/'} if length($_[0]) and substr($_[0],0,1) ne '/';
331 0         0 return map s!~0!~!gr =~ s!~1!/!gr, split m!/!, $_[0];
332             }
333              
334 0     0 1 0 sub jsonp_get ($data, $pointer) {
  0         0  
  0         0  
  0         0  
335 0         0 Mojo::JSON::Pointer->new($data)->get($pointer);
336             }
337              
338             # flatten the data structure into a hashref of { pointer => value, ... }
339             # (essentially the reverse of jsonp_set($data, $foo->%{$_}) foreach keys $foo)
340 21     21 1 5364 sub jsonp_elements ($data, $prefix = '') {
  21         23  
  21         25  
  21         21  
341             # recursively walk the structure..
342             my $hash = +{
343             ref $data eq '' ? ($prefix => $data)
344 21 50       143 : ref $data eq 'HASH' ? map jsonp_elements($data->{$_}, $prefix.'/'.$_)->%*, keys %$data
    100          
    100          
345             : ref $data eq 'ARRAY' ? map jsonp_elements($data->[$_], $prefix.'/'.$_)->%*, 0..$data->$#*
346             : die 'unrecognized type: '. ref $data
347             };
348             }
349              
350             # assigns a value to a data structure at a specific json pointer location
351             # operates destructively, in place, unless the root data or type is being modified
352 17470     17470 1 46421 sub jsonp_set ($data, $pointer, $value) {
  17470         29229  
  17470         27812  
  17470         26900  
  17470         23349  
353 17470 100 100     113688 croak 'cannot write into a non-reference in void context'
354             if not grep ref $data eq $_, qw(HASH ARRAY) and not defined wantarray;
355              
356             # assigning to the root overwrites existing data
357 17468 100       44327 if (not length $pointer) {
358 17412 100 100     49934 if (not ref $data or ref $data ne ref $value) {
359 17410 100       88808 return $value if defined wantarray;
360 2         221 croak 'cannot write into a reference of a different type in void context';
361             }
362              
363 2 100       5 if (ref $value eq 'HASH') {
364 1 50       3 $data = {} if not ref $data;
365 1         4 $data->%* = $value->%*;
366             }
367 2 100       6 if (ref $value eq 'ARRAY') {
368 1 50       3 $data = [] if not ref $data;
369 1         3 $data->@* = $value->@*;
370             }
371              
372 2         5 return $data;
373             }
374              
375 56 50       324 my @keys = map +(s!~0!~!gr =~ s!~1!/!gr),
376             (length $pointer ? (split /\//, $pointer, -1) : ($pointer));
377              
378 56 100 66     486 croak 'cannot write a hashref into a reference to an array in void context'
      100        
      66        
379             if @keys >= 2 and $keys[1] !~ /^(?:\d+|-)\z/a and ref $data eq 'ARRAY' and not defined wantarray;
380              
381 55         87 shift @keys; # always '', indicating the root
382 55         81 my $curp = \$data;
383              
384 55         78 foreach my $key (@keys) {
385             # if needed, first remove the existing data so we can replace with a new hash key or array index
386 113 100 100     337 undef $curp->$*
      100        
387             if not ref $curp->$*
388             or ref $curp->$* eq 'ARRAY' and $key !~ /^(?:\d+|-)\z/a;
389              
390             # use this existing hash key or array index location, or create new position
391 45     45   373 use autovivification 'store';
  45         74  
  45         278  
392             $curp = \(
393             ref $curp->$* eq 'HASH' || $key !~ /^(?:\d+|-)\z/a
394 113 100 100     420 ? $curp->$*->{$key}
    100          
395             : $key =~ /^\d+\z/a
396             ? $curp->$*->[$key]
397             : $curp->$*->[$curp->$*->$#* + 1]);
398             }
399              
400 55         82 $curp->$* = $value;
401 55         297 return $data;
402             }
403              
404             # returns a reusable Types::Standard type for json pointers
405             # TODO: move this off into its own distribution, see JSON::Schema::Types
406 360     360 1 106743 sub json_pointer_type () { Str->where('!length || m{^/} && !m{~(?![01])}'); }
  360         1106  
  360         1833  
407              
408             # a URI without a fragment, or with a json pointer fragment
409 90     90 1 275748 sub canonical_uri_type () {
  90         187  
410 90         536 (InstanceOf['Mojo::URL'])->where(q{!defined($_->fragment) || $_->fragment =~ m{^/} && $_->fragment !~ m{~(?![01])}});
411             }
412              
413             # Validation §7.1-2: "Note that the "type" keyword in this specification defines an "integer" type
414             # which is not part of the data model. Therefore a format attribute can be limited to numbers, but
415             # not specifically to integers."
416 90     90 1 132687 sub core_types_type () {
  90         148  
417 90         454 Enum[qw(null object array boolean string number)];
418             }
419              
420 4     4 1 7 sub core_formats_type () {
  4         6  
421 4         29 Enum[qw(date-time date time duration email idn-email hostname idn-hostname ipv4 ipv6 uri uri-reference iri iri-reference uuid uri-template json-pointer relative-json-pointer regex)];
422             }
423              
424             # simple runtime-wide cache of $ids to schema document objects that are sourced from disk
425             {
426             my $document_cache = {};
427              
428             # Fetches a document from the cache (reading it from disk and creating the document if necessary),
429             # and add it to the evaluator.
430             # Normally this will just be a cache of schemas that are bundled with this distribution or a related
431             # distribution (such as OpenAPI-Modern), as duplicate identifiers are not checked for, unlike for
432             # normal schema additions.
433             # Only JSON-encoded files are supported at this time.
434 114     114 1 250 sub load_cached_document ($evaluator, $uri) {
  114         228  
  114         193  
  114         178  
435 114         301 $uri =~ s/#\z//; # older draft $ids use an empty fragment
436              
437             # see if it already exists as a document in the cache
438 114         22128 my $document = $document_cache->{$uri};
439              
440             # otherwise, load it from disk using our filename cache and create the document
441 114 100 100     21984 if (not $document and my $filename = get_schema_filename($uri)) {
442 68         15031 my $file = path($filename);
443 68 50       2085 die "uri $uri maps to file $file which does not exist" if not -f $file;
444 68         5054 my $schema = $evaluator->_json_decoder->decode($file->slurp);
445              
446             # avoid calling add_schema, which checksums the file to look for duplicates
447 68         15603 $document = JSON::Schema::Modern::Document->new(
448             schema => $schema,
449             evaluator => $evaluator,
450             skip_ref_checks => 1,
451             );
452              
453             # avoid calling add_document, which checks for duplicate identifiers (and would result in an
454             # infinite loop)
455 68 50       336 die JSON::Schema::Modern::Result->new(
456             output_format => $evaluator->output_format,
457             valid => 0,
458             errors => [ $document->errors ],
459             exception => 1,
460             ) if $document->has_errors;
461              
462 68         504 $document_cache->{$uri} = $document;
463             }
464              
465 114 100       22519 return if not $document;
466              
467             # bypass the normal collision checks, to avoid an infinite loop: these documents are presumed safe
468 103         460 $evaluator->_add_resources_unsafe(
469             map +($_->[0] => +{ $_->[1]->%*, document => $document }),
470             $document->resource_pairs
471             );
472              
473 103         52124 return $document;
474             }
475             }
476              
477             ###### media-type mayhem below!
478              
479             {
480             # a hashref that indexes a media-type or media-range string to a decoder, encoder, and
481             # denormalized representation of the string.
482             # prepopulated list must match the list of definitions in _predefined_media_types
483             my $MEDIA_TYPES = {
484             map +(join('/', @$_) => { type => $_->[0], subtype => $_->[1] }), (
485             [ qw(application json) ],
486             [ qw(application octet-stream) ],
487             [ qw(text *) ],
488             [ qw(application x-www-form-urlencoded) ],
489             [ qw(application x-ndjson) ],
490             )
491             };
492              
493             # see RFC9110 §8.3.1 for ABNF
494             my $OWS = q{[\x09\x20]*};
495             my $TOKEN = q{[a-zA-Z0-9!#$%&'*+.^_`|~-]+};
496             my $QUOTED_STRING = q{"((?:[\x09\20\x21\x23-\x5B\x5D-\x7E\x80-\xFF]|\x5C[\x09\x20-\x7E\x80-\xFF])*)"};
497              
498             # parses into hashref: { type => .., subtype => .., params => { .. } }
499 209     209   223 my sub _parse_media_type ($media_type_string) {
  209         246  
  209         203  
500 209         1020 my ($type_subtype, @params) = split /$OWS;$OWS/, $media_type_string;
501 209   50     1126 my ($type, $subtype) = ($type_subtype//'') =~ m{^($TOKEN)/($TOKEN)\z};
502 209 100 66     551 return if not defined $type or not defined $subtype;
503              
504             # RFC9110 §5.6.4: "The backslash octet ("\") can be used as a single-octet quoting mechanism
505             # within quoted-string and comment constructs. Recipients that process the value of a
506             # quoted-string MUST handle a quoted-pair as if it were replaced by the octet following the
507             # backslash."
508 207 100       830 my $params = {
    50          
509             map +(m{^($TOKEN)=($TOKEN|$QUOTED_STRING)\z}
510             ? (fc($1) => fc(defined $3 ? ($3 =~ s/\x5C(.)/$1/gr) : $2))
511             : ()),
512             @params
513             };
514              
515 207 50       418 croak 'cannot parse more than 64 parameters' if keys $params->%* > 64;
516             +{
517 207 100       793 type => fc($type),
518             subtype => fc($subtype),
519             keys %$params ? (parameters => $params) : (),
520             };
521             }
522              
523             # wrapped in a sub so we don't define them until needed
524 21     21   28 my sub _predefined_media_types ($media_type_string) {
  21         35  
  21         23  
525             return +{
526             type => 'application',
527             subtype => 'json',
528             # UTF-8 decoding and encoding is always done, as per the JSON spec.
529             # other charsets are not supported: see RFC8259 §11
530             decode => sub ($content_ref, @) {
531 22         397 \ _JSON_BACKEND->new->allow_nonref(1)->utf8(1)->decode($content_ref->$*);
532             },
533             encode => sub ($content_ref, @) {
534 3         90 \ _JSON_BACKEND->new->allow_nonref(1)->utf8(1)->allow_blessed(1)->convert_blessed(1)->encode($content_ref->$*);
535             },
536 21 100       74 caller_addr => 1,
537             }
538             if $media_type_string eq 'application/json';
539              
540             return +{
541             type => 'application',
542             subtype => 'octet-stream',
543 18 50       35 (map +($_ => sub ($content_ref, @) { $content_ref }), qw(decode encode)),
  0         0  
544             caller_addr => 1,
545             }
546             if $media_type_string eq 'application/octet-stream';
547              
548             # identity function, with charset support
549             return +{
550             type => 'text', subtype => '*',
551             decode => sub ($content_ref, $parameters = {}, @) {
552             # RFC2046 §4.1.2: charset is case-insensitive
553             return $parameters->{charset} ?
554 5 100       38 \ Encode::decode($parameters->{charset}, $content_ref->$*, Encode::DIE_ON_ERR | Encode::LEAVE_SRC)
555             : $content_ref;
556             },
557             encode => sub ($content_ref, $parameters = {}, @) {
558             return $parameters->{charset} ?
559 2 50       15 \ Encode::encode($parameters->{charset}, $content_ref->$*, Encode::DIE_ON_ERR | Encode::LEAVE_SRC)
560             : $content_ref;
561             },
562 18 100       36 caller_addr => 1,
563             }
564             if $media_type_string eq 'text/*';
565              
566             return +{
567             type => 'application',
568             subtype => 'x-www-form-urlencoded',
569             decode => sub ($content_ref, @) {
570 2         20 \ Mojo::Parameters->new->charset('UTF-8')->parse($content_ref->$*)->to_hash;
571             },
572             encode => sub ($content_ref, @) {
573 1         6 \ Mojo::Parameters->new->charset('UTF-8')->pairs([ $content_ref->$*->%* ])->to_string;
574             },
575 17 100       36 caller_addr => 1,
576             }
577             if $media_type_string eq 'application/x-www-form-urlencoded';
578              
579             return +{
580             type => 'application',
581             subtype => 'x-ndjson',
582             decode => sub ($content_ref, @) {
583 3         24 my $decoder = _JSON_BACKEND->new->allow_nonref(1)->utf8(1);
584 3         6 my $line = 0; # line numbers start at 1
585             \[ map {
586 3         21 do {
  7         7  
587 7         11 try { ++$line; $decoder->decode($_) }
  7         9  
  7         54  
588 1         9 catch ($e) { die 'parse error at line '.$line.': '.$e }
589             }
590             }
591             split(/\r?\n/, $content_ref->$*)
592             ];
593             },
594             encode => sub ($content_ref, @) {
595 1         10 my $encoder = _JSON_BACKEND->new->allow_nonref(1)->utf8(1)->allow_blessed(1)->convert_blessed(1);
596 1         15 \ join "\n", map $encoder->encode($_), $content_ref->$*->@*;
597             },
598 16 100       37 caller_addr => 1,
599             }
600             if $media_type_string eq 'application/x-ndjson';
601             }
602              
603             # for internal use only by JSON::Schema::Modern! may be removed without notice!
604 24     24   30 sub _get_media_type_decoder ($media_type_string) {
  24         38  
  24         37  
605 24         49 my $matched_string = match_media_type($media_type_string);
606 24 100       57 return undef if not defined $matched_string;
607              
608 20         36 my $definition = $MEDIA_TYPES->{$matched_string};
609             $definition = $MEDIA_TYPES->{$matched_string} = _predefined_media_types($matched_string)
610 20 100       54 if not exists $definition->{decode};
611              
612 20         103 return $definition->{decode};
613             }
614              
615 16     16 1 890 sub add_media_type ($media_type_string, $decoder_sub = undef, $encoder_sub = undef, $caller_addr = 1) {
  16         24  
  16         17  
  16         22  
  16         19  
  16         19  
616 16 50 66     44 croak 'decoder is not a subref' if defined $decoder_sub and ref $decoder_sub ne 'CODE';
617 16 50 33     29 croak 'encoder is not a subref' if defined $encoder_sub and ref $encoder_sub ne 'CODE';
618              
619 16         27 my $type = _parse_media_type($media_type_string);
620 16 100       194 croak "bad media-type string \"$media_type_string\"" if not $type;
621              
622             # populate the cache if it's a bundled type that hasn't been defined yet
623 15 50       43 _predefined_media_types($media_type_string) if not exists $MEDIA_TYPES->{$media_type_string};
624              
625 15 100       35 if (any { is_equal($type, { $_->%{qw(type subtype parameters)} }) } values %$MEDIA_TYPES) {
  134         347  
626 1 50       105 croak 'duplicate media-type found' if $caller_addr == 1;
627              
628             # track evaluator object that used the deprecated add_media_type interface
629 0         0 push $MEDIA_TYPES->{$media_type_string}{caller_addr}->@*, $caller_addr;
630 0         0 return;
631             }
632              
633 14         69 $MEDIA_TYPES->{$media_type_string} = {
634             decode => $decoder_sub,
635             encode => $encoder_sub,
636             %$type,
637             caller_addr => [ $caller_addr ], # refaddr of the evaluator object that added us
638             };
639              
640 14         41 return;
641             }
642              
643 7     7 1 1106 sub delete_media_type ($media_type_string, $caller_addr = 1) {
  7         10  
  7         9  
  7         7  
644 7 100       35 return if not exists $MEDIA_TYPES->{$media_type_string};
645              
646             delete $MEDIA_TYPES->{$media_type_string}
647             if $caller_addr == 1
648             or $MEDIA_TYPES->{$media_type_string}{caller_addr} = [
649             grep +($_ != 1 && $_ != $caller_addr), $MEDIA_TYPES->{$media_type_string}{caller_addr}->@*
650 5 50 33     63 ];
      100        
651             }
652              
653             # wildcards, parameters supported
654             # always returns a reference to the decoded data, or undef if no decoder is found
655 29     29 1 3576 sub decode_media_type ($media_type_string, $content_ref) {
  29         59  
  29         35  
  29         38  
656 29 50       76 die 'decoder payload must be a reference to a string' if ref $content_ref ne 'SCALAR';
657              
658 29         71 my $matched_string = match_media_type($media_type_string);
659 29 100       61 return if not $matched_string;
660              
661 27         48 my $definition = $MEDIA_TYPES->{$matched_string};
662             $definition = $MEDIA_TYPES->{$matched_string} = _predefined_media_types($matched_string)
663 27 100       65 if not exists $definition->{decode};
664              
665 27 100       63 return if not $definition->{decode};
666              
667 26         66 my $type = _parse_media_type($media_type_string);
668 26   66     119 $definition->{decode}->($content_ref, $type->{parameters}//());
669             }
670              
671             # wildcards, parameters supported
672 8     8 1 1311 sub encode_media_type ($media_type_string, $content_ref) {
  8         11  
  8         8  
  8         10  
673 8 50 66     28 die 'encoder payload must be a reference' if ref $content_ref ne 'REF' and ref $content_ref ne 'SCALAR';
674              
675 8         16 my $matched_string = match_media_type($media_type_string);
676 8 50       17 return if not $matched_string;
677              
678 8         9 my $definition = $MEDIA_TYPES->{$matched_string};
679             $definition = $MEDIA_TYPES->{$matched_string} = _predefined_media_types($matched_string)
680 8 50       18 if not exists $definition->{encode};
681              
682 8 100       19 return if not $definition->{encode};
683              
684 7         13 my $type = _parse_media_type($media_type_string);
685 7   66     33 $definition->{encode}->($content_ref, $type->{parameters}//());
686             }
687              
688             # finds best match for a media-type against a list of media-types. if parameter(s) are included in
689             # the media-type to be matched, all parameters must be present in the match value.
690 88     88 1 5385 sub match_media_type ($media_type_string, $media_types = []) {
  88         128  
  88         124  
  88         156  
691             # return immediately if exact match exists
692             return $media_type_string
693 115         173 if @$media_types and any { $_ eq $media_type_string } @$media_types
694 88 100 100     546 or not @$media_types and exists $MEDIA_TYPES->{$media_type_string};
      100        
      100        
695              
696 50         111 my $mt = _parse_media_type($media_type_string);
697 50 100       101 return if not $mt;
698              
699 49 100       80 my $types = @$media_types ? +{ map +($_ => _parse_media_type($_)), @$media_types } : $MEDIA_TYPES;
700              
701 49         68 my @matches; # [ rank, candidate ]
702              
703             # iterate through each provided MT and compare it to the string for matchability..
704             CANDIDATE:
705 49         165 foreach my $candidate (keys %$types) {
706             # if candidate has parameters, all parameters must match; missing parameters ok.
707             # the more parameters match the higher the score.
708 523         528 my $params_matched = 0;
709 523   100     1157 foreach my $param (keys(($types->{$candidate}{parameters}//{})->%*)) {
710             next CANDIDATE if not exists(($mt->{parameters}//{})->{$param})
711 122 100 100     410 or $types->{$candidate}{parameters}{$param} ne $mt->{parameters}{$param};
      100        
712              
713 18         23 ++$params_matched;
714             }
715              
716 419 100       591 push(@matches, [ 0+$params_matched, $candidate ]), next if $candidate eq '*/*';
717             push(@matches, [ 2**8 + $params_matched, $candidate ]), next
718             if $types->{$candidate}{subtype} eq '*'
719 391 100 100     860 and $types->{$candidate}{type} eq $mt->{type};
720              
721             # exact type + subtype match: best overall
722 356 100       645 if ($types->{$candidate}{type} eq $mt->{type}) {
723             push(@matches, [ 2**10 + $params_matched, $candidate ]), next
724 80 100       171 if $types->{$candidate}{subtype} eq $mt->{subtype};
725              
726             # text/foo+plain matches text/plain but not text/bar+plain
727             push(@matches, [ 2**9 + $params_matched, $candidate ]), next
728 56 100 100     150 if $mt->{subtype} =~ m{^.+\+(.+)\z} and $types->{$candidate}{subtype} eq $1;
729             }
730             }
731              
732 49 100       113 return if not @matches;
733 43         114 my @sorted = sort { $b->[0] <=> $a->[0] } @matches;
  68         118  
734 43         227 return $sorted[0]->[1];
735             }
736             }
737              
738             ######## NO PUBLIC INTERFACES FOLLOW THIS POINT ########
739              
740             # get all annotations produced for the current instance data location (that are visible to this
741             # schema location) - remember these are hashrefs, not Annotation objects
742 1395     1395 0 1970 sub local_annotations ($state) {
  1395         2026  
  1395         1747  
743 1395         7039 grep $_->{instance_location} eq $state->{data_path}, $state->{annotations}->@*;
744             }
745              
746             # shorthand for finding the current uri of the present schema location
747             # ensure that this code is kept consistent with the absolute_keyword_location builder in ResultNode
748             # Note that this may not be canonical if keyword_path has not yet been reset via the processing of a
749             # local identifier keyword (e.g. '$id').
750 34245     34245 0 49507 sub canonical_uri ($state, @extra_path) {
  34245         43612  
  34245         43710  
  34245         39737  
751 34245 100 66     168483 return $state->{initial_schema_uri} if not @extra_path and not length($state->{keyword_path});
752 12799         50139 my $uri = $state->{initial_schema_uri}->clone;
753 12799 50 100     881224 my $fragment = ($uri->fragment//'').(@extra_path ? jsonp($state->{keyword_path}, @extra_path) : $state->{keyword_path});
754 12799 50       95807 undef $fragment if not length($fragment);
755 12799         29700 $uri->fragment($fragment);
756 12799         68359 $uri;
757             }
758              
759             # shorthand for creating error objects
760             # uses these keys from $state:
761             # - initial_schema_uri
762             # - keyword (optional)
763             # - data_path
764             # - traversed_keyword_path
765             # - keyword_path
766             # - _keyword_path_suffix (optional)
767             # - errors
768             # - exception (optional; set by abort())
769             # - recommended_response (optional)
770             # - depth
771             # - traverse (boolean, used for mode)
772             # returns defined-false, so callers can use 'return;' to differentiate between
773             # failed-with-no-error from failed-with-error.
774 13954     13954 0 52684 sub E ($state, $error_string, @args) {
  13954         20786  
  13954         21534  
  13954         25494  
  13954         17813  
775 13954 50       30218 croak 'E called in void context' if not defined wantarray;
776              
777             # sometimes the keyword shouldn't be at the very end of the schema path
778 13954         35837 my $sps = delete $state->{_keyword_path_suffix};
779 13954 100 100     64870 my @keyword_path_suffix = defined $sps && ref $sps eq 'ARRAY' ? $sps->@* : $sps//();
      100        
780              
781             # we store the absolute uri in unresolved form until needed,
782             # and perform the rest of the calculations later.
783 13954   100     63851 my $uri = [ $state->@{qw(initial_schema_uri keyword_path)}, $state->{keyword}//(), @keyword_path_suffix ];
784              
785             my $keyword_location = $state->{traversed_keyword_path}
786 13954         49565 .jsonp($state->@{qw(keyword_path keyword)}, @keyword_path_suffix);
787              
788 13954         91075 require JSON::Schema::Modern::Error;
789             push $state->{errors}->@*, JSON::Schema::Modern::Error->new(
790             depth => $state->{depth} // 0,
791             keyword => $state->{keyword},
792             $state->{traverse} ? () : (instance_location => $state->{data_path}),
793             keyword_location => $keyword_location,
794             # we calculate absolute_keyword_location when instantiating the Error object for Result
795             _uri => $uri,
796             error => @args ? sprintf($error_string, @args) : $error_string,
797             exception => $state->{exception},
798             ($state->%{recommended_response})x!!$state->{recommended_response},
799 13954 100 50     465952 mode => $state->{traverse} ? 'traverse' : 'evaluate',
    100          
    100          
800             );
801              
802 13954         85777 return 0;
803             }
804              
805             # shorthand for creating annotations
806             # uses these keys from $state:
807             # - initial_schema_uri
808             # - keyword (mandatory)
809             # - data_path
810             # - traversed_keyword_path
811             # - keyword_path
812             # - annotations
813             # - collect_annotations
814             # - _unknown (boolean)
815             # - depth
816 12028     12028 0 16506 sub A ($state, $annotation) {
  12028         17158  
  12028         16618  
  12028         15120  
817             # even if the user requested annotations, we only collect them for later drafts
818             # ..but we always collect them if the lowest bit is set, indicating the presence of unevaluated*
819             # keywords necessary for accurate validation
820             return 1 if not ($state->{collect_annotations}
821 12028 100       72906 & ($state->{specification_version} =~ /^draft[467]\z/ ? ~(1<<8) : ~0));
    100          
822              
823             # we store the absolute uri in unresolved form until needed,
824             # and perform the rest of the calculations later.
825 2638         9814 my $uri = [ $state->@{qw(initial_schema_uri keyword_path keyword)} ];
826              
827 2638         9082 my $keyword_location = $state->{traversed_keyword_path}.jsonp($state->@{qw(keyword_path keyword)});
828              
829             push $state->{annotations}->@*, {
830             depth => $state->{depth} // 0,
831             keyword => $state->{keyword},
832             instance_location => $state->{data_path},
833             keyword_location => $keyword_location,
834             # we calculate absolute_keyword_location when instantiating the Annotation object for Result
835             _uri => $uri,
836             annotation => $annotation,
837 2638 100 50     24590 $state->{_unknown} ? (unknown => 1) : (),
838             };
839              
840 2638         5741 return 1;
841             }
842              
843             # creates an error object, but also aborts evaluation immediately
844             # only this error is returned, because other errors on the stack might not actually be "real"
845             # errors (consider if we were in the middle of evaluating a "not" or "if").
846             # Therefore this is only appropriate during the evaluation phase, not the traverse phase.
847 34     34 0 282 sub abort ($state, $error_string, @args) {
  34         60  
  34         60  
  34         62  
  34         44  
848 34         373 ()= E({ %$state, exception => 1 }, $error_string, @args);
849 34 50       230 croak 'abort() called during traverse' if $state->{traverse};
850 34         442 die pop $state->{errors}->@*;
851             }
852              
853 0     0 0 0 sub assert_keyword_exists ($state, $schema) {
  0         0  
  0         0  
  0         0  
854 0 0       0 croak 'assert_keyword_exists called in void context' if not defined wantarray;
855 0 0       0 return E($state, '%s keyword is required', $state->{keyword}) if not exists $schema->{$state->{keyword}};
856 0         0 return 1;
857             }
858              
859 42348     42348 0 53382 sub assert_keyword_type ($state, $schema, $type) {
  42348         50927  
  42348         49786  
  42348         58971  
  42348         46831  
860 42348 50       77344 croak 'assert_keyword_type called in void context' if not defined wantarray;
861 42348 100       119051 return 1 if is_type($type, $schema->{$state->{keyword}});
862 17 100       92 E($state, '%s value is not a%s %s', $state->{keyword}, ($type =~ /^[aeiou]/ ? 'n' : ''), $type);
863             }
864              
865 2875     2875 0 5054 sub assert_pattern ($state, $pattern) {
  2875         4224  
  2875         4861  
  2875         4038  
866 2875 50       6857 croak 'assert_pattern called in void context' if not defined wantarray;
867 2875         5011 try {
868 2875     0   30257 local $SIG{__WARN__} = sub { die @_ };
  0         0  
869 2875         40074 qr/$pattern/;
870             }
871 3         11 catch ($e) { return E($state, $e); };
872 2872         19534 return 1;
873             }
874              
875             # this is only suitable for checking URIs within schemas themselves
876             # note that we cannot use $state->{specification_version} to more tightly constrain the plain-name
877             # fragment syntax, as we could be checking a $ref to a schema using a different version
878 5886     5886 0 8141 sub assert_uri_reference ($state, $schema) {
  5886         7454  
  5886         6984  
  5886         7181  
879 5886 50       10774 croak 'assert_uri_reference called in void context' if not defined wantarray;
880              
881 5886         14062 my $string = $schema->{$state->{keyword}};
882             return E($state, '%s value is not a valid URI-reference', $state->{keyword})
883             # see also uri-reference format sub
884 5886 100 66     22559 if fc(Mojo::URL->new($string)->to_unsafe_string) ne fc($string)
      100        
      100        
      100        
      100        
885             or $string =~ /[^[:ascii:]]/ # ascii characters only
886             or $string =~ /#/ # no fragment, except...
887             and $string !~ m{#\z} # allow empty fragment
888             and $string !~ m{#[A-Za-z_][A-Za-z0-9_:.-]*\z} # allow plain-name fragment, superset of all drafts
889             and $string !~ m{#/(?:[^~]|~[01])*\z}; # allow json pointer fragment
890              
891 5852         1884808 return 1;
892             }
893              
894             # this is only suitable for checking URIs within schemas themselves,
895             # which have fragments consisting of plain names (anchors) or json pointers
896 6199     6199 0 9211 sub assert_uri ($state, $schema, $override = undef) {
  6199         8569  
  6199         8183  
  6199         9265  
  6199         8227  
897 6199 50       14051 croak 'assert_uri called in void context' if not defined wantarray;
898              
899 6199   33     14479 my $string = $override // $schema->{$state->{keyword}};
900 6199         26454 my $uri = Mojo::URL->new($string);
901              
902 6199 100 66     491329 return E($state, '"%s" is not a valid URI', $string)
      100        
      100        
      66        
      66        
      66        
903             # see also uri format sub
904             if fc($uri->to_unsafe_string) ne fc($string)
905             or $string =~ /[^[:ascii:]]/ # ascii characters only
906             or not $uri->is_abs # must have a scheme
907             or $string =~ /#/ # no fragment, except...
908             and $string !~ m{#\z} # empty fragment
909             and $string !~ m{#[A-Za-z][A-Za-z0-9_:.-]*\z} # plain-name fragment
910             and $string !~ m{#/(?:[^~]|~[01])*\z}; # json pointer fragment
911              
912 6183         1181575 return 1;
913             }
914              
915             # produces an annotation whose value is the same as that of the current schema keyword
916             # makes a copy as this is passed back to the user, who cannot be trusted to not mutate it
917 1138     1138 0 1464 sub annotate_self ($state, $schema) {
  1138         1358  
  1138         1387  
  1138         1222  
918             A($state, ref $schema->{$state->{keyword}} ? dclone($schema->{$state->{keyword}})
919 1138 100       11999 : $schema->{$state->{keyword}});
920             }
921              
922             # use original value as stored in the NV, without losing precision
923 1305     1305 0 2300 sub sprintf_num ($value) {
  1305         2156  
  1305         1925  
924 1305 100       2476 is_bignum($value) ? $value->bstr : sprintf('%s', $value);
925             }
926              
927             {
928             # simple runtime-wide cache of $ids to filenames that are sourced from disk
929             my $schema_filename_cache = {};
930              
931             # adds a mapping from a URI to an absolute filename in the global runtime
932             # (available to all instances of the evaluator running in the same process).
933 945     945 0 1142 sub register_schema ($uri, $filename) {
  945         1095  
  945         1015  
  945         1001  
934 945         5299 $schema_filename_cache->{$uri} = $filename;
935             }
936              
937 2000     2000 0 2806 sub get_schema_filename ($uri) {
  2000         2932  
  2000         2518  
938 2000         6843 $schema_filename_cache->{$uri};
939             }
940             }
941              
942             1;
943              
944             __END__