File Coverage

blib/lib/JSON/Schema/Modern.pm

Criterion	Covered	Total	%
statement	351	354	99.1
branch	127	150	84.6
condition	78	90	86.6
subroutine	51	51	100.0
pod	7	9	77.7
total	614	654	93.8

line	stmt	bran	cond	sub	pod	time	code
1	34			34		10406726	use strict;
	34					465
	34					1081
2	34			34		231	use warnings;
	34					144
	34					1855
3							package JSON::Schema::Modern; # git description: v0.569-9-g357987f3
4							# vim: set ts=8 sts=2 sw=2 tw=100 et :
5							# ABSTRACT: Validate data against a schema
6							# KEYWORDS: JSON Schema validator data validation structure specification
7
8							our $VERSION = '0.570';
9
10	34			34		955	use 5.020; # for fc, unicode_strings features
	34					172
11	34			34		13408	use Moo;
	34					173990
	34					244
12	34			34		41721	use strictures 2;
	34					464
	34					1680
13	34			34		7704	use stable 0.031 'postderef';
	34					697
	34					319
14	34			34		6698	use experimental 'signatures';
	34					93
	34					174
15	34			34		2798	use if "$]" >= 5.022, experimental => 're_strict';
	34					127
	34					400
16	34			34		3649	no if "$]" >= 5.031009, feature => 'indirect';
	34					119
	34					343
17	34			34		1759	no if "$]" >= 5.033001, feature => 'multidimensional';
	34					170
	34					236
18	34			34		1680	no if "$]" >= 5.033006, feature => 'bareword_filehandles';
	34					95
	34					270
19	34			34		13320	use JSON::MaybeXS;
	34					139368
	34					2335
20	34			34		259	use Carp qw(croak carp);
	34					74
	34					2088
21	34			34		223	use List::Util 1.55 qw(pairs first uniqint pairmap uniq any);
	34					741
	34					3367
22	34			34		12301	use Ref::Util 0.100 qw(is_ref is_plain_hashref);
	34					41052
	34					2350
23	34			34		285	use Scalar::Util 'refaddr';
	34					77
	34					1592
24	34			34		17231	use Mojo::URL;
	34					6268419
	34					268
25	34			34		13363	use Safe::Isa;
	34					11355
	34					4737
26	34			34		22127	use Path::Tiny;
	34					316125
	34					2119
27	34			34		23204	use Storable 'dclone';
	34					113386
	34					2401
28	34			34		13145	use File::ShareDir 'dist_dir';
	34					674487
	34					2144
29	34			34		12059	use Module::Runtime qw(use_module require_module);
	34					43265
	34					1002
30	34			34		12393	use MooX::TypeTiny 0.002002;
	34					8832
	34					235
31	34			34		201754	use MooX::HandlesVia;
	34					26186
	34					232
32	34			34		20336	use Types::Standard 1.016003 qw(Bool Int Str HasMethods Enum InstanceOf HashRef Dict CodeRef Optional Slurpy ArrayRef Undef ClassName Tuple Map);
	34					2802044
	34					532
33	34			34		181594	use Feature::Compat::Try;
	34					8987
	34					313
34	34			34		89076	use JSON::Schema::Modern::Error;
	34					152
	34					1609
35	34			34		18740	use JSON::Schema::Modern::Result;
	34					151
	34					1490
36	34			34		17421	use JSON::Schema::Modern::Document;
	34					159
	34					477
37	34			34		20655	use JSON::Schema::Modern::Utilities qw(get_type canonical_uri E abort annotate_self);
	34					119
	34					3284
38	34			34		278	use namespace::clean;
	34					93
	34					204
39
40							our @CARP_NOT = qw(
41							JSON::Schema::Modern::Document
42							JSON::Schema::Modern::Vocabulary
43							JSON::Schema::Modern::Vocabulary::Applicator
44							OpenAPI::Modern
45							);
46
47	34			34		23597	use constant SPECIFICATION_VERSION_DEFAULT => 'draft2020-12';
	34					104
	34					2179
48	34			34		273	use constant SPECIFICATION_VERSIONS_SUPPORTED => [qw(draft7 draft2019-09 draft2020-12)];
	34					78
	34					219292
49
50							has specification_version => (
51							is => 'ro',
52							isa => Enum(SPECIFICATION_VERSIONS_SUPPORTED),
53							coerce => sub {
54							return $_[0] if any { $_[0] eq $_ } SPECIFICATION_VERSIONS_SUPPORTED->@*;
55							my $real = 'draft'.($_[0]//'');
56							(any { $real eq $_ } SPECIFICATION_VERSIONS_SUPPORTED->@*) ? $real : $_[0];
57							},
58							);
59
60							has output_format => (
61							is => 'ro',
62							isa => Enum(JSON::Schema::Modern::Result->OUTPUT_FORMATS),
63							default => 'basic',
64							);
65
66							has short_circuit => (
67							is => 'ro',
68							isa => Bool,
69							lazy => 1,
70							default => sub { $_[0]->output_format eq 'flag' && !$_[0]->collect_annotations },
71							);
72
73							has max_traversal_depth => (
74							is => 'ro',
75							isa => Int,
76							default => 50,
77							);
78
79							has validate_formats => (
80							is => 'ro',
81							isa => Bool,
82							default => 0, # as specified by https://json-schema.org/draft/<version>/schema#/$vocabulary
83							);
84
85							has validate_content_schemas => (
86							is => 'ro',
87							isa => Bool,
88							lazy => 1,
89							# defaults to false in latest versions, as specified by
90							# https://json-schema.org/draft/2020-12/json-schema-validation.html#rfc.section.8.2
91							default => sub { ($_[0]->specification_version//'') eq 'draft7' },
92							);
93
94							has [qw(collect_annotations scalarref_booleans stringy_numbers strict)] => (
95							is => 'ro',
96							isa => Bool,
97							);
98
99							has _format_validations => (
100							is => 'bare',
101							isa => my $format_type = Dict[
102							(map +($_ => Optional[CodeRef]), qw(date-time date time duration email idn-email hostname idn-hostname ipv4 ipv6 uri uri-reference iri iri-reference uuid uri-template json-pointer relative-json-pointer regex)),
103							Slurpy[HashRef[Dict[type => Enum[qw(null object array boolean string number integer)], sub => CodeRef]]],
104							],
105							init_arg => 'format_validations',
106							handles_via => 'Hash',
107							handles => {
108							_get_format_validation => 'get',
109							add_format_validation => 'set',
110							},
111							lazy => 1,
112							default => sub { {} },
113							);
114
115							before add_format_validation => sub ($self, @kvs) { $format_type->({ @$_ }) foreach pairs @kvs };
116
117							around BUILDARGS => sub ($orig, $class, @args) {
118							my $args = $class->$orig(@args);
119							croak 'output_format: strict_basic can only be used with specification_version: draft2019-09'
120							if ($args->{output_format}//'') eq 'strict_basic'
121							and ($args->{specification_version}//'') ne 'draft2019-09';
122
123							return $args;
124							};
125
126							sub add_schema {
127	13548	50		13548	1	313883	croak 'insufficient arguments' if @_ < 2;
128	13548					22400	my $self = shift;
129
130							# TODO: resolve $uri against $self->base_uri
131	13548	50				46064	my $uri = !is_ref($_[0]) ? Mojo::URL->new(shift)
		100
132							: $_[0]->$_isa('Mojo::URL') ? shift : Mojo::URL->new;
133
134	13548	100				2162650	croak 'cannot add a schema with a uri with a fragment' if defined $uri->fragment;
135
136	13547	100				80545	if (not @_) {
137	2					18	my $schema_info = $self->_fetch_from_uri($uri);
138	2	100	66			30	return if not $schema_info or not defined wantarray;
139	1					12	return $schema_info->{document};
140							}
141
142							# document BUILD will trigger $self->traverse($schema)
143	13545	50				37264	my $document = $_[0]->$_isa('JSON::Schema::Modern::Document') ? shift
		100
144							: JSON::Schema::Modern::Document->new(
145							schema => shift,
146							$uri ? (canonical_uri => $uri) : (),
147							evaluator => $self, # used mainly for traversal during document construction
148							);
149
150	13545	100				694910	if ($document->has_errors) {
151	117					7867	my $result = JSON::Schema::Modern::Result->new(
152							output_format => $self->output_format,
153							valid => 0,
154							errors => [ $document->errors ],
155							exception => 1,
156							);
157	117					9756	die $result;
158							}
159
160	13428	100				680601	if (not grep refaddr($_->{document}) == refaddr($document), $self->_canonical_resources) {
161	13423		66			1519436	my $schema_content = $document->_serialized_schema
162							// $document->_serialized_schema($self->_json_decoder->encode($document->schema));
163
164	13423	100				1077770	if (my $existing_doc = first {
165	405963		66	405963		7545653	my $existing_content = $_->_serialized_schema
166							// $_->_serialized_schema($self->_json_decoder->encode($_->schema));
167	405963					2532246	$existing_content eq $schema_content
168							} uniqint map $_->{document}, $self->_canonical_resources) {
169							# we already have this schema content in another document object.
170	9417					75045	$document = $existing_doc;
171							}
172							else {
173	4006					76641	$self->_add_resources(map +($_->[0] => +{ $_->[1]->%*, document => $document }),
174							$document->resource_pairs);
175							}
176							}
177
178	13422	100				723421	if ("$uri") {
179	358					72081	my $resource = $document->_get_resource($document->canonical_uri);
180							$self->_add_resources($uri => {
181							path => '',
182							canonical_uri => $document->canonical_uri,
183							specification_version => $resource->{specification_version},
184							vocabularies => $resource->{vocabularies}, # reference, not copy
185							document => $document,
186							configs => $resource->{configs},
187	358					104712	});
188							}
189
190	13422					1701112	return $document;
191							}
192
193	3			3	1	13497	sub evaluate_json_string ($self, $json_data, $schema, $config_override = {}) {
	3					9
	3					6
	3					9
	3					9
	3					7
194	3	50				14	croak 'evaluate_json_string called in void context' if not defined wantarray;
195
196	3					8	my $data;
197							try {
198							$data = $self->_json_decoder->decode($json_data)
199							}
200	3					16	catch ($e) {
201							return JSON::Schema::Modern::Result->new(
202							output_format => $self->output_format,
203							valid => 0,
204							exception => 1,
205							errors => [
206							JSON::Schema::Modern::Error->new(
207							keyword => undef,
208							instance_location => '',
209							keyword_location => '',
210							error => $e,
211							)
212							],
213							);
214							}
215
216	1					18	return $self->evaluate($data, $schema, $config_override);
217							}
218
219							# this is called whenever we need to walk a document for something.
220							# for now it is just called when a ::Document object is created, to verify the integrity of the
221							# schema structure, to identify the metaschema (via the $schema keyword), and to extract all
222							# embedded resources via $id and $anchor keywords within.
223							# Returns the internal $state object accumulated during the traversal.
224	13645			13645	1	663226	sub traverse ($self, $schema_reference, $config_override = {}) {
	13645					22083
	13645					20019
	13645					21955
	13645					19348
225							# Note: the starting position is not guaranteed to be at the root of the $document.
226	13645		100			43001	my $initial_uri = Mojo::URL->new($config_override->{initial_schema_uri} // '');
227	13645		100			3956206	my $initial_path = $config_override->{traversed_schema_path} // '';
228	13645		100			56578	my $spec_version = $self->specification_version//SPECIFICATION_VERSION_DEFAULT;
229
230							my $state = {
231							depth => 0,
232							data_path => '', # this never changes since we don't have an instance yet
233							initial_schema_uri => $initial_uri, # the canonical URI as of the start of this method, or last $id
234							traversed_schema_path => $initial_path, # the accumulated traversal path as of the start, or last $id
235							schema_path => '', # the rest of the path, since the start of this method, or last $id
236							effective_base_uri => Mojo::URL->new(''),
237							errors => [],
238							identifiers => [],
239							configs => {},
240							callbacks => $config_override->{callbacks} // {},
241	13645		100			40303	evaluator => $self,
242							traverse => 1,
243							};
244
245							try {
246							my $for_canonical_uri = Mojo::URL->new(
247							(is_plain_hashref($schema_reference) && exists $schema_reference->{'$id'}
248							? Mojo::URL->new($schema_reference->{'$id'}) : undef)
249							// $state->{initial_schema_uri});
250							$for_canonical_uri->fragment(undef) if not length $for_canonical_uri->fragment;
251
252							# a subsequent "$schema" keyword can still change these values
253							$state->@{qw(spec_version vocabularies)} = $self->_get_metaschema_info(
254							$config_override->{metaschema_uri} // $self->METASCHEMA_URIS->{$spec_version},
255							$for_canonical_uri,
256							);
257							}
258	13645					2257570	catch ($e) {
259							if ($e->$_isa('JSON::Schema::Modern::Result')) {
260							push $state->{errors}->@*, $e->errors;
261							}
262							elsif ($e->$_isa('JSON::Schema::Modern::Error')) {
263							push $state->{errors}->@*, $e;
264							}
265							else {
266							()= E({ %$state, exception => 1 }, 'EXCEPTION: '.$e);
267							}
268
269							return $state;
270							}
271
272							try {
273							$self->_traverse_subschema($schema_reference, $state);
274							}
275	13642					36742	catch ($e) {
276							if ($e->$_isa('JSON::Schema::Modern::Error')) {
277							# note: we should never be here, since traversal subs are no longer be fatal
278							push $state->{errors}->@*, $e;
279							}
280							else {
281							E({ %$state, exception => 1 }, 'EXCEPTION: '.$e);
282							}
283							}
284
285	13642					27294	delete $state->{traverse};
286	13642					46063	return $state;
287							}
288
289							# the actual runtime evaluation of the schema against input data.
290	13197			13197	1	18666383	sub evaluate ($self, $data, $schema_reference, $config_override = {}) {
	13197					24212
	13197					20129
	13197					20278
	13197					24020
	13197					19679
291	13197	50				34160	croak 'evaluate called in void context' if not defined wantarray;
292
293	13197		100			50774	my $initial_path = $config_override->{traversed_schema_path} // '';
294	13197		100			70270	my $effective_base_uri = Mojo::URL->new($config_override->{effective_base_uri}//'');
295
296							my $state = {
297	13197		100			2431760	data_path => $config_override->{data_path} // '',
298							traversed_schema_path => $initial_path, # the accumulated path as of the start of evaluation, or last $id or $ref
299							initial_schema_uri => Mojo::URL->new, # the canonical URI as of the start of evaluation, or last $id or $ref
300							schema_path => '', # the rest of the path, since the start of evaluation, or last $id or $ref
301							effective_base_uri => $effective_base_uri, # resolve locations against this for errors and annotations
302							errors => [],
303							};
304
305							exists $config_override->{$_} and die $_.' not supported as a config override'
306	13197		50			178724	foreach qw(output_format specification_version);
307
308	13197					22422	my $valid;
309							try {
310							my $schema_info;
311
312							if (not is_ref($schema_reference) or $schema_reference->$_isa('Mojo::URL')) {
313							$schema_info = $self->_fetch_from_uri($schema_reference);
314							$state->{initial_schema_uri} = Mojo::URL->new($config_override->{initial_schema_uri} // '');
315							}
316							else {
317							# traverse is called via add_schema -> ::Document->new -> ::Document->BUILD
318							my $document = $self->add_schema('', $schema_reference);
319							my $base_resource = $document->_get_resource($document->canonical_uri)
320							\|\| croak "couldn't get resource: document parse error";
321
322							$schema_info = {
323							schema => $document->schema,
324							document => $document,
325							document_path => '',
326							$base_resource->%{qw(canonical_uri specification_version vocabularies configs)},
327							};
328							}
329
330							abort($state, 'EXCEPTION: unable to find resource %s', $schema_reference)
331							if not $schema_info;
332
333							$state = +{
334							%$state,
335							depth => 0,
336							initial_schema_uri => $schema_info->{canonical_uri}, # the canonical URI as of the start of evaluation, or last $id or $ref
337							document => $schema_info->{document}, # the ::Document object containing this schema
338							document_path => $schema_info->{document_path}, # the path within the document of this schema, as of the start of evaluation, or last $id or $ref
339							dynamic_scope => [ $schema_info->{canonical_uri} ],
340							annotations => [],
341							seen => {},
342							spec_version => $schema_info->{specification_version},
343							vocabularies => $schema_info->{vocabularies},
344							callbacks => $config_override->{callbacks} // {},
345							evaluator => $self,
346							$schema_info->{configs}->%*,
347							(map {
348							my $val = $config_override->{$_} // $self->$_;
349							defined $val ? ( $_ => $val ) : ()
350							} qw(validate_formats validate_content_schemas short_circuit collect_annotations scalarref_booleans stringy_numbers strict)),
351							};
352
353							if ($state->{validate_formats}) {
354							$state->{vocabularies} = [
355							map s/^JSON::Schema::Modern::Vocabulary::Format\KAnnotation$/Assertion/r, $state->{vocabularies}->@*
356							];
357							require JSON::Schema::Modern::Vocabulary::FormatAssertion;
358							}
359
360							# we're going to set collect_annotations during evaluation when we see an unevaluated* keyword,
361							# but after we pass to a new data scope we'll clear it again.. unless we've got the config set
362							# globally for the entire evaluation, so we store that value in a high bit.
363							$state->{collect_annotations} = ($state->{collect_annotations}//0) << 8;
364
365							$valid = $self->_eval_subschema($data, $schema_info->{schema}, $state);
366							warn 'result is false but there are no errors' if not $valid and not $state->{errors}->@*;
367							}
368	13197					30733	catch ($e) {
369							if ($e->$_isa('JSON::Schema::Modern::Result')) {
370							return $e;
371							}
372							elsif ($e->$_isa('JSON::Schema::Modern::Error')) {
373							push $state->{errors}->@*, $e;
374							}
375							else {
376							$valid = E({ %$state, exception => 1 }, 'EXCEPTION: '.$e);
377							}
378							}
379
380	13080	50	50			58749	die 'evaluate validity inconsistent with error count' if $valid xor !$state->{errors}->@*;
381
382							return JSON::Schema::Modern::Result->new(
383							output_format => $self->output_format,
384							valid => $valid,
385							$valid
386							# strip annotations from result if user didn't explicitly ask for them
387							? ($config_override->{collect_annotations} // $self->collect_annotations
388							? (annotations => $state->{annotations}) : ())
389	13080	100	100			354960	: (errors => $state->{errors}),
		100
390							);
391							}
392
393	3			3	1	24047	sub validate_schema ($self, $schema, $config_override = {}) {
	3					8
	3					7
	3					7
	3					6
394	3	50				16	croak 'validate_schema called in void context' if not defined wantarray;
395
396							my $metaschema_uri = is_plain_hashref($schema) && $schema->{'$schema'} ? $schema->{'$schema'}
397	3	100	66			44	: $self->METASCHEMA_URIS->{$self->specification_version // $self->SPECIFICATION_VERSION_DEFAULT};
			33
398
399	3					16	return $self->evaluate($schema, $metaschema_uri, $config_override);
400							}
401
402	10			10	1	77498	sub get ($self, $uri) {
	10					24
	10					34
	10					21
403	10					39	my $schema_info = $self->_fetch_from_uri($uri);
404	10	100				897	return if not $schema_info;
405	8	100				1028	my $subschema = is_ref($schema_info->{schema}) ? dclone($schema_info->{schema}) : $schema_info->{schema};
406	8	100				196	return wantarray ? ($subschema, $schema_info->{canonical_uri}) : $subschema;
407							}
408
409							# defined lower down:
410							# sub add_vocabulary { ... }
411							# sub add_encoding { ... }
412							# sub add_media_type { ... }
413
414							######## NO PUBLIC INTERFACES FOLLOW THIS POINT ########
415
416							# current spec version => { keyword => undef, or arrayref of alternatives }
417							my %removed_keywords = (
418							'draft7' => {
419							id => [ '$id' ],
420							},
421							'draft2019-09' => {
422							id => [ '$id' ],
423							definitions => [ '$defs' ],
424							dependencies => [ qw(dependentSchemas dependentRequired) ],
425							},
426							'draft2020-12' => {
427							id => [ '$id' ],
428							definitions => [ '$defs' ],
429							dependencies => [ qw(dependentSchemas dependentRequired) ],
430							'$recursiveAnchor' => [ '$dynamicAnchor' ],
431							'$recursiveRef' => [ '$dynamicRef' ],
432							additionalItems => [ 'items' ],
433							},
434							);
435
436							# {
437							# $spec_version => {
438							# $vocabulary_class => {
439							# traverse => [ [ $keyword => $subref ], [ ... ] ],
440							# evaluate => [ [ $keyword => $subref ], [ ... ] ],
441							# }
442							# }
443							# }
444							# If we could serialize coderefs, this could be an object attribute;
445							# otherwise, we might as well persist this for the lifetime of the process.
446							our $vocabulary_cache = {};
447
448	32147			32147		54564	sub _traverse_subschema ($self, $schema, $state) {
	32147					48063
	32147					47162
	32147					45858
	32147					44970
449	32147					57708	delete $state->{keyword};
450
451							return E($state, 'EXCEPTION: maximum traversal depth exceeded')
452	32147	50				102719	if $state->{depth}++ > $self->max_traversal_depth;
453
454	32147					90080	my $schema_type = get_type($schema);
455	32147	100				162690	return 1 if $schema_type eq 'boolean';
456
457	25365	100				53905	return E($state, 'invalid schema type: %s', $schema_type) if $schema_type ne 'object';
458
459	25355	100				64635	return 1 if not keys %$schema;
460
461	24916					38372	my $valid = 1;
462	24916					120017	my %unknown_keywords = map +($_ => undef), keys %$schema;
463							# we must check the array length on every iteration because some keywords can change it!
464	24916					88669	for (my $idx = 0; $idx <= $state->{vocabularies}->$#*; ++$idx) {
465	159169					253895	my $vocabulary = $state->{vocabularies}[$idx];
466
467							# [ [ $keyword => $subref ], [ ... ] ]
468							my $keyword_list = $vocabulary_cache->{$state->{spec_version}}{$vocabulary}{traverse} //= [
469							map [ $_ => $vocabulary->can('_traverse_keyword_'.($_ =~ s/^\$//r)) ],
470							$vocabulary->keywords($state->{spec_version})
471	159169		100			429704	];
472
473	159169					261227	foreach my $keyword_tuple ($keyword_list->@*) {
474	1348782					2031911	my ($keyword, $sub) = $keyword_tuple->@*;
475	1348782	100				2652208	next if not exists $schema->{$keyword};
476
477							# keywords adjacent to $ref are not evaluated before draft2019-09
478	44850	100	100			174631	next if $keyword ne '$ref' and exists $schema->{'$ref'} and $state->{spec_version} eq 'draft7';
			100
479
480	44824					86564	delete $unknown_keywords{$keyword};
481	44824					80832	$state->{keyword} = $keyword;
482
483	44824	100				139194	if (not $sub->($vocabulary, $schema, $state)) {
484	179	50				604	die 'traverse result is false but there are no errors (keyword: '.$keyword.')' if not $state->{errors}->@*;
485	179					308	$valid = 0;
486	179					489	next;
487							}
488
489	44645	100				140853	if (my $callback = $state->{callbacks}{$keyword}) {
490	4	50				16	if (not $callback->($schema, $state)) {
491	0	0				0	die 'callback result is false but there are no errors (keyword: '.$keyword.')' if not $state->{errors}->@*;
492	0					0	$valid = 0;
493	0					0	next;
494							}
495							}
496							}
497							}
498
499	24916					47987	delete $state->{keyword};
500
501	24916	100	100			77554	if ($self->strict and keys %unknown_keywords) {
502	1	50				14	()= E($state, 'unknown keyword%s found: %s', keys %unknown_keywords > 1 ? 's' : '',
503							join(', ', sort keys %unknown_keywords));
504							}
505
506							# check for previously-supported but now removed keywords
507	24916					117031	foreach my $keyword (sort keys $removed_keywords{$state->{spec_version}}->%*) {
508	93121	100				195347	next if not exists $schema->{$keyword};
509	238					1085	my $message ='no-longer-supported "'.$keyword.'" keyword present (at location "'
510							.canonical_uri($state).'")';
511	238	50				30648	if (my $alternates = $removed_keywords{$state->{spec_version}}->{$keyword}) {
512	238					1233	my @list = map '"'.$_.'"', @$alternates;
513	238	50				659	@list = ((map $_.',', @list[0..$#list-1]), $list[-1]) if @list > 2;
514	238	100				747	splice(@list, -1, 0, 'or') if @list > 1;
515	238					853	$message .= ': this should be rewritten as '.join(' ', @list);
516							}
517	238					49579	carp $message;
518							}
519
520	24916					125761	return $valid;
521							}
522
523	27540			27540		47285	sub _eval_subschema ($self, $data, $schema, $state) {
	27540					42193
	27540					43441
	27540					41028
	27540					39372
	27540					37483
524	27540	50				61593	croak '_eval_subschema called in void context' if not defined wantarray;
525
526							# callers created a new $state for us, so we do not propagate upwards changes to depth, traversed
527							# paths; but annotations, errors are arrayrefs so their contents will be shared
528	27540		100			87007	$state->{dynamic_scope} = [ ($state->{dynamic_scope}//[])->@* ];
529	27540					235114	delete $state->@{'keyword', grep /^_/, keys %$state};
530
531							abort($state, 'EXCEPTION: maximum evaluation depth exceeded')
532	27540	100				122299	if $state->{depth}++ > $self->max_traversal_depth;
533
534	27537					83952	my $schema_type = get_type($schema);
535	27537	100	66			76784	return $schema \|\| E($state, 'subschema is false') if $schema_type eq 'boolean';
536
537							# this should never happen, due to checks in traverse
538	26769	50				55696	abort($state, 'invalid schema type: %s', $schema_type) if $schema_type ne 'object';
539
540	26769	100				63959	return 1 if not keys %$schema;
541
542							# find all schema locations in effect at this data path + canonical_uri combination
543							# if any of them are absolute prefix of this schema location, we are in a loop.
544	26521					63176	my $canonical_uri = canonical_uri($state);
545	26521					68884	my $schema_location = $state->{traversed_schema_path}.$state->{schema_path};
546							abort($state, 'EXCEPTION: infinite loop detected (same location evaluated twice)')
547							if grep substr($schema_location, 0, length) eq $_,
548	26521	100				117244	keys $state->{seen}{$state->{data_path}}{$canonical_uri}->%*;
549	26520					4045706	$state->{seen}{$state->{data_path}}{$canonical_uri}{$schema_location}++;
550
551	26520					3616705	my $valid = 1;
552	26520					144720	my %unknown_keywords = map +($_ => undef), keys %$schema;
553
554							# set aside annotations collected so far; they are not used in the current scope's evaluation
555	26520					63322	my $parent_annotations = $state->{annotations};
556	26520					52483	$state->{annotations} = [];
557
558							# in order to collect annotations from applicator keywords only when needed, we twiddle the low
559							# bit if we see a local unevaluated* keyword, and clear it again as we move on to a new data path.
560	26520		100			116223	$state->{collect_annotations} \|= 0+(exists $schema->{unevaluatedItems} \|\| exists $schema->{unevaluatedProperties});
561
562							# in order to collect annotations for unevaluated* keywords, we sometimes need to ignore the
563							# suggestion to short_circuit evaluation at this scope (but lower scopes are still fine)
564							$state->{short_circuit} = ($state->{short_circuit} \|\| delete($state->{short_circuit_suggested}))
565	26520		100			134819	&& !exists($schema->{unevaluatedItems}) && !exists($schema->{unevaluatedProperties});
566
567							ALL_KEYWORDS:
568	26520					70347	foreach my $vocabulary ($state->{vocabularies}->@*) {
569							# [ [ $keyword => $subref\|undef ], [ ... ] ]
570							my $keyword_list = $vocabulary_cache->{$state->{spec_version}}{$vocabulary}{evaluate} //= [
571							map [ $_ => $vocabulary->can('_eval_keyword_'.($_ =~ s/^\$//r)) ],
572							$vocabulary->keywords($state->{spec_version})
573	154247		100			436562	];
574
575	154247					242114	foreach my $keyword_tuple ($keyword_list->@*) {
576	1318748					1998762	my ($keyword, $sub) = $keyword_tuple->@*;
577	1318748	100				2503299	next if not exists $schema->{$keyword};
578
579							# keywords adjacent to $ref are not evaluated before draft2019-09
580	54650	100	100			194899	next if $keyword ne '$ref' and exists $schema->{'$ref'} and $state->{spec_version} eq 'draft7';
			100
581
582	54639					106487	delete $unknown_keywords{$keyword};
583	54639					100757	$state->{keyword} = $keyword;
584
585	54639	100				106089	if ($sub) {
586	42743					72256	my $error_count = $state->{errors}->@*;
587
588	42743	100				143693	if (not $sub->($vocabulary, $data, $schema, $state)) {
589							warn 'evaluation result is false but there are no errors (keyword: '.$keyword.')'
590	10438	50				32738	if $error_count == $state->{errors}->@*;
591	10438					17768	$valid = 0;
592
593	10438	100				31872	last ALL_KEYWORDS if $state->{short_circuit};
594	6380					18283	next;
595							}
596							}
597
598	44134	100				202065	if (my $callback = $state->{callbacks}{$keyword}) {
599	19					33	my $error_count = $state->{errors}->@*;
600
601	19	100				60	if (not $callback->($data, $schema, $state)) {
602							warn 'callback result is false but there are no errors (keyword: '.$keyword.')'
603	2	50				9	if $error_count == $state->{errors}->@*;
604	2					3	$valid = 0;
605
606	2	100				13	last ALL_KEYWORDS if $state->{short_circuit};
607	1					3	next;
608							}
609							}
610							}
611							}
612
613	26453					52214	delete $state->{keyword};
614
615	26453	100	66			62838	if ($state->{strict} and keys %unknown_keywords) {
616	2	50				23	abort($state, 'unknown keyword%s found: %s', keys %unknown_keywords > 1 ? 's' : '',
617							join(', ', sort keys %unknown_keywords));
618							}
619
620	26451	100	100			95329	if ($valid and $state->{collect_annotations} and $state->{spec_version} !~ qr/^draft(7\|2019-09)$/) {
			100
621							annotate_self(+{ %$state, keyword => $_, _unknown => 1 }, $schema)
622	814					2756	foreach sort keys %unknown_keywords;
623							}
624
625							# only keep new annotations if schema is valid
626	26451	100				59234	push $parent_annotations->@, $state->{annotations}->@ if $valid;
627
628	26451					188106	return $valid;
629							}
630
631							has _resource_index => (
632							is => 'bare',
633							isa => HashRef[my $resource_type = Dict[
634							canonical_uri => InstanceOf['Mojo::URL'],
635							path => Str,
636							specification_version => my $spec_version_type = Enum(SPECIFICATION_VERSIONS_SUPPORTED),
637							document => InstanceOf['JSON::Schema::Modern::Document'],
638							# the vocabularies used when evaluating instance data against schema
639							vocabularies => ArrayRef[my $vocabulary_class_type = ClassName->where(q{$_->DOES('JSON::Schema::Modern::Vocabulary')})],
640							configs => HashRef,
641							Slurpy[HashRef[Undef]], # no other fields allowed
642							]],
643							handles_via => 'Hash',
644							handles => {
645							_add_resources => 'set',
646							_get_resource => 'get',
647							_remove_resource => 'delete',
648							_resource_index => 'elements',
649							_resource_keys => 'keys',
650							_add_resources_unsafe => 'set',
651							_canonical_resources => 'values',
652							_resource_exists => 'exists',
653							},
654							lazy => 1,
655							default => sub { {} },
656							);
657
658							around _add_resources => sub {
659							my ($orig, $self) = (shift, shift);
660
661							my @resources;
662							foreach my $pair (sort { $a->[0] cmp $b->[0] } pairs @_) {
663							my ($key, $value) = @$pair;
664
665							$resource_type->($value); # check type of hash value against Dict
666
667							if (my $existing = $self->_get_resource($key)) {
668							# we allow overwriting canonical_uri = '' to allow for ad hoc evaluation of schemas that
669							# lack all identifiers altogether, but preserve other resources from the original document
670							if ($key ne '') {
671							next if $existing->{path} eq $value->{path}
672							and $existing->{canonical_uri} eq $value->{canonical_uri}
673							and $existing->{specification_version} eq $value->{specification_version}
674							and refaddr($existing->{document}) == refaddr($value->{document});
675							croak 'uri "'.$key.'" conflicts with an existing schema resource';
676							}
677							}
678							elsif ($self->CACHED_METASCHEMAS->{$key}) {
679							croak 'uri "'.$key.'" conflicts with an existing meta-schema resource';
680							}
681
682							my $fragment = $value->{canonical_uri}->fragment;
683							croak sprintf('canonical_uri cannot contain an empty fragment (%s)', $value->{canonical_uri})
684							if defined $fragment and $fragment eq '';
685
686							croak sprintf('canonical_uri cannot contain a plain-name fragment (%s)', $value->{canonical_uri})
687							if ($fragment // '') =~ m{^[^/]};
688
689							$self->$orig($key, $value);
690							}
691							};
692
693							# $vocabulary uri (not its $id!) => [ spec_version, class ]
694							has _vocabulary_classes => (
695							is => 'bare',
696							isa => HashRef[
697							Tuple[
698							$spec_version_type,
699							$vocabulary_class_type,
700							]
701							],
702							handles_via => 'Hash',
703							handles => {
704							_get_vocabulary_class => 'get',
705							_set_vocabulary_class => 'set',
706							_get_vocabulary_values => 'values',
707							},
708							lazy => 1,
709							default => sub {
710							+{
711							map { my $class = $_; pairmap { $a => [ $b, $class ] } $class->vocabulary }
712							map use_module('JSON::Schema::Modern::Vocabulary::'.$_),
713							qw(Core Applicator Validation FormatAssertion FormatAnnotation Content MetaData Unevaluated)
714							}
715							},
716							);
717
718	8			8	1	26969	sub add_vocabulary ($self, $classname) {
	8					16
	8					15
	8					10
719	8	50				203	return if grep $_->[1] eq $classname, $self->_get_vocabulary_values;
720
721	8					1083	$vocabulary_class_type->(use_module($classname));
722
723							# uri => version, uri => version
724	5					9106	foreach my $pair (pairs $classname->vocabulary) {
725	5					78	my ($uri_string, $spec_version) = @$pair;
726	5					25	Str->where(q{my $uri = Mojo::URL->new($_); $uri->is_abs && !defined $uri->fragment})->($uri_string);
727	4					5682	$spec_version_type->($spec_version);
728	2					342	$self->_set_vocabulary_class($uri_string => [ $spec_version, $classname ])
729							}
730							}
731
732							# $schema uri => [ spec_version, [ vocab classes ] ].
733							has _metaschema_vocabulary_classes => (
734							is => 'bare',
735							isa => HashRef[
736							Tuple[
737							$spec_version_type,
738							ArrayRef[$vocabulary_class_type],
739							]
740							],
741							handles_via => 'Hash',
742							handles => {
743							_get_metaschema_vocabulary_classes => 'get',
744							_set_metaschema_vocabulary_classes => 'set',
745							__all_metaschema_vocabulary_classes => 'values',
746							},
747							lazy => 1,
748							default => sub {
749							my @modules = map use_module('JSON::Schema::Modern::Vocabulary::'.$_),
750							qw(Core Applicator Validation FormatAnnotation Content MetaData Unevaluated);
751							+{
752							'https://json-schema.org/draft/2020-12/schema' => [ 'draft2020-12', [ @modules ] ],
753							do { pop @modules; () },
754							'https://json-schema.org/draft/2019-09/schema' => [ 'draft2019-09', \@modules ],
755							'http://json-schema.org/draft-07/schema#' => [ 'draft7', \@modules ],
756							},
757							},
758							);
759
760							# retrieves metaschema info either from cache or by parsing the schema for vocabularies
761							# throws a JSON::Schema::Modern::Result on error
762	13645			13645		24223	sub _get_metaschema_info ($self, $metaschema_uri, $for_canonical_uri) {
	13645					21108
	13645					21317
	13645					19830
	13645					18898
763							# check the cache
764	13645					304326	my $metaschema_info = $self->_get_metaschema_vocabulary_classes($metaschema_uri);
765	13645	100				1533154	return @$metaschema_info if $metaschema_info;
766
767							# otherwise, fetch the metaschema and parse its $vocabulary keyword.
768							# we do this by traversing a baby schema with just the $schema keyword.
769	5					46	my $state = $self->traverse({ '$schema' => $metaschema_uri.'' });
770							die JSON::Schema::Modern::Result->new(
771							output_format => $self->output_format,
772							valid => JSON::PP::false,
773							errors => [
774							map {
775	9					424	my $e = $_;
776							# absolute location is undef iff the location = '/$schema'
777	9		66			48	my $absolute_location = $e->absolute_keyword_location // $for_canonical_uri;
778	9	100	100			71	JSON::Schema::Modern::Error->new(
		100
779							keyword => $e->keyword eq '$schema' ? '' : $e->keyword,
780							instance_location => $e->instance_location,
781							keyword_location => ($for_canonical_uri->fragment//'').($e->keyword_location =~ s{^/\$schema\b}{}r),
782							length $absolute_location ? ( absolute_keyword_location => $absolute_location ) : (),
783							error => $e->error,
784							)
785							}
786							$state->{errors}->@* ],
787							exception => 1,
788	5	100				38	) if $state->{errors}->@*;
789	2					40	return ($state->{spec_version}, $state->{vocabularies});
790							}
791
792							# used for determining a default '$schema' keyword where there is none
793	34					4797	use constant METASCHEMA_URIS => {
794							'draft2020-12' => 'https://json-schema.org/draft/2020-12/schema',
795							'draft2019-09' => 'https://json-schema.org/draft/2019-09/schema',
796							'draft7' => 'http://json-schema.org/draft-07/schema#',
797	34			34		370	};
	34					86
798
799	34					76781	use constant CACHED_METASCHEMAS => {
800							'https://json-schema.org/draft/2020-12/meta/applicator' => 'draft2020-12/meta/applicator.json',
801							'https://json-schema.org/draft/2020-12/meta/content' => 'draft2020-12/meta/content.json',
802							'https://json-schema.org/draft/2020-12/meta/core' => 'draft2020-12/meta/core.json',
803							'https://json-schema.org/draft/2020-12/meta/format-annotation' => 'draft2020-12/meta/format-annotation.json',
804							'https://json-schema.org/draft/2020-12/meta/format-assertion' => 'draft2020-12/meta/format-assertion.json',
805							'https://json-schema.org/draft/2020-12/meta/meta-data' => 'draft2020-12/meta/meta-data.json',
806							'https://json-schema.org/draft/2020-12/meta/unevaluated' => 'draft2020-12/meta/unevaluated.json',
807							'https://json-schema.org/draft/2020-12/meta/validation' => 'draft2020-12/meta/validation.json',
808							'https://json-schema.org/draft/2020-12/output/schema' => 'draft2020-12/output/schema.json',
809							'https://json-schema.org/draft/2020-12/schema' => 'draft2020-12/schema.json',
810
811							'https://json-schema.org/draft/2019-09/meta/applicator' => 'draft2019-09/meta/applicator.json',
812							'https://json-schema.org/draft/2019-09/meta/content' => 'draft2019-09/meta/content.json',
813							'https://json-schema.org/draft/2019-09/meta/core' => 'draft2019-09/meta/core.json',
814							'https://json-schema.org/draft/2019-09/meta/format' => 'draft2019-09/meta/format.json',
815							'https://json-schema.org/draft/2019-09/meta/meta-data' => 'draft2019-09/meta/meta-data.json',
816							'https://json-schema.org/draft/2019-09/meta/validation' => 'draft2019-09/meta/validation.json',
817							'https://json-schema.org/draft/2019-09/output/schema' => 'draft2019-09/output/schema.json',
818							'https://json-schema.org/draft/2019-09/schema' => 'draft2019-09/schema.json',
819
820							# trailing # is omitted because we always cache documents by its canonical (fragmentless) URI
821							'http://json-schema.org/draft-07/schema' => 'draft7/schema.json',
822	34			34		271	};
	34					111
823
824							# returns the same as _get_resource
825	3701			3701		6925	sub _get_or_load_resource ($self, $uri) {
	3701					6062
	3701					5559
	3701					5246
826	3701					85534	my $resource = $self->_get_resource($uri);
827	3701	100				1193601	return $resource if $resource;
828
829	78	100				506	if (my $local_filename = $self->CACHED_METASCHEMAS->{$uri}) {
830	72					19104	my $file = path(dist_dir('JSON-Schema-Modern'), $local_filename);
831	72					13990	my $schema = $self->_json_decoder->decode($file->slurp_raw);
832	72					22738	my $document = JSON::Schema::Modern::Document->new(schema => $schema, evaluator => $self);
833
834							# this should be caught by the try/catch in evaluate()
835	72	50				43247	die JSON::Schema::Modern::Result->new(
836							output_format => $self->output_format,
837							valid => 0,
838							errors => [ $document->errors ],
839							exception => 1,
840							) if $document->has_errors;
841
842							# we have already performed the appropriate collision checks, so we bypass them here
843	72					4244	$self->_add_resources_unsafe(
844							map +($_->[0] => +{ $_->[1]->%*, document => $document }),
845							$document->resource_pairs
846							);
847
848	72					15009	return $self->_get_resource($uri);
849							}
850
851							# TODO:
852							# - load from network or disk
853
854	6					1297	return;
855							};
856
857							# returns information necessary to use a schema found at a particular URI:
858							# - a schema (which may not be at a document root)
859							# - the canonical uri for that schema,
860							# - the JSON::Schema::Modern::Document object that holds that schema
861							# - the path relative to the document root for this schema
862							# - the specification version that applies to this schema
863							# - the vocabularies to use when considering schema keywords
864							# - the config overrides to set when considering schema keywords
865							# creates a Document and adds it to the resource index, if not already present.
866	4474			4474		7300	sub _fetch_from_uri ($self, $uri) {
	4474					7056
	4474					6530
	4474					6199
867	4474	100				12232	$uri = Mojo::URL->new($uri) if not is_ref($uri);
868	4474					23994	my $fragment = $uri->fragment;
869
870	4474	100	100			35091	if (not length($fragment) or $fragment =~ m{^/}) {
871	3700					10666	my $base = $uri->clone->fragment(undef);
872	3700	100				452732	if (my $resource = $self->_get_or_load_resource($base)) {
873	3694		100			51260	my $subschema = $resource->{document}->get(my $document_path = $resource->{path}.($fragment//''));
874	3694	100				69056	return if not defined $subschema;
875	3688					6708	my $document = $resource->{document};
876							my $closest_resource = first { !length($_->[1]{path}) # document root
877	3882	100	100	3882		374557	\|\| length($document_path)
878							&& $document_path =~ m{^\Q$_->[1]{path}\E(?:/\|\z)} } # path is above present location
879	1361					86068	sort { length($b->[1]{path}) <=> length($a->[1]{path}) } # sort by length, descending
880	3688					89491	grep { not length Mojo::URL->new($_->[0])->fragment } # omit anchors
	6049					498330
881							$document->resource_pairs;
882
883							my $canonical_uri = $closest_resource->[1]{canonical_uri}->clone
884	3688					23584	->fragment(substr($document_path, length($closest_resource->[1]{path})));
885	3688	100				322183	$canonical_uri->fragment(undef) if not length($canonical_uri->fragment);
886							return {
887							schema => $subschema,
888							canonical_uri => $canonical_uri,
889							document => $document,
890							document_path => $document_path,
891	3688					72807	$resource->%{qw(specification_version vocabularies configs)}, # reference, not copy
892							};
893							}
894							}
895							else { # we are following a URI with a plain-name fragment
896	774	100				17432	if (my $resource = $self->_get_resource($uri)) {
897	684					223600	my $subschema = $resource->{document}->get($resource->{path});
898	684	50				12960	return if not defined $subschema;
899							return {
900							schema => $subschema,
901							canonical_uri => $resource->{canonical_uri}->clone, # this is not the anchor-containing URI
902							document => $resource->{document},
903							document_path => $resource->{path},
904	684					2473	$resource->%{qw(specification_version vocabularies configs)}, # reference, not copy
905							};
906							}
907							}
908							}
909
910							# used for internal encoding as well (when caching serialized schemas)
911							has _json_decoder => (
912							is => 'ro',
913							isa => HasMethods[qw(encode decode)],
914							lazy => 1,
915							default => sub { JSON::MaybeXS->new(allow_nonref => 1, canonical => 1, utf8 => 1, allow_bignum => 1, convert_blessed => 1) },
916							);
917
918							# since media types are case-insensitive, all type names must be foldcased on insertion.
919							has _media_type => (
920							is => 'bare',
921							isa => my $media_type_type = Map[Str->where(q{$_ eq CORE::fc($_)}), CodeRef],
922							handles_via => 'Hash',
923							handles => {
924							get_media_type => 'get',
925							add_media_type => 'set',
926							_media_types => 'keys',
927							},
928							lazy => 1,
929							default => sub ($self) {
930							my $_json_media_type = sub ($content_ref) {
931							# utf-8 decoding is always done, as per the JSON spec.
932							# other charsets are not supported: see RFC8259 §11
933							\ JSON::MaybeXS->new(allow_nonref => 1, utf8 => 1)->decode($content_ref->$*);
934							};
935							+{
936							(map +($_ => $_json_media_type),
937							qw(application/json application/schema+json application/schema-instance+json)),
938							(map +($_ => sub ($content_ref) { $content_ref }),
939							qw(text/* application/octet-stream)),
940							'application/x-www-form-urlencoded' => sub ($content_ref) {
941							\ Mojo::Parameters->new->charset('UTF-8')->parse($content_ref->$*)->to_hash;
942							},
943							'application/x-ndjson' => sub ($content_ref) {
944							my $decoder = JSON::MaybeXS->new(allow_nonref => 1, utf8 => 1);
945							my $line = 0; # line numbers start at 1
946							\[ map {
947							do {
948							try { ++$line; $decoder->decode($_) }
949							catch ($e) { die 'parse error at line '.$line.': '.$e }
950							}
951							}
952							split(/\r?\n/, $content_ref->$*)
953							];
954							},
955							};
956							},
957							);
958
959							# get_media_type('TExT/bloop') will fall through to matching an entry for 'text/' or '/*'
960							around get_media_type => sub ($orig, $self, $type) {
961							my $mt = $self->$orig(fc $type);
962							return $mt if $mt;
963
964							return $self->$orig((first { m{([^/]+)/\*$} && fc($type) =~ m{^\Q$1\E/[^/]+$} } $self->_media_types)
965							// '/');
966							};
967
968							before add_media_type => sub ($self, $type, $sub) { $media_type_type->({ $type => $sub }) };
969
970							has _encoding => (
971							is => 'bare',
972							isa => HashRef[CodeRef],
973							handles_via => 'Hash',
974							handles => {
975							get_encoding => 'get',
976							add_encoding => 'set',
977							},
978							lazy => 1,
979							default => sub ($self) {
980							+{
981							identity => sub ($content_ref) { $content_ref },
982							base64 => sub ($content_ref) {
983							die "invalid characters\n"
984							if $content_ref->$* =~ m{[^A-Za-z0-9+/=]} or $content_ref->$* =~ m{=(?=[^=])};
985							require MIME::Base64; \ MIME::Base64::decode_base64($content_ref->$*);
986							},
987							base64url => sub ($content_ref) {
988							die "invalid characters\n"
989							if $content_ref->$* =~ m{[^A-Za-z0-9=_-]} or $content_ref->$* =~ m{=(?=[^=])};
990							require MIME::Base64; \ MIME::Base64::decode_base64url($content_ref->$*);
991							},
992							};
993							},
994							);
995
996							# callback hook for Sereal::Encode
997	1			1	0	3	sub FREEZE ($self, $serializer) {
	1					4
	1					3
	1					4
998	1					15	my $data = +{ %$self };
999							# Cpanel::JSON::XS doesn't serialize: https://github.com/Sereal/Sereal/issues/266
1000							# coderefs can't serialize cleanly and must be re-added by the user.
1001	1					8	delete $data->@{qw(_json_decoder _format_validations _media_type _encoding)};
1002	1					3	return $data;
1003							}
1004
1005							# callback hook for Sereal::Decode
1006	1			1	0	10191	sub THAW ($class, $serializer, $data) {
	1					3
	1					2
	1					2
	1					2
1007	1					3	my $self = bless($data, $class);
1008
1009							# load all vocabulary classes
1010	1					26	require_module($_) foreach uniq map $_->{vocabularies}->@*, $self->_canonical_resources;
1011
1012	1					222	return $self;
1013							}
1014
1015							1;
1016
1017							__END__
1018
1019							=pod
1020
1021							=encoding UTF-8
1022
1023							=for stopwords schema subschema metaschema validator evaluator listref
1024
1025							=head1 NAME
1026
1027							JSON::Schema::Modern - Validate data against a schema
1028
1029							=head1 VERSION
1030
1031							version 0.570
1032
1033							=head1 SYNOPSIS
1034
1035							use JSON::Schema::Modern;
1036
1037							$js = JSON::Schema::Modern->new(
1038							specification_version => 'draft2020-12',
1039							output_format => 'flag',
1040							... # other options
1041							);
1042							$result = $js->evaluate($instance_data, $schema_data);
1043
1044							=head1 DESCRIPTION
1045
1046							This module aims to be a fully-compliant L<JSON Schema\|https://json-schema.org/> evaluator and
1047							validator, targeting the currently-latest
1048							L<Draft 2020-12\|https://json-schema.org/specification-links.html#2020-12>
1049							version of the specification.
1050
1051							=head1 CONFIGURATION OPTIONS
1052
1053							These values are all passed as arguments to the constructor.
1054
1055							=head2 specification_version
1056
1057							Indicates which version of the JSON Schema specification is used during evaluation. When not set,
1058							this value is derived from the C<$schema> keyword in the schema used in evaluation, or defaults to
1059							the latest version (currently C<draft2020-12>).
1060
1061							The use of this option is I<HIGHLY> encouraged to ensure continued correct operation of your schema.
1062							The current default value will not stay the same over time.
1063
1064							May be one of:
1065
1066							=over 4
1067
1068							=item *
1069
1070							L<C<draft2020-12> or C<2020-12>\|https://json-schema.org/specification-links.html#2020-12>, corresponding to metaschema C<https://json-schema.org/draft/2020-12/schema>
1071
1072							=item *
1073
1074							L<C<draft2019-09> or C<2019-09>\|https://json-schema.org/specification-links.html#2019-09-formerly-known-as-draft-8>, corresponding to metaschema C<https://json-schema.org/draft/2019-09/schema>
1075
1076							=item *
1077
1078							L<C<draft7> or C<7>\|https://json-schema.org/specification-links.html#draft-7>, corresponding to metaschema C<http://json-schema.org/draft-07/schema#>
1079
1080							=back
1081
1082							Note that you can also use a C<$schema> keyword in the schema itself, to specify a different metaschema or
1083							specification version.
1084
1085							=head2 output_format
1086
1087							One of: C<flag>, C<basic>, C<strict_basic>, C<detailed>, C<verbose>, C<terse>. Defaults to C<basic>.
1088							C<strict_basic> can only be used with C<specification_version = draft2019-09>.
1089							Passed to L<JSON::Schema::Modern::Result/output_format>.
1090
1091							=head2 short_circuit
1092
1093							When true, evaluation will return early in any execution path as soon as the outcome can be
1094							determined, rather than continuing to find all errors or annotations.
1095							This option is safe to use in all circumstances, even in the presence of
1096							C<unevaluatedItems> and C<unevaluatedProperties> keywords: the validation result will not change;
1097							only some errors will be omitted from the result.
1098
1099							Defaults to true when C<output_format> is C<flag>, and false otherwise.
1100
1101							=head2 max_traversal_depth
1102
1103							The maximum number of levels deep a schema traversal may go, before evaluation is halted. This is to
1104							protect against accidental infinite recursion, such as from two subschemas that each reference each
1105							other, or badly-written schemas that could be optimized. Defaults to 50.
1106
1107							=head2 validate_formats
1108
1109							When true, the C<format> keyword will be treated as an assertion, not merely an annotation. Defaults
1110							to false.
1111
1112							=head2 format_validations
1113
1114							=for stopwords subref
1115
1116							An optional hashref that allows overriding the validation method for formats, or adding new ones.
1117							Overrides to existing formats (see L</Format Validation>)
1118							must be specified in the form of C<< { $format_name => $format_sub } >>, where
1119							the format sub is a subref that takes one argument and returns a boolean result. New formats must
1120							be specified in the form of C<< { $format_name => { type => $type, sub => $format_sub } } >>,
1121							where the type indicates which of the core JSON Schema types (null, object, array, boolean, string,
1122							number, or integer) the instance value must be for the format validation to be considered.
1123
1124							=head2 validate_content_schemas
1125
1126							When true, the C<contentMediaType> and C<contentSchema> keywords are not treated as pure annotations:
1127							C<contentEncoding> (when present) is used to decode the applied data payload and then
1128							C<contentMediaType> will be used as the media-type for decoding to produce the data payload which is
1129							then applied to the schema in C<contentSchema> for validation. (Note that treating these keywords as
1130							anything beyond simple annotations is contrary to the specification, therefore this option defaults
1131							to false.)
1132
1133							See L</add_media_type> and L</add_encoding> for adding additional type support.
1134
1135							=for stopwords shhh
1136
1137							Technically only draft7 allows this and drafts 2019-09 and 2020-12 prohibit ever returning the
1138							subschema evaluation results together with their parent schema's results, so shhh. I'm trying to get this
1139							fixed for the next draft.
1140
1141							=head2 collect_annotations
1142
1143							When true, annotations are collected from keywords that produce them, when validation succeeds.
1144							These annotations are available in the returned result (see L<JSON::Schema::Modern::Result>).
1145							Defaults to false.
1146
1147							=head2 scalarref_booleans
1148
1149							When true, any value that is expected to be a boolean B<in the instance data> may also be expressed
1150							as the scalar references C<\0> or C<\1> (which are serialized as booleans by JSON backends).
1151
1152							Defaults to false.
1153
1154							=head2 stringy_numbers
1155
1156							When true, any value that is expected to be a number or integer B<in the instance data> may also be
1157							expressed as a string. This does B<not> apply to the C<const> or C<enum> keywords, but only
1158							the following keywords:
1159
1160							=over 4
1161
1162							=item *
1163
1164							C<type> (where both C<string> and C<number> (and possibly C<integer>) are considered valid
1165
1166							=item *
1167
1168							C<multipleOf>
1169
1170							=item *
1171
1172							C<maximum>
1173
1174							=item *
1175
1176							C<exclusiveMaximum>
1177
1178							=item *
1179
1180							C<minimum>
1181
1182							=item *
1183
1184							C<exclusiveMinimum>
1185
1186							=back
1187
1188							This allows you to write a schema like this (which validates a string representing an integer):
1189
1190							type: string
1191							pattern: ^[0-9]$
1192							multipleOf: 4
1193							minimum: 16
1194							maximum: 256
1195
1196							Such keywords are only applied if the value looks like a number, and do not generate a failure
1197							otherwise. Values are determined to be numbers via L<perlapi/looks_like_number>.
1198							This option is only intended to be used for evaluating data from sources that can only be strings,
1199							such as the extracted value of an HTTP header or query parameter.
1200
1201							Defaults to false.
1202
1203							=head2 strict
1204
1205							When true, unrecognized keywords are disallowed in schemas (they will cause an immediate abort
1206							in L</traverse> or L</evaluate>).
1207
1208							=head1 METHODS
1209
1210							=for Pod::Coverage BUILDARGS FREEZE THAW
1211
1212							=head2 evaluate_json_string
1213
1214							$result = $js->evaluate_json_string($data_as_json_string, $schema);
1215							$result = $js->evaluate_json_string($data_as_json_string, $schema, { collect_annotations => 1});
1216
1217							Evaluates the provided instance data against the known schema document.
1218
1219							The data is in the form of a JSON-encoded string (in accordance with
1220							L<RFC8259\|https://datatracker.ietf.org/doc/html/rfc8259>). B<The string is expected to be UTF-8 encoded.>
1221
1222							The schema must be in one of these forms:
1223
1224							=over 4
1225
1226							=item *
1227
1228							a Perl data structure, such as what is returned from a JSON decode operation,
1229
1230							=item *
1231
1232							a L<JSON::Schema::Modern::Document> object,
1233
1234							=item *
1235
1236							or a URI string indicating the location where such a schema is located.
1237
1238							=back
1239
1240							Optionally, a hashref can be passed as a third parameter which allows changing the values of the
1241							L</short_circuit>, L</collect_annotations>, L</scalarref_booleans>,
1242							L</stringy_numbers>, L</strict>, L</validate_formats>, and/or L</validate_content_schemas>
1243							settings for just this evaluation call.
1244
1245							You can also pass use these keys to alter behaviour (these are generally only used by custom validation
1246							applications that contain embedded JSON Schemas):
1247
1248							=over 4
1249
1250							=item *
1251
1252							C<data_path>: adjusts the effective path of the data instance as of the start of evaluation
1253
1254							=item *
1255
1256							C<traversed_schema_path>: adjusts the accumulated path as of the start of evaluation (or last C<$id> or C<$ref>)
1257
1258							=item *
1259
1260							C<initial_schema_uri>: adjusts the recorded absolute keyword location as of the start of evaluation
1261
1262							=item *
1263
1264							C<effective_base_uri>: locations in errors and annotations are resolved against this URI
1265
1266							=back
1267
1268							The return value is a L<JSON::Schema::Modern::Result> object, which can also be used as a boolean.
1269
1270							=head2 evaluate
1271
1272							$result = $js->evaluate($instance_data, $schema);
1273							$result = $js->evaluate($instance_data, $schema, { short_circuit => 0 });
1274
1275							Evaluates the provided instance data against the known schema document.
1276
1277							The data is in the form of an unblessed nested Perl data structure representing any type that JSON
1278							allows: null, boolean, string, number, object, array. (See L</TYPES> below.)
1279
1280							The schema must be in one of these forms:
1281
1282							=over 4
1283
1284							=item *
1285
1286							a Perl data structure, such as what is returned from a JSON decode operation,
1287
1288							=item *
1289
1290							a L<JSON::Schema::Modern::Document> object,
1291
1292							=item *
1293
1294							or a URI string indicating the location where such a schema is located.
1295
1296							=back
1297
1298							Optionally, a hashref can be passed as a third parameter which allows changing the values of the
1299							L</short_circuit>, L</collect_annotations>, L</scalarref_booleans>,
1300							L</stringy_numbers>, L</strict>, L</validate_formats>, and/or L</validate_content_schemas>
1301							settings for just this evaluation call.
1302
1303							You can also pass use these keys to alter behaviour (these are generally only used by custom validation
1304							applications that contain embedded JSON Schemas):
1305
1306							=over 4
1307
1308							=item *
1309
1310							C<data_path>: adjusts the effective path of the data instance as of the start of evaluation
1311
1312							=item *
1313
1314							C<traversed_schema_path>: adjusts the accumulated path as of the start of evaluation (or last C<$id> or C<$ref>)
1315
1316							=item *
1317
1318							C<initial_schema_uri>: adjusts the recorded absolute keyword location as of the start of evaluation
1319
1320							=item *
1321
1322							C<effective_base_uri>: locations in errors and annotations are resolved against this URI
1323
1324							=back
1325
1326							You can pass a series of callback subs to this method corresponding to keywords, which is useful for
1327							identifying various data that are not exposed by annotations.
1328							This feature is highly experimental and may change in the future.
1329
1330							For example, to find the locations where all C<$ref> keywords are applied B<successfully>:
1331
1332							my @used_ref_at;
1333							$js->evaluate($data, $schema_or_uri, {
1334							callbacks => {
1335							'$ref' => sub ($data, $schema, $state) {
1336							push @used_ref_at, $state->{data_path};
1337							}
1338							},
1339							});
1340
1341							The return value is a L<JSON::Schema::Modern::Result> object, which can also be used as a boolean.
1342							Callbacks are not compatible with L</short_circuit> mode.
1343
1344							=head2 validate_schema
1345
1346							$result = $js->validate_schema($schema);
1347
1348							Evaluates the provided schema as instance data against its metaschema. Accepts C<$schema> and
1349							C<$config_override> parameters in the same form as L</evaluate>.
1350
1351							=head2 traverse
1352
1353							$result = $js->traverse($schema);
1354							$result = $js->traverse($schema, { initial_schema_uri => 'http://example.com' });
1355
1356							Traverses the provided schema without evaluating it against any instance data. Returns the
1357							internal state object accumulated during the traversal, including any identifiers found therein, and
1358							any errors found during parsing. For internal purposes only.
1359
1360							Optionally, a hashref can be passed as a second parameter which alters some
1361							behaviour (these are generally only used by custom validation
1362							applications that contain embedded JSON Schemas):
1363
1364							=over 4
1365
1366							=item *
1367
1368							C<traversed_schema_path>: adjusts the accumulated path as of the start of evaluation (or last C<$id> or C<$ref>)
1369
1370							=item *
1371
1372							C<initial_schema_uri>: adjusts the recorded absolute keyword location as of the start of evaluation
1373
1374							=item *
1375
1376							C<metaschema_uri>: use the indicated URI as the metaschema
1377
1378							=back
1379
1380							You can pass a series of callback subs to this method corresponding to keywords, which is useful for
1381							extracting data from within schemas and skipping properties that may look like keywords but actually
1382							are not (for example C<{"const":{"$ref": "this is not actually a $ref"}}>). This feature is highly
1383							experimental and is highly likely to change in the future.
1384
1385							For example, to find the resolved targets of all C<$ref> keywords in a schema document:
1386
1387							my @refs;
1388							JSON::Schema::Modern->new->traverse($schema, {
1389							callbacks => {
1390							'$ref' => sub ($schema, $state) {
1391							push @refs, Mojo::URL->new($schema->{'$ref'})
1392							->to_abs(JSON::Schema::Modern::Utilities::canonical_uri($state));
1393							}
1394							},
1395							});
1396
1397							=head2 add_schema
1398
1399							$js->add_schema($uri => $schema);
1400							$js->add_schema($uri => $document);
1401							$js->add_schema($schema);
1402							$js->add_schema($document);
1403
1404							Introduces the (unblessed, nested) Perl data structure or L<JSON::Schema::Modern::Document>
1405							object, representing a JSON Schema, to the implementation, registering it under the indicated URI if
1406							provided (and if not, C<''> will be used if no other identifier can be found within).
1407
1408							You B<MUST> call C<add_schema> for any external resources that a schema may reference via C<$ref>
1409							before calling L</evaluate>, other than the standard metaschemas which are loaded from a local cache
1410							as needed.
1411
1412							Returns C<undef> if the resource could not be found;
1413							if there were errors in the document, will die with these errors;
1414							otherwise returns the L<JSON::Schema::Modern::Document> that contains the added schema.
1415
1416							=head2 add_format_validation
1417
1418							=for comment we are the nine Eleven Deniers
1419
1420							$js->add_format_validation(no_nines => { type => 'number', sub => sub ($value) { $value =~ m/^[0-8]$$/ });
1421
1422							Adds support for a custom format. The data type that this format applies to must be supplied; all
1423							values of any other type will automatically be deemed to be valid, and will not be passed to the
1424							subref.
1425
1426							=head2 add_vocabulary
1427
1428							$js->add_vocabulary('My::Custom::Vocabulary::Class');
1429
1430							Makes a custom vocabulary class available to metaschemas that make use of this vocabulary.
1431							as described in the specification at
1432							L<"Meta-Schemas and Vocabularies"\|https://json-schema.org/draft/2020-12/json-schema-core.html#rfc.section.8.1>.
1433
1434							The class must compose the L<JSON::Schema::Modern::Vocabulary> role and implement the
1435							L<vocabulary\|JSON::Schema::Modern::Vocabulary/vocabulary> and
1436							L<keywords\|JSON::Schema::Modern::Vocabulary/keywords> methods.
1437
1438							=head2 add_media_type
1439
1440							$js->add_media_type('application/furble' => sub ($content_ref) {
1441							return ...; # data representing the deserialized text for Content-Type: application/furble
1442							});
1443
1444							Takes a media-type name and a subref which takes a single scalar reference, which is expected to be
1445							a reference to a string, which might contain wide characters (i.e. not octets), especially when used
1446							in conjunction with L</get_encoding> below. Must return B<a reference to a value of any type> (which is
1447							then dereferenced for the C<contentSchema> keyword).
1448
1449							These media types are already known:
1450
1451							=over 4
1452
1453							=item *
1454
1455							C<application/json> - see L<RFC 4627\|https://datatracker.ietf.org/doc/html/rfc4627>
1456
1457							=item *
1458
1459							C<application/schema+json> - see L<proposed definition\|https://json-schema.org/draft/2020-12/json-schema-core.html#name-application-schemajson>
1460
1461							=item *
1462
1463							C<application/schema-instance+json> - see L<proposed definition\|https://json-schema.org/draft/2020-12/json-schema-core.html#name-application-schema-instance>
1464
1465							=item *
1466
1467							C<application/octet-stream> - passes strings through unchanged
1468
1469							=item *
1470
1471							C<application/x-www-form-urlencoded>
1472
1473							=item *
1474
1475							C<application/x-ndjson> - see L<https://github.com/ndjson/ndjson-spec>
1476
1477							=item *
1478
1479							C<text/*> - passes strings through unchanged
1480
1481							=back
1482
1483							=head2 get_media_type
1484
1485							Fetches a decoder sub for the indicated media type. Lookups are performed B<without case sensitivity>.
1486
1487							=for stopwords thusly
1488
1489							You can use it thusly:
1490
1491							$js->add_media_type('application/furble' => sub { ... }); # as above
1492							my $decoder = $self->get_media_type('application/furble') or die 'cannot find media type decoder';
1493							my $content_ref = $decoder->(\$content_string);
1494
1495							=head2 add_encoding
1496
1497							$js->add_encoding('bloop' => sub ($content_ref) {
1498							return \ ...; # data representing the deserialized content for Content-Transfer-Encoding: bloop
1499							});
1500
1501							Takes an encoding name and a subref which takes a single scalar reference, which is expected to be
1502							a reference to a string, which SHOULD be a 7-bit or 8-bit string. Result values MUST be a scalar-reference
1503							to a string (which is then dereferenced for the C<contentMediaType> keyword).
1504
1505							=for stopwords natively
1506
1507							Encodings handled natively are:
1508
1509							=over 4
1510
1511							=item *
1512
1513							C<identity> - passes strings through unchanged
1514
1515							=item *
1516
1517							C<base64> - see L<RFC 4648 §4\|https://datatracker.ietf.org/doc/html/rfc4648#section-4>
1518
1519							=item *
1520
1521							C<base64url> - see L<RFC 4648 §5\|https://datatracker.ietf.org/doc/html/rfc4648#section-5>
1522
1523							=back
1524
1525							See also L<HTTP::Message/encode>.
1526
1527							=head2 get_encoding
1528
1529							Fetches a decoder sub for the indicated encoding. Incoming values MUST be a reference to an octet
1530							string. Result values will be a scalar-reference to a string, which might be passed to a media_type
1531							decoder (see above).
1532
1533							You can use it thusly:
1534
1535							my $decoder = $self->get_encoding('base64') or die 'cannot find encoding decoder';
1536							my $content_ref = $decoder->(\$content_string);
1537
1538							=head2 get
1539
1540							my $schema = $js->get($uri);
1541							my ($schema, $canonical_uri) = $js->get($uri);
1542
1543							Fetches the Perl data structure representing the JSON Schema at the indicated URI. When called in
1544							list context, the canonical URI of that location is also returned, as a L<Mojo::URL>. Returns
1545							C<undef> if the schema with that URI has not been loaded (or cached).
1546
1547							=head1 LIMITATIONS
1548
1549							=head2 Types
1550
1551							Perl is a more loosely-typed language than JSON. This module delves into a value's internal
1552							representation in an attempt to derive the true "intended" type of the value. However, if a value is
1553							used in another context (for example, a numeric value is concatenated into a string, or a numeric
1554							string is used in an arithmetic operation), additional flags can be added onto the variable causing
1555							it to resemble the other type. This should not be an issue if data validation is occurring
1556							immediately after decoding a JSON payload, or if the JSON string itself is passed to this module.
1557							If you are still having difficulties, make sure you are using Perl's fastest and most trusted and
1558							reliable JSON decoder, L<Cpanel::JSON::XS> (or its proxy, useful for fatpacking, L<JSON::MaybeXS>).
1559							Other JSON decoders are known to produce data with incorrect data types.
1560
1561							For more information, see L<Cpanel::JSON::XS/MAPPING>.
1562
1563							=head2 Format Validation
1564
1565							By default (and unless you specify a custom metaschema with the C<$schema> keyword or
1566							L<JSON::Schema::Modern::Document/metaschema>),
1567							formats are treated only as annotations, not assertions. When L</validate_formats> is
1568							true, strings are also checked against the format as specified in the schema. At present the
1569							following formats are supported (use of any other formats than these will always evaluate as true,
1570							but remember you can always supply custom format handlers; see L</format_validations> above):
1571
1572							=over 4
1573
1574							=item *
1575
1576							C<date-time>
1577
1578							=item *
1579
1580							C<date>
1581
1582							=item *
1583
1584							C<time>
1585
1586							=item *
1587
1588							C<duration>
1589
1590							=item *
1591
1592							C<email>
1593
1594							=item *
1595
1596							C<idn-email>
1597
1598							=item *
1599
1600							C<hostname>
1601
1602							=item *
1603
1604							C<idn-hostname>
1605
1606							=item *
1607
1608							C<ipv4>
1609
1610							=item *
1611
1612							C<ipv6>
1613
1614							=item *
1615
1616							C<uri>
1617
1618							=item *
1619
1620							C<uri-reference>
1621
1622							=item *
1623
1624							C<iri>
1625
1626							=item *
1627
1628							C<uuid>
1629
1630							=item *
1631
1632							C<json-pointer>
1633
1634							=item *
1635
1636							C<relative-json-pointer>
1637
1638							=item *
1639
1640							C<regex>
1641
1642							=back
1643
1644							A few optional prerequisites are needed for some of these (if the prerequisite is missing,
1645							validation will always succeed):
1646
1647							=over 4
1648
1649							=item *
1650
1651							C<date-time>, C<date>, and C<time> require L<Time::Moment>, L<DateTime::Format::RFC3339>
1652
1653							=item *
1654
1655							C<email> and C<idn-email> require L<Email::Address::XS> version 1.04 (or higher)
1656
1657							=item *
1658
1659							C<hostname> and C<idn-hostname> require L<Data::Validate::Domain>
1660
1661							=item *
1662
1663							C<idn-hostname> requires L<Net::IDN::Encode>
1664
1665							=back
1666
1667							=head2 Specification Compliance
1668
1669							This implementation is now fully specification-compliant (for versions draft7, draft2019-09,
1670							draft2020-12), but until version 1.000 is released, it is
1671							still deemed to be missing some optional but quite useful features, such as:
1672
1673							=over 4
1674
1675							=item *
1676
1677							loading schema documents from disk
1678
1679							=item *
1680
1681							loading schema documents from the network
1682
1683							=item *
1684
1685							loading schema documents from a local web application (e.g. L<Mojolicious>)
1686
1687							=item *
1688
1689							additional output formats beyond C<flag>, C<basic>, and C<terse> (L<https://json-schema.org/draft/2020-12/json-schema-core.html#rfc.section.12>)
1690
1691							=back
1692
1693							=head1 SECURITY CONSIDERATIONS
1694
1695							The C<pattern> and C<patternProperties> keywords evaluate regular expressions from the schema,
1696							the C<regex> format validator evaluates regular expressions from the data, and some keywords
1697							in the Validation vocabulary perform floating point operations on potentially-very large numbers.
1698							No effort is taken (at this time) to sanitize the regular expressions for embedded code or
1699							detect potentially pathological constructs that may pose a security risk, either via denial of
1700							service or by allowing exposure to the internals of your application. B<DO NOT USE SCHEMAS FROM
1701							UNTRUSTED SOURCES.>
1702
1703							=head1 SEE ALSO
1704
1705							=over 4
1706
1707							=item *
1708
1709							L<json-schema-eval>
1710
1711							=item *
1712
1713							L<https://json-schema.org>
1714
1715							=item *
1716
1717							L<RFC8259: The JavaScript Object Notation (JSON) Data Interchange Format\|https://datatracker.ietf.org/doc/html/rfc8259>
1718
1719							=item *
1720
1721							L<RFC3986: Uniform Resource Identifier (URI): Generic Syntax\|https://datatracker.ietf.org/doc/html/rfc3986>
1722
1723							=item *
1724
1725							L<Test::JSON::Schema::Acceptance>: contains the official JSON Schema test suite
1726
1727							=item *
1728
1729							L<JSON::Schema::Tiny>: a more stripped-down implementation of the specification, with fewer dependencies and faster evaluation
1730
1731							=item *
1732
1733							L<https://json-schema.org/draft/2020-12/release-notes.html>
1734
1735							=item *
1736
1737							L<https://json-schema.org/draft/2019-09/release-notes.html>
1738
1739							=item *
1740
1741							L<https://json-schema.org/draft-07/json-schema-release-notes.html>
1742
1743							=item *
1744
1745							L<Understanding JSON Schema\|https://json-schema.org/understanding-json-schema>: tutorial-focused documentation
1746
1747							=back
1748
1749							=for stopwords OpenAPI
1750
1751							=head1 SUPPORT
1752
1753							Bugs may be submitted through L<https://github.com/karenetheridge/JSON-Schema-Modern/issues>.
1754
1755							I am also usually active on irc, as 'ether' at C<irc.perl.org> and C<irc.libera.chat>.
1756
1757							You can also find me on the L<JSON Schema Slack server\|https://json-schema.slack.com> and L<OpenAPI Slack
1758							server\|https://open-api.slack.com>, which are also great resources for finding help.
1759
1760							=head1 AUTHOR
1761
1762							Karen Etheridge <ether@cpan.org>
1763
1764							=head1 COPYRIGHT AND LICENCE
1765
1766							This software is copyright (c) 2020 by Karen Etheridge.
1767
1768							This is free software; you can redistribute it and/or modify it under
1769							the same terms as the Perl 5 programming language system itself.
1770
1771							=cut