File Coverage

blib/lib/Whelk/Schema.pm
Criterion Covered Total %
statement 35 36 97.2
branch 12 16 75.0
condition 2 6 33.3
subroutine 9 9 100.0
pod 5 5 100.0
total 63 72 87.5


line stmt bran cond sub pod time code
1             package Whelk::Schema;
2             $Whelk::Schema::VERSION = '1.04';
3 25     25   1077434 use Kelp::Base -strict;
  25         52  
  25         182  
4 25     25   19980 use Whelk::Schema::Definition;
  25         237  
  25         281  
5 25     25   537 use Carp;
  25         65  
  25         3268  
6              
7             our @CARP_NOT = qw(Whelk::Endpoint);
8              
9             my %registered;
10              
11 25     25   196 use constant NO_DEFAULT => sub { undef };
  25         46  
  25         18045  
  0         0  
12              
13             sub build_if_defined
14             {
15 118     118 1 213377 my ($class, $args) = @_;
16              
17 118 100       561 return undef unless defined $args;
18 11         45 return $class->build($args);
19             }
20              
21             sub build
22             {
23 302     302 1 935476 my ($class, @input) = @_;
24              
25 302 100       907 if (@input == 1) {
26 250 50       657 croak 'usage: build($args)'
27             unless ref $input[0];
28              
29 250         579 unshift @input, undef;
30             }
31             else {
32 52 50 33     803 croak 'usage: build(name => $args)'
      33        
33             unless @input == 2 && !ref $input[0] && ref $input[1];
34             }
35              
36 302         766 my ($name, $args) = @input;
37 302         1265 my $self = Whelk::Schema::Definition->create($args);
38              
39 298 100       893 if ($name) {
40 48         358 $self->name($name);
41              
42             croak "trying to reuse schema name " . $self->name
43 48 50       316 if $registered{$self->name};
44              
45 48         423 $registered{$self->name} = $self;
46             }
47              
48 298         1457 return $self;
49             }
50              
51             sub get_or_build
52             {
53 115     115 1 362 my ($class, $name, $args) = @_;
54              
55             return $registered{$name}
56 115 100       623 if $registered{$name};
57              
58 19         85 return $class->build($name, $args);
59             }
60              
61             sub get_by_name
62             {
63 48     48 1 3823 my ($class, $name) = @_;
64              
65             croak "no such referenced schema '$name'"
66 48 50       204 unless $registered{$name};
67              
68 48         267 return $registered{$name};
69             }
70              
71             sub all_schemas
72             {
73 5     5 1 96 my ($class) = @_;
74              
75 5         35 return [values %registered];
76             }
77              
78             1;
79              
80             __END__
81              
82             =pod
83              
84             =head1 NAME
85              
86             Whelk::Schema - Whelk validation language
87              
88             =head1 SYNOPSIS
89              
90             # build from scratch
91             Whelk::Schema->build(
92             name => {
93             type => 'string',
94             }
95             );
96              
97             # build by extending
98             Whelk::Schema->build(
99             new_name => [
100             \'name_to_extend',
101             %more_args
102             ],
103             );
104              
105             =head1 DESCRIPTION
106              
107             Whelk schema is an easy validation language for defining validations similar to
108             JSON Schema. It's designed to be a bit more concise and crafted specifically
109             for Whelk needs.
110              
111             Whelk schema is used everywhere in Whelk: not only in C<< Whelk::Schema->build
112             >> calls but also in C<request>, C<response> and C<parameters> keys in
113             endpoints. Only L</build> allows defining named schemas.
114              
115             A named schema is global and should have an unique name. The module will not
116             allow overriding a named schema. All named schemas will be put into the OpenAPI
117             document, in C<compontents/schemas> object, using their defined names.
118              
119             =head2 Defining a schema
120              
121             There are a couple of ways to define a schema, listed below. All of them can be
122             used at every nesting level, so for example you can use a reference to a schema
123             inside C<properties> of an C<object> schema created with hash.
124              
125             =head3 New schema using hash reference
126              
127             { # new schema, level 0
128             type => 'array',
129             items => { # new schema, level 1
130             type => 'object',
131             properties => { # reused schema, level 2
132             some_field => \'named_schema'
133             },
134             },
135             }
136              
137             By passing a C<HASH> reference you are creating a completely new schema.
138             C<type> field is required and must be one of the available types, in lowercase.
139              
140             Schema declared this way will be put into the OpenAPI document as-is, without
141             referencing any other schema.
142              
143             =head3 Reusing schemas with scalar reference
144              
145             # reusing a named schema
146             \'name'
147              
148             By passing a C<SCALAR> reference you are reusing a named schema. The name must
149             exist beforehand or else an exception will be raised.
150              
151             Schema declared this way will be put into the OpenAPI document as a reference
152             to a schema inside C<components/schemas> object.
153              
154             =head3 Extending schemas with array reference
155              
156             # extending a named schema
157             [
158             \'name',
159             required => !!0,
160             ]
161              
162             By passing an C<ARRAY> reference you are extending an named schema. The first
163             argument must be a C<SCALAR> reference with the name of the schema to extend.
164             Rest of the arguments are configuration which should be replaced in the
165             extended schema. C<type> cannot be replaced.
166              
167             Schema declared this way will be put into the OpenAPI document as-is, without
168             referencing any other schema.
169              
170             =head3 Reusable schemas without OpenAPI trace
171              
172             All methods above will leave a trace in your OpenAPI output, which may not be
173             what you want. If you for example just want to use a list of properties across
174             a couple of objects, you may want to use a regular hash instead:
175              
176             my %common_fields = (
177             name => {
178             type => 'string',
179             },
180             age => {
181             type => 'integer',
182             },
183             );
184              
185             Whelk::Schema->build(
186             person => {
187             type => 'object,
188             properties => {
189             %common_fields,
190             id => {
191             type => 'integer',
192             nullable => !!1,
193             },
194             },
195             }
196             );
197              
198             This should work well as presented, but since Whelk does not usually deep-clone
199             its input before using it, some nested parts of C<%common_fields> may get
200             changed or blessed. Don't rely on its contents being exactly as you defined it,
201             or deep-clone it yourself before passing it to Whelk.
202              
203             =head2 Where to define the schemas?
204              
205             It is not important where your schemas are defined, as long as they are defined
206             before they are used. Whelk provides C<schemas> method as syntax sugar, which
207             will be called just once for each controller. That does not mean schemas must
208             be defined there, they may as well be called at the package level (during
209             package compilation) or anywhere else.
210              
211             You can use it to your advantage when creating schemas which should be used for
212             the entire application, not just for one controller. It can safely be put in a
213             separate package, or even in the C<app.psgi> itself (even though it's surely
214             not a good place to keep them).
215              
216             =head2 Available types
217              
218             Each new schema must have a C<type> defined. All types share these common configuration values:
219              
220             =over
221              
222             =item * required
223              
224             Boolean - whether the value is required to be present. C<true> by default.
225              
226             =item * nullable
227              
228             Boolean - whether the value can be null (but present). C<false> by default.
229              
230             =item * description
231              
232             String - an optional description used for the schema in the OpenAPI document.
233              
234             =item * rules
235              
236             An array reference of hashes. See L</Extra rules>.
237              
238             =back
239              
240             =head3 null
241              
242             A forced C<undef> value.
243              
244             No special configuration.
245              
246             =head3 empty
247              
248             This is a special type used to implement C<204 No Content> responses. It is
249             only valid at the root of C<response> and should not be used in any other
250             context.
251              
252             No special configuration.
253              
254             =head3 string
255              
256             A string type. The value must not be a reference and the output will be coerced
257             to a string value. Unlike JSON schema, this also accepts numbers.
258              
259             Extra configuration fields:
260              
261             =over
262              
263             =item * default
264              
265             A default value to be used when there is no value. Also assumes C<< required => !!0 >>.
266              
267             CAUTION: Whelk does not differentiate null value and no value. If you specify
268             default, a received null value will get replaced with that default. To
269             explicitly say that there is no default, use C<Whelk::Schema::NO_DEFAULT>.
270              
271             =item * example
272              
273             An optional example used for the schema in the OpenAPI document.
274              
275             =back
276              
277             =head3 boolean
278              
279             A boolean type. Will coerce the output value to JSON::PP::true and
280             JSON::PP::false objects.
281              
282             Same extra configuration as in L</string>.
283              
284             =head3 number
285              
286             A numeric type. Will coerce the output value to a number. Unlike JSON schema,
287             this also accepts strings as long as they contain something which looks like a
288             number.
289              
290             Same extra configuration as in L</string>.
291              
292             =head3 integer
293              
294             Same as L</number>, but will not accept numbers with fractions.
295              
296             =head3 array
297              
298             This is an array type, which will only accept array references.
299              
300             Extra configuration fields:
301              
302             =over
303              
304             =item * items
305              
306             An optional type to use for each of the array elements. This is a nested
307             schema, and all ways to define a schema discussed in L</Defining a schema> will
308             work.
309              
310             =item * lax
311              
312             This is a special boolean flag used to accept array C<parameters> of type
313             C<query> and C<header>. If present and true, the type will also accept a
314             non-array input and turn it into an array with one element. Should probably
315             only use it within C<parameters> structure of the endpoint.
316              
317             =back
318              
319             =head3 object
320              
321             This is a hash type, which will only accept hash references. Unlike JSON
322             schema, it's C<required> is not an array of required elements - instead the
323             required elements will be taken from C<required> flag of its C<properties>.
324              
325             Extra configuration fields:
326              
327             =over
328              
329             =item * properties
330              
331             An optional dictionary to use for the keys in the object. If it's not
332             specified, the object can contain anything. This is a nested schema, and all
333             ways to define a schema discussed in L</Defining a schema> will work.
334              
335             =item * strict
336              
337             This is a special boolean flag used to make any schema which does contain extra
338             keys as those specified in C<properties> incorrect. By default, the hash can
339             contain any number of extra keys and will be considered correct. Note that the
340             schema will still only copy the keys which were defined, so this is usually not
341             required.
342              
343             =back
344              
345             =head2 Extra rules
346              
347             Whelk does not define a full JSONSchema spec with all its rules. To allow
348             configuration, you can specify extra rules when needed which will be used
349             during validation and may optionally add some keys to the OpenAPI spec of that
350             field. While all field types allow defining extra rules, it makes little sense
351             to use them for types C<boolean>, C<null> and C<empty> - rules will do nothing
352             for them.
353              
354             An example of adding some rules is showcased below:
355              
356             {
357             type => 'integer',
358             rules => [
359             {
360             openapi => {
361             minimum => '5',
362             },
363             hint => '(>=5)',
364             code => sub {
365             my $value = shift;
366              
367             return $value >= 5;
368             },
369             },
370             ],
371             }
372              
373             As shown, a C<rules> array reference may be defined, containing hash
374             references. Each rule (represented by a hash reference) must contain C<hint> (a
375             very short error message notifying the end user what's wrong), C<code> (a sub
376             reference, which will be passed the value and must return C<true> if the value
377             is valid) and optionally C<openapi> (a hash reference, containing keys which
378             will be added to OpenAPI document).
379              
380             There may be multiple rules in each field, and each rule can contain multiple
381             C<openapi> keys (but only a single C<code> and C<hint>). This system is very
382             bare-bones and a bit verbose, but it makes it very easy to write your own
383             library of validations, implementing the parts of JSONSchema you need (or even
384             the full schema - please publish to CPAN if you do!). Just write a function
385             which will return a given hash reference and it becomes quite powerful:
386              
387             sub greater_or_equal
388             {
389             my ($arg) = @_;
390              
391             return {
392             openapi => {
393             minimum => $arg,
394             },
395             hint => "(>=$arg)",
396             code => sub { shift() >= $arg },
397             };
398             }
399              
400             ... then
401             {
402             type => 'integer',
403             rules => [
404             greater_or_equal(5),
405             ],
406             }
407              
408             =head1 METHODS
409              
410             This is a list of factory methods implemented by C<Whelk::Schema>.
411              
412             =head2 build
413              
414             Builds a schema and returns L<Whelk::Schema::Definition>.
415              
416             =head2 build_if_defined
417              
418             Same as L</build>, but will not throw an exception if an undef is passed.
419             Instead, returns undef.
420              
421             =head2 get_by_name
422              
423             Gets a named schema by name and returns L<Whelk::Schema::Definition>.
424              
425             =head2 get_or_build
426              
427             A mix of L</build> and L</get_by_name>. Tries to get a schema by name, and
428             builds it if it was not defined yet.
429              
430             =head2 all_schemas
431              
432             Returns all named schemas defined thus far.
433              
434             =head1 SEE ALSO
435              
436             L<Whelk::Manual>
437