File Coverage

blib/lib/JsonSQL/Validator.pm
Criterion Covered Total %
statement 100 103 97.0
branch 27 30 90.0
condition 8 11 72.7
subroutine 15 15 100.0
pod 4 4 100.0
total 154 163 94.4


line stmt bran cond sub pod time code
1             # ABSTRACT: JSON schema validation module. Returns a JsonSQL::Validator object for validating a JSON string against a pre-defined schema.
2              
3              
4 3     3   329281 use strict;
  3         10  
  3         83  
5 3     3   15 use warnings;
  3         7  
  3         82  
6 3     3   53 use 5.014;
  3         13  
7              
8             package JsonSQL::Validator;
9              
10             our $VERSION = '0.4'; # VERSION
11              
12 3     3   1170 use JSON::Validator;
  3         574970  
  3         184  
13 3     3   1074 use JSON::Parse qw( assert_valid_json parse_json );
  3         2608  
  3         174  
14 3     3   20 use List::Util qw( any );
  3         7  
  3         144  
15              
16 3     3   805 use JsonSQL::Schemas::Schema;
  3         8  
  3         101  
17 3     3   22 use JsonSQL::Error;
  3         7  
  3         2282  
18             #use Data::Dumper;
19              
20              
21              
22             sub new {
23 14     14 1 3049 my ( $class, $jsonSchema, $ruleSets ) = @_;
24            
25 14         59 my $self = {};
26            
27             # Load the specified JSON schema.
28 14         94 my $schema = JsonSQL::Schemas::Schema->load_schema($jsonSchema);
29 14 50       29 if ( eval { $schema->is_error } ) {
  14         182  
30 0         0 return JsonSQL::Error->new("validate", "Error loading JSON schema object '$jsonSchema': $schema->{message}.");
31             } else {
32 14         582 my $validator = JSON::Validator->new;
33 14         158 $validator->schema($schema);
34 14         186043 $self->{jsonValidator} = $validator;
35             }
36            
37 14 50 33     150 if (defined $ruleSets && 'ARRAY' eq ref ($ruleSets) ) {
38 14         46 $self->{ruleSets} = $ruleSets;
39             } else {
40 0         0 $self->{ruleSets} = [];
41             }
42            
43 14         46 bless $self, $class;
44 14         62 return $self;
45             }
46              
47              
48             sub validate_schema {
49 16     16 1 3818 my ( $self, $json ) = @_;
50              
51             # Parse and validate the JSON string.
52 16         31 eval {
53             # Check if string is valid JSON before continuing.
54 16         159 assert_valid_json($json);
55             };
56            
57 16 100       91 if ( $@ ) {
58 1         16 return JsonSQL::Error->new("validate", "Input is invalid JSON at: $@");
59             } else {
60 15         187 my $perldata = parse_json($json);
61 15         88 my @errors = $self->{jsonValidator}->validate($perldata);
62            
63 15 100       25220 if ( @errors ) {
64 2         8 my $err = "JSON failed schema validation at: \n";
65 2         10 for my $error ( @errors ) {
66 2         18 $err .= "\t $error->{message} at $error->{path} \n";
67             }
68 2         22 return JsonSQL::Error->new("validate", $err);
69             } else {
70 13         57 return $perldata;
71             }
72             }
73             }
74              
75              
76              
77             sub _getRuleSets {
78 24     24   57 my ( $self, $schemaString ) = @_;
79            
80 24   100     77 my $schema = $schemaString || '';
81 24         40 my @matchingRuleSets;
82            
83 24         37 for my $ruleSet ( @{ $self->{ruleSets} } ) {
  24         67  
84 24 100 100     117 if ( $ruleSet->{schema} eq $schema || $ruleSet->{schema} eq '#anySchema' ) {
85 21         52 push(@matchingRuleSets, $ruleSet);
86             }
87             }
88            
89 24         72 return \@matchingRuleSets;
90             }
91              
92              
93             sub _getTableRules {
94 21     21   51 my ( $tableString, $ruleSet ) = @_;
95            
96 21         39 my @matchingTableRules;
97 21         56 for my $tableRule ( keys %{ $ruleSet } ) {
  21         69  
98 50 100 66     205 if ( $tableRule eq '#anyTable' ) {
    100          
99 13         35 push(@matchingTableRules, $tableRule);
100             } elsif ( defined $tableString && $tableRule eq $tableString ) {
101 6         21 push(@matchingTableRules, $ruleSet->{$tableRule});
102             }
103             }
104            
105 21         62 return \@matchingTableRules;
106             }
107              
108              
109             sub check_table_allowed {
110 24     24 1 2476 my ( $self, $tableObj ) = @_;
111            
112 24         59 my @table_rules;
113             my @table_violations;
114 24         91 my $ruleSets = $self->_getRuleSets($tableObj->{schema});
115            
116 24 100       42 if ( @{ $ruleSets } ) {
  24         63  
117 21         36 for my $ruleSet ( @{ $ruleSets } ) {
  21         44  
118             # For a given rule set, the default is to be restrictive.
119 21         36 my $table_allowed = 0;
120 21         59 my $tableRules = _getTableRules($tableObj->{table}, $ruleSet);
121            
122 21         44 for my $tableRule ( @{ $tableRules } ) {
  21         51  
123             # If there is a rule defined for the table, the table is marked as "allowed"...
124 19         44 $table_allowed = 1;
125            
126             # Check to be sure the table rule is an array of allowed columns.
127             # The table rule '#anyTable' is a special case that turns off restrictions for all tables in the schema.
128 19 100       93 unless ( $tableRule eq '#anyTable' ) {
129 6 50       24 if ( 'ARRAY' eq ref ($tableRule) ) {
130 6         21 push(@table_rules, $tableRule);
131             } else {
132 0         0 push(@table_violations, "Bad syntax in rule set $ruleSet->{schema}. Table rules must be arrays of allowed columns.");
133             }
134             }
135             }
136            
137 21 100       74 unless ( $table_allowed ) {
138 2         13 push(@table_violations, "Table $tableObj->{table} is not allowed by rule set $ruleSet->{schema}");
139             }
140             }
141             } else {
142             # If no rule sets are defined, the default is to be restrictive.
143 3         9 push(@table_violations, "No access rules have been defined. Default is to be restrictive.");
144             }
145            
146             # If any violation is found, the access control test fails.
147             # If cases where there are multiple rule sets for a schema, this ensures that the most restrictive set is used.
148 24 100       71 if ( @table_violations ) {
149 5         13 my $err = "Table failed access control test.\n\t";
150 5         21 $err .= join("\n\t", @table_violations);
151 5         27 return JsonSQL::Error->new("access_control", $err);
152             } else {
153             # Otherwise, return the @table_rules so they can be used for additional checks.
154 19         69 return \@table_rules;
155             }
156             }
157              
158              
159             sub check_field_allowed {
160 31     31 1 877 my ( $self, $table_rules, $field ) = @_;
161            
162 31         50 my @column_violations;
163 31         54 for my $tableRule ( @{ $table_rules } ) {
  31         73  
164             # Check allowed column list for the table.
165 9 100   5   76 unless ( any { $_ eq $field || $_ eq '#anyColumn' } @{ $tableRule } ) {
  5 100       59  
  9         46  
166 5         32 push(@column_violations, "Field $field is not allowed by the table rule set.");
167             }
168             }
169            
170 31 100       80 if ( @column_violations ) {
171 5         12 my $err = "Field failed access control test.\n\t";
172 5         20 $err .= join("\n\t", @column_violations);
173 5         25 return JsonSQL::Error->new("access_control", $err);
174             } else {
175 26         84 return 1;
176             }
177             }
178              
179              
180             1;
181              
182             __END__
183              
184             =pod
185              
186             =encoding UTF-8
187              
188             =head1 NAME
189              
190             JsonSQL::Validator - JSON schema validation module. Returns a JsonSQL::Validator object for validating a JSON string against a pre-defined schema.
191              
192             =head1 VERSION
193              
194             version 0.4
195              
196             =head1 SYNOPSIS
197              
198             This is a supporting module used by JsonSQL::Query modules.
199              
200             To use this:
201              
202             my $validator = JsonSQL::Validator->new(<json_schema_name>, <whitelisting_rule_set>);
203             my $perldata = $validator->validate_schema(<json_string>);
204             if ( eval { $perldata->is_error } ) {
205             return "$perldata->{message}";
206             } else {
207             ...
208             }
209              
210             To use the whitelisting module:
211              
212             my $table_rules = $validator->check_table_allowed({ schema => <schemaname>, table => <tablename> });
213             if ( eval { $table_rules->is_error } ) {
214             return "Use of table failed access check: $table_rules->{message}";
215             } else {
216             my $allowedField = $validator->check_field_allowed($table_rules, <fieldname>);
217             if ( eval { $allowedField->is_error } ) {
218             return "Use of field in table failed access check: $allowedField->{message}";
219             } else {
220             ...
221             }
222             }
223              
224             For more information on the whitelisting module, and how to construct rule sets, see documentation below.
225              
226             =head1 METHODS
227              
228             =head2 Constructor new($jsonSchema, $ruleSets) -> JsonSQL::Validator
229              
230             Loads the specified $jsonSchema and creates a JSON::Validator instance with it. A reference to the validator and to the provided
231             whitelisting rule sets is saved in the object before it is returned. If an error occurs during schema loading, a JsonSQL::Error object
232             is returned.
233              
234             $jsonSchema => The JSON schema to load. Must be present in JsonSQL::Schemas as a subclass of JsonSQL::Schemas::Schema.
235             $ruleSets => An array of whitelisting rules to be applied when a JsonSQL query object is being constructed (see below).
236              
237             =head2 ObjectMethod validate_schema($json) -> \%hash
238              
239             Parses the provided JSON string into a Perl data structure, and then uses the stored JSON::Validator to validate it
240             against the specified schema (a JsonSQL::Schemas::<schema>). If the process fails at any step, it will return a JsonSQL::Error object
241             with an appropriate error message. If successful, a Perl data structure (depends on the schema, but usually a hashref)
242             representing the SQL query is returned.
243              
244             $json => The JSON string to validate. Must be valid JSON.
245              
246             =head2 PrivateMethod _getRuleSets($schemaString) -> \@array
247              
248             Searches the @ruleSets array for the specified $schemaString and returns all matching rule sets.
249              
250             $schemaString => Name of schema to match for identifying rulesets.
251              
252             Matches a rule set if ( $schemaString eq $ruleSet->{schema} || $ruleSet->{schema} eq '#anySchema' ).
253              
254             =head2 PrivateMethod _getTableRules($tableString, $ruleSet) -> \@array
255              
256             Looks at each table rule in the \@ruleSet array and returns it if it matches the specified $tableString.
257              
258             $tableString => Name of table to match for identifying table rules.
259             $ruleSet => The @\ruleSet array to search for table matches.
260              
261             Matches table rules of the form ( $ruleSet->{$tableString} || $ruleSet->{'#anyTable'} ).
262              
263             =head2 ObjectMethod check_table_allowed($tableObj) -> \@array
264              
265             Determines whether access to a table is allowed by the current stored rule set. If yes, a set of table rules applicable to the table is
266             returned to use for column verification. If no, a JsonSQL::Error object is returned.
267              
268             $tableObj => Name of table to match for identifying table rules.
269             Takes the form { schema => <schemaname>, table => <tablename> }
270              
271             =head2 ObjectMethod check_field_allowed($table_rules, $field) -> 1 || JsonSQL::Error
272              
273             Determines whether access to a column is allowed by the supplied table rules. If yes, a true value is returned. If no,
274             a JsonSQL::Error object is returned.
275              
276             $table_rules => Array of table rules as returned by check_table_allowed.
277             $field => The name of the field to check.
278              
279             =head1 Whitelisting Module
280              
281             To provide some basic whitelisting support for table and column identifiers, a set of whitelisting rules is saved in the
282             JsonSQL::Validator object when it is being created. The rules take the form of an \@arrayref as follows:
283              
284             [
285             {
286             schema => 'schemaName' || '#anySchema',
287             <'#anyTable' || allowedTableName1 => [ '#anyColumn' || allowedFieldName1, allowedFieldName2, ... ]>,
288             <... additional table rules ...>
289             },
290             < ... additional rule sets ... >
291             ]
292              
293             Rule sets are generally grouped by schema. If you are not using schemas (or you are using a DB that doesn't support them), you will have
294             to provide a rule set with the schema property set to '#anySchema'. Whitelisting security is enabled and restrictive by default, so
295             at least one rule set will have to be defined in order to create JsonSQL query objects. If you want to disable whitelisting security
296             (not recommended), use this rule set,
297              
298             [ { schema = '#anySchema', '#anyTable' } ]
299              
300             The above allows access to all tables in any schema. Column restrictions are not meaningful without table restrictions, so table rules
301             have to be defined if you want column restrictions. You can have more than one rule set per schema, but in this case the most
302             restrictive rule set will be the one that takes precedent. This behavior can be used as an effective way to disable access to specific
303             schemas. For example,
304              
305             [
306             { schema => '#anySchema', '#anyTable' },
307             { schema => 'forbiddenSchema' }
308             ]
309              
310             will first allow access to all tables in any schema, and then restrict access to any table in 'forbiddenSchema'. Table verification and
311             column verification take place in separate steps. During table verification, rule sets are selected based on the schema property. The
312             remaining keys in each rule set %hash correspond to tables that the query object is allowed access to.
313              
314             If there is a key in the rule set with the special name '#anyTable', access to any table in that rule set (schema) will be allowed.
315             For other keys (table names), the value needs to be set to an array of column names. During column verification, this list will be
316             used to determine whether the query object has access to particular columns in the table.
317              
318             As with schemas, access to a table can be governed by more than one 'table rule'. In this case, the most restrictive rule is the one
319             that takes precedent. For example,
320              
321             [
322             { schema => 'allowedSchema', '#anyTable', 'allowedTable' => [ 'allowedColumnA', 'allowedColumnB' ] }
323             ]
324              
325             will allow access to all columns of all tables in the schema 'allowedSchema', but for the table 'allowedTable', only access to
326             columns 'allowedColumnA' and 'allowedColumnB' is allowed. Similarly,
327              
328             [
329             { schema => 'allowedSchema', '#anyTable', 'forbiddenTable' => [] }
330             ]
331              
332             will allow access to all columns of all tables in the schema 'allowedSchema', but block access to the table 'forbiddenTable'.
333             (Technically, it is only blocking access to the columns in that table, but this is effectively the same thing for most SQL operations).
334              
335             If the column list contains the special string '#anyColumn' access to all columns in the table will be allowed. So,
336              
337             [
338             { schema => 'allowedSchema', 'allowedTable1' => [ '#anyColumn' ], 'allowedTable2' => [ 'allowedColumn1' ] }
339             ]
340              
341             will allow access to any column in 'allowedTable1' and only column 'allowedColumn1' of 'allowedTable2'. Access to all other tables
342             in 'allowedSchema' will be blocked.
343              
344             This module is designed to err on the side of caution, and in so doing will always take the more restrictive course of action in the
345             case of ambiguity. As such, many SQL queries will probably fail validation if you don't use fully-qualified table and column identifiers,
346             which is generally recommended as good practice when writing SQL queries anyway. However, if you are writing simple queries and find this
347             to be annoying, you can turn off whitelisting and rely only on database-level security.
348              
349             It is important to note that while this module aims to reduce the attack surface, it is NOT a replacement for database-level security. But
350             when combined with good database-level security (ex: per-user schemas and Kerberos), it provides for reasonably safe SQL query generation
351             using data from untrusted sources (ex: web browsers).
352              
353             =head1 AUTHOR
354              
355             Chris Hoefler <bhoefler@draper.com>
356              
357             =head1 COPYRIGHT AND LICENSE
358              
359             This software is copyright (c) 2017 by Chris Hoefler.
360              
361             This is free software; you can redistribute it and/or modify it under
362             the same terms as the Perl 5 programming language system itself.
363              
364             =cut