File Coverage

blib/lib/Avro/BinaryDecoder.pm
Criterion Covered Total %
statement 128 184 69.5
branch 13 22 59.0
condition 1 3 33.3
subroutine 23 40 57.5
pod 1 32 3.1
total 166 281 59.0


line stmt bran cond sub pod time code
1             # Licensed to the Apache Software Foundation (ASF) under one
2             # or more contributor license agreements. See the NOTICE file
3             # distributed with this work for additional information
4             # regarding copyright ownership. The ASF licenses this file
5             # to you under the Apache License, Version 2.0 (the
6             # "License"); you may not use this file except in compliance
7             # with the License. You may obtain a copy of the License at
8             #
9             # https://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing,
12             # software distributed under the License is distributed on an
13             # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14             # KIND, either express or implied. See the License for the
15             # specific language governing permissions and limitations
16             # under the License.
17              
18             use strict;
19 2     2   1054 use warnings;
  2         4  
  2         46  
20 2     2   8  
  2         3  
  2         37  
21             use Config;
22 2     2   9 use Encode();
  2         2  
  2         45  
23 2     2   8 use Error::Simple;
  2         2  
  2         22  
24 2     2   8 use Avro::Schema;
  2         2  
  2         10  
25 2     2   101  
  2         10  
  2         3346  
26             our $VERSION = '1.11.1';
27              
28             our $complement = ~0x7F;
29             unless ($Config{use64bitint}) {
30             require Math::BigInt;
31             $complement = Math::BigInt->new("0b" . ("1" x 57) . ("0" x 7));
32             }
33              
34             =head2 decode(%param)
35              
36             Resolve the given writer and reader_schema to decode the data provided by the
37             reader.
38              
39             =over 4
40              
41             =item * writer_schema
42              
43             The schema that was used to encode the data provided by the C<reader>
44              
45             =item * reader_schema
46              
47             The schema we want to use to decode the data.
48              
49             =item * reader
50              
51             An object implementing a straightforward interface. C<read($buf, $nbytes)> and
52             C<seek($nbytes, $whence)> are expected. Typically a IO::String object or a
53             IO::File object. It is expected that this calls will block the decoder, if not
54             enough data is available for read.
55              
56             =back
57              
58             =cut
59             my $class = shift;
60             my %param = @_;
61 1332     1332 1 2951  
62 1332         2641 my ($writer_schema, $reader_schema, $reader)
63             = @param{qw/writer_schema reader_schema reader/};
64              
65 1332         2447 my $type = Avro::Schema->match(
66             writer => $writer_schema,
67 1332 100       2422 reader => $reader_schema,
68             ) or throw Avro::Schema::Error::Mismatch;
69              
70             my $meth = "decode_$type";
71             return $class->$meth($writer_schema, $reader_schema, $reader);
72 1331         2100 }
73 1331         2301  
74             my $class = shift;
75             my ($schema, $reader) = @_;
76             my $type = ref $schema ? $schema->type : $schema;
77 2     2 0 3 my $meth = "skip_$type";
78 2         2 return $class->$meth($schema, $reader);
79 2 50       5 }
80 2         4  
81 2         5  
82             my $class = shift;
83             my $reader = pop;
84 1     1 0 3 $reader->read(my $bool, 1);
85             return unpack 'C', $bool;
86 0     0 0 0 }
87              
88 0     0 0 0 my $class = shift;
89 0         0 my $reader = pop;
90 0         0 return zigzag(unsigned_varint($reader));
91 0         0 }
92              
93             my $class = shift;
94 0     0 0 0 return decode_int($class, @_);
95             }
96 2084     2084 0 2171  
97 2084         2065 my $class = shift;
98 2084         2496 my $reader = pop;
99             $reader->read(my $buf, 4);
100             return unpack "f<", $buf;
101 0     0 0 0 }
102              
103 2075     2075 0 2182 my $class = shift;
104 2075         2557 my $reader = pop;
105             $reader->read(my $buf, 8);
106             return unpack "d<", $buf,
107 0     0 0 0 }
108              
109 0     0 0 0 my $class = shift;
110 0         0 my $reader = pop;
111 0         0 my $size = decode_long($class, undef, undef, $reader);
112 0         0 $reader->seek($size, 0);
113             return;
114             }
115 0     0 0 0  
116             my $class = shift;
117 0     0 0 0 my $reader = pop;
118 0         0 my $size = decode_long($class, undef, undef, $reader);
119 0         0 $reader->read(my $buf, $size);
120 0         0 return $buf;
121             }
122              
123             my $class = shift;
124 2     2 0 2 my $reader = pop;
125 2         2 my $bytes = decode_bytes($class, undef, undef, $reader);
126 2         4 return Encode::decode_utf8($bytes);
127 2         7 }
128 2         38  
129             my $class = shift;
130             my ($schema, $reader) = @_;
131             for my $field (@{ $schema->fields }){
132 1189     1189 0 1251 skip($class, $field->{type}, $reader);
133 1189         1235 }
134 1189         1523 }
135 1189         2271  
136 1189         5329 ## 1.3.2 A record is encoded by encoding the values of its fields in the order
137             ## that they are declared. In other words, a record is encoded as just the
138             ## concatenation of the encodings of its fields. Field values are encoded per
139 2     2 0 3 ## their schema.
140             my $class = shift;
141 1172     1172 0 1357 my ($writer_schema, $reader_schema, $reader) = @_;
142 1172         1211 my $record;
143 1172         1516  
144 1172         1799 my %extra_fields = %{ $reader_schema->fields_as_hash };
145             for my $field (@{ $writer_schema->fields }) {
146             my $name = $field->{name};
147             my $w_field_schema = $field->{type};
148 0     0 0 0 my $r_field_schema = delete $extra_fields{$name};
149 0         0  
150 0         0 ## 1.3.2 if the writer's record contains a field with a name not
  0         0  
151 0         0 ## present in the reader's record, the writer's value for that field
152             ## is ignored.
153             if (! $r_field_schema) {
154             $class->skip($w_field_schema, $reader);
155             next;
156             }
157             my $data = $class->decode(
158             writer_schema => $w_field_schema,
159             reader_schema => $r_field_schema->{type},
160 9     9 0 12 reader => $reader,
161 9         14 );
162 9         22 $record->{ $name } = $data;
163             }
164 9         10  
  9         20  
165 9         15 for my $name (keys %extra_fields) {
  9         16  
166 24         35 ## 1.3.2. if the reader's record schema has a field with no default
167 24         27 ## value, and writer's schema does not have a field with the same
168 24         36 ## name, an error is signalled.
169             unless (exists $extra_fields{$name}->{default}) {
170             throw Avro::Schema::Error::Mismatch(
171             "cannot resolve without default"
172             );
173 24 100       52 }
174 2         5 ## 1.3.2 ... else the default value is used
175 2         3 $record->{ $name } = $extra_fields{$name}->{default};
176             }
177             return $record;
178             }
179              
180 22         55  
181             ## 1.3.2 An enum is encoded by a int, representing the zero-based position of
182 22         76 ## the symbol in the schema.
183             my $class = shift;
184             my ($writer_schema, $reader_schema, $reader) = @_;
185 9         26 my $index = decode_int($class, @_);
186              
187             my $w_data = $writer_schema->symbols->[$index];
188             ## 1.3.2 if the writer's symbol is not present in the reader's enum,
189 2 100       5 ## then an error is signalled.
190 1         5 throw Avro::Schema::Error::Mismatch("enum unknown")
191             unless $reader_schema->is_data_valid($w_data);
192             return $w_data;
193             }
194              
195 1         2 my $class = shift;
196             my ($reader, $block_content) = @_;
197 8         25 my $block_count = decode_long($class, undef, undef, $reader);
198             while ($block_count) {
199             if ($block_count < 0) {
200 0     0 0 0 $reader->seek($block_count, 0);
201             next;
202             }
203             else {
204             for (1..$block_count) {
205 5     5 0 6 $block_content->();
206 5         7 }
207 5         10 }
208             $block_count = decode_long($class, undef, undef, $reader);
209 5         13 }
210             }
211              
212 5 100       11 my $class = shift;
213             my ($schema, $reader) = @_;
214 3         9 skip_block($reader, sub { $class->skip($schema->items, $reader) });
215             }
216              
217             ## 1.3.2 Arrays are encoded as a series of blocks. Each block consists of a
218 0     0 0 0 ## long count value, followed by that many array items. A block with count zero
219 0         0 ## indicates the end of the array. Each item is encoded per the array's item
220 0         0 ## schema.
221 0         0 ## If a block's count is negative, its absolute value is used, and the count is
222 0 0       0 ## followed immediately by a long block size
223 0         0 my $class = shift;
224 0         0 my ($writer_schema, $reader_schema, $reader) = @_;
225             my $block_count = decode_long($class, @_);
226             my @array;
227 0         0 my $writer_items = $writer_schema->items;
228 0         0 my $reader_items = $reader_schema->items;
229             while ($block_count) {
230             my $block_size;
231 0         0 if ($block_count < 0) {
232             $block_count = -$block_count;
233             $block_size = decode_long($class, @_);
234             ## XXX we can skip with $reader_schema?
235             }
236 0     0 0 0 for (1..$block_count) {
237 0         0 push @array, $class->decode(
238 0     0   0 writer_schema => $writer_items,
  0         0  
239             reader_schema => $reader_items,
240             reader => $reader,
241             );
242             }
243             $block_count = decode_long($class, @_);
244             }
245             return \@array;
246             }
247              
248 318     318 0 364 my $class = shift;
249 318         430 my ($schema, $reader) = @_;
250 318         461 skip_block($reader, sub {
251 318         369 skip_string($class, $reader);
252 318         510 $class->skip($schema->values, $reader);
253 318         482 });
254 318         577 }
255 318         337  
256 318 50       457 ## 1.3.2 Maps are encoded as a series of blocks. Each block consists of a long
257 0         0 ## count value, followed by that many key/value pairs. A block with count zero
258 0         0 ## indicates the end of the map. Each item is encoded per the map's value
259             ## schema.
260             ##
261 318         574 ## If a block's count is negative, its absolute value is used, and the count is
262 848         7933 ## followed immediately by a long block size indicating the number of bytes in
263             ## the block. This block size permits fast skipping through data, e.g., when
264             ## projecting a record to a subset of its fields.
265             my $class = shift;
266             my ($writer_schema, $reader_schema, $reader) = @_;
267             my %hash;
268 318         4606  
269             my $block_count = decode_long($class, @_);
270 318         1152 my $writer_values = $writer_schema->values;
271             my $reader_values = $reader_schema->values;
272             while ($block_count) {
273             my $block_size;
274 0     0 0 0 if ($block_count < 0) {
275 0         0 $block_count = -$block_count;
276             $block_size = decode_long($class, @_);
277 0     0   0 ## XXX we can skip with $reader_schema?
278 0         0 }
279 0         0 for (1..$block_count) {
280             my $key = decode_string($class, @_);
281             unless (defined $key && length $key) {
282             throw Avro::Schema::Error::Parse("key of map is invalid");
283             }
284             $hash{$key} = $class->decode(
285             writer_schema => $writer_values,
286             reader_schema => $reader_values,
287             reader => $reader,
288             );
289             }
290             $block_count = decode_long($class, @_);
291             }
292 112     112 0 134 return \%hash;
293 112         149 }
294 112         143  
295             my $class = shift;
296 112         167 my ($schema, $reader) = @_;
297 112         202 my $idx = decode_long($class, undef, undef, $reader);
298 112         201 my $union_schema = $schema->schemas->[$idx]
299 112         180 or throw Avro::Schema::Error::Parse("union union member");
300 112         141 $class->skip($union_schema, $reader);
301 112 50       184 }
302 0         0  
303 0         0 ## 1.3.2 A union is encoded by first writing an int value indicating the
304             ## zero-based position within the union of the schema of its value. The value
305             ## is then encoded per the indicated schema within the union.
306 112         202 my $class = shift;
307 333         471 my ($writer_schema, $reader_schema, $reader) = @_;
308 333 50 33     5049 my $idx = decode_long($class, @_);
309 0         0 my $union_schema = $writer_schema->schemas->[$idx];
310             ## XXX TODO: schema resolution
311 333         589 # The first schema in the reader's union that matches the selected writer's
312             # union schema is recursively resolved against it. if none match, an error
313             # is signalled.
314             return $class->decode(
315             reader_schema => $union_schema,
316             writer_schema => $union_schema,
317 112         187 reader => $reader,
318             );
319 112         365 }
320              
321             my $class = shift;
322             my ($schema, $reader) = @_;
323 0     0 0 0 $reader->seek($schema->size, 0);
324 0         0 }
325 0         0  
326 0 0       0 ## 1.3.2 Fixed instances are encoded using the number of bytes declared in the
327             ## schema.
328 0         0 my $class = shift;
329             my ($writer_schema, $reader_schema, $reader) = @_;
330             $reader->read(my $buf, $writer_schema->size);
331             return $buf;
332             }
333              
334             my $int = shift;
335 3     3 0 5 if (1 & $int) {
336 3         3 ## odd values are encoded negative ints
337 3         7 return -( 1 + ($int >> 1) );
338 3         7 }
339             ## even values are positive natural left shifted one bit
340             else {
341             return $int >> 1;
342             }
343 3         5 }
344              
345             my $reader = shift;
346             my $int = 0;
347             my $more;
348             my $shift = 0;
349             do {
350             $reader->read(my $buf, 1);
351 0     0 0 0 my $byte = ord $buf;
352 0         0 my $value = $byte & 0x7F;
353 0         0 $int |= $value << $shift;
354             $shift += 7;
355             $more = $byte & 0x80;
356             } until (! $more);
357             return $int;
358             }
359 12     12 0 14  
360 12         19 1;