File Coverage

blib/lib/Avro/BinaryDecoder.pm

Criterion	Covered	Total	%
statement	128	184	69.5
branch	13	22	59.0
condition	1	3	33.3
subroutine	23	40	57.5
pod	1	32	3.1
total	166	281	59.0

line	stmt	bran	cond	sub	pod	time	code
1							# Licensed to the Apache Software Foundation (ASF) under one
2							# or more contributor license agreements. See the NOTICE file
3							# distributed with this work for additional information
4							# regarding copyright ownership. The ASF licenses this file
5							# to you under the Apache License, Version 2.0 (the
6							# "License"); you may not use this file except in compliance
7							# with the License. You may obtain a copy of the License at
8							#
9							# https://www.apache.org/licenses/LICENSE-2.0
10							#
11							# Unless required by applicable law or agreed to in writing,
12							# software distributed under the License is distributed on an
13							# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14							# KIND, either express or implied. See the License for the
15							# specific language governing permissions and limitations
16							# under the License.
17
18							use strict;
19	2			2		1054	use warnings;
	2					4
	2					46
20	2			2		8
	2					3
	2					37
21							use Config;
22	2			2		9	use Encode();
	2					2
	2					45
23	2			2		8	use Error::Simple;
	2					2
	2					22
24	2			2		8	use Avro::Schema;
	2					2
	2					10
25	2			2		101
	2					10
	2					3346
26							our $VERSION = '1.11.1';
27
28							our $complement = ~0x7F;
29							unless ($Config{use64bitint}) {
30							require Math::BigInt;
31							$complement = Math::BigInt->new("0b" . ("1" x 57) . ("0" x 7));
32							}
33
34							=head2 decode(%param)
35
36							Resolve the given writer and reader_schema to decode the data provided by the
37							reader.
38
39							=over 4
40
41							=item * writer_schema
42
43							The schema that was used to encode the data provided by the C<reader>
44
45							=item * reader_schema
46
47							The schema we want to use to decode the data.
48
49							=item * reader
50
51							An object implementing a straightforward interface. C<read($buf, $nbytes)> and
52							C<seek($nbytes, $whence)> are expected. Typically a IO::String object or a
53							IO::File object. It is expected that this calls will block the decoder, if not
54							enough data is available for read.
55
56							=back
57
58							=cut
59							my $class = shift;
60							my %param = @_;
61	1332			1332	1	2951
62	1332					2641	my ($writer_schema, $reader_schema, $reader)
63							= @param{qw/writer_schema reader_schema reader/};
64
65	1332					2447	my $type = Avro::Schema->match(
66							writer => $writer_schema,
67	1332	100				2422	reader => $reader_schema,
68							) or throw Avro::Schema::Error::Mismatch;
69
70							my $meth = "decode_$type";
71							return $class->$meth($writer_schema, $reader_schema, $reader);
72	1331					2100	}
73	1331					2301
74							my $class = shift;
75							my ($schema, $reader) = @_;
76							my $type = ref $schema ? $schema->type : $schema;
77	2			2	0	3	my $meth = "skip_$type";
78	2					2	return $class->$meth($schema, $reader);
79	2	50				5	}
80	2					4
81	2					5
82							my $class = shift;
83							my $reader = pop;
84	1			1	0	3	$reader->read(my $bool, 1);
85							return unpack 'C', $bool;
86	0			0	0	0	}
87
88	0			0	0	0	my $class = shift;
89	0					0	my $reader = pop;
90	0					0	return zigzag(unsigned_varint($reader));
91	0					0	}
92
93							my $class = shift;
94	0			0	0	0	return decode_int($class, @_);
95							}
96	2084			2084	0	2171
97	2084					2065	my $class = shift;
98	2084					2496	my $reader = pop;
99							$reader->read(my $buf, 4);
100							return unpack "f<", $buf;
101	0			0	0	0	}
102
103	2075			2075	0	2182	my $class = shift;
104	2075					2557	my $reader = pop;
105							$reader->read(my $buf, 8);
106							return unpack "d<", $buf,
107	0			0	0	0	}
108
109	0			0	0	0	my $class = shift;
110	0					0	my $reader = pop;
111	0					0	my $size = decode_long($class, undef, undef, $reader);
112	0					0	$reader->seek($size, 0);
113							return;
114							}
115	0			0	0	0
116							my $class = shift;
117	0			0	0	0	my $reader = pop;
118	0					0	my $size = decode_long($class, undef, undef, $reader);
119	0					0	$reader->read(my $buf, $size);
120	0					0	return $buf;
121							}
122
123							my $class = shift;
124	2			2	0	2	my $reader = pop;
125	2					2	my $bytes = decode_bytes($class, undef, undef, $reader);
126	2					4	return Encode::decode_utf8($bytes);
127	2					7	}
128	2					38
129							my $class = shift;
130							my ($schema, $reader) = @_;
131							for my $field (@{ $schema->fields }){
132	1189			1189	0	1251	skip($class, $field->{type}, $reader);
133	1189					1235	}
134	1189					1523	}
135	1189					2271
136	1189					5329	## 1.3.2 A record is encoded by encoding the values of its fields in the order
137							## that they are declared. In other words, a record is encoded as just the
138							## concatenation of the encodings of its fields. Field values are encoded per
139	2			2	0	3	## their schema.
140							my $class = shift;
141	1172			1172	0	1357	my ($writer_schema, $reader_schema, $reader) = @_;
142	1172					1211	my $record;
143	1172					1516
144	1172					1799	my %extra_fields = %{ $reader_schema->fields_as_hash };
145							for my $field (@{ $writer_schema->fields }) {
146							my $name = $field->{name};
147							my $w_field_schema = $field->{type};
148	0			0	0	0	my $r_field_schema = delete $extra_fields{$name};
149	0					0
150	0					0	## 1.3.2 if the writer's record contains a field with a name not
	0					0
151	0					0	## present in the reader's record, the writer's value for that field
152							## is ignored.
153							if (! $r_field_schema) {
154							$class->skip($w_field_schema, $reader);
155							next;
156							}
157							my $data = $class->decode(
158							writer_schema => $w_field_schema,
159							reader_schema => $r_field_schema->{type},
160	9			9	0	12	reader => $reader,
161	9					14	);
162	9					22	$record->{ $name } = $data;
163							}
164	9					10
	9					20
165	9					15	for my $name (keys %extra_fields) {
	9					16
166	24					35	## 1.3.2. if the reader's record schema has a field with no default
167	24					27	## value, and writer's schema does not have a field with the same
168	24					36	## name, an error is signalled.
169							unless (exists $extra_fields{$name}->{default}) {
170							throw Avro::Schema::Error::Mismatch(
171							"cannot resolve without default"
172							);
173	24	100				52	}
174	2					5	## 1.3.2 ... else the default value is used
175	2					3	$record->{ $name } = $extra_fields{$name}->{default};
176							}
177							return $record;
178							}
179
180	22					55
181							## 1.3.2 An enum is encoded by a int, representing the zero-based position of
182	22					76	## the symbol in the schema.
183							my $class = shift;
184							my ($writer_schema, $reader_schema, $reader) = @_;
185	9					26	my $index = decode_int($class, @_);
186
187							my $w_data = $writer_schema->symbols->[$index];
188							## 1.3.2 if the writer's symbol is not present in the reader's enum,
189	2	100				5	## then an error is signalled.
190	1					5	throw Avro::Schema::Error::Mismatch("enum unknown")
191							unless $reader_schema->is_data_valid($w_data);
192							return $w_data;
193							}
194
195	1					2	my $class = shift;
196							my ($reader, $block_content) = @_;
197	8					25	my $block_count = decode_long($class, undef, undef, $reader);
198							while ($block_count) {
199							if ($block_count < 0) {
200	0			0	0	0	$reader->seek($block_count, 0);
201							next;
202							}
203							else {
204							for (1..$block_count) {
205	5			5	0	6	$block_content->();
206	5					7	}
207	5					10	}
208							$block_count = decode_long($class, undef, undef, $reader);
209	5					13	}
210							}
211
212	5	100				11	my $class = shift;
213							my ($schema, $reader) = @_;
214	3					9	skip_block($reader, sub { $class->skip($schema->items, $reader) });
215							}
216
217							## 1.3.2 Arrays are encoded as a series of blocks. Each block consists of a
218	0			0	0	0	## long count value, followed by that many array items. A block with count zero
219	0					0	## indicates the end of the array. Each item is encoded per the array's item
220	0					0	## schema.
221	0					0	## If a block's count is negative, its absolute value is used, and the count is
222	0	0				0	## followed immediately by a long block size
223	0					0	my $class = shift;
224	0					0	my ($writer_schema, $reader_schema, $reader) = @_;
225							my $block_count = decode_long($class, @_);
226							my @array;
227	0					0	my $writer_items = $writer_schema->items;
228	0					0	my $reader_items = $reader_schema->items;
229							while ($block_count) {
230							my $block_size;
231	0					0	if ($block_count < 0) {
232							$block_count = -$block_count;
233							$block_size = decode_long($class, @_);
234							## XXX we can skip with $reader_schema?
235							}
236	0			0	0	0	for (1..$block_count) {
237	0					0	push @array, $class->decode(
238	0			0		0	writer_schema => $writer_items,
	0					0
239							reader_schema => $reader_items,
240							reader => $reader,
241							);
242							}
243							$block_count = decode_long($class, @_);
244							}
245							return \@array;
246							}
247
248	318			318	0	364	my $class = shift;
249	318					430	my ($schema, $reader) = @_;
250	318					461	skip_block($reader, sub {
251	318					369	skip_string($class, $reader);
252	318					510	$class->skip($schema->values, $reader);
253	318					482	});
254	318					577	}
255	318					337
256	318	50				457	## 1.3.2 Maps are encoded as a series of blocks. Each block consists of a long
257	0					0	## count value, followed by that many key/value pairs. A block with count zero
258	0					0	## indicates the end of the map. Each item is encoded per the map's value
259							## schema.
260							##
261	318					574	## If a block's count is negative, its absolute value is used, and the count is
262	848					7933	## followed immediately by a long block size indicating the number of bytes in
263							## the block. This block size permits fast skipping through data, e.g., when
264							## projecting a record to a subset of its fields.
265							my $class = shift;
266							my ($writer_schema, $reader_schema, $reader) = @_;
267							my %hash;
268	318					4606
269							my $block_count = decode_long($class, @_);
270	318					1152	my $writer_values = $writer_schema->values;
271							my $reader_values = $reader_schema->values;
272							while ($block_count) {
273							my $block_size;
274	0			0	0	0	if ($block_count < 0) {
275	0					0	$block_count = -$block_count;
276							$block_size = decode_long($class, @_);
277	0			0		0	## XXX we can skip with $reader_schema?
278	0					0	}
279	0					0	for (1..$block_count) {
280							my $key = decode_string($class, @_);
281							unless (defined $key && length $key) {
282							throw Avro::Schema::Error::Parse("key of map is invalid");
283							}
284							$hash{$key} = $class->decode(
285							writer_schema => $writer_values,
286							reader_schema => $reader_values,
287							reader => $reader,
288							);
289							}
290							$block_count = decode_long($class, @_);
291							}
292	112			112	0	134	return \%hash;
293	112					149	}
294	112					143
295							my $class = shift;
296	112					167	my ($schema, $reader) = @_;
297	112					202	my $idx = decode_long($class, undef, undef, $reader);
298	112					201	my $union_schema = $schema->schemas->[$idx]
299	112					180	or throw Avro::Schema::Error::Parse("union union member");
300	112					141	$class->skip($union_schema, $reader);
301	112	50				184	}
302	0					0
303	0					0	## 1.3.2 A union is encoded by first writing an int value indicating the
304							## zero-based position within the union of the schema of its value. The value
305							## is then encoded per the indicated schema within the union.
306	112					202	my $class = shift;
307	333					471	my ($writer_schema, $reader_schema, $reader) = @_;
308	333	50	33			5049	my $idx = decode_long($class, @_);
309	0					0	my $union_schema = $writer_schema->schemas->[$idx];
310							## XXX TODO: schema resolution
311	333					589	# The first schema in the reader's union that matches the selected writer's
312							# union schema is recursively resolved against it. if none match, an error
313							# is signalled.
314							return $class->decode(
315							reader_schema => $union_schema,
316							writer_schema => $union_schema,
317	112					187	reader => $reader,
318							);
319	112					365	}
320
321							my $class = shift;
322							my ($schema, $reader) = @_;
323	0			0	0	0	$reader->seek($schema->size, 0);
324	0					0	}
325	0					0
326	0	0				0	## 1.3.2 Fixed instances are encoded using the number of bytes declared in the
327							## schema.
328	0					0	my $class = shift;
329							my ($writer_schema, $reader_schema, $reader) = @_;
330							$reader->read(my $buf, $writer_schema->size);
331							return $buf;
332							}
333
334							my $int = shift;
335	3			3	0	5	if (1 & $int) {
336	3					3	## odd values are encoded negative ints
337	3					7	return -( 1 + ($int >> 1) );
338	3					7	}
339							## even values are positive natural left shifted one bit
340							else {
341							return $int >> 1;
342							}
343	3					5	}
344
345							my $reader = shift;
346							my $int = 0;
347							my $more;
348							my $shift = 0;
349							do {
350							$reader->read(my $buf, 1);
351	0			0	0	0	my $byte = ord $buf;
352	0					0	my $value = $byte & 0x7F;
353	0					0	$int \|= $value << $shift;
354							$shift += 7;
355							$more = $byte & 0x80;
356							} until (! $more);
357							return $int;
358							}
359	12			12	0	14
360	12					19	1;