File Coverage

blib/lib/Avro/BinaryEncoder.pm
Criterion Covered Total %
statement 112 125 89.6
branch 19 26 73.0
condition n/a
subroutine 23 25 92.0
pod 1 17 5.8
total 155 193 80.3


line stmt bran cond sub pod time code
1             # Licensed to the Apache Software Foundation (ASF) under one
2             # or more contributor license agreements. See the NOTICE file
3             # distributed with this work for additional information
4             # regarding copyright ownership. The ASF licenses this file
5             # to you under the Apache License, Version 2.0 (the
6             # "License"); you may not use this file except in compliance
7             # with the License. You may obtain a copy of the License at
8             #
9             # https://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing,
12             # software distributed under the License is distributed on an
13             # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14             # KIND, either express or implied. See the License for the
15             # specific language governing permissions and limitations
16             # under the License.
17              
18             use strict;
19 3     3   1453 use warnings;
  3         8  
  3         77  
20 3     3   13  
  3         5  
  3         61  
21             use Config;
22 3     3   13 use Encode();
  3         4  
  3         85  
23 3     3   1470 use Error::Simple;
  3         25809  
  3         63  
24 3     3   18 use Regexp::Common qw(number);
  3         6  
  3         29  
25 3     3   135  
  3         4  
  3         18  
26             our $VERSION = '1.11.1';
27              
28             our $max64;
29             our $complement = ~0x7F;
30             if ($Config{use64bitint}) {
31             $max64 = 9223372036854775807;
32             }
33             else {
34             require Math::BigInt;
35             $complement = Math::BigInt->new("0b" . ("1" x 57) . ("0" x 7));
36             $max64 = Math::BigInt->new("0b0" . ("1" x 63));
37             }
38              
39              
40             =head2 encode(%param)
41              
42             Encodes the given C<data> according to the given C<schema>, and pass it
43             to the C<emit_cb>
44              
45             Params are:
46              
47             =over 4
48              
49             =item * data
50              
51             The data to encode (can be any perl data structure, but it should match
52             schema)
53              
54             =item * schema
55              
56             The schema to use to encode C<data>
57              
58             =item * emit_cb($byte_ref)
59              
60             The callback that will be invoked with the a reference to the encoded data
61             in parameters.
62              
63             =back
64              
65             =cut
66              
67             my $class = shift;
68             my %param = @_;
69 1326     1326 1 4056 my ($schema, $data, $cb) = @param{qw/schema data emit_cb/};
70 1326         3061  
71 1326         2375 ## a schema can also be just a string
72             my $type = ref $schema ? $schema->type : $schema;
73              
74 1326 50       3502 ## might want to profile and optimize this
75             my $meth = "encode_$type";
76             $class->$meth($schema, $data, $cb);
77 1326         2019 return;
78 1326         2945 }
79 1326         3215  
80             $_[3]->(\'');
81             }
82              
83 3     3 0 996 my $class = shift;
84             my ($schema, $data, $cb) = @_;
85             $cb->( $data ? \"\x1" : \"\x0" );
86             }
87 2     2 0 975  
88 2         5 my $class = shift;
89 2 100       7 my ($schema, $data, $cb) = @_;
90             if ($data !~ /^$RE{num}{int}$/) {
91             throw Avro::BinaryEncoder::Error("cannot convert '$data' to integer");
92             }
93 22     22 0 6429 if (abs($data) > 0x7fffffff) {
94 22         36 throw Avro::BinaryEncoder::Error("int ($data) should be <= 32bits");
95 22 100       90 }
96 2         201  
97             my $enc = unsigned_varint(zigzag($data));
98 20 100       2365 $cb->(\$enc);
99 2         167 }
100              
101             my $class = shift;
102 18         225 my ($schema, $data, $cb) = @_;
103 18         92 if ($data !~ /^$RE{num}{int}$/) {
104             throw Avro::BinaryEncoder::Error("cannot convert '$data' to long integer");
105             }
106             if (abs($data) > $max64) {
107 2072     2072 0 8298 throw Avro::BinaryEncoder::Error("int ($data) should be <= 64bits");
108 2072         3075 }
109 2072 100       6112 my $enc = unsigned_varint(zigzag($data));
110 2         205 $cb->(\$enc);
111             }
112 2070 50       185997  
113 0         0 my $class = shift;
114             my ($schema, $data, $cb) = @_;
115 2070         3409 my $enc = pack "f<", $data;
116 2070         4074 $cb->(\$enc);
117             }
118              
119             my $class = shift;
120 0     0 0 0 my ($schema, $data, $cb) = @_;
121 0         0 my $enc = pack "d<", $data;
122 0         0 $cb->(\$enc);
123 0         0 }
124              
125             my $class = shift;
126             my ($schema, $data, $cb) = @_;
127 0     0 0 0 encode_long($class, undef, bytes::length($data), $cb);
128 0         0 $cb->(\$data);
129 0         0 }
130 0         0  
131             my $class = shift;
132             my ($schema, $data, $cb) = @_;
133             my $bytes = Encode::encode_utf8($data);
134 14     14 0 14 encode_long($class, undef, bytes::length($bytes), $cb);
135 14         30 $cb->(\$bytes);
136 14         26 }
137 14         28  
138             ## 1.3.2 A record is encoded by encoding the values of its fields in the order
139             ## that they are declared. In other words, a record is encoded as just the
140             ## concatenation of the encodings of its fields. Field values are encoded per
141 1170     1170 0 1482 ## their schema.
142 1170         1651 my $class = shift;
143 1170         2006 my ($schema, $data, $cb) = @_;
144 1170         7436 for my $field (@{ $schema->fields }) {
145 1170         2098 $class->encode(
146             schema => $field->{type},
147             data => $data->{ $field->{name} },
148             emit_cb => $cb,
149             );
150             }
151             }
152              
153 8     8 0 14 ## 1.3.2 An enum is encoded by a int, representing the zero-based position of
154 8         17 ## the symbol in the schema.
155 8         12 my $class = shift;
  8         37  
156             my ($schema, $data, $cb) = @_;
157             my $symbols = $schema->symbols_as_hash;
158             my $pos = $symbols->{ $data };
159 20         73 throw Avro::BinaryEncoder::Error("Cannot find enum $data")
160             unless defined $pos;
161             $class->encode_int(undef, $pos, $cb);
162             }
163              
164             ## 1.3.2 Arrays are encoded as a series of blocks. Each block consists of a
165             ## long count value, followed by that many array items. A block with count zero
166             ## indicates the end of the array. Each item is encoded per the array's item
167 5     5 0 6 ## schema.
168 5         8 ## If a block's count is negative, its absolute value is used, and the count is
169 5         10 ## followed immediately by a long block size
170 5         6  
171 5 50       11 ## maybe here it would be worth configuring what a typical block size should be
172             my $class = shift;
173 5         10 my ($schema, $data, $cb) = @_;
174              
175             ## FIXME: multiple blocks
176             if (@$data) {
177             $class->encode_long(undef, scalar @$data, $cb);
178             for (@$data) {
179             $class->encode(
180             schema => $schema->items,
181             data => $_,
182             emit_cb => $cb,
183             );
184             }
185 318     318 0 362 }
186 318         438 ## end of the only block
187             $class->encode_long(undef, 0, $cb);
188             }
189 318 50       541  
190 318         689  
191 318         606 ## 1.3.2 Maps are encoded as a series of blocks. Each block consists of a long
192 848         1732 ## count value, followed by that many key/value pairs. A block with count zero
193             ## indicates the end of the map. Each item is encoded per the map's value
194             ## schema.
195             ##
196             ## (TODO)
197             ## If a block's count is negative, its absolute value is used, and the count is
198             ## followed immediately by a long block size indicating the number of bytes in
199             ## the block. This block size permits fast skipping through data, e.g., when
200 318         588 ## projecting a record to a subset of its fields.
201             my $class = shift;
202             my ($schema, $data, $cb) = @_;
203              
204             my @keys = keys %$data;
205             if (@keys) {
206             $class->encode_long(undef, scalar @keys, $cb);
207             for (@keys) {
208             ## the key
209             $class->encode_string(undef, $_, $cb);
210              
211             ## the value
212             $class->encode(
213             schema => $schema->values,
214             data => $data->{$_},
215 111     111 0 164 emit_cb => $cb,
216 111         249 );
217             }
218 111         342 }
219 111 50       234 ## end of the only block
220 111         307 $class->encode_long(undef, 0, $cb);
221 111         255 }
222              
223 330         686 ## 1.3.2 A union is encoded by first writing an int value indicating the
224             ## zero-based position within the union of the schema of its value. The value
225             ## is then encoded per the indicated schema within the union.
226             my $class = shift;
227             my ($schema, $data, $cb) = @_;
228 330         828 my $idx = 0;
229             my $elected_schema;
230             for my $inner_schema (@{$schema->schemas}) {
231             if ($inner_schema->is_data_valid($data)) {
232             $elected_schema = $inner_schema;
233             last;
234 111         318 }
235             $idx++;
236             }
237             unless ($elected_schema) {
238             throw Avro::BinaryEncoder::Error("union cannot validate the data");
239             }
240             $class->encode_long(undef, $idx, $cb);
241 4     4 0 7 $class->encode(
242 4         9 schema => $elected_schema,
243 4         5 data => $data,
244 4         5 emit_cb => $cb,
245 4         6 );
  4         8  
246 8 100       19 }
247 4         5  
248 4         9 ## 1.3.2 Fixed instances are encoded using the number of bytes declared in the
249             ## schema.
250 4         6 my $class = shift;
251             my ($schema, $data, $cb) = @_;
252 4 50       10 if (bytes::length $data != $schema->size) {
253 0         0 my $s1 = bytes::length $data;
254             my $s2 = $schema->size;
255 4         24 throw Avro::BinaryEncoder::Error("Fixed size doesn't match $s1!=$s2");
256 4         27 }
257             $cb->(\$data);
258             }
259              
260             use warnings FATAL => 'numeric';
261             if ( $_[0] >= 0 ) {
262             return $_[0] << 1;
263             }
264             return (($_[0] << 1) ^ -1) | 0x1;
265             }
266 10     10 0 19  
267 10         23 my @bytes;
268 10 50       21 while ($_[0] & $complement) { # mask with continuation bit
269 0         0 push @bytes, ($_[0] & 0x7F) | 0x80; # out and set continuation bit
270 0         0 $_[0] >>= 7; # next please
271 0         0 }
272             push @bytes, $_[0]; # last byte
273 10         38 return pack "C*", @bytes;
274             }
275              
276             use parent 'Error::Simple';
277 3     3   3762  
  3         5  
  3         514  
278 2088 100   2088 0 3203 1;