File Coverage

blib/lib/Avro/BinaryEncoder.pm
Criterion Covered Total %
statement 112 125 89.6
branch 19 26 73.0
condition n/a
subroutine 23 25 92.0
pod 1 17 5.8
total 155 193 80.3


line stmt bran cond sub pod time code
1             # Licensed to the Apache Software Foundation (ASF) under one
2             # or more contributor license agreements. See the NOTICE file
3             # distributed with this work for additional information
4             # regarding copyright ownership. The ASF licenses this file
5             # to you under the Apache License, Version 2.0 (the
6             # "License"); you may not use this file except in compliance
7             # with the License. You may obtain a copy of the License at
8             #
9             # https://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing,
12             # software distributed under the License is distributed on an
13             # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14             # KIND, either express or implied. See the License for the
15             # specific language governing permissions and limitations
16             # under the License.
17              
18             package Avro::BinaryEncoder;
19 3     3   1743 use strict;
  3         49  
  3         95  
20 3     3   19 use warnings;
  3         7  
  3         70  
21              
22 3     3   14 use Config;
  3         4  
  3         125  
23 3     3   1711 use Encode();
  3         31199  
  3         73  
24 3     3   19 use Error::Simple;
  3         7  
  3         32  
25 3     3   173 use Regexp::Common qw(number);
  3         6  
  3         17  
26              
27             our $VERSION = '1.11.2';
28              
29             our $max64;
30             our $complement = ~0x7F;
31             if ($Config{use64bitint}) {
32             $max64 = 9223372036854775807;
33             }
34             else {
35             require Math::BigInt;
36             $complement = Math::BigInt->new("0b" . ("1" x 57) . ("0" x 7));
37             $max64 = Math::BigInt->new("0b0" . ("1" x 63));
38             }
39              
40              
41             =head2 encode(%param)
42              
43             Encodes the given C<data> according to the given C<schema>, and pass it
44             to the C<emit_cb>
45              
46             Params are:
47              
48             =over 4
49              
50             =item * data
51              
52             The data to encode (can be any perl data structure, but it should match
53             schema)
54              
55             =item * schema
56              
57             The schema to use to encode C<data>
58              
59             =item * emit_cb($byte_ref)
60              
61             The callback that will be invoked with the a reference to the encoded data
62             in parameters.
63              
64             =back
65              
66             =cut
67              
68             sub encode {
69 1326     1326 1 4857 my $class = shift;
70 1326         3462 my %param = @_;
71 1326         2816 my ($schema, $data, $cb) = @param{qw/schema data emit_cb/};
72              
73             ## a schema can also be just a string
74 1326 50       3930 my $type = ref $schema ? $schema->type : $schema;
75              
76             ## might want to profile and optimize this
77 1326         2336 my $meth = "encode_$type";
78 1326         3552 $class->$meth($schema, $data, $cb);
79 1326         3843 return;
80             }
81              
82             sub encode_null {
83 3     3 0 1200 $_[3]->(\'');
84             }
85              
86             sub encode_boolean {
87 2     2 0 1085 my $class = shift;
88 2         5 my ($schema, $data, $cb) = @_;
89 2 100       12 $cb->( $data ? \"\x1" : \"\x0" );
90             }
91              
92             sub encode_int {
93 22     22 0 7539 my $class = shift;
94 22         40 my ($schema, $data, $cb) = @_;
95 22 100       101 if ($data !~ /^$RE{num}{int}$/) {
96 2         244 throw Avro::BinaryEncoder::Error("cannot convert '$data' to integer");
97             }
98 20 100       2865 if (abs($data) > 0x7fffffff) {
99 2         167 throw Avro::BinaryEncoder::Error("int ($data) should be <= 32bits");
100             }
101              
102 18         308 my $enc = unsigned_varint(zigzag($data));
103 18         116 $cb->(\$enc);
104             }
105              
106             sub encode_long {
107 2072     2072 0 9621 my $class = shift;
108 2072         3289 my ($schema, $data, $cb) = @_;
109 2072 100       7386 if ($data !~ /^$RE{num}{int}$/) {
110 2         249 throw Avro::BinaryEncoder::Error("cannot convert '$data' to long integer");
111             }
112 2070 50       230910 if (abs($data) > $max64) {
113 0         0 throw Avro::BinaryEncoder::Error("int ($data) should be <= 64bits");
114             }
115 2070         4202 my $enc = unsigned_varint(zigzag($data));
116 2070         5050 $cb->(\$enc);
117             }
118              
119             sub encode_float {
120 0     0 0 0 my $class = shift;
121 0         0 my ($schema, $data, $cb) = @_;
122 0         0 my $enc = pack "f<", $data;
123 0         0 $cb->(\$enc);
124             }
125              
126             sub encode_double {
127 0     0 0 0 my $class = shift;
128 0         0 my ($schema, $data, $cb) = @_;
129 0         0 my $enc = pack "d<", $data;
130 0         0 $cb->(\$enc);
131             }
132              
133             sub encode_bytes {
134 14     14 0 20 my $class = shift;
135 14         28 my ($schema, $data, $cb) = @_;
136 14         30 encode_long($class, undef, bytes::length($data), $cb);
137 14         38 $cb->(\$data);
138             }
139              
140             sub encode_string {
141 1170     1170 0 1811 my $class = shift;
142 1170         1927 my ($schema, $data, $cb) = @_;
143 1170         2352 my $bytes = Encode::encode_utf8($data);
144 1170         9302 encode_long($class, undef, bytes::length($bytes), $cb);
145 1170         2548 $cb->(\$bytes);
146             }
147              
148             ## 1.3.2 A record is encoded by encoding the values of its fields in the order
149             ## that they are declared. In other words, a record is encoded as just the
150             ## concatenation of the encodings of its fields. Field values are encoded per
151             ## their schema.
152             sub encode_record {
153 8     8 0 19 my $class = shift;
154 8         26 my ($schema, $data, $cb) = @_;
155 8         13 for my $field (@{ $schema->fields }) {
  8         50  
156             $class->encode(
157             schema => $field->{type},
158             data => $data->{ $field->{name} },
159 20         87 emit_cb => $cb,
160             );
161             }
162             }
163              
164             ## 1.3.2 An enum is encoded by a int, representing the zero-based position of
165             ## the symbol in the schema.
166             sub encode_enum {
167 5     5 0 10 my $class = shift;
168 5         9 my ($schema, $data, $cb) = @_;
169 5         13 my $symbols = $schema->symbols_as_hash;
170 5         11 my $pos = $symbols->{ $data };
171 5 50       11 throw Avro::BinaryEncoder::Error("Cannot find enum $data")
172             unless defined $pos;
173 5         12 $class->encode_int(undef, $pos, $cb);
174             }
175              
176             ## 1.3.2 Arrays are encoded as a series of blocks. Each block consists of a
177             ## long count value, followed by that many array items. A block with count zero
178             ## indicates the end of the array. Each item is encoded per the array's item
179             ## schema.
180             ## If a block's count is negative, its absolute value is used, and the count is
181             ## followed immediately by a long block size
182              
183             ## maybe here it would be worth configuring what a typical block size should be
184             sub encode_array {
185 318     318 0 451 my $class = shift;
186 318         518 my ($schema, $data, $cb) = @_;
187              
188             ## FIXME: multiple blocks
189 318 50       633 if (@$data) {
190 318         818 $class->encode_long(undef, scalar @$data, $cb);
191 318         735 for (@$data) {
192 848         2129 $class->encode(
193             schema => $schema->items,
194             data => $_,
195             emit_cb => $cb,
196             );
197             }
198             }
199             ## end of the only block
200 318         717 $class->encode_long(undef, 0, $cb);
201             }
202              
203              
204             ## 1.3.2 Maps are encoded as a series of blocks. Each block consists of a long
205             ## count value, followed by that many key/value pairs. A block with count zero
206             ## indicates the end of the map. Each item is encoded per the map's value
207             ## schema.
208             ##
209             ## (TODO)
210             ## If a block's count is negative, its absolute value is used, and the count is
211             ## followed immediately by a long block size indicating the number of bytes in
212             ## the block. This block size permits fast skipping through data, e.g., when
213             ## projecting a record to a subset of its fields.
214             sub encode_map {
215 111     111 0 151 my $class = shift;
216 111         198 my ($schema, $data, $cb) = @_;
217              
218 111         300 my @keys = keys %$data;
219 111 50       259 if (@keys) {
220 111         362 $class->encode_long(undef, scalar @keys, $cb);
221 111         261 for (@keys) {
222             ## the key
223 330         969 $class->encode_string(undef, $_, $cb);
224              
225             ## the value
226             $class->encode(
227             schema => $schema->values,
228 330         898 data => $data->{$_},
229             emit_cb => $cb,
230             );
231             }
232             }
233             ## end of the only block
234 111         245 $class->encode_long(undef, 0, $cb);
235             }
236              
237             ## 1.3.2 A union is encoded by first writing an int value indicating the
238             ## zero-based position within the union of the schema of its value. The value
239             ## is then encoded per the indicated schema within the union.
240             sub encode_union {
241 4     4 0 9 my $class = shift;
242 4         10 my ($schema, $data, $cb) = @_;
243 4         5 my $idx = 0;
244 4         8 my $elected_schema;
245 4         6 for my $inner_schema (@{$schema->schemas}) {
  4         11  
246 8 100       23 if ($inner_schema->is_data_valid($data)) {
247 4         9 $elected_schema = $inner_schema;
248 4         8 last;
249             }
250 4         9 $idx++;
251             }
252 4 50       10 unless ($elected_schema) {
253 0         0 throw Avro::BinaryEncoder::Error("union cannot validate the data");
254             }
255 4         29 $class->encode_long(undef, $idx, $cb);
256 4         30 $class->encode(
257             schema => $elected_schema,
258             data => $data,
259             emit_cb => $cb,
260             );
261             }
262              
263             ## 1.3.2 Fixed instances are encoded using the number of bytes declared in the
264             ## schema.
265             sub encode_fixed {
266 10     10 0 18 my $class = shift;
267 10         17 my ($schema, $data, $cb) = @_;
268 10 50       26 if (bytes::length $data != $schema->size) {
269 0         0 my $s1 = bytes::length $data;
270 0         0 my $s2 = $schema->size;
271 0         0 throw Avro::BinaryEncoder::Error("Fixed size doesn't match $s1!=$s2");
272             }
273 10         29 $cb->(\$data);
274             }
275              
276             sub zigzag {
277 3     3   4495 use warnings FATAL => 'numeric';
  3         6  
  3         582  
278 2088 100   2088 0 3952 if ( $_[0] >= 0 ) {
279 2083         4239 return $_[0] << 1;
280             }
281 5         223 return (($_[0] << 1) ^ -1) | 0x1;
282             }
283              
284             sub unsigned_varint {
285 2088     2088 0 7498 my @bytes;
286 2088         4221 while ($_[0] & $complement) { # mask with continuation bit
287 8         399 push @bytes, ($_[0] & 0x7F) | 0x80; # out and set continuation bit
288 8         473 $_[0] >>= 7; # next please
289             }
290 2088         3887 push @bytes, $_[0]; # last byte
291 2088         6569 return pack "C*", @bytes;
292             }
293              
294             package Avro::BinaryEncoder::Error;
295 3     3   22 use parent 'Error::Simple';
  3         6  
  3         16  
296              
297             1;