| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one |
|
2
|
|
|
|
|
|
|
# or more contributor license agreements. See the NOTICE file |
|
3
|
|
|
|
|
|
|
# distributed with this work for additional information |
|
4
|
|
|
|
|
|
|
# regarding copyright ownership. The ASF licenses this file |
|
5
|
|
|
|
|
|
|
# to you under the Apache License, Version 2.0 (the |
|
6
|
|
|
|
|
|
|
# "License"); you may not use this file except in compliance |
|
7
|
|
|
|
|
|
|
# with the License. You may obtain a copy of the License at |
|
8
|
|
|
|
|
|
|
# |
|
9
|
|
|
|
|
|
|
# https://www.apache.org/licenses/LICENSE-2.0 |
|
10
|
|
|
|
|
|
|
# |
|
11
|
|
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, |
|
12
|
|
|
|
|
|
|
# software distributed under the License is distributed on an |
|
13
|
|
|
|
|
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
|
14
|
|
|
|
|
|
|
# KIND, either express or implied. See the License for the |
|
15
|
|
|
|
|
|
|
# specific language governing permissions and limitations |
|
16
|
|
|
|
|
|
|
# under the License. |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
package Avro::BinaryEncoder; |
|
19
|
3
|
|
|
3
|
|
1563
|
use strict; |
|
|
3
|
|
|
|
|
9
|
|
|
|
3
|
|
|
|
|
87
|
|
|
20
|
3
|
|
|
3
|
|
12
|
use warnings; |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
59
|
|
|
21
|
|
|
|
|
|
|
|
|
22
|
3
|
|
|
3
|
|
12
|
use Config; |
|
|
3
|
|
|
|
|
4
|
|
|
|
3
|
|
|
|
|
106
|
|
|
23
|
3
|
|
|
3
|
|
1466
|
use Encode(); |
|
|
3
|
|
|
|
|
25602
|
|
|
|
3
|
|
|
|
|
60
|
|
|
24
|
3
|
|
|
3
|
|
16
|
use Error::Simple; |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
33
|
|
|
25
|
3
|
|
|
3
|
|
127
|
use Regexp::Common qw(number); |
|
|
3
|
|
|
|
|
4
|
|
|
|
3
|
|
|
|
|
17
|
|
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
our $VERSION = '1.11.3'; |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
our $max64; |
|
30
|
|
|
|
|
|
|
our $complement = ~0x7F; |
|
31
|
|
|
|
|
|
|
if ($Config{use64bitint}) { |
|
32
|
|
|
|
|
|
|
$max64 = 9223372036854775807; |
|
33
|
|
|
|
|
|
|
} |
|
34
|
|
|
|
|
|
|
else { |
|
35
|
|
|
|
|
|
|
require Math::BigInt; |
|
36
|
|
|
|
|
|
|
$complement = Math::BigInt->new("0b" . ("1" x 57) . ("0" x 7)); |
|
37
|
|
|
|
|
|
|
$max64 = Math::BigInt->new("0b0" . ("1" x 63)); |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head2 encode(%param) |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
Encodes the given C<data> according to the given C<schema>, and pass it |
|
44
|
|
|
|
|
|
|
to the C<emit_cb> |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
Params are: |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=over 4 |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=item * data |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
The data to encode (can be any perl data structure, but it should match |
|
53
|
|
|
|
|
|
|
schema) |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=item * schema |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
The schema to use to encode C<data> |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=item * emit_cb($byte_ref) |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
The callback that will be invoked with the a reference to the encoded data |
|
62
|
|
|
|
|
|
|
in parameters. |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=back |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=cut |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub encode { |
|
69
|
1326
|
|
|
1326
|
1
|
4124
|
my $class = shift; |
|
70
|
1326
|
|
|
|
|
2974
|
my %param = @_; |
|
71
|
1326
|
|
|
|
|
2368
|
my ($schema, $data, $cb) = @param{qw/schema data emit_cb/}; |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
## a schema can also be just a string |
|
74
|
1326
|
50
|
|
|
|
3197
|
my $type = ref $schema ? $schema->type : $schema; |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
## might want to profile and optimize this |
|
77
|
1326
|
|
|
|
|
2161
|
my $meth = "encode_$type"; |
|
78
|
1326
|
|
|
|
|
3107
|
$class->$meth($schema, $data, $cb); |
|
79
|
1326
|
|
|
|
|
3108
|
return; |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
sub encode_null { |
|
83
|
3
|
|
|
3
|
0
|
1059
|
$_[3]->(\''); |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub encode_boolean { |
|
87
|
2
|
|
|
2
|
0
|
891
|
my $class = shift; |
|
88
|
2
|
|
|
|
|
4
|
my ($schema, $data, $cb) = @_; |
|
89
|
2
|
100
|
|
|
|
5
|
$cb->( $data ? \"\x1" : \"\x0" ); |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
sub encode_int { |
|
93
|
22
|
|
|
22
|
0
|
6157
|
my $class = shift; |
|
94
|
22
|
|
|
|
|
35
|
my ($schema, $data, $cb) = @_; |
|
95
|
22
|
100
|
|
|
|
83
|
if ($data !~ /^$RE{num}{int}$/) { |
|
96
|
2
|
|
|
|
|
198
|
throw Avro::BinaryEncoder::Error("cannot convert '$data' to integer"); |
|
97
|
|
|
|
|
|
|
} |
|
98
|
20
|
100
|
|
|
|
2316
|
if (abs($data) > 0x7fffffff) { |
|
99
|
2
|
|
|
|
|
145
|
throw Avro::BinaryEncoder::Error("int ($data) should be <= 32bits"); |
|
100
|
|
|
|
|
|
|
} |
|
101
|
|
|
|
|
|
|
|
|
102
|
18
|
|
|
|
|
240
|
my $enc = unsigned_varint(zigzag($data)); |
|
103
|
18
|
|
|
|
|
93
|
$cb->(\$enc); |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub encode_long { |
|
107
|
2072
|
|
|
2072
|
0
|
8799
|
my $class = shift; |
|
108
|
2072
|
|
|
|
|
2895
|
my ($schema, $data, $cb) = @_; |
|
109
|
2072
|
100
|
|
|
|
6404
|
if ($data !~ /^$RE{num}{int}$/) { |
|
110
|
2
|
|
|
|
|
201
|
throw Avro::BinaryEncoder::Error("cannot convert '$data' to long integer"); |
|
111
|
|
|
|
|
|
|
} |
|
112
|
2070
|
50
|
|
|
|
187047
|
if (abs($data) > $max64) { |
|
113
|
0
|
|
|
|
|
0
|
throw Avro::BinaryEncoder::Error("int ($data) should be <= 64bits"); |
|
114
|
|
|
|
|
|
|
} |
|
115
|
2070
|
|
|
|
|
3902
|
my $enc = unsigned_varint(zigzag($data)); |
|
116
|
2070
|
|
|
|
|
4419
|
$cb->(\$enc); |
|
117
|
|
|
|
|
|
|
} |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub encode_float { |
|
120
|
0
|
|
|
0
|
0
|
0
|
my $class = shift; |
|
121
|
0
|
|
|
|
|
0
|
my ($schema, $data, $cb) = @_; |
|
122
|
0
|
|
|
|
|
0
|
my $enc = pack "f<", $data; |
|
123
|
0
|
|
|
|
|
0
|
$cb->(\$enc); |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
sub encode_double { |
|
127
|
0
|
|
|
0
|
0
|
0
|
my $class = shift; |
|
128
|
0
|
|
|
|
|
0
|
my ($schema, $data, $cb) = @_; |
|
129
|
0
|
|
|
|
|
0
|
my $enc = pack "d<", $data; |
|
130
|
0
|
|
|
|
|
0
|
$cb->(\$enc); |
|
131
|
|
|
|
|
|
|
} |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
sub encode_bytes { |
|
134
|
14
|
|
|
14
|
0
|
25
|
my $class = shift; |
|
135
|
14
|
|
|
|
|
23
|
my ($schema, $data, $cb) = @_; |
|
136
|
14
|
|
|
|
|
28
|
encode_long($class, undef, bytes::length($data), $cb); |
|
137
|
14
|
|
|
|
|
24
|
$cb->(\$data); |
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub encode_string { |
|
141
|
1170
|
|
|
1170
|
0
|
1430
|
my $class = shift; |
|
142
|
1170
|
|
|
|
|
1749
|
my ($schema, $data, $cb) = @_; |
|
143
|
1170
|
|
|
|
|
2046
|
my $bytes = Encode::encode_utf8($data); |
|
144
|
1170
|
|
|
|
|
7598
|
encode_long($class, undef, bytes::length($bytes), $cb); |
|
145
|
1170
|
|
|
|
|
2048
|
$cb->(\$bytes); |
|
146
|
|
|
|
|
|
|
} |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
## 1.3.2 A record is encoded by encoding the values of its fields in the order |
|
149
|
|
|
|
|
|
|
## that they are declared. In other words, a record is encoded as just the |
|
150
|
|
|
|
|
|
|
## concatenation of the encodings of its fields. Field values are encoded per |
|
151
|
|
|
|
|
|
|
## their schema. |
|
152
|
|
|
|
|
|
|
sub encode_record { |
|
153
|
8
|
|
|
8
|
0
|
13
|
my $class = shift; |
|
154
|
8
|
|
|
|
|
16
|
my ($schema, $data, $cb) = @_; |
|
155
|
8
|
|
|
|
|
13
|
for my $field (@{ $schema->fields }) { |
|
|
8
|
|
|
|
|
40
|
|
|
156
|
|
|
|
|
|
|
$class->encode( |
|
157
|
|
|
|
|
|
|
schema => $field->{type}, |
|
158
|
|
|
|
|
|
|
data => $data->{ $field->{name} }, |
|
159
|
20
|
|
|
|
|
72
|
emit_cb => $cb, |
|
160
|
|
|
|
|
|
|
); |
|
161
|
|
|
|
|
|
|
} |
|
162
|
|
|
|
|
|
|
} |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
## 1.3.2 An enum is encoded by a int, representing the zero-based position of |
|
165
|
|
|
|
|
|
|
## the symbol in the schema. |
|
166
|
|
|
|
|
|
|
sub encode_enum { |
|
167
|
5
|
|
|
5
|
0
|
7
|
my $class = shift; |
|
168
|
5
|
|
|
|
|
7
|
my ($schema, $data, $cb) = @_; |
|
169
|
5
|
|
|
|
|
12
|
my $symbols = $schema->symbols_as_hash; |
|
170
|
5
|
|
|
|
|
7
|
my $pos = $symbols->{ $data }; |
|
171
|
5
|
50
|
|
|
|
11
|
throw Avro::BinaryEncoder::Error("Cannot find enum $data") |
|
172
|
|
|
|
|
|
|
unless defined $pos; |
|
173
|
5
|
|
|
|
|
20
|
$class->encode_int(undef, $pos, $cb); |
|
174
|
|
|
|
|
|
|
} |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
## 1.3.2 Arrays are encoded as a series of blocks. Each block consists of a |
|
177
|
|
|
|
|
|
|
## long count value, followed by that many array items. A block with count zero |
|
178
|
|
|
|
|
|
|
## indicates the end of the array. Each item is encoded per the array's item |
|
179
|
|
|
|
|
|
|
## schema. |
|
180
|
|
|
|
|
|
|
## If a block's count is negative, its absolute value is used, and the count is |
|
181
|
|
|
|
|
|
|
## followed immediately by a long block size |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
## maybe here it would be worth configuring what a typical block size should be |
|
184
|
|
|
|
|
|
|
sub encode_array { |
|
185
|
318
|
|
|
318
|
0
|
388
|
my $class = shift; |
|
186
|
318
|
|
|
|
|
431
|
my ($schema, $data, $cb) = @_; |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
## FIXME: multiple blocks |
|
189
|
318
|
50
|
|
|
|
539
|
if (@$data) { |
|
190
|
318
|
|
|
|
|
658
|
$class->encode_long(undef, scalar @$data, $cb); |
|
191
|
318
|
|
|
|
|
583
|
for (@$data) { |
|
192
|
848
|
|
|
|
|
1744
|
$class->encode( |
|
193
|
|
|
|
|
|
|
schema => $schema->items, |
|
194
|
|
|
|
|
|
|
data => $_, |
|
195
|
|
|
|
|
|
|
emit_cb => $cb, |
|
196
|
|
|
|
|
|
|
); |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
## end of the only block |
|
200
|
318
|
|
|
|
|
603
|
$class->encode_long(undef, 0, $cb); |
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
## 1.3.2 Maps are encoded as a series of blocks. Each block consists of a long |
|
205
|
|
|
|
|
|
|
## count value, followed by that many key/value pairs. A block with count zero |
|
206
|
|
|
|
|
|
|
## indicates the end of the map. Each item is encoded per the map's value |
|
207
|
|
|
|
|
|
|
## schema. |
|
208
|
|
|
|
|
|
|
## |
|
209
|
|
|
|
|
|
|
## (TODO) |
|
210
|
|
|
|
|
|
|
## If a block's count is negative, its absolute value is used, and the count is |
|
211
|
|
|
|
|
|
|
## followed immediately by a long block size indicating the number of bytes in |
|
212
|
|
|
|
|
|
|
## the block. This block size permits fast skipping through data, e.g., when |
|
213
|
|
|
|
|
|
|
## projecting a record to a subset of its fields. |
|
214
|
|
|
|
|
|
|
sub encode_map { |
|
215
|
111
|
|
|
111
|
0
|
159
|
my $class = shift; |
|
216
|
111
|
|
|
|
|
166
|
my ($schema, $data, $cb) = @_; |
|
217
|
|
|
|
|
|
|
|
|
218
|
111
|
|
|
|
|
266
|
my @keys = keys %$data; |
|
219
|
111
|
50
|
|
|
|
221
|
if (@keys) { |
|
220
|
111
|
|
|
|
|
248
|
$class->encode_long(undef, scalar @keys, $cb); |
|
221
|
111
|
|
|
|
|
210
|
for (@keys) { |
|
222
|
|
|
|
|
|
|
## the key |
|
223
|
330
|
|
|
|
|
695
|
$class->encode_string(undef, $_, $cb); |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
## the value |
|
226
|
|
|
|
|
|
|
$class->encode( |
|
227
|
|
|
|
|
|
|
schema => $schema->values, |
|
228
|
330
|
|
|
|
|
811
|
data => $data->{$_}, |
|
229
|
|
|
|
|
|
|
emit_cb => $cb, |
|
230
|
|
|
|
|
|
|
); |
|
231
|
|
|
|
|
|
|
} |
|
232
|
|
|
|
|
|
|
} |
|
233
|
|
|
|
|
|
|
## end of the only block |
|
234
|
111
|
|
|
|
|
281
|
$class->encode_long(undef, 0, $cb); |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
## 1.3.2 A union is encoded by first writing an int value indicating the |
|
238
|
|
|
|
|
|
|
## zero-based position within the union of the schema of its value. The value |
|
239
|
|
|
|
|
|
|
## is then encoded per the indicated schema within the union. |
|
240
|
|
|
|
|
|
|
sub encode_union { |
|
241
|
4
|
|
|
4
|
0
|
6
|
my $class = shift; |
|
242
|
4
|
|
|
|
|
8
|
my ($schema, $data, $cb) = @_; |
|
243
|
4
|
|
|
|
|
6
|
my $idx = 0; |
|
244
|
4
|
|
|
|
|
5
|
my $elected_schema; |
|
245
|
4
|
|
|
|
|
6
|
for my $inner_schema (@{$schema->schemas}) { |
|
|
4
|
|
|
|
|
9
|
|
|
246
|
8
|
100
|
|
|
|
19
|
if ($inner_schema->is_data_valid($data)) { |
|
247
|
4
|
|
|
|
|
6
|
$elected_schema = $inner_schema; |
|
248
|
4
|
|
|
|
|
6
|
last; |
|
249
|
|
|
|
|
|
|
} |
|
250
|
4
|
|
|
|
|
8
|
$idx++; |
|
251
|
|
|
|
|
|
|
} |
|
252
|
4
|
50
|
|
|
|
15
|
unless ($elected_schema) { |
|
253
|
0
|
|
|
|
|
0
|
throw Avro::BinaryEncoder::Error("union cannot validate the data"); |
|
254
|
|
|
|
|
|
|
} |
|
255
|
4
|
|
|
|
|
21
|
$class->encode_long(undef, $idx, $cb); |
|
256
|
4
|
|
|
|
|
31
|
$class->encode( |
|
257
|
|
|
|
|
|
|
schema => $elected_schema, |
|
258
|
|
|
|
|
|
|
data => $data, |
|
259
|
|
|
|
|
|
|
emit_cb => $cb, |
|
260
|
|
|
|
|
|
|
); |
|
261
|
|
|
|
|
|
|
} |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
## 1.3.2 Fixed instances are encoded using the number of bytes declared in the |
|
264
|
|
|
|
|
|
|
## schema. |
|
265
|
|
|
|
|
|
|
sub encode_fixed { |
|
266
|
10
|
|
|
10
|
0
|
19
|
my $class = shift; |
|
267
|
10
|
|
|
|
|
53
|
my ($schema, $data, $cb) = @_; |
|
268
|
10
|
50
|
|
|
|
27
|
if (bytes::length $data != $schema->size) { |
|
269
|
0
|
|
|
|
|
0
|
my $s1 = bytes::length $data; |
|
270
|
0
|
|
|
|
|
0
|
my $s2 = $schema->size; |
|
271
|
0
|
|
|
|
|
0
|
throw Avro::BinaryEncoder::Error("Fixed size doesn't match $s1!=$s2"); |
|
272
|
|
|
|
|
|
|
} |
|
273
|
10
|
|
|
|
|
23
|
$cb->(\$data); |
|
274
|
|
|
|
|
|
|
} |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
sub zigzag { |
|
277
|
3
|
|
|
3
|
|
3879
|
use warnings FATAL => 'numeric'; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
519
|
|
|
278
|
2088
|
100
|
|
2088
|
0
|
3412
|
if ( $_[0] >= 0 ) { |
|
279
|
2083
|
|
|
|
|
3863
|
return $_[0] << 1; |
|
280
|
|
|
|
|
|
|
} |
|
281
|
5
|
|
|
|
|
181
|
return (($_[0] << 1) ^ -1) | 0x1; |
|
282
|
|
|
|
|
|
|
} |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
sub unsigned_varint { |
|
285
|
2088
|
|
|
2088
|
0
|
5990
|
my @bytes; |
|
286
|
2088
|
|
|
|
|
3554
|
while ($_[0] & $complement) { # mask with continuation bit |
|
287
|
8
|
|
|
|
|
328
|
push @bytes, ($_[0] & 0x7F) | 0x80; # out and set continuation bit |
|
288
|
8
|
|
|
|
|
386
|
$_[0] >>= 7; # next please |
|
289
|
|
|
|
|
|
|
} |
|
290
|
2088
|
|
|
|
|
3638
|
push @bytes, $_[0]; # last byte |
|
291
|
2088
|
|
|
|
|
5300
|
return pack "C*", @bytes; |
|
292
|
|
|
|
|
|
|
} |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
package Avro::BinaryEncoder::Error; |
|
295
|
3
|
|
|
3
|
|
17
|
use parent 'Error::Simple'; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
14
|
|
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
1; |