line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one |
2
|
|
|
|
|
|
|
# or more contributor license agreements. See the NOTICE file |
3
|
|
|
|
|
|
|
# distributed with this work for additional information |
4
|
|
|
|
|
|
|
# regarding copyright ownership. The ASF licenses this file |
5
|
|
|
|
|
|
|
# to you under the Apache License, Version 2.0 (the |
6
|
|
|
|
|
|
|
# "License"); you may not use this file except in compliance |
7
|
|
|
|
|
|
|
# with the License. You may obtain a copy of the License at |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# https://www.apache.org/licenses/LICENSE-2.0 |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, |
12
|
|
|
|
|
|
|
# software distributed under the License is distributed on an |
13
|
|
|
|
|
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14
|
|
|
|
|
|
|
# KIND, either express or implied. See the License for the |
15
|
|
|
|
|
|
|
# specific language governing permissions and limitations |
16
|
|
|
|
|
|
|
# under the License. |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
use strict; |
19
|
3
|
|
|
3
|
|
1453
|
use warnings; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
77
|
|
20
|
3
|
|
|
3
|
|
13
|
|
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
61
|
|
21
|
|
|
|
|
|
|
use Config; |
22
|
3
|
|
|
3
|
|
13
|
use Encode(); |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
85
|
|
23
|
3
|
|
|
3
|
|
1470
|
use Error::Simple; |
|
3
|
|
|
|
|
25809
|
|
|
3
|
|
|
|
|
63
|
|
24
|
3
|
|
|
3
|
|
18
|
use Regexp::Common qw(number); |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
29
|
|
25
|
3
|
|
|
3
|
|
135
|
|
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
18
|
|
26
|
|
|
|
|
|
|
our $VERSION = '1.11.1'; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
our $max64; |
29
|
|
|
|
|
|
|
our $complement = ~0x7F; |
30
|
|
|
|
|
|
|
if ($Config{use64bitint}) { |
31
|
|
|
|
|
|
|
$max64 = 9223372036854775807; |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
else { |
34
|
|
|
|
|
|
|
require Math::BigInt; |
35
|
|
|
|
|
|
|
$complement = Math::BigInt->new("0b" . ("1" x 57) . ("0" x 7)); |
36
|
|
|
|
|
|
|
$max64 = Math::BigInt->new("0b0" . ("1" x 63)); |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head2 encode(%param) |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
Encodes the given C<data> according to the given C<schema>, and pass it |
43
|
|
|
|
|
|
|
to the C<emit_cb> |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
Params are: |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=over 4 |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=item * data |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
The data to encode (can be any perl data structure, but it should match |
52
|
|
|
|
|
|
|
schema) |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=item * schema |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
The schema to use to encode C<data> |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=item * emit_cb($byte_ref) |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
The callback that will be invoked with the a reference to the encoded data |
61
|
|
|
|
|
|
|
in parameters. |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=back |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=cut |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
my $class = shift; |
68
|
|
|
|
|
|
|
my %param = @_; |
69
|
1326
|
|
|
1326
|
1
|
4056
|
my ($schema, $data, $cb) = @param{qw/schema data emit_cb/}; |
70
|
1326
|
|
|
|
|
3061
|
|
71
|
1326
|
|
|
|
|
2375
|
## a schema can also be just a string |
72
|
|
|
|
|
|
|
my $type = ref $schema ? $schema->type : $schema; |
73
|
|
|
|
|
|
|
|
74
|
1326
|
50
|
|
|
|
3502
|
## might want to profile and optimize this |
75
|
|
|
|
|
|
|
my $meth = "encode_$type"; |
76
|
|
|
|
|
|
|
$class->$meth($schema, $data, $cb); |
77
|
1326
|
|
|
|
|
2019
|
return; |
78
|
1326
|
|
|
|
|
2945
|
} |
79
|
1326
|
|
|
|
|
3215
|
|
80
|
|
|
|
|
|
|
$_[3]->(\''); |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
3
|
|
|
3
|
0
|
996
|
my $class = shift; |
84
|
|
|
|
|
|
|
my ($schema, $data, $cb) = @_; |
85
|
|
|
|
|
|
|
$cb->( $data ? \"\x1" : \"\x0" ); |
86
|
|
|
|
|
|
|
} |
87
|
2
|
|
|
2
|
0
|
975
|
|
88
|
2
|
|
|
|
|
5
|
my $class = shift; |
89
|
2
|
100
|
|
|
|
7
|
my ($schema, $data, $cb) = @_; |
90
|
|
|
|
|
|
|
if ($data !~ /^$RE{num}{int}$/) { |
91
|
|
|
|
|
|
|
throw Avro::BinaryEncoder::Error("cannot convert '$data' to integer"); |
92
|
|
|
|
|
|
|
} |
93
|
22
|
|
|
22
|
0
|
6429
|
if (abs($data) > 0x7fffffff) { |
94
|
22
|
|
|
|
|
36
|
throw Avro::BinaryEncoder::Error("int ($data) should be <= 32bits"); |
95
|
22
|
100
|
|
|
|
90
|
} |
96
|
2
|
|
|
|
|
201
|
|
97
|
|
|
|
|
|
|
my $enc = unsigned_varint(zigzag($data)); |
98
|
20
|
100
|
|
|
|
2365
|
$cb->(\$enc); |
99
|
2
|
|
|
|
|
167
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
my $class = shift; |
102
|
18
|
|
|
|
|
225
|
my ($schema, $data, $cb) = @_; |
103
|
18
|
|
|
|
|
92
|
if ($data !~ /^$RE{num}{int}$/) { |
104
|
|
|
|
|
|
|
throw Avro::BinaryEncoder::Error("cannot convert '$data' to long integer"); |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
if (abs($data) > $max64) { |
107
|
2072
|
|
|
2072
|
0
|
8298
|
throw Avro::BinaryEncoder::Error("int ($data) should be <= 64bits"); |
108
|
2072
|
|
|
|
|
3075
|
} |
109
|
2072
|
100
|
|
|
|
6112
|
my $enc = unsigned_varint(zigzag($data)); |
110
|
2
|
|
|
|
|
205
|
$cb->(\$enc); |
111
|
|
|
|
|
|
|
} |
112
|
2070
|
50
|
|
|
|
185997
|
|
113
|
0
|
|
|
|
|
0
|
my $class = shift; |
114
|
|
|
|
|
|
|
my ($schema, $data, $cb) = @_; |
115
|
2070
|
|
|
|
|
3409
|
my $enc = pack "f<", $data; |
116
|
2070
|
|
|
|
|
4074
|
$cb->(\$enc); |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
my $class = shift; |
120
|
0
|
|
|
0
|
0
|
0
|
my ($schema, $data, $cb) = @_; |
121
|
0
|
|
|
|
|
0
|
my $enc = pack "d<", $data; |
122
|
0
|
|
|
|
|
0
|
$cb->(\$enc); |
123
|
0
|
|
|
|
|
0
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
my $class = shift; |
126
|
|
|
|
|
|
|
my ($schema, $data, $cb) = @_; |
127
|
0
|
|
|
0
|
0
|
0
|
encode_long($class, undef, bytes::length($data), $cb); |
128
|
0
|
|
|
|
|
0
|
$cb->(\$data); |
129
|
0
|
|
|
|
|
0
|
} |
130
|
0
|
|
|
|
|
0
|
|
131
|
|
|
|
|
|
|
my $class = shift; |
132
|
|
|
|
|
|
|
my ($schema, $data, $cb) = @_; |
133
|
|
|
|
|
|
|
my $bytes = Encode::encode_utf8($data); |
134
|
14
|
|
|
14
|
0
|
14
|
encode_long($class, undef, bytes::length($bytes), $cb); |
135
|
14
|
|
|
|
|
30
|
$cb->(\$bytes); |
136
|
14
|
|
|
|
|
26
|
} |
137
|
14
|
|
|
|
|
28
|
|
138
|
|
|
|
|
|
|
## 1.3.2 A record is encoded by encoding the values of its fields in the order |
139
|
|
|
|
|
|
|
## that they are declared. In other words, a record is encoded as just the |
140
|
|
|
|
|
|
|
## concatenation of the encodings of its fields. Field values are encoded per |
141
|
1170
|
|
|
1170
|
0
|
1482
|
## their schema. |
142
|
1170
|
|
|
|
|
1651
|
my $class = shift; |
143
|
1170
|
|
|
|
|
2006
|
my ($schema, $data, $cb) = @_; |
144
|
1170
|
|
|
|
|
7436
|
for my $field (@{ $schema->fields }) { |
145
|
1170
|
|
|
|
|
2098
|
$class->encode( |
146
|
|
|
|
|
|
|
schema => $field->{type}, |
147
|
|
|
|
|
|
|
data => $data->{ $field->{name} }, |
148
|
|
|
|
|
|
|
emit_cb => $cb, |
149
|
|
|
|
|
|
|
); |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
8
|
|
|
8
|
0
|
14
|
## 1.3.2 An enum is encoded by a int, representing the zero-based position of |
154
|
8
|
|
|
|
|
17
|
## the symbol in the schema. |
155
|
8
|
|
|
|
|
12
|
my $class = shift; |
|
8
|
|
|
|
|
37
|
|
156
|
|
|
|
|
|
|
my ($schema, $data, $cb) = @_; |
157
|
|
|
|
|
|
|
my $symbols = $schema->symbols_as_hash; |
158
|
|
|
|
|
|
|
my $pos = $symbols->{ $data }; |
159
|
20
|
|
|
|
|
73
|
throw Avro::BinaryEncoder::Error("Cannot find enum $data") |
160
|
|
|
|
|
|
|
unless defined $pos; |
161
|
|
|
|
|
|
|
$class->encode_int(undef, $pos, $cb); |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
## 1.3.2 Arrays are encoded as a series of blocks. Each block consists of a |
165
|
|
|
|
|
|
|
## long count value, followed by that many array items. A block with count zero |
166
|
|
|
|
|
|
|
## indicates the end of the array. Each item is encoded per the array's item |
167
|
5
|
|
|
5
|
0
|
6
|
## schema. |
168
|
5
|
|
|
|
|
8
|
## If a block's count is negative, its absolute value is used, and the count is |
169
|
5
|
|
|
|
|
10
|
## followed immediately by a long block size |
170
|
5
|
|
|
|
|
6
|
|
171
|
5
|
50
|
|
|
|
11
|
## maybe here it would be worth configuring what a typical block size should be |
172
|
|
|
|
|
|
|
my $class = shift; |
173
|
5
|
|
|
|
|
10
|
my ($schema, $data, $cb) = @_; |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
## FIXME: multiple blocks |
176
|
|
|
|
|
|
|
if (@$data) { |
177
|
|
|
|
|
|
|
$class->encode_long(undef, scalar @$data, $cb); |
178
|
|
|
|
|
|
|
for (@$data) { |
179
|
|
|
|
|
|
|
$class->encode( |
180
|
|
|
|
|
|
|
schema => $schema->items, |
181
|
|
|
|
|
|
|
data => $_, |
182
|
|
|
|
|
|
|
emit_cb => $cb, |
183
|
|
|
|
|
|
|
); |
184
|
|
|
|
|
|
|
} |
185
|
318
|
|
|
318
|
0
|
362
|
} |
186
|
318
|
|
|
|
|
438
|
## end of the only block |
187
|
|
|
|
|
|
|
$class->encode_long(undef, 0, $cb); |
188
|
|
|
|
|
|
|
} |
189
|
318
|
50
|
|
|
|
541
|
|
190
|
318
|
|
|
|
|
689
|
|
191
|
318
|
|
|
|
|
606
|
## 1.3.2 Maps are encoded as a series of blocks. Each block consists of a long |
192
|
848
|
|
|
|
|
1732
|
## count value, followed by that many key/value pairs. A block with count zero |
193
|
|
|
|
|
|
|
## indicates the end of the map. Each item is encoded per the map's value |
194
|
|
|
|
|
|
|
## schema. |
195
|
|
|
|
|
|
|
## |
196
|
|
|
|
|
|
|
## (TODO) |
197
|
|
|
|
|
|
|
## If a block's count is negative, its absolute value is used, and the count is |
198
|
|
|
|
|
|
|
## followed immediately by a long block size indicating the number of bytes in |
199
|
|
|
|
|
|
|
## the block. This block size permits fast skipping through data, e.g., when |
200
|
318
|
|
|
|
|
588
|
## projecting a record to a subset of its fields. |
201
|
|
|
|
|
|
|
my $class = shift; |
202
|
|
|
|
|
|
|
my ($schema, $data, $cb) = @_; |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
my @keys = keys %$data; |
205
|
|
|
|
|
|
|
if (@keys) { |
206
|
|
|
|
|
|
|
$class->encode_long(undef, scalar @keys, $cb); |
207
|
|
|
|
|
|
|
for (@keys) { |
208
|
|
|
|
|
|
|
## the key |
209
|
|
|
|
|
|
|
$class->encode_string(undef, $_, $cb); |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
## the value |
212
|
|
|
|
|
|
|
$class->encode( |
213
|
|
|
|
|
|
|
schema => $schema->values, |
214
|
|
|
|
|
|
|
data => $data->{$_}, |
215
|
111
|
|
|
111
|
0
|
164
|
emit_cb => $cb, |
216
|
111
|
|
|
|
|
249
|
); |
217
|
|
|
|
|
|
|
} |
218
|
111
|
|
|
|
|
342
|
} |
219
|
111
|
50
|
|
|
|
234
|
## end of the only block |
220
|
111
|
|
|
|
|
307
|
$class->encode_long(undef, 0, $cb); |
221
|
111
|
|
|
|
|
255
|
} |
222
|
|
|
|
|
|
|
|
223
|
330
|
|
|
|
|
686
|
## 1.3.2 A union is encoded by first writing an int value indicating the |
224
|
|
|
|
|
|
|
## zero-based position within the union of the schema of its value. The value |
225
|
|
|
|
|
|
|
## is then encoded per the indicated schema within the union. |
226
|
|
|
|
|
|
|
my $class = shift; |
227
|
|
|
|
|
|
|
my ($schema, $data, $cb) = @_; |
228
|
330
|
|
|
|
|
828
|
my $idx = 0; |
229
|
|
|
|
|
|
|
my $elected_schema; |
230
|
|
|
|
|
|
|
for my $inner_schema (@{$schema->schemas}) { |
231
|
|
|
|
|
|
|
if ($inner_schema->is_data_valid($data)) { |
232
|
|
|
|
|
|
|
$elected_schema = $inner_schema; |
233
|
|
|
|
|
|
|
last; |
234
|
111
|
|
|
|
|
318
|
} |
235
|
|
|
|
|
|
|
$idx++; |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
unless ($elected_schema) { |
238
|
|
|
|
|
|
|
throw Avro::BinaryEncoder::Error("union cannot validate the data"); |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
$class->encode_long(undef, $idx, $cb); |
241
|
4
|
|
|
4
|
0
|
7
|
$class->encode( |
242
|
4
|
|
|
|
|
9
|
schema => $elected_schema, |
243
|
4
|
|
|
|
|
5
|
data => $data, |
244
|
4
|
|
|
|
|
5
|
emit_cb => $cb, |
245
|
4
|
|
|
|
|
6
|
); |
|
4
|
|
|
|
|
8
|
|
246
|
8
|
100
|
|
|
|
19
|
} |
247
|
4
|
|
|
|
|
5
|
|
248
|
4
|
|
|
|
|
9
|
## 1.3.2 Fixed instances are encoded using the number of bytes declared in the |
249
|
|
|
|
|
|
|
## schema. |
250
|
4
|
|
|
|
|
6
|
my $class = shift; |
251
|
|
|
|
|
|
|
my ($schema, $data, $cb) = @_; |
252
|
4
|
50
|
|
|
|
10
|
if (bytes::length $data != $schema->size) { |
253
|
0
|
|
|
|
|
0
|
my $s1 = bytes::length $data; |
254
|
|
|
|
|
|
|
my $s2 = $schema->size; |
255
|
4
|
|
|
|
|
24
|
throw Avro::BinaryEncoder::Error("Fixed size doesn't match $s1!=$s2"); |
256
|
4
|
|
|
|
|
27
|
} |
257
|
|
|
|
|
|
|
$cb->(\$data); |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
use warnings FATAL => 'numeric'; |
261
|
|
|
|
|
|
|
if ( $_[0] >= 0 ) { |
262
|
|
|
|
|
|
|
return $_[0] << 1; |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
return (($_[0] << 1) ^ -1) | 0x1; |
265
|
|
|
|
|
|
|
} |
266
|
10
|
|
|
10
|
0
|
19
|
|
267
|
10
|
|
|
|
|
23
|
my @bytes; |
268
|
10
|
50
|
|
|
|
21
|
while ($_[0] & $complement) { # mask with continuation bit |
269
|
0
|
|
|
|
|
0
|
push @bytes, ($_[0] & 0x7F) | 0x80; # out and set continuation bit |
270
|
0
|
|
|
|
|
0
|
$_[0] >>= 7; # next please |
271
|
0
|
|
|
|
|
0
|
} |
272
|
|
|
|
|
|
|
push @bytes, $_[0]; # last byte |
273
|
10
|
|
|
|
|
38
|
return pack "C*", @bytes; |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
use parent 'Error::Simple'; |
277
|
3
|
|
|
3
|
|
3762
|
|
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
514
|
|
278
|
2088
|
100
|
|
2088
|
0
|
3203
|
1; |