line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
1
|
|
|
1
|
|
43354
|
use 5.008; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
33
|
|
2
|
1
|
|
|
1
|
|
7
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
38
|
|
3
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
71
|
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
# ABSTRACT: a low-level reader for EBML files |
6
|
|
|
|
|
|
|
package Parse::Matroska::Reader; |
7
|
|
|
|
|
|
|
{ |
8
|
|
|
|
|
|
|
$Parse::Matroska::Reader::VERSION = '0.003'; |
9
|
|
|
|
|
|
|
} |
10
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
550
|
use Parse::Matroska::Definitions qw{elem_by_hexid}; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
76
|
|
12
|
1
|
|
|
1
|
|
590
|
use Parse::Matroska::Element; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
25
|
|
13
|
|
|
|
|
|
|
|
14
|
1
|
|
|
1
|
|
5
|
use Carp; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
46
|
|
15
|
1
|
|
|
1
|
|
4
|
use Scalar::Util qw{openhandle weaken}; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
86
|
|
16
|
1
|
|
|
1
|
|
870
|
use IO::Handle; |
|
1
|
|
|
|
|
7033
|
|
|
1
|
|
|
|
|
55
|
|
17
|
1
|
|
|
1
|
|
1048
|
use IO::File; |
|
1
|
|
|
|
|
2080
|
|
|
1
|
|
|
|
|
114
|
|
18
|
1
|
|
|
1
|
|
5
|
use List::Util qw{first}; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
50
|
|
19
|
1
|
|
|
1
|
|
920
|
use Encode; |
|
1
|
|
|
|
|
15902
|
|
|
1
|
|
|
|
|
96
|
|
20
|
|
|
|
|
|
|
|
21
|
1
|
|
|
1
|
|
9
|
use constant BIGINT_TRY => 'Pari,GMP,FastCalc'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
75
|
|
22
|
1
|
|
|
1
|
|
1456
|
use Math::BigInt try => BIGINT_TRY; |
|
1
|
|
|
|
|
21088
|
|
|
1
|
|
|
|
|
6
|
|
23
|
1
|
|
|
1
|
|
19343
|
use Math::BigRat try => BIGINT_TRY; |
|
1
|
|
|
|
|
33987
|
|
|
1
|
|
|
|
|
5
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
sub new { |
26
|
1
|
|
|
1
|
1
|
720
|
my $class = shift; |
27
|
1
|
|
|
|
|
3
|
my $self = {}; |
28
|
1
|
|
|
|
|
3
|
bless $self, $class; |
29
|
|
|
|
|
|
|
|
30
|
1
|
50
|
|
|
|
9
|
$self->open(@_) if @_; |
31
|
1
|
|
|
|
|
150
|
return $self; |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub open { |
35
|
2
|
|
|
2
|
1
|
10
|
my ($self, $arg) = @_; |
36
|
2
|
50
|
33
|
|
|
34
|
$self->{fh} = openhandle($arg) || IO::File->new($arg, "<:raw") |
37
|
|
|
|
|
|
|
or croak "Can't open $arg: $!"; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub close { |
41
|
1
|
|
|
1
|
1
|
530
|
my ($self) = @_; |
42
|
1
|
|
|
|
|
14
|
$self->{fh}->close; |
43
|
1
|
|
|
|
|
39
|
delete $self->{fh}; |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# equivalent to $self->readlen(1), possibly faster |
47
|
|
|
|
|
|
|
sub _getc { |
48
|
50
|
|
|
50
|
|
52
|
my ($self) = @_; |
49
|
50
|
|
|
|
|
148
|
my $c = $self->{fh}->getc; |
50
|
50
|
0
|
33
|
|
|
286
|
croak "Can't do read of length 1: $!" if !defined $c && $!; |
51
|
50
|
|
|
|
|
98
|
return $c; |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
sub readlen { |
55
|
66
|
|
|
66
|
1
|
71
|
my ($self, $len) = @_; |
56
|
66
|
|
|
|
|
53
|
my $data; |
57
|
66
|
|
|
|
|
161
|
my $readlen = $self->{fh}->read($data, $len); |
58
|
66
|
50
|
|
|
|
360
|
croak "Can't do read of length $len: $!" |
59
|
|
|
|
|
|
|
unless defined $readlen; |
60
|
66
|
|
|
|
|
222
|
return $data; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# converts a byte string into an integer |
64
|
|
|
|
|
|
|
# we do so by converting the integer into a hex string (big-endian) |
65
|
|
|
|
|
|
|
# and then reading the hex-string into an integer |
66
|
|
|
|
|
|
|
sub _bin2int($) { |
67
|
35
|
|
|
35
|
|
80
|
my ($bin) = @_; |
68
|
|
|
|
|
|
|
# if the length is larger than 3 |
69
|
|
|
|
|
|
|
# the resulting integer might be larger than INT_MAX |
70
|
35
|
100
|
|
|
|
64
|
if (length($bin) > 3) { |
71
|
4
|
|
|
|
|
24
|
return Math::BigInt->from_hex(unpack("H*", $bin)); |
72
|
|
|
|
|
|
|
} |
73
|
31
|
|
|
|
|
89
|
return hex(unpack("H*", $bin)); |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# creates a floating-point number with the given mantissa and exponent |
77
|
|
|
|
|
|
|
sub _ldexp { |
78
|
1
|
|
|
1
|
|
380
|
my ($mantissa, $exponent) = @_; |
79
|
1
|
|
|
|
|
9
|
return $mantissa * Math::BigRat->new(2)**$exponent; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# NOTE: the read_* functions are hard to read because they're ports |
83
|
|
|
|
|
|
|
# of even harder to read python functions. |
84
|
|
|
|
|
|
|
# TODO: make them readable |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub read_id { |
87
|
25
|
|
|
25
|
1
|
27
|
my ($self) = @_; |
88
|
25
|
|
|
|
|
48
|
my $t = $self->_getc; |
89
|
25
|
50
|
|
|
|
43
|
return undef unless defined $t; |
90
|
25
|
|
|
|
|
25
|
my $i = 0; |
91
|
25
|
|
|
|
|
23
|
my $mask = 1<<7; |
92
|
|
|
|
|
|
|
|
93
|
25
|
50
|
|
|
|
47
|
if (ord($t) == 0) { |
94
|
0
|
|
|
|
|
0
|
croak "Matroska Syntax error: first byte of ID was \\0" |
95
|
|
|
|
|
|
|
} |
96
|
25
|
|
|
|
|
45
|
until (ord($t) & $mask) { |
97
|
37
|
|
|
|
|
43
|
++$i; |
98
|
37
|
|
|
|
|
64
|
$mask >>= 1; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
# return hex string of the bytes we just read |
101
|
25
|
|
|
|
|
53
|
return unpack "H*", ($t . $self->readlen($i)); |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
sub read_size { |
105
|
25
|
|
|
25
|
1
|
26
|
my ($self) = @_; |
106
|
25
|
|
|
|
|
37
|
my $t = $self->_getc; |
107
|
25
|
|
|
|
|
26
|
my $i = 0; |
108
|
25
|
|
|
|
|
22
|
my $mask = 1<<7; |
109
|
|
|
|
|
|
|
|
110
|
25
|
50
|
|
|
|
48
|
if (ord($t) == 0) { |
111
|
0
|
|
|
|
|
0
|
croak "Matroska Syntax error: first byte of data size was \\0" |
112
|
|
|
|
|
|
|
} |
113
|
25
|
|
|
|
|
46
|
until (ord($t) & $mask) { |
114
|
12
|
|
|
|
|
8
|
++$i; |
115
|
12
|
|
|
|
|
20
|
$mask >>= 1; |
116
|
|
|
|
|
|
|
} |
117
|
25
|
|
|
|
|
44
|
$t = $t & chr($mask-1); # strip length bits (keep only significant bits) |
118
|
25
|
|
|
|
|
48
|
return ($i+1, _bin2int $t . $self->readlen($i)); |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
{ |
122
|
|
|
|
|
|
|
my $utf8 = find_encoding("UTF-8"); |
123
|
|
|
|
|
|
|
sub read_str { |
124
|
5
|
|
|
5
|
1
|
7
|
my ($self, $length) = @_; |
125
|
5
|
|
|
|
|
7
|
return $utf8->decode($self->readlen($length)); |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub read_uint { |
130
|
10
|
|
|
10
|
1
|
11
|
my ($self, $length) = @_; |
131
|
10
|
|
|
|
|
17
|
return _bin2int $self->readlen($length); |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub read_sint { |
135
|
1
|
|
|
1
|
1
|
2
|
my ($self, $length) = @_; |
136
|
1
|
|
|
|
|
2
|
my $i = $self->read_uint($length); |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# Apply 2's complement to the unsigned int |
139
|
1
|
|
|
|
|
130
|
my $mask = int(2 ** ($length * 8 - 1)); |
140
|
|
|
|
|
|
|
# if the most significant bit is set... |
141
|
1
|
50
|
|
|
|
3
|
if ($i & $mask) { |
142
|
|
|
|
|
|
|
# subtract the MSB twice |
143
|
0
|
|
|
|
|
0
|
$i -= 2 * $mask; |
144
|
|
|
|
|
|
|
} |
145
|
1
|
|
|
|
|
303
|
return $i; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub read_float { |
149
|
1
|
|
|
1
|
1
|
3
|
my ($self, $length) = @_; |
150
|
1
|
|
|
|
|
2
|
my $i = $self->read_uint($length); |
151
|
1
|
|
|
|
|
104
|
my $f; |
152
|
|
|
|
|
|
|
|
153
|
1
|
|
|
1
|
|
3913
|
use bigrat try => BIGINT_TRY; |
|
1
|
|
|
|
|
8990
|
|
|
1
|
|
|
|
|
6
|
|
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
# These evil expressions reinterpret an unsigned int as IEEE binary floats |
156
|
1
|
50
|
|
|
|
6
|
if ($length == 4) { |
|
|
50
|
|
|
|
|
|
157
|
0
|
|
|
|
|
0
|
$f = _ldexp(($i & (1<<23 - 1)) + (1<<23), ($i>>23 & (1<<8 - 1)) - 150); |
158
|
0
|
0
|
|
|
|
0
|
$f = -$f if $i & (1<<31); |
159
|
|
|
|
|
|
|
} elsif ($length == 8) { |
160
|
1
|
|
|
|
|
175
|
$f = _ldexp(($i & (1<<52 - 1)) + (1<<52), ($i>>52 & (1<<12 - 1)) - 1075); |
161
|
1
|
50
|
|
|
|
1790
|
$f = -$f if $i & (1<<63); |
162
|
|
|
|
|
|
|
} else { |
163
|
0
|
|
|
|
|
0
|
croak "Matroska Syntax error: unsupported IEEE float byte size $length"; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
1
|
|
|
|
|
87
|
return $f; |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
sub read_ebml_id { |
170
|
0
|
|
|
0
|
1
|
0
|
my ($self, $length) = @_; |
171
|
0
|
|
|
|
|
0
|
return elem_by_hexid(unpack("H*", $self->readlen($length))); |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
sub skip { |
175
|
3
|
|
|
3
|
1
|
4
|
my ($self, $len) = @_; |
176
|
3
|
50
|
33
|
|
|
17
|
return if $self->{fh}->can('seek') && $self->{fh}->seek($len, 1); |
177
|
0
|
|
|
|
|
0
|
$self->readlen($len); |
178
|
0
|
|
|
|
|
0
|
return; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub getpos { |
182
|
73
|
|
|
73
|
1
|
79
|
my ($self) = @_; |
183
|
73
|
50
|
|
|
|
230
|
return undef unless $self->{fh}->can('getpos'); |
184
|
73
|
|
|
|
|
281
|
return $self->{fh}->getpos; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub setpos { |
188
|
23
|
|
|
23
|
1
|
30
|
my ($self, $pos) = @_; |
189
|
23
|
50
|
33
|
|
|
126
|
return undef unless $pos && $self->{fh}->can('setpos'); |
190
|
|
|
|
|
|
|
|
191
|
23
|
|
|
|
|
202
|
my $ret = $self->{fh}->setpos($pos); |
192
|
23
|
50
|
|
|
|
44
|
croak "Cannot seek to correct position" |
193
|
|
|
|
|
|
|
unless $self->getpos eq $pos; |
194
|
23
|
|
|
|
|
53
|
return $ret; |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
sub read_element { |
198
|
25
|
|
|
25
|
1
|
33
|
my ($self, $read_bin) = @_; |
199
|
25
|
50
|
|
|
|
72
|
return undef if $self->{fh}->eof; |
200
|
|
|
|
|
|
|
|
201
|
25
|
|
|
|
|
293
|
my $elem_pos = $self->getpos; |
202
|
|
|
|
|
|
|
|
203
|
25
|
|
|
|
|
52
|
my $elid = $self->read_id; |
204
|
25
|
|
|
|
|
69
|
my $elem_def = elem_by_hexid($elid); |
205
|
25
|
|
|
|
|
51
|
my ($size_len, $content_len) = $self->read_size; |
206
|
25
|
|
|
|
|
243
|
my $full_len = length($elid)/2 + $size_len + $content_len; |
207
|
|
|
|
|
|
|
|
208
|
25
|
|
33
|
|
|
319
|
my $elem = Parse::Matroska::Element->new( |
|
|
|
33
|
|
|
|
|
209
|
|
|
|
|
|
|
elid => $elid, |
210
|
|
|
|
|
|
|
name => $elem_def && $elem_def->{name}, |
211
|
|
|
|
|
|
|
type => $elem_def && $elem_def->{valtype}, |
212
|
|
|
|
|
|
|
size_len => $size_len, |
213
|
|
|
|
|
|
|
content_len => $content_len, |
214
|
|
|
|
|
|
|
full_len => $full_len, |
215
|
|
|
|
|
|
|
reader => $self, |
216
|
|
|
|
|
|
|
elem_pos => $elem_pos, |
217
|
|
|
|
|
|
|
data_pos => $self->getpos, |
218
|
|
|
|
|
|
|
); |
219
|
25
|
|
|
|
|
58
|
weaken($elem->{reader}); |
220
|
|
|
|
|
|
|
|
221
|
25
|
50
|
|
|
|
44
|
if (defined $elem_def) { |
222
|
25
|
100
|
|
|
|
92
|
if ($elem->{type} eq 'sub') { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
223
|
7
|
|
|
|
|
15
|
$elem->{value} = []; |
224
|
|
|
|
|
|
|
} elsif ($elem->{type} eq 'str') { |
225
|
5
|
|
|
|
|
11
|
$elem->{value} = $self->read_str($content_len); |
226
|
|
|
|
|
|
|
} elsif ($elem->{type} eq 'ebml_id') { |
227
|
0
|
|
|
|
|
0
|
$elem->{value} = $self->read_ebml_id($content_len); |
228
|
|
|
|
|
|
|
} elsif ($elem->{type} eq 'uint') { |
229
|
8
|
|
|
|
|
19
|
$elem->{value} = $self->read_uint($content_len); |
230
|
|
|
|
|
|
|
} elsif ($elem->{type} eq 'sint') { |
231
|
1
|
|
|
|
|
5
|
$elem->{value} = $self->read_sint($content_len); |
232
|
|
|
|
|
|
|
} elsif ($elem->{type} eq 'float') { |
233
|
1
|
|
|
|
|
4
|
$elem->{value} = $self->read_float($content_len); |
234
|
|
|
|
|
|
|
} elsif ($elem->{type} eq 'skip') { |
235
|
0
|
|
|
|
|
0
|
$self->skip($content_len); |
236
|
|
|
|
|
|
|
} elsif ($elem->{type} eq 'binary') { |
237
|
3
|
50
|
|
|
|
6
|
if ($read_bin) { |
238
|
0
|
|
|
|
|
0
|
$elem->{value} = $self->readlen($content_len); |
239
|
|
|
|
|
|
|
} else { |
240
|
3
|
|
|
|
|
9
|
$self->skip($content_len); |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
} else { |
243
|
0
|
|
|
|
|
0
|
die "Matroska Definition error: type $elem->{valtype} unknown" |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
} else { |
246
|
0
|
|
|
|
|
0
|
$self->skip($content_len); |
247
|
|
|
|
|
|
|
} |
248
|
25
|
|
|
|
|
322
|
return $elem; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
1; |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
__END__ |