| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Compress::BGZF::Writer; |
|
2
|
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
773
|
use strict; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
75
|
|
|
4
|
2
|
|
|
2
|
|
7
|
use warnings; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
37
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
2
|
|
|
2
|
|
6
|
use Carp; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
82
|
|
|
7
|
2
|
|
|
2
|
|
6
|
use Compress::Zlib; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
367
|
|
|
8
|
2
|
|
|
2
|
|
7
|
use IO::Compress::RawDeflate qw/rawdeflate $RawDeflateError/; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
192
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
2
|
|
|
2
|
|
9
|
use constant HEAD_BYTES => 18; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
112
|
|
|
11
|
2
|
|
|
2
|
|
7
|
use constant FOOT_BYTES => 8; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
99
|
|
|
12
|
2
|
|
|
2
|
|
6
|
use constant FLUSH_SIZE => 2**16 - HEAD_BYTES - FOOT_BYTES - 1; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
84
|
|
|
13
|
2
|
|
|
2
|
|
6
|
use constant BGZF_HEADER => pack "H*", '1f8b08040000000000ff060042430200'; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
1442
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
## no critic |
|
16
|
|
|
|
|
|
|
# allow for filehandle tie'ing |
|
17
|
3
|
|
|
3
|
|
6
|
sub TIEHANDLE { Compress::BGZF::Writer::new(@_) } |
|
18
|
2015
|
|
|
2015
|
|
4166
|
sub PRINT { Compress::BGZF::Writer::_queue(@_) } |
|
19
|
3
|
|
|
3
|
|
13
|
sub CLOSE { Compress::BGZF::Writer::finalize(@_) } |
|
20
|
|
|
|
|
|
|
## use critic |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
sub new_filehandle { |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
25
|
|
|
|
|
|
|
# ARG 0 : (optional) output filename |
|
26
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
27
|
|
|
|
|
|
|
# RET 0 : filehandle GLOB |
|
28
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
29
|
|
|
|
|
|
|
|
|
30
|
3
|
|
|
3
|
1
|
1507
|
my ($class, $fn_out) = @_; |
|
31
|
|
|
|
|
|
|
|
|
32
|
3
|
|
|
|
|
273
|
open my $fh, '<', undef; |
|
33
|
3
|
50
|
|
|
|
16
|
tie *$fh, $class, $fn_out or croak "failed to tie filehandle"; |
|
34
|
3
|
|
|
|
|
4
|
return $fh; |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
} |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub new { |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
41
|
|
|
|
|
|
|
# ARG 0 : (optional) output filename |
|
42
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
43
|
|
|
|
|
|
|
# RET 0 : Compress::BGZF::Writer object |
|
44
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
45
|
|
|
|
|
|
|
|
|
46
|
3
|
|
|
3
|
1
|
5
|
my ($class, $fn_out) = @_; |
|
47
|
3
|
|
|
|
|
5
|
my $self = bless {}, $class; |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# initialize |
|
50
|
3
|
50
|
|
|
|
8
|
if (defined $fn_out) { |
|
51
|
3
|
50
|
|
|
|
96
|
open $self->{fh}, '>', $fn_out |
|
52
|
|
|
|
|
|
|
or croak "Error openingn file for writing"; |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
else { |
|
55
|
0
|
|
|
|
|
0
|
$self->{fh} = \*STDOUT; |
|
56
|
|
|
|
|
|
|
} |
|
57
|
3
|
|
|
|
|
7
|
binmode $self->{fh}; |
|
58
|
|
|
|
|
|
|
|
|
59
|
3
|
|
|
|
|
11
|
$self->{c_level} = Z_DEFAULT_COMPRESSION; |
|
60
|
3
|
|
|
|
|
10
|
$self->{buffer} = ''; # contents waiting to be compressed/written |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# these variables are tracked to allow for virtual offset calculation |
|
63
|
3
|
|
|
|
|
5
|
$self->{block_offset} = 0; # offset of current block in compressed data |
|
64
|
3
|
|
|
|
|
3
|
$self->{buffer_offset} = 0; # offset of current pos in uncompressed block |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# these variables are tracked to allow for index creation |
|
67
|
3
|
|
|
|
|
3
|
$self->{u_offset} = 0; #uncompressed file offset |
|
68
|
3
|
|
|
|
|
5
|
$self->{idx} = []; |
|
69
|
|
|
|
|
|
|
|
|
70
|
3
|
|
|
|
|
10
|
return $self; |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
} |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
sub set_level { |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
77
|
|
|
|
|
|
|
# ARG 0 : compression level desired |
|
78
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
79
|
|
|
|
|
|
|
# no returns |
|
80
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
81
|
|
|
|
|
|
|
|
|
82
|
0
|
|
|
0
|
1
|
0
|
my ($self, $level) = @_; |
|
83
|
|
|
|
|
|
|
|
|
84
|
0
|
0
|
|
|
|
0
|
croak "Invalid compression level (allowed 0-9)" |
|
85
|
|
|
|
|
|
|
if ($level !~ /^\d$/); |
|
86
|
0
|
|
|
|
|
0
|
$self->{c_level} = $level; |
|
87
|
|
|
|
|
|
|
|
|
88
|
0
|
|
|
|
|
0
|
return; |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
sub add_data { |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# a wrapper around the queue() function that returns the virtual offset |
|
95
|
|
|
|
|
|
|
# of the chunk added |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
98
|
|
|
|
|
|
|
# ARG 0 : data chunk to queue for compression |
|
99
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
100
|
|
|
|
|
|
|
# RET 1 : virtual offset of data written |
|
101
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
102
|
|
|
|
|
|
|
|
|
103
|
0
|
|
|
0
|
1
|
0
|
my ($self, $content) = @_; |
|
104
|
|
|
|
|
|
|
|
|
105
|
0
|
|
|
|
|
0
|
my $vo = ($self->{block_offset} << 16) | $self->{buffer_offset}; |
|
106
|
0
|
|
|
|
|
0
|
$self->_queue( $content ); |
|
107
|
|
|
|
|
|
|
|
|
108
|
0
|
|
|
|
|
0
|
return $vo; |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub _queue { |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
115
|
|
|
|
|
|
|
# ARG 0 : data chunk to queue for compression |
|
116
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
117
|
|
|
|
|
|
|
# no returns |
|
118
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
119
|
|
|
|
|
|
|
|
|
120
|
2015
|
|
|
2015
|
|
1456
|
my ($self, $content) = @_; |
|
121
|
|
|
|
|
|
|
|
|
122
|
2015
|
|
|
|
|
1598
|
$self->{buffer} .= $content; |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
# compress/write in 64k chunks |
|
125
|
2015
|
|
|
|
|
2391
|
while (length($self->{buffer}) >= FLUSH_SIZE) { |
|
126
|
|
|
|
|
|
|
|
|
127
|
5
|
|
|
|
|
79
|
my $chunk = substr $self->{buffer}, 0, FLUSH_SIZE, ''; |
|
128
|
5
|
|
|
|
|
9
|
my $unwritten = $self->_write_block($chunk); |
|
129
|
|
|
|
|
|
|
$self->{buffer} = $unwritten . $self->{buffer} |
|
130
|
5
|
100
|
|
|
|
31
|
if ( length($unwritten) ); |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
} |
|
133
|
2015
|
|
|
|
|
1333
|
$self->{buffer_offset} = length $self->{buffer}; |
|
134
|
|
|
|
|
|
|
|
|
135
|
2015
|
|
|
|
|
2796
|
return; |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub _write_block { |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
142
|
|
|
|
|
|
|
# ARG 0 : independent data block to compress |
|
143
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
144
|
|
|
|
|
|
|
# RET 0 : remaining data that wasn't written |
|
145
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
146
|
|
|
|
|
|
|
|
|
147
|
8
|
|
|
8
|
|
10
|
my ($self, $chunk) = @_; |
|
148
|
|
|
|
|
|
|
|
|
149
|
8
|
|
|
|
|
8
|
my $chunk_len = length($chunk); |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# payload is compressed with DEFLATE |
|
152
|
|
|
|
|
|
|
rawdeflate(\$chunk, \my $payload, -Level => $self->{c_level}) |
|
153
|
8
|
50
|
|
|
|
27
|
or croak "deflate failed: $RawDeflateError\n"; |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
# very rarely, a DEFLATEd string may be larger than input. This can result |
|
156
|
|
|
|
|
|
|
# in a block size > 2**16, which violates the BGZF specification and |
|
157
|
|
|
|
|
|
|
# causes corruption of the BC field. Fix those edge cases here (somewhat |
|
158
|
|
|
|
|
|
|
# slow but shouldn't happen often) and send the rest back to the buffer |
|
159
|
8
|
|
|
|
|
22271
|
my $trimmed = ''; |
|
160
|
8
|
|
|
|
|
17
|
while (length($payload) > FLUSH_SIZE) { |
|
161
|
4
|
|
|
|
|
9
|
my $trim_len = int( $chunk_len * 0.05 ); |
|
162
|
4
|
|
|
|
|
52
|
$trimmed = substr($chunk, -$trim_len, $trim_len, '') . $trimmed; |
|
163
|
|
|
|
|
|
|
rawdeflate(\$chunk, \$payload, -Level => $self->{c_level}) |
|
164
|
4
|
50
|
|
|
|
15
|
or croak "deflate failed: $RawDeflateError\n"; |
|
165
|
4
|
|
|
|
|
10367
|
$chunk_len = length($chunk); |
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
|
|
168
|
8
|
|
|
|
|
10
|
my $block_size = length($payload) + HEAD_BYTES + FOOT_BYTES; |
|
169
|
|
|
|
|
|
|
|
|
170
|
8
|
50
|
|
|
|
13
|
croak "Internal error: block size > 65536" if ($block_size > 2**16); |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# payload is wrapped with appropriate headers and footers |
|
173
|
8
|
50
|
|
|
|
7
|
print { $self->{fh} } pack( |
|
|
8
|
|
|
|
|
1985
|
|
|
174
|
|
|
|
|
|
|
"a*va*VV", |
|
175
|
|
|
|
|
|
|
BGZF_HEADER, |
|
176
|
|
|
|
|
|
|
$block_size - 1, |
|
177
|
|
|
|
|
|
|
$payload, |
|
178
|
|
|
|
|
|
|
crc32($chunk), |
|
179
|
|
|
|
|
|
|
$chunk_len, |
|
180
|
|
|
|
|
|
|
) or croak "Error writing compressed block"; |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
# increment the current offsets |
|
183
|
8
|
|
|
|
|
14
|
$self->{block_offset} += $block_size; |
|
184
|
8
|
|
|
|
|
10
|
$self->{u_offset} += $chunk_len; |
|
185
|
8
|
|
|
|
|
5
|
push @{ $self->{idx} }, [$self->{block_offset}, $self->{u_offset}]; |
|
|
8
|
|
|
|
|
23
|
|
|
186
|
|
|
|
|
|
|
|
|
187
|
8
|
|
|
|
|
16
|
return $trimmed; |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
sub finalize { |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
194
|
|
|
|
|
|
|
# no arguments |
|
195
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
196
|
|
|
|
|
|
|
# no returns |
|
197
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
198
|
|
|
|
|
|
|
|
|
199
|
6
|
|
|
6
|
1
|
6
|
my ($self) = @_; |
|
200
|
|
|
|
|
|
|
|
|
201
|
6
|
|
|
|
|
17
|
while (length($self->{buffer}) > 0) { |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
croak "file closed but buffer not empty" |
|
204
|
3
|
50
|
|
|
|
8
|
if ( ! defined fileno($self->{fh}) ); |
|
205
|
|
|
|
|
|
|
|
|
206
|
3
|
|
|
|
|
17
|
my $chunk = substr $self->{buffer}, 0, FLUSH_SIZE, ''; |
|
207
|
3
|
|
|
|
|
5
|
my $unwritten = $self->_write_block($chunk); |
|
208
|
|
|
|
|
|
|
$self->{buffer} = $unwritten . $self->{buffer} |
|
209
|
3
|
50
|
|
|
|
11
|
if ( length($unwritten) ); |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
} |
|
212
|
6
|
|
|
|
|
160
|
close $self->{fh}; |
|
213
|
|
|
|
|
|
|
|
|
214
|
6
|
|
|
|
|
13
|
return; |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
} |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
sub write_index { |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
221
|
|
|
|
|
|
|
# ARG 0 : index output filename |
|
222
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
223
|
|
|
|
|
|
|
# No returns |
|
224
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
225
|
|
|
|
|
|
|
|
|
226
|
0
|
|
|
0
|
1
|
0
|
my ($self, $fn_out) = @_; |
|
227
|
|
|
|
|
|
|
|
|
228
|
0
|
|
|
|
|
0
|
$self->finalize(); # always clear remaining buffer to fully populate index |
|
229
|
0
|
0
|
|
|
|
0
|
croak "missing index output filename" if (! defined $fn_out); |
|
230
|
0
|
|
|
|
|
0
|
open my $fh_out, '>:raw', $fn_out; |
|
231
|
|
|
|
|
|
|
|
|
232
|
0
|
|
|
|
|
0
|
my @offsets = @{ $self->{idx} }; |
|
|
0
|
|
|
|
|
0
|
|
|
233
|
0
|
|
|
|
|
0
|
pop @offsets; # last offset is EOF |
|
234
|
0
|
|
|
|
|
0
|
print {$fh_out} pack('Q<', scalar(@offsets)); |
|
|
0
|
|
|
|
|
0
|
|
|
235
|
0
|
|
|
|
|
0
|
print {$fh_out} pack('Q
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
236
|
|
|
|
|
|
|
|
|
237
|
0
|
|
|
|
|
0
|
close $fh_out; |
|
238
|
0
|
|
|
|
|
0
|
return; |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
} |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
sub DESTROY { |
|
243
|
|
|
|
|
|
|
|
|
244
|
3
|
|
|
3
|
|
554
|
my ($self) = @_; |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# make sure we call finalize in case the caller forgot |
|
247
|
3
|
|
|
|
|
6
|
$self->finalize(); |
|
248
|
|
|
|
|
|
|
|
|
249
|
3
|
|
|
|
|
100
|
return; |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
} |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
1; |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
__END__ |