| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Compress::BGZF::Writer; |
|
2
|
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
8527
|
use strict; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
127
|
|
|
4
|
3
|
|
|
3
|
|
14
|
use warnings; |
|
|
3
|
|
|
|
|
11
|
|
|
|
3
|
|
|
|
|
242
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
3
|
|
|
3
|
|
19
|
use Carp; |
|
|
3
|
|
|
|
|
4
|
|
|
|
3
|
|
|
|
|
250
|
|
|
7
|
3
|
|
|
3
|
|
18
|
use Compress::Zlib; |
|
|
3
|
|
|
|
|
30
|
|
|
|
3
|
|
|
|
|
1046
|
|
|
8
|
3
|
|
|
3
|
|
26
|
use IO::Compress::RawDeflate qw/rawdeflate $RawDeflateError/; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
600
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
3
|
|
|
3
|
|
23
|
use constant HEAD_BYTES => 18; |
|
|
3
|
|
|
|
|
13
|
|
|
|
3
|
|
|
|
|
327
|
|
|
11
|
3
|
|
|
3
|
|
19
|
use constant FOOT_BYTES => 8; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
292
|
|
|
12
|
3
|
|
|
3
|
|
21
|
use constant FLUSH_SIZE => 2**16 - HEAD_BYTES - FOOT_BYTES - 1; |
|
|
3
|
|
|
|
|
4
|
|
|
|
3
|
|
|
|
|
217
|
|
|
13
|
3
|
|
|
3
|
|
43
|
use constant BGZF_HEADER => pack "H*", '1f8b08040000000000ff060042430200'; |
|
|
3
|
|
|
|
|
3
|
|
|
|
3
|
|
|
|
|
201
|
|
|
14
|
3
|
|
|
|
|
4622
|
use constant BGZF_EOF => pack "H*", |
|
15
|
3
|
|
|
3
|
|
17
|
'1f8b08040000000000ff0600424302001b0003000000000000000000'; |
|
|
3
|
|
|
|
|
6
|
|
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
## no critic |
|
18
|
|
|
|
|
|
|
# allow for filehandle tie'ing |
|
19
|
3
|
|
|
3
|
|
14
|
sub TIEHANDLE { Compress::BGZF::Writer::new(@_) } |
|
20
|
2015
|
|
|
2015
|
|
5753
|
sub PRINT { Compress::BGZF::Writer::_queue(@_) } |
|
21
|
3
|
|
|
3
|
|
27
|
sub CLOSE { Compress::BGZF::Writer::finalize(@_) } |
|
22
|
|
|
|
|
|
|
## use critic |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
sub new_filehandle { |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
27
|
|
|
|
|
|
|
# ARG 0 : (optional) output filename |
|
28
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
29
|
|
|
|
|
|
|
# RET 0 : filehandle GLOB |
|
30
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
31
|
|
|
|
|
|
|
|
|
32
|
3
|
|
|
3
|
1
|
265118
|
my ($class, $fn_out) = @_; |
|
33
|
|
|
|
|
|
|
|
|
34
|
3
|
|
|
|
|
553
|
open my $fh, '<', undef; |
|
35
|
3
|
50
|
|
|
|
35
|
tie *$fh, $class, $fn_out or croak "failed to tie filehandle"; |
|
36
|
3
|
|
|
|
|
53
|
return $fh; |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub new { |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
43
|
|
|
|
|
|
|
# ARG 0 : (optional) output filename |
|
44
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
45
|
|
|
|
|
|
|
# RET 0 : Compress::BGZF::Writer object |
|
46
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
47
|
|
|
|
|
|
|
|
|
48
|
6
|
|
|
6
|
1
|
188389
|
my ($class, $fn_out) = @_; |
|
49
|
6
|
|
|
|
|
19
|
my $self = bless {}, $class; |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# initialize |
|
52
|
6
|
100
|
|
|
|
30
|
if (defined $fn_out) { |
|
53
|
5
|
50
|
|
|
|
717
|
open $self->{fh}, '>', $fn_out |
|
54
|
|
|
|
|
|
|
or croak "Error opening file for writing"; |
|
55
|
|
|
|
|
|
|
} |
|
56
|
|
|
|
|
|
|
else { |
|
57
|
1
|
|
|
|
|
10
|
$self->{fh} = \*STDOUT; |
|
58
|
|
|
|
|
|
|
} |
|
59
|
6
|
|
|
|
|
35
|
binmode $self->{fh}; |
|
60
|
|
|
|
|
|
|
|
|
61
|
6
|
|
|
|
|
57
|
$self->{c_level} = Z_DEFAULT_COMPRESSION; |
|
62
|
6
|
|
|
|
|
45
|
$self->{buffer} = ''; # contents waiting to be compressed/written |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# these variables are tracked to allow for virtual offset calculation |
|
65
|
6
|
|
|
|
|
19
|
$self->{block_offset} = 0; # offset of current block in compressed data |
|
66
|
6
|
|
|
|
|
17
|
$self->{buffer_offset} = 0; # offset of current pos in uncompressed block |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# these variables are tracked to allow for index creation |
|
69
|
6
|
|
|
|
|
15
|
$self->{u_offset} = 0; #uncompressed file offset |
|
70
|
6
|
|
|
|
|
26
|
$self->{idx} = []; |
|
71
|
6
|
|
|
|
|
44
|
$self->{write_eof} = 0; |
|
72
|
|
|
|
|
|
|
|
|
73
|
6
|
|
|
|
|
34
|
return $self; |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
} |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub set_level { |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
80
|
|
|
|
|
|
|
# ARG 0 : compression level desired |
|
81
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
82
|
|
|
|
|
|
|
# no returns |
|
83
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
84
|
|
|
|
|
|
|
|
|
85
|
9
|
|
|
9
|
1
|
3344
|
my ($self, $level) = @_; |
|
86
|
|
|
|
|
|
|
|
|
87
|
9
|
100
|
|
|
|
1034
|
croak "Invalid compression level (allowed 0-9)" |
|
88
|
|
|
|
|
|
|
if ($level !~ /^\d$/); |
|
89
|
3
|
|
|
|
|
10
|
$self->{c_level} = $level; |
|
90
|
|
|
|
|
|
|
|
|
91
|
3
|
|
|
|
|
6
|
return; |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub set_write_eof { |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# Sets whether to include htslib-style EOF empty block at end of file |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
100
|
|
|
|
|
|
|
# ARG 0 : (optional) boolean |
|
101
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
102
|
|
|
|
|
|
|
# no returns |
|
103
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
104
|
|
|
|
|
|
|
|
|
105
|
6
|
|
|
6
|
1
|
1534
|
my ($self, $bool) = @_; |
|
106
|
|
|
|
|
|
|
|
|
107
|
6
|
|
100
|
|
|
29
|
$bool //= 1; |
|
108
|
6
|
100
|
|
|
|
20
|
$self->{write_eof} = $bool ? 1 : 0; |
|
109
|
|
|
|
|
|
|
|
|
110
|
6
|
|
|
|
|
24
|
return; |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub add_data { |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# a wrapper around the queue() function that returns the virtual offset |
|
117
|
|
|
|
|
|
|
# of the chunk added |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
120
|
|
|
|
|
|
|
# ARG 0 : data chunk to queue for compression |
|
121
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
122
|
|
|
|
|
|
|
# RET 1 : virtual offset of data written |
|
123
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
124
|
|
|
|
|
|
|
|
|
125
|
5855
|
|
|
5855
|
1
|
21893
|
my ($self, $content) = @_; |
|
126
|
|
|
|
|
|
|
|
|
127
|
5855
|
|
|
|
|
10213
|
my $vo = ($self->{block_offset} << 16) | $self->{buffer_offset}; |
|
128
|
5855
|
|
|
|
|
12654
|
$self->_queue( $content ); |
|
129
|
|
|
|
|
|
|
|
|
130
|
5855
|
|
|
|
|
14485
|
return $vo; |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub _queue { |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
137
|
|
|
|
|
|
|
# ARG 0 : data chunk to queue for compression |
|
138
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
139
|
|
|
|
|
|
|
# no returns |
|
140
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
141
|
|
|
|
|
|
|
|
|
142
|
7870
|
|
|
7870
|
|
12030
|
my ($self, $content) = @_; |
|
143
|
|
|
|
|
|
|
|
|
144
|
7870
|
|
|
|
|
12442
|
$self->{buffer} .= $content; |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# compress/write in 64k chunks |
|
147
|
7870
|
|
|
|
|
16849
|
while (length($self->{buffer}) >= FLUSH_SIZE) { |
|
148
|
|
|
|
|
|
|
|
|
149
|
18
|
|
|
|
|
543
|
my $chunk = substr $self->{buffer}, 0, FLUSH_SIZE, ''; |
|
150
|
18
|
|
|
|
|
89
|
my $unwritten = $self->_write_block($chunk); |
|
151
|
|
|
|
|
|
|
$self->{buffer} = $unwritten . $self->{buffer} |
|
152
|
18
|
100
|
|
|
|
142
|
if ( length($unwritten) ); |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
} |
|
155
|
7870
|
|
|
|
|
12191
|
$self->{buffer_offset} = length $self->{buffer}; |
|
156
|
|
|
|
|
|
|
|
|
157
|
7870
|
|
|
|
|
13885
|
return; |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
} |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub _write_block { |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
164
|
|
|
|
|
|
|
# ARG 0 : independent data block to compress |
|
165
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
166
|
|
|
|
|
|
|
# RET 0 : remaining data that wasn't written |
|
167
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
168
|
|
|
|
|
|
|
|
|
169
|
24
|
|
|
24
|
|
110
|
my ($self, $chunk) = @_; |
|
170
|
|
|
|
|
|
|
|
|
171
|
24
|
|
|
|
|
52
|
my $chunk_len = length($chunk); |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
# payload is compressed with DEFLATE |
|
174
|
|
|
|
|
|
|
rawdeflate(\$chunk, \my $payload, -Level => $self->{c_level}) |
|
175
|
24
|
50
|
|
|
|
232
|
or croak "deflate failed: $RawDeflateError\n"; |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
# very rarely, a DEFLATEd string may be larger than input. This can result |
|
178
|
|
|
|
|
|
|
# in a block size > 2**16, which violates the BGZF specification and |
|
179
|
|
|
|
|
|
|
# causes corruption of the BC field. Fix those edge cases here (somewhat |
|
180
|
|
|
|
|
|
|
# slow but shouldn't happen often) and send the rest back to the buffer |
|
181
|
24
|
|
|
|
|
122839
|
my $trimmed = ''; |
|
182
|
24
|
|
|
|
|
102
|
while (length($payload) > FLUSH_SIZE) { |
|
183
|
8
|
|
|
|
|
28
|
my $trim_len = int( $chunk_len * 0.05 ); |
|
184
|
8
|
|
|
|
|
169
|
$trimmed = substr($chunk, -$trim_len, $trim_len, '') . $trimmed; |
|
185
|
|
|
|
|
|
|
rawdeflate(\$chunk, \$payload, -Level => $self->{c_level}) |
|
186
|
8
|
50
|
|
|
|
55
|
or croak "deflate failed: $RawDeflateError\n"; |
|
187
|
8
|
|
|
|
|
36903
|
$chunk_len = length($chunk); |
|
188
|
|
|
|
|
|
|
} |
|
189
|
|
|
|
|
|
|
|
|
190
|
24
|
|
|
|
|
70
|
my $block_size = length($payload) + HEAD_BYTES + FOOT_BYTES; |
|
191
|
|
|
|
|
|
|
|
|
192
|
24
|
50
|
|
|
|
76
|
croak "Internal error: block size > 65536" if ($block_size > 2**16); |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
# payload is wrapped with appropriate headers and footers |
|
195
|
24
|
50
|
|
|
|
49
|
print { $self->{fh} } pack( |
|
|
24
|
|
|
|
|
8900
|
|
|
196
|
|
|
|
|
|
|
"a*va*VV", |
|
197
|
|
|
|
|
|
|
BGZF_HEADER, |
|
198
|
|
|
|
|
|
|
$block_size - 1, |
|
199
|
|
|
|
|
|
|
$payload, |
|
200
|
|
|
|
|
|
|
crc32($chunk), |
|
201
|
|
|
|
|
|
|
$chunk_len, |
|
202
|
|
|
|
|
|
|
) or croak "Error writing compressed block"; |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
# increment the current offsets |
|
205
|
24
|
|
|
|
|
132
|
$self->{block_offset} += $block_size; |
|
206
|
24
|
|
|
|
|
64
|
$self->{u_offset} += $chunk_len; |
|
207
|
24
|
|
|
|
|
51
|
push @{ $self->{idx} }, [$self->{block_offset}, $self->{u_offset}]; |
|
|
24
|
|
|
|
|
151
|
|
|
208
|
|
|
|
|
|
|
|
|
209
|
24
|
|
|
|
|
119
|
return $trimmed; |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
} |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub finalize { |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
216
|
|
|
|
|
|
|
# no arguments |
|
217
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
218
|
|
|
|
|
|
|
# no returns |
|
219
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
220
|
|
|
|
|
|
|
|
|
221
|
18
|
|
|
18
|
1
|
107
|
my ($self) = @_; |
|
222
|
|
|
|
|
|
|
|
|
223
|
18
|
|
|
|
|
81
|
while (length($self->{buffer}) > 0) { |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
croak "file closed but buffer not empty" |
|
226
|
6
|
50
|
|
|
|
32
|
if ( ! defined fileno($self->{fh}) ); |
|
227
|
|
|
|
|
|
|
|
|
228
|
6
|
|
|
|
|
60
|
my $chunk = substr $self->{buffer}, 0, FLUSH_SIZE, ''; |
|
229
|
6
|
|
|
|
|
23
|
my $unwritten = $self->_write_block($chunk); |
|
230
|
|
|
|
|
|
|
$self->{buffer} = $unwritten . $self->{buffer} |
|
231
|
6
|
50
|
|
|
|
42
|
if ( length($unwritten) ); |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
# write EOF block if requested (only first time finalize() is run) |
|
235
|
18
|
100
|
100
|
|
|
79
|
if ($self->{write_eof} && defined fileno($self->{fh})) { |
|
236
|
1
|
|
|
|
|
3
|
print { $self->{fh} } BGZF_EOF; |
|
|
1
|
|
|
|
|
4
|
|
|
237
|
|
|
|
|
|
|
} |
|
238
|
18
|
100
|
|
|
|
59
|
if (defined fileno($self->{fh}) ) { |
|
239
|
|
|
|
|
|
|
close $self->{fh} |
|
240
|
6
|
50
|
|
|
|
1942
|
or croak "Error closing compressed file"; |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
|
|
243
|
18
|
|
|
|
|
68
|
return; |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
} |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
sub write_index { |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
250
|
|
|
|
|
|
|
# ARG 0 : index output filename |
|
251
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
252
|
|
|
|
|
|
|
# No returns |
|
253
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
|
254
|
|
|
|
|
|
|
|
|
255
|
6
|
|
|
6
|
1
|
449
|
my ($self, $fn_out) = @_; |
|
256
|
|
|
|
|
|
|
|
|
257
|
6
|
|
|
|
|
29
|
$self->finalize(); # always clear remaining buffer to fully populate index |
|
258
|
6
|
100
|
|
|
|
599
|
croak "missing index output filename" if (! defined $fn_out); |
|
259
|
3
|
50
|
|
|
|
591
|
open my $fh_out, '>:raw', $fn_out |
|
260
|
|
|
|
|
|
|
or croak "Error opening index file for writing"; |
|
261
|
|
|
|
|
|
|
|
|
262
|
3
|
|
|
|
|
11
|
my @offsets = @{ $self->{idx} }; |
|
|
3
|
|
|
|
|
19
|
|
|
263
|
3
|
|
|
|
|
34
|
pop @offsets; # last offset is EOF |
|
264
|
3
|
50
|
|
|
|
7
|
print {$fh_out} pack('Q<', scalar(@offsets)) |
|
|
3
|
|
|
|
|
31
|
|
|
265
|
|
|
|
|
|
|
or croak "Error printing to index file"; |
|
266
|
3
|
|
|
|
|
14
|
for (@offsets) { |
|
267
|
13
|
50
|
|
|
|
21
|
print {$fh_out} pack('Q
|
|
|
13
|
|
|
|
|
25
|
|
|
|
13
|
|
|
|
|
48
|
|
|
268
|
|
|
|
|
|
|
or croak "Error printing offset to index file"; |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
|
|
271
|
3
|
50
|
|
|
|
184
|
close $fh_out |
|
272
|
|
|
|
|
|
|
or croak "Error closing index file after writing"; |
|
273
|
3
|
|
|
|
|
33
|
return; |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
} |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
sub DESTROY { |
|
278
|
|
|
|
|
|
|
|
|
279
|
6
|
|
|
6
|
|
4500
|
my ($self) = @_; |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
# make sure we call finalize in case the caller forgot |
|
282
|
6
|
|
|
|
|
26
|
$self->finalize(); |
|
283
|
|
|
|
|
|
|
|
|
284
|
6
|
|
|
|
|
292
|
return; |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
} |
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
1; |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
__END__ |