line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Compress::BGZF::Writer; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
785
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
60
|
|
4
|
2
|
|
|
2
|
|
8
|
use warnings; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
39
|
|
5
|
|
|
|
|
|
|
|
6
|
2
|
|
|
2
|
|
8
|
use Carp; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
89
|
|
7
|
2
|
|
|
2
|
|
11
|
use Compress::Zlib; |
|
2
|
|
|
|
|
7
|
|
|
2
|
|
|
|
|
435
|
|
8
|
2
|
|
|
2
|
|
19
|
use IO::Compress::RawDeflate qw/rawdeflate $RawDeflateError/; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
205
|
|
9
|
|
|
|
|
|
|
|
10
|
2
|
|
|
2
|
|
11
|
use constant HEAD_BYTES => 18; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
102
|
|
11
|
2
|
|
|
2
|
|
11
|
use constant FOOT_BYTES => 8; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
101
|
|
12
|
2
|
|
|
2
|
|
9
|
use constant FLUSH_SIZE => 2**16 - HEAD_BYTES - FOOT_BYTES - 1; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
95
|
|
13
|
2
|
|
|
2
|
|
11
|
use constant BGZF_HEADER => pack "H*", '1f8b08040000000000ff060042430200'; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
1872
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
## no critic |
16
|
|
|
|
|
|
|
# allow for filehandle tie'ing |
17
|
3
|
|
|
3
|
|
8
|
sub TIEHANDLE { Compress::BGZF::Writer::new(@_) } |
18
|
2015
|
|
|
2015
|
|
5553
|
sub PRINT { Compress::BGZF::Writer::_queue(@_) } |
19
|
3
|
|
|
3
|
|
14
|
sub CLOSE { Compress::BGZF::Writer::finalize(@_) } |
20
|
|
|
|
|
|
|
## use critic |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
sub new_filehandle { |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
25
|
|
|
|
|
|
|
# ARG 0 : (optional) output filename |
26
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
27
|
|
|
|
|
|
|
# RET 0 : filehandle GLOB |
28
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
29
|
|
|
|
|
|
|
|
30
|
3
|
|
|
3
|
1
|
1972
|
my ($class, $fn_out) = @_; |
31
|
|
|
|
|
|
|
|
32
|
3
|
|
|
|
|
209
|
open my $fh, '<', undef; |
33
|
3
|
50
|
|
|
|
20
|
tie *$fh, $class, $fn_out or croak "failed to tie filehandle"; |
34
|
3
|
|
|
|
|
9
|
return $fh; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub new { |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
41
|
|
|
|
|
|
|
# ARG 0 : (optional) output filename |
42
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
43
|
|
|
|
|
|
|
# RET 0 : Compress::BGZF::Writer object |
44
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
45
|
|
|
|
|
|
|
|
46
|
3
|
|
|
3
|
1
|
7
|
my ($class, $fn_out) = @_; |
47
|
3
|
|
|
|
|
6
|
my $self = bless {}, $class; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# initialize |
50
|
3
|
50
|
|
|
|
8
|
if (defined $fn_out) { |
51
|
3
|
50
|
|
|
|
119
|
open $self->{fh}, '>', $fn_out |
52
|
|
|
|
|
|
|
or croak "Error opening file for writing"; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
else { |
55
|
0
|
|
|
|
|
0
|
$self->{fh} = \*STDOUT; |
56
|
|
|
|
|
|
|
} |
57
|
3
|
|
|
|
|
10
|
binmode $self->{fh}; |
58
|
|
|
|
|
|
|
|
59
|
3
|
|
|
|
|
11
|
$self->{c_level} = Z_DEFAULT_COMPRESSION; |
60
|
3
|
|
|
|
|
12
|
$self->{buffer} = ''; # contents waiting to be compressed/written |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# these variables are tracked to allow for virtual offset calculation |
63
|
3
|
|
|
|
|
5
|
$self->{block_offset} = 0; # offset of current block in compressed data |
64
|
3
|
|
|
|
|
5
|
$self->{buffer_offset} = 0; # offset of current pos in uncompressed block |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# these variables are tracked to allow for index creation |
67
|
3
|
|
|
|
|
5
|
$self->{u_offset} = 0; #uncompressed file offset |
68
|
3
|
|
|
|
|
5
|
$self->{idx} = []; |
69
|
|
|
|
|
|
|
|
70
|
3
|
|
|
|
|
12
|
return $self; |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
sub set_level { |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
77
|
|
|
|
|
|
|
# ARG 0 : compression level desired |
78
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
79
|
|
|
|
|
|
|
# no returns |
80
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
81
|
|
|
|
|
|
|
|
82
|
0
|
|
|
0
|
1
|
0
|
my ($self, $level) = @_; |
83
|
|
|
|
|
|
|
|
84
|
0
|
0
|
|
|
|
0
|
croak "Invalid compression level (allowed 0-9)" |
85
|
|
|
|
|
|
|
if ($level !~ /^\d$/); |
86
|
0
|
|
|
|
|
0
|
$self->{c_level} = $level; |
87
|
|
|
|
|
|
|
|
88
|
0
|
|
|
|
|
0
|
return; |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
sub add_data { |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# a wrapper around the queue() function that returns the virtual offset |
95
|
|
|
|
|
|
|
# of the chunk added |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
98
|
|
|
|
|
|
|
# ARG 0 : data chunk to queue for compression |
99
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
100
|
|
|
|
|
|
|
# RET 1 : virtual offset of data written |
101
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
102
|
|
|
|
|
|
|
|
103
|
0
|
|
|
0
|
1
|
0
|
my ($self, $content) = @_; |
104
|
|
|
|
|
|
|
|
105
|
0
|
|
|
|
|
0
|
my $vo = ($self->{block_offset} << 16) | $self->{buffer_offset}; |
106
|
0
|
|
|
|
|
0
|
$self->_queue( $content ); |
107
|
|
|
|
|
|
|
|
108
|
0
|
|
|
|
|
0
|
return $vo; |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub _queue { |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
115
|
|
|
|
|
|
|
# ARG 0 : data chunk to queue for compression |
116
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
117
|
|
|
|
|
|
|
# no returns |
118
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
119
|
|
|
|
|
|
|
|
120
|
2015
|
|
|
2015
|
|
2646
|
my ($self, $content) = @_; |
121
|
|
|
|
|
|
|
|
122
|
2015
|
|
|
|
|
2568
|
$self->{buffer} .= $content; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
# compress/write in 64k chunks |
125
|
2015
|
|
|
|
|
2983
|
while (length($self->{buffer}) >= FLUSH_SIZE) { |
126
|
|
|
|
|
|
|
|
127
|
5
|
|
|
|
|
107
|
my $chunk = substr $self->{buffer}, 0, FLUSH_SIZE, ''; |
128
|
5
|
|
|
|
|
13
|
my $unwritten = $self->_write_block($chunk); |
129
|
|
|
|
|
|
|
$self->{buffer} = $unwritten . $self->{buffer} |
130
|
5
|
100
|
|
|
|
32
|
if ( length($unwritten) ); |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
} |
133
|
2015
|
|
|
|
|
2262
|
$self->{buffer_offset} = length $self->{buffer}; |
134
|
|
|
|
|
|
|
|
135
|
2015
|
|
|
|
|
4044
|
return; |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub _write_block { |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
142
|
|
|
|
|
|
|
# ARG 0 : independent data block to compress |
143
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
144
|
|
|
|
|
|
|
# RET 0 : remaining data that wasn't written |
145
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
146
|
|
|
|
|
|
|
|
147
|
8
|
|
|
8
|
|
13
|
my ($self, $chunk) = @_; |
148
|
|
|
|
|
|
|
|
149
|
8
|
|
|
|
|
12
|
my $chunk_len = length($chunk); |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# payload is compressed with DEFLATE |
152
|
|
|
|
|
|
|
rawdeflate(\$chunk, \my $payload, -Level => $self->{c_level}) |
153
|
8
|
50
|
|
|
|
31
|
or croak "deflate failed: $RawDeflateError\n"; |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
# very rarely, a DEFLATEd string may be larger than input. This can result |
156
|
|
|
|
|
|
|
# in a block size > 2**16, which violates the BGZF specification and |
157
|
|
|
|
|
|
|
# causes corruption of the BC field. Fix those edge cases here (somewhat |
158
|
|
|
|
|
|
|
# slow but shouldn't happen often) and send the rest back to the buffer |
159
|
8
|
|
|
|
|
26069
|
my $trimmed = ''; |
160
|
8
|
|
|
|
|
19
|
while (length($payload) > FLUSH_SIZE) { |
161
|
4
|
|
|
|
|
8
|
my $trim_len = int( $chunk_len * 0.05 ); |
162
|
4
|
|
|
|
|
50
|
$trimmed = substr($chunk, -$trim_len, $trim_len, '') . $trimmed; |
163
|
|
|
|
|
|
|
rawdeflate(\$chunk, \$payload, -Level => $self->{c_level}) |
164
|
4
|
50
|
|
|
|
18
|
or croak "deflate failed: $RawDeflateError\n"; |
165
|
4
|
|
|
|
|
11879
|
$chunk_len = length($chunk); |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
8
|
|
|
|
|
14
|
my $block_size = length($payload) + HEAD_BYTES + FOOT_BYTES; |
169
|
|
|
|
|
|
|
|
170
|
8
|
50
|
|
|
|
19
|
croak "Internal error: block size > 65536" if ($block_size > 2**16); |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# payload is wrapped with appropriate headers and footers |
173
|
8
|
50
|
|
|
|
10
|
print { $self->{fh} } pack( |
|
8
|
|
|
|
|
1774
|
|
174
|
|
|
|
|
|
|
"a*va*VV", |
175
|
|
|
|
|
|
|
BGZF_HEADER, |
176
|
|
|
|
|
|
|
$block_size - 1, |
177
|
|
|
|
|
|
|
$payload, |
178
|
|
|
|
|
|
|
crc32($chunk), |
179
|
|
|
|
|
|
|
$chunk_len, |
180
|
|
|
|
|
|
|
) or croak "Error writing compressed block"; |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
# increment the current offsets |
183
|
8
|
|
|
|
|
25
|
$self->{block_offset} += $block_size; |
184
|
8
|
|
|
|
|
9
|
$self->{u_offset} += $chunk_len; |
185
|
8
|
|
|
|
|
13
|
push @{ $self->{idx} }, [$self->{block_offset}, $self->{u_offset}]; |
|
8
|
|
|
|
|
25
|
|
186
|
|
|
|
|
|
|
|
187
|
8
|
|
|
|
|
29
|
return $trimmed; |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
sub finalize { |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
194
|
|
|
|
|
|
|
# no arguments |
195
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
196
|
|
|
|
|
|
|
# no returns |
197
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
198
|
|
|
|
|
|
|
|
199
|
6
|
|
|
6
|
1
|
10
|
my ($self) = @_; |
200
|
|
|
|
|
|
|
|
201
|
6
|
|
|
|
|
17
|
while (length($self->{buffer}) > 0) { |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
croak "file closed but buffer not empty" |
204
|
3
|
50
|
|
|
|
9
|
if ( ! defined fileno($self->{fh}) ); |
205
|
|
|
|
|
|
|
|
206
|
3
|
|
|
|
|
19
|
my $chunk = substr $self->{buffer}, 0, FLUSH_SIZE, ''; |
207
|
3
|
|
|
|
|
7
|
my $unwritten = $self->_write_block($chunk); |
208
|
|
|
|
|
|
|
$self->{buffer} = $unwritten . $self->{buffer} |
209
|
3
|
50
|
|
|
|
12
|
if ( length($unwritten) ); |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
} |
212
|
6
|
100
|
|
|
|
17
|
if (defined fileno($self->{fh}) ) { |
213
|
|
|
|
|
|
|
close $self->{fh} |
214
|
3
|
50
|
|
|
|
233
|
or croak "Error closing compressed file"; |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
6
|
|
|
|
|
20
|
return; |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub write_index { |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
224
|
|
|
|
|
|
|
# ARG 0 : index output filename |
225
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
226
|
|
|
|
|
|
|
# No returns |
227
|
|
|
|
|
|
|
#------------------------------------------------------------------------- |
228
|
|
|
|
|
|
|
|
229
|
0
|
|
|
0
|
1
|
0
|
my ($self, $fn_out) = @_; |
230
|
|
|
|
|
|
|
|
231
|
0
|
|
|
|
|
0
|
$self->finalize(); # always clear remaining buffer to fully populate index |
232
|
0
|
0
|
|
|
|
0
|
croak "missing index output filename" if (! defined $fn_out); |
233
|
0
|
0
|
|
|
|
0
|
open my $fh_out, '>:raw', $fn_out |
234
|
|
|
|
|
|
|
or croak "Error opening index file for writing"; |
235
|
|
|
|
|
|
|
|
236
|
0
|
|
|
|
|
0
|
my @offsets = @{ $self->{idx} }; |
|
0
|
|
|
|
|
0
|
|
237
|
0
|
|
|
|
|
0
|
pop @offsets; # last offset is EOF |
238
|
0
|
0
|
|
|
|
0
|
print {$fh_out} pack('Q<', scalar(@offsets)) |
|
0
|
|
|
|
|
0
|
|
239
|
|
|
|
|
|
|
or croak "Error printing to index file"; |
240
|
0
|
|
|
|
|
0
|
for (@offsets) { |
241
|
0
|
0
|
|
|
|
0
|
print {$fh_out} pack('Q
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
242
|
|
|
|
|
|
|
or croak "Error printing offset to index file"; |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
0
|
0
|
|
|
|
0
|
close $fh_out |
246
|
|
|
|
|
|
|
or croak "Error closing index file after writing"; |
247
|
0
|
|
|
|
|
0
|
return; |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
sub DESTROY { |
252
|
|
|
|
|
|
|
|
253
|
3
|
|
|
3
|
|
773
|
my ($self) = @_; |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
# make sure we call finalize in case the caller forgot |
256
|
3
|
|
|
|
|
9
|
$self->finalize(); |
257
|
|
|
|
|
|
|
|
258
|
3
|
|
|
|
|
113
|
return; |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
1; |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
__END__ |