File Coverage

blib/lib/Compress/BGZF/Writer.pm
Criterion Covered Total %
statement 84 108 77.7
branch 14 36 38.8
condition n/a
subroutine 18 21 85.7
pod 6 6 100.0
total 122 171 71.3


line stmt bran cond sub pod time code
1             package Compress::BGZF::Writer;
2              
3 2     2   785 use strict;
  2         5  
  2         60  
4 2     2   8 use warnings;
  2         5  
  2         39  
5              
6 2     2   8 use Carp;
  2         4  
  2         89  
7 2     2   11 use Compress::Zlib;
  2         7  
  2         435  
8 2     2   19 use IO::Compress::RawDeflate qw/rawdeflate $RawDeflateError/;
  2         3  
  2         205  
9              
10 2     2   11 use constant HEAD_BYTES => 18;
  2         4  
  2         102  
11 2     2   11 use constant FOOT_BYTES => 8;
  2         3  
  2         101  
12 2     2   9 use constant FLUSH_SIZE => 2**16 - HEAD_BYTES - FOOT_BYTES - 1;
  2         4  
  2         95  
13 2     2   11 use constant BGZF_HEADER => pack "H*", '1f8b08040000000000ff060042430200';
  2         3  
  2         1872  
14              
15             ## no critic
16             # allow for filehandle tie'ing
17 3     3   8 sub TIEHANDLE { Compress::BGZF::Writer::new(@_) }
18 2015     2015   5553 sub PRINT { Compress::BGZF::Writer::_queue(@_) }
19 3     3   14 sub CLOSE { Compress::BGZF::Writer::finalize(@_) }
20             ## use critic
21              
22             sub new_filehandle {
23              
24             #-------------------------------------------------------------------------
25             # ARG 0 : (optional) output filename
26             #-------------------------------------------------------------------------
27             # RET 0 : filehandle GLOB
28             #-------------------------------------------------------------------------
29              
30 3     3 1 1972 my ($class, $fn_out) = @_;
31              
32 3         209 open my $fh, '<', undef;
33 3 50       20 tie *$fh, $class, $fn_out or croak "failed to tie filehandle";
34 3         9 return $fh;
35              
36             }
37              
38             sub new {
39              
40             #-------------------------------------------------------------------------
41             # ARG 0 : (optional) output filename
42             #-------------------------------------------------------------------------
43             # RET 0 : Compress::BGZF::Writer object
44             #-------------------------------------------------------------------------
45              
46 3     3 1 7 my ($class, $fn_out) = @_;
47 3         6 my $self = bless {}, $class;
48              
49             # initialize
50 3 50       8 if (defined $fn_out) {
51 3 50       119 open $self->{fh}, '>', $fn_out
52             or croak "Error opening file for writing";
53             }
54             else {
55 0         0 $self->{fh} = \*STDOUT;
56             }
57 3         10 binmode $self->{fh};
58              
59 3         11 $self->{c_level} = Z_DEFAULT_COMPRESSION;
60 3         12 $self->{buffer} = ''; # contents waiting to be compressed/written
61              
62             # these variables are tracked to allow for virtual offset calculation
63 3         5 $self->{block_offset} = 0; # offset of current block in compressed data
64 3         5 $self->{buffer_offset} = 0; # offset of current pos in uncompressed block
65              
66             # these variables are tracked to allow for index creation
67 3         5 $self->{u_offset} = 0; #uncompressed file offset
68 3         5 $self->{idx} = [];
69              
70 3         12 return $self;
71              
72             }
73              
74             sub set_level {
75              
76             #-------------------------------------------------------------------------
77             # ARG 0 : compression level desired
78             #-------------------------------------------------------------------------
79             # no returns
80             #-------------------------------------------------------------------------
81              
82 0     0 1 0 my ($self, $level) = @_;
83              
84 0 0       0 croak "Invalid compression level (allowed 0-9)"
85             if ($level !~ /^\d$/);
86 0         0 $self->{c_level} = $level;
87              
88 0         0 return;
89              
90             }
91              
92             sub add_data {
93              
94             # a wrapper around the queue() function that returns the virtual offset
95             # of the chunk added
96              
97             #-------------------------------------------------------------------------
98             # ARG 0 : data chunk to queue for compression
99             #-------------------------------------------------------------------------
100             # RET 1 : virtual offset of data written
101             #-------------------------------------------------------------------------
102              
103 0     0 1 0 my ($self, $content) = @_;
104              
105 0         0 my $vo = ($self->{block_offset} << 16) | $self->{buffer_offset};
106 0         0 $self->_queue( $content );
107              
108 0         0 return $vo;
109              
110             }
111              
112             sub _queue {
113              
114             #-------------------------------------------------------------------------
115             # ARG 0 : data chunk to queue for compression
116             #-------------------------------------------------------------------------
117             # no returns
118             #-------------------------------------------------------------------------
119              
120 2015     2015   2646 my ($self, $content) = @_;
121              
122 2015         2568 $self->{buffer} .= $content;
123              
124             # compress/write in 64k chunks
125 2015         2983 while (length($self->{buffer}) >= FLUSH_SIZE) {
126              
127 5         107 my $chunk = substr $self->{buffer}, 0, FLUSH_SIZE, '';
128 5         13 my $unwritten = $self->_write_block($chunk);
129             $self->{buffer} = $unwritten . $self->{buffer}
130 5 100       32 if ( length($unwritten) );
131              
132             }
133 2015         2262 $self->{buffer_offset} = length $self->{buffer};
134              
135 2015         4044 return;
136            
137             }
138              
139             sub _write_block {
140              
141             #-------------------------------------------------------------------------
142             # ARG 0 : independent data block to compress
143             #-------------------------------------------------------------------------
144             # RET 0 : remaining data that wasn't written
145             #-------------------------------------------------------------------------
146              
147 8     8   13 my ($self, $chunk) = @_;
148              
149 8         12 my $chunk_len = length($chunk);
150              
151             # payload is compressed with DEFLATE
152             rawdeflate(\$chunk, \my $payload, -Level => $self->{c_level})
153 8 50       31 or croak "deflate failed: $RawDeflateError\n";
154              
155             # very rarely, a DEFLATEd string may be larger than input. This can result
156             # in a block size > 2**16, which violates the BGZF specification and
157             # causes corruption of the BC field. Fix those edge cases here (somewhat
158             # slow but shouldn't happen often) and send the rest back to the buffer
159 8         26069 my $trimmed = '';
160 8         19 while (length($payload) > FLUSH_SIZE) {
161 4         8 my $trim_len = int( $chunk_len * 0.05 );
162 4         50 $trimmed = substr($chunk, -$trim_len, $trim_len, '') . $trimmed;
163             rawdeflate(\$chunk, \$payload, -Level => $self->{c_level})
164 4 50       18 or croak "deflate failed: $RawDeflateError\n";
165 4         11879 $chunk_len = length($chunk);
166             }
167              
168 8         14 my $block_size = length($payload) + HEAD_BYTES + FOOT_BYTES;
169              
170 8 50       19 croak "Internal error: block size > 65536" if ($block_size > 2**16);
171              
172             # payload is wrapped with appropriate headers and footers
173 8 50       10 print { $self->{fh} } pack(
  8         1774  
174             "a*va*VV",
175             BGZF_HEADER,
176             $block_size - 1,
177             $payload,
178             crc32($chunk),
179             $chunk_len,
180             ) or croak "Error writing compressed block";
181              
182             # increment the current offsets
183 8         25 $self->{block_offset} += $block_size;
184 8         9 $self->{u_offset} += $chunk_len;
185 8         13 push @{ $self->{idx} }, [$self->{block_offset}, $self->{u_offset}];
  8         25  
186              
187 8         29 return $trimmed;
188              
189             }
190              
191             sub finalize {
192              
193             #-------------------------------------------------------------------------
194             # no arguments
195             #-------------------------------------------------------------------------
196             # no returns
197             #-------------------------------------------------------------------------
198              
199 6     6 1 10 my ($self) = @_;
200              
201 6         17 while (length($self->{buffer}) > 0) {
202              
203             croak "file closed but buffer not empty"
204 3 50       9 if ( ! defined fileno($self->{fh}) );
205              
206 3         19 my $chunk = substr $self->{buffer}, 0, FLUSH_SIZE, '';
207 3         7 my $unwritten = $self->_write_block($chunk);
208             $self->{buffer} = $unwritten . $self->{buffer}
209 3 50       12 if ( length($unwritten) );
210              
211             }
212 6 100       17 if (defined fileno($self->{fh}) ) {
213             close $self->{fh}
214 3 50       233 or croak "Error closing compressed file";
215             }
216              
217 6         20 return;
218              
219             }
220              
221             sub write_index {
222              
223             #-------------------------------------------------------------------------
224             # ARG 0 : index output filename
225             #-------------------------------------------------------------------------
226             # No returns
227             #-------------------------------------------------------------------------
228              
229 0     0 1 0 my ($self, $fn_out) = @_;
230              
231 0         0 $self->finalize(); # always clear remaining buffer to fully populate index
232 0 0       0 croak "missing index output filename" if (! defined $fn_out);
233 0 0       0 open my $fh_out, '>:raw', $fn_out
234             or croak "Error opening index file for writing";
235              
236 0         0 my @offsets = @{ $self->{idx} };
  0         0  
237 0         0 pop @offsets; # last offset is EOF
238 0 0       0 print {$fh_out} pack('Q<', scalar(@offsets))
  0         0  
239             or croak "Error printing to index file";
240 0         0 for (@offsets) {
241 0 0       0 print {$fh_out} pack('Q
  0         0  
  0         0  
242             or croak "Error printing offset to index file";
243             }
244              
245 0 0       0 close $fh_out
246             or croak "Error closing index file after writing";
247 0         0 return;
248              
249             }
250              
251             sub DESTROY {
252              
253 3     3   773 my ($self) = @_;
254              
255             # make sure we call finalize in case the caller forgot
256 3         9 $self->finalize();
257              
258 3         113 return;
259              
260             }
261              
262             1;
263              
264              
265             __END__