File Coverage

blib/lib/Finnigan/Scan.pm
Criterion Covered Total %
statement 121 246 49.1
branch 15 52 28.8
condition 8 35 22.8
subroutine 21 29 72.4
pod 4 4 100.0
total 169 366 46.1


line stmt bran cond sub pod time code
1 2     2   11 use feature qw/state say/;
  2         4  
  2         237  
2 2     2   57 use 5.010;
  2         8  
  2         79  
3 2     2   11 use strict;
  2         4  
  2         71  
4 2     2   51 use warnings FATAL => qw( all );
  2         4  
  2         5153  
5              
6             # ----------------------------------------------------------------------------------------
7             package Finnigan::Scan::Profile;
8             our $VERSION = 0.0206;
9              
10             my $MAX_DIST = 0.025; # kHz
11             my $MAX_DIST_MZ = 0.001; # M/z
12              
13             sub new {
14 1     1   5 my ($class, $buf, $layout) = @_;
15 1         4 my $self = {};
16 1         92 @{$self}{'first value', 'step', 'peak count', 'nbins'} = unpack 'd
  1         5  
17 1         4 my $offset = 24; # ddVV
18              
19 1         3 my $chunk;
20 1         5 foreach my $i (0 .. $self->{'peak count'} - 1) {
21 580         1298 $chunk = new Finnigan::Scan::ProfileChunk $buf, $offset, $layout;
22 580         884 $offset += $chunk->{size};
23 580         626 push @{$self->{chunks}}, $chunk;
  580         1245  
24             }
25              
26 1         14 return bless $self, $class;
27             }
28              
29             sub set_converter {
30 1     1   8536 $_[0]->{converter} = $_[1];
31             }
32              
33             sub set_inverse_converter {
34 0     0   0 $_[0]->{"inverse converter"} = $_[1];
35             }
36              
37             sub nchunks { # instead of the deprecated peak_count()
38 0     0   0 $_[0]->{"peak count"};
39             }
40              
41             sub peak_count { # deprecated
42 0     0   0 $_[0]->{"peak count"};
43             }
44              
45             sub print_bins {
46 0     0   0 my ($self, $bookends) = @_;
47              
48 0         0 foreach ( @{$self->bins($bookends)} ) {
  0         0  
49 0         0 say join "\t", @$_;
50             }
51             }
52              
53             sub bins {
54 1     1   10 my ($self, $bookends) = @_;
55 1         3 my @list;
56 1         4 my $start = $self->{"first value"};
57 1         4 my $step = $self->{step};
58              
59             # Write something at the start of the range
60 1         3 my $front_bookend_needed;
61 1 50       7 if ($bookends) {
62 0   0     0 my $fudge = $self->{chunks}->[0]->{'fudge'} || 0;
63 0         0 my $startMz = &{$self->{converter}}( $start + $step ) + $fudge;
  0         0  
64 0         0 my $next_chunk_start = $self->{chunks}->[0]->{'first bin'};
65 0         0 my $gap_size = $next_chunk_start - 0; # will see if it's 0 or 1
66 0         0 my $fill_size;
67 0 0       0 if ( $gap_size < 2 * $bookends ) {
68 0         0 $fill_size = $gap_size;
69 0         0 $front_bookend_needed = 0;
70             }
71             else {
72 0         0 $fill_size = $bookends;
73 0         0 $front_bookend_needed = 1;
74             }
75              
76 0         0 foreach my $j ( 1 .. $fill_size ) {
77 0         0 push @list, [&{$self->{converter}}( $start + $j * $step ) + $fudge, 0]
  0         0  
78             }
79             }
80              
81 1         4 my $fudge = 0; # this is declared outside the chunk loop to allow
82             # writing the empty bins at the end of the range with
83             # the same amount of fudge as in the last chunk
84              
85 1         7 foreach my $i ( 0 .. $self->{"peak count"} - 1 ) { # each chunk
86 580         1023 my $chunk = $self->{chunks}->[$i];
87 580         1143 my $first_bin = $chunk->{'first bin'};
88 580   100     1717 $fudge = $chunk->{fudge} || 0;
89 580         1002 my $x = $start + $first_bin * $step;
90              
91             # front bookend
92 580 50 33     1420 if ( $bookends and $front_bookend_needed ) {
93             # add empty bins ahead of the chunk
94 0         0 foreach my $j ( $first_bin - $bookends .. $first_bin - 1) {
95 0         0 push @list, [&{$self->{converter}}( $start + $j * $step ) + $fudge, 0];
  0         0  
96             }
97             }
98              
99             # chunk data
100 580         1150 foreach my $j ( 0 .. $chunk->{nbins} - 1) {
101 3878         6489 push @list, [&{$self->{converter}}( $start + ($first_bin + $j) * $step ) + $fudge, $chunk->{signal}->[$j]];
  3878         11245  
102             }
103              
104             # tail bookeend
105 580 50       1791 if ( $bookends ) {
106              
107             # determine the number of gap bins
108 0         0 my $next_chunk_start;
109             my $next_chunks_fudge;
110 0 0       0 if ($i == $self->{'peak count'} - 1 ) {
111 0         0 $next_chunk_start = $self->{nbins};
112 0         0 $next_chunks_fudge = $fudge;
113             }
114             else {
115 0         0 $next_chunk_start = $self->{chunks}->[$i + 1]->{'first bin'};
116 0   0     0 $next_chunks_fudge = $self->{chunks}->[$i + 1]->{fudge} || 0; # not all profiles have the fudge value
117             }
118 0         0 my $gap_size = $next_chunk_start - $first_bin - $chunk->{nbins}; # will see if it's 0 or 1
119 0         0 my $fill_size = $bookends;
120 0 0       0 if ( $gap_size < 2 * $bookends ) {
121 0         0 $fill_size = $gap_size;
122 0         0 $front_bookend_needed = 0;
123             }
124             else {
125 0         0 $front_bookend_needed = 1;
126             }
127              
128             # write the tail bookend
129 0         0 foreach my $j ( $chunk->{nbins} .. $chunk->{nbins} + $fill_size - 1 ) {
130             # Using the next chunk's fudge to add zeroes to this chunk is unreasonable, but whatever they please...
131 0         0 push @list, [&{$self->{converter}}( $start + ($first_bin + $j) * $step ) + $next_chunks_fudge, 0]
  0         0  
132             }
133             }
134             }
135              
136             # filling the bookend at the end of the range
137 1 50 33     8 if ( $bookends and $front_bookend_needed ) {
138 0         0 my $last_bin = $self->{nbins};
139 0         0 foreach my $j ( $last_bin - $bookends + 1 .. $last_bin) {
140 0         0 push @list, [&{$self->{converter}}( $start + $j * $step ) + $fudge, 0];
  0         0  
141             }
142             }
143              
144 1         9 return \@list;
145             }
146              
147             sub find_peak_intensity {
148             # Finds the nearest peak in the profile for a given query value.
149             # One possible use is to look up the precursor ion intensity, since
150             # it is not stored as a separate item anywhere in the data file.
151             # Return the intensity the peak matching the query M/z.
152 0     0   0 my ($self, $query) = @_;
153 0         0 my $raw_query = &{$self->{"inverse converter"}}($query);
  0         0  
154              
155             # find the closest chunk
156 0         0 my ($nearest_chunk, $dist) = $self->find_chunk($raw_query);
157 0 0 0     0 if (not defined $dist or $dist > $MAX_DIST) { # undefind $dist means we're outside the full range of peaks in the scan
158 0         0 say STDERR "$self->{'dependent scan number'}: could not find a profile peak in parent scan $self->{'scan number'} within ${MAX_DIST} M/z of the target frequency $raw_query (M/z $query)";
159 0         0 return 0;
160             }
161              
162 0         0 my @chunk_ix = ($nearest_chunk);
163 0         0 my $i = $nearest_chunk;
164 0   0     0 while ( $i < $self->{"peak count"} - 1 and $self->chunk_dist($i, $i++) <= $MAX_DIST ) { # kHz
165 0         0 push @chunk_ix, $i;
166             }
167 0         0 $i = $nearest_chunk;
168 0   0     0 while ( $i > 0 and $self->chunk_dist($i, $i--) <= $MAX_DIST ) { # kHz
169 0         0 push @chunk_ix, $i;
170             }
171              
172 0         0 return (sort {$b <=> $a} map {$self->chunk_max($_)} @chunk_ix)[0]; # max. intensity
  0         0  
  0         0  
173             }
174              
175             sub chunk_dist {
176             # find the gap distance between the chunks
177 0     0   0 my ($self, $i, $j) = @_;
178 0         0 my ($chunk_i, $chunk_j) = ($self->{chunks}->[$i], $self->{chunks}->[$j]);
179 0         0 my $start = $self->{"first value"};
180 0         0 my $step = $self->{step};
181 0         0 my $min_i = $start + ($chunk_i->{"first bin"} - 1) * $step;
182 0         0 my $max_i = $min_i + $chunk_i->{nbins} * $step;
183 0         0 my $min_j = $start + ($chunk_j->{"first bin"} - 1) * $step;
184 0         0 my $max_j = $min_j + $chunk_j->{nbins} * $step;
185 0         0 my $dist = (sort {$a <=> $b}
  0         0  
186             (
187             abs($min_i - $min_j),
188             abs($min_i - $max_j),
189             abs($max_i - $min_j),
190             abs($max_i - $max_j)
191             )
192             )[0];
193 0         0 return $dist;
194             }
195              
196             sub chunk_max {
197 0     0   0 my ($self, $num) = @_;
198 0         0 my $chunk = $self->{chunks}->[$num];
199 0         0 my $max = 0;
200 0         0 foreach my $i ( 0 .. $chunk->{nbins} - 1) {
201 0         0 my $intensity = $chunk->{signal}->[$i];
202 0 0       0 $max = $intensity if $intensity > $max;
203             }
204 0         0 return $max;
205             }
206              
207             sub find_chunk {
208             # Use binary search to find a pair of profile chunks
209             # in the (sorted) chunk list surrounding the probe value
210              
211 0     0   0 my ( $self, $value) = @_;
212 0         0 my $chunks = $self->{chunks};
213 0         0 my $first_value = $self->{"first value"};
214 0         0 my $step = $self->{step};
215 0         0 my ( $lower, $upper, $low_ix, $high_ix, $cur );
216 0         0 my $safety_count = 15;
217 0         0 my $last_match = [undef, 100000];
218 0         0 my $dist;
219              
220 0         0 ( $upper, $lower ) = ( $first_value, $first_value + $step * $self->{nbins} ) ;
221 0 0 0     0 if ( $value < $lower - $MAX_DIST or $value > $upper + $MAX_DIST) {
222 0         0 return (undef, undef);
223             }
224             else {
225 0         0 ( $low_ix, $high_ix ) = ( 0, $self->{"peak count"} - 1 );
226 0         0 while ( $low_ix < $high_ix ) {
227 0 0       0 die "broken find_chunk algorithm" unless $safety_count--;
228 0         0 $cur = int ( ( $low_ix + $high_ix ) / 2 );
229 0         0 my $chunk = $chunks->[$cur];
230 0         0 $upper = $first_value + $chunk->{"first bin"} * $step;
231 0         0 $lower = $upper + $chunk->{nbins} * $step;
232             # say STDERR " testing: $cur [$lower .. $upper] against $value in [$low_ix .. $high_ix]";
233 0 0 0     0 if ( $value >= $lower and $value <= $upper ) {
234             # say STDERR " direct hit";
235 0         0 return ($cur, 0);
236             }
237 0 0       0 if ( $value > $upper ) {
238 0         0 $dist = (sort {$a <=> $b} (abs($value - $lower), abs($value - $upper)))[0];
  0         0  
239 0 0       0 $last_match = [$cur, $dist] if $dist < $last_match->[1];
240             # say STDERR " distance = $dist, shifting up";
241 0         0 $high_ix = $cur;
242             }
243 0 0       0 if ( $value < $lower ) {
244 0         0 $dist = (sort {$a <=> $b} (abs($value - $lower), abs($value - $upper)))[0];
  0         0  
245 0 0       0 $last_match = [$cur, $dist] if $dist < $last_match->[1];
246             # say STDERR " distance = $dist; shifting down";
247 0         0 $low_ix = $cur + 1;
248             }
249             # say STDERR "The remainder: $low_ix, $high_ix";
250             }
251             # say STDERR "The final remainder: $low_ix, $high_ix";
252             }
253              
254 0 0       0 if ( $low_ix == $high_ix ) {
255             # this is one of possibly two closest chunks, with no direct hits found
256 0         0 my $chunk = $chunks->[$low_ix];
257 0         0 $upper = $first_value + $chunk->{"first bin"} * $step;
258 0         0 $lower = $upper + $chunk->{nbins} * $step;
259 0         0 my $dist = (sort {$a <=> $b} (abs($value - $lower), abs($value - $upper)))[0];
  0         0  
260              
261 0         0 my ($closest_chunk, $min_dist) = ($low_ix, $dist);
262 0 0       0 if ( $dist > $last_match->[1] ) {
263 0         0 ($closest_chunk, $min_dist) = @$last_match;
264             }
265             # say STDERR " no direct hit; closest chunk is $closest_chunk; distance between $value and [$lower, $upper] is $min_dist";
266 0         0 return ($closest_chunk, $min_dist);
267             }
268              
269 0         0 die "unexpected condition";
270             }
271              
272             #----------------------------------------------------------------------------------------
273             package Finnigan::Scan;
274             our $VERSION = 0.0206;
275              
276 2     2   16 use strict;
  2         5  
  2         65  
277 2     2   11 use warnings;
  2         4  
  2         72  
278              
279 2     2   10 use Finnigan;
  2         5  
  2         2782  
280              
281             sub decode {
282 1     1 1 901 my ($class, $stream) = @_;
283              
284 1         5 my $self = {
285             addr => tell $stream
286             };
287 1         3 my $buf;
288             my $nbytes;
289 0         0 my $bytes_to_read;
290 0         0 my $current_addr;
291              
292 1         12 $self->{header} = Finnigan::PacketHeader->decode($stream);
293 1         4 $self->{size} = $self->{header}->{size};
294 1         3 my $header_data = $self->{header}->{data};
295              
296 1         3 $bytes_to_read = 4 * $header_data->{"profile size"}->{value};
297 1         54 $nbytes = CORE::read $stream, $self->{"raw profile"}, $bytes_to_read;
298 1 50       10 $nbytes == $bytes_to_read
299             or die "could not read all $bytes_to_read bytes of scan profile at " . ($self->{addr} + $self->{size});
300 1         5 $self->{size} += $nbytes;
301              
302 1         4 $bytes_to_read = 4 * $header_data->{"peak list size"}->{value};
303 1         25 $nbytes = CORE::read $stream, $self->{"raw centroids"}, $bytes_to_read;
304 1 50       6 $nbytes == $bytes_to_read
305             or die "could not read all $bytes_to_read bytes of scan profile at " . ($self->{addr} + $self->{size});
306 1         3 $self->{size} += $nbytes;
307              
308             # skip peak descriptors and the unknown streams
309 1         5 $self->{size} += 4 * (
310             $header_data->{"descriptor list size"}->{value} +
311             $header_data->{"size of unknown stream"}->{value} +
312             $header_data->{"size of triplet stream"}->{value}
313             );
314 1         11 seek $stream, $self->{addr} + $self->{size}, 0;
315              
316 1         5 return bless $self, $class;
317             }
318              
319             sub header {
320 1     1 1 18 return shift->{header};
321             }
322              
323             sub profile {
324 1     1 1 15 new Finnigan::Scan::Profile $_[0]->{"raw profile"}, $_[0]->{header}->{data}->{layout}->{value};
325             }
326              
327             sub centroids {
328 1     1 1 2256 new Finnigan::Scan::CentroidList $_[0]->{"raw centroids"};
329             }
330              
331             # end Finnigan::Scan::Profile
332             # ----------------------------------------------------------------------------------------
333             package Finnigan::Scan::ProfileChunk;
334             our $VERSION = 0.0206;
335              
336             sub new {
337 580     580   923 my ($class, $buf, $offset, $layout) = @_;
338 580         1000 my $self = {};
339 580 50       1093 if ( $layout > 0 ) {
340 580         11188 @{$self}{'first bin', 'nbins', 'fudge'} = unpack "x${offset} VVf<", $buf;
  580         1967  
341 580         1112 $self->{size} = 12;
342             }
343             else {
344 0         0 @{$self}{'first bin', 'nbins'} = unpack "x${offset} VV", $buf;
  0         0  
345 0         0 $self->{size} = 8;
346             }
347 580         789 $offset += $self->{size};
348              
349 580         1843 @{$self->{signal}} = unpack "x${offset} f<$self->{nbins}", $buf;
  580         1958  
350 580         1114 $self->{size} += 4 * $self->{nbins};
351              
352 580         1936 return bless $self, $class;
353             }
354              
355             #----------------------------------------------------------------------------------------
356             package Finnigan::Scan::CentroidList;
357             our $VERSION = 0.0206;
358              
359             sub new {
360 1     1   3 my ($class, $buf) = @_;
361 1         9 my $self = {count => unpack 'V', $buf};
362 1         4 my $offset = 4; # V
363              
364 1         2 my $chunk;
365 1         5 foreach my $i (0 .. $self->{count} - 1) {
366 580         579 push @{$self->{peaks}}, [unpack "x${offset} f
  580         1839  
367 580         863 $offset += 8;
368             }
369              
370 1         14 return bless $self, $class;
371             }
372              
373             sub count {
374 1     1   1395 shift->{count};
375             }
376              
377             sub list {
378 2     2   425 shift->{peaks};
379             }
380              
381             sub find_peak_intensity {
382             # Finds the nearest peak in the profile for a given query value.
383              
384 2     2   456 my ($self, $query) = @_;
385              
386             # find the closest peak
387 2         6 my ($nearest_peak, $dist) = $self->find_peak($query);
388 2 100 66     16 if (not defined $dist or $dist > $MAX_DIST_MZ) { # undefind $dist means we're outside the full range of peaks in the scan
389 1         170 say STDERR "$self->{'dependent scan number'}: could not find a profile peak in parent scan $self->{'scan number'} within ${MAX_DIST_MZ} M/z the target value $query";
390 1         8 return 0;
391             }
392              
393 1         7 return $self->{peaks}->[$nearest_peak]->[1];
394             }
395              
396             sub find_peak {
397             # Use binary search to find a pair of peaks
398             # surrounding the probe value
399              
400             # One possible use is to look up the precursor ion intensity, since
401             # it is not stored as a separate item anywhere in the data file.
402              
403 5     5   426 my ( $self, $value) = @_;
404 5         7 my ( $lower, $upper, $low_ix, $high_ix, $mid_ix );
405 5         7 my $safety_count = 15;
406 5         8 my $dist;
407              
408             sub num_equal {
409 39     39   56 my( $float1, $float2, $diff ) = @_;
410 39   50     172 abs( $float1 - $float2 ) < ($diff or 0.00001);
411             }
412              
413 5         14 ( $low_ix, $high_ix ) = ( 0, $self->{count} - 1 );
414 5         19 ( $lower, $upper ) = ($self->{peaks}->[$low_ix]->[0], $self->{peaks}->[$high_ix]->[0]);
415 5 50 33     39 if ( $value < $lower - $MAX_DIST_MZ or $value > $upper + $MAX_DIST_MZ) {
416 0         0 return (undef, undef);
417             }
418             else {
419 5         7 my $dist;
420 5         14 while ( $low_ix <= $high_ix ) {
421 39 50       77 die "broken find_peak algorithm" unless $safety_count--;
422 39         81 $mid_ix = $low_ix + int ( ( $high_ix - $low_ix ) / 2 );
423 39         67 my $peak = $self->{peaks}->[$mid_ix];
424 39         53 $dist = abs($value - $peak->[0]);
425             # say STDERR " testing: $mid_ix [$peak->[0]] against $value";
426 39 100       74 if ( num_equal($peak->[0], $value, $MAX_DIST_MZ ) ) {
    100          
427 3         18 return ($mid_ix, $dist)
428             }
429             elsif ( $peak->[0] < $value) {
430             # say STDERR " distance = $dist, shifting up";
431 29         84 $low_ix = $mid_ix + 1;
432             }
433             else { # $peak->[0] > $value
434             # say STDERR " distance = $dist; shifting down";
435 7         21 $high_ix = $mid_ix - 1;
436             }
437             }
438 2         8 return (undef, $dist);
439             }
440             }
441              
442             1;
443              
444             __END__