File Coverage

blib/lib/Finnigan/Scan.pm

Criterion	Covered	Total	%
statement	121	246	49.1
branch	15	52	28.8
condition	8	35	22.8
subroutine	21	29	72.4
pod	4	4	100.0
total	169	366	46.1

line	stmt	bran	cond	sub	pod	time	code
1	2			2		11	use feature qw/state say/;
	2					4
	2					237
2	2			2		57	use 5.010;
	2					8
	2					79
3	2			2		11	use strict;
	2					4
	2					71
4	2			2		51	use warnings FATAL => qw( all );
	2					4
	2					5153
5
6							# ----------------------------------------------------------------------------------------
7							package Finnigan::Scan::Profile;
8							our $VERSION = 0.0206;
9
10							my $MAX_DIST = 0.025; # kHz
11							my $MAX_DIST_MZ = 0.001; # M/z
12
13							sub new {
14	1			1		5	my ($class, $buf, $layout) = @_;
15	1					4	my $self = {};
16	1					92	@{$self}{'first value', 'step', 'peak count', 'nbins'} = unpack 'd
	1					5
17	1					4	my $offset = 24; # ddVV
18
19	1					3	my $chunk;
20	1					5	foreach my $i (0 .. $self->{'peak count'} - 1) {
21	580					1298	$chunk = new Finnigan::Scan::ProfileChunk $buf, $offset, $layout;
22	580					884	$offset += $chunk->{size};
23	580					626	push @{$self->{chunks}}, $chunk;
	580					1245
24							}
25
26	1					14	return bless $self, $class;
27							}
28
29							sub set_converter {
30	1			1		8536	$_[0]->{converter} = $_[1];
31							}
32
33							sub set_inverse_converter {
34	0			0		0	$_[0]->{"inverse converter"} = $_[1];
35							}
36
37							sub nchunks { # instead of the deprecated peak_count()
38	0			0		0	$_[0]->{"peak count"};
39							}
40
41							sub peak_count { # deprecated
42	0			0		0	$_[0]->{"peak count"};
43							}
44
45							sub print_bins {
46	0			0		0	my ($self, $bookends) = @_;
47
48	0					0	foreach ( @{$self->bins($bookends)} ) {
	0					0
49	0					0	say join "\t", @$_;
50							}
51							}
52
53							sub bins {
54	1			1		10	my ($self, $bookends) = @_;
55	1					3	my @list;
56	1					4	my $start = $self->{"first value"};
57	1					4	my $step = $self->{step};
58
59							# Write something at the start of the range
60	1					3	my $front_bookend_needed;
61	1	50				7	if ($bookends) {
62	0		0			0	my $fudge = $self->{chunks}->[0]->{'fudge'} \|\| 0;
63	0					0	my $startMz = &{$self->{converter}}( $start + $step ) + $fudge;
	0					0
64	0					0	my $next_chunk_start = $self->{chunks}->[0]->{'first bin'};
65	0					0	my $gap_size = $next_chunk_start - 0; # will see if it's 0 or 1
66	0					0	my $fill_size;
67	0	0				0	if ( $gap_size < 2 * $bookends ) {
68	0					0	$fill_size = $gap_size;
69	0					0	$front_bookend_needed = 0;
70							}
71							else {
72	0					0	$fill_size = $bookends;
73	0					0	$front_bookend_needed = 1;
74							}
75
76	0					0	foreach my $j ( 1 .. $fill_size ) {
77	0					0	push @list, [&{$self->{converter}}( $start + $j * $step ) + $fudge, 0]
	0					0
78							}
79							}
80
81	1					4	my $fudge = 0; # this is declared outside the chunk loop to allow
82							# writing the empty bins at the end of the range with
83							# the same amount of fudge as in the last chunk
84
85	1					7	foreach my $i ( 0 .. $self->{"peak count"} - 1 ) { # each chunk
86	580					1023	my $chunk = $self->{chunks}->[$i];
87	580					1143	my $first_bin = $chunk->{'first bin'};
88	580		100			1717	$fudge = $chunk->{fudge} \|\| 0;
89	580					1002	my $x = $start + $first_bin * $step;
90
91							# front bookend
92	580	50	33			1420	if ( $bookends and $front_bookend_needed ) {
93							# add empty bins ahead of the chunk
94	0					0	foreach my $j ( $first_bin - $bookends .. $first_bin - 1) {
95	0					0	push @list, [&{$self->{converter}}( $start + $j * $step ) + $fudge, 0];
	0					0
96							}
97							}
98
99							# chunk data
100	580					1150	foreach my $j ( 0 .. $chunk->{nbins} - 1) {
101	3878					6489	push @list, [&{$self->{converter}}( $start + ($first_bin + $j) * $step ) + $fudge, $chunk->{signal}->[$j]];
	3878					11245
102							}
103
104							# tail bookeend
105	580	50				1791	if ( $bookends ) {
106
107							# determine the number of gap bins
108	0					0	my $next_chunk_start;
109							my $next_chunks_fudge;
110	0	0				0	if ($i == $self->{'peak count'} - 1 ) {
111	0					0	$next_chunk_start = $self->{nbins};
112	0					0	$next_chunks_fudge = $fudge;
113							}
114							else {
115	0					0	$next_chunk_start = $self->{chunks}->[$i + 1]->{'first bin'};
116	0		0			0	$next_chunks_fudge = $self->{chunks}->[$i + 1]->{fudge} \|\| 0; # not all profiles have the fudge value
117							}
118	0					0	my $gap_size = $next_chunk_start - $first_bin - $chunk->{nbins}; # will see if it's 0 or 1
119	0					0	my $fill_size = $bookends;
120	0	0				0	if ( $gap_size < 2 * $bookends ) {
121	0					0	$fill_size = $gap_size;
122	0					0	$front_bookend_needed = 0;
123							}
124							else {
125	0					0	$front_bookend_needed = 1;
126							}
127
128							# write the tail bookend
129	0					0	foreach my $j ( $chunk->{nbins} .. $chunk->{nbins} + $fill_size - 1 ) {
130							# Using the next chunk's fudge to add zeroes to this chunk is unreasonable, but whatever they please...
131	0					0	push @list, [&{$self->{converter}}( $start + ($first_bin + $j) * $step ) + $next_chunks_fudge, 0]
	0					0
132							}
133							}
134							}
135
136							# filling the bookend at the end of the range
137	1	50	33			8	if ( $bookends and $front_bookend_needed ) {
138	0					0	my $last_bin = $self->{nbins};
139	0					0	foreach my $j ( $last_bin - $bookends + 1 .. $last_bin) {
140	0					0	push @list, [&{$self->{converter}}( $start + $j * $step ) + $fudge, 0];
	0					0
141							}
142							}
143
144	1					9	return \@list;
145							}
146
147							sub find_peak_intensity {
148							# Finds the nearest peak in the profile for a given query value.
149							# One possible use is to look up the precursor ion intensity, since
150							# it is not stored as a separate item anywhere in the data file.
151							# Return the intensity the peak matching the query M/z.
152	0			0		0	my ($self, $query) = @_;
153	0					0	my $raw_query = &{$self->{"inverse converter"}}($query);
	0					0
154
155							# find the closest chunk
156	0					0	my ($nearest_chunk, $dist) = $self->find_chunk($raw_query);
157	0	0	0			0	if (not defined $dist or $dist > $MAX_DIST) { # undefind $dist means we're outside the full range of peaks in the scan
158	0					0	say STDERR "$self->{'dependent scan number'}: could not find a profile peak in parent scan $self->{'scan number'} within ${MAX_DIST} M/z of the target frequency $raw_query (M/z $query)";
159	0					0	return 0;
160							}
161
162	0					0	my @chunk_ix = ($nearest_chunk);
163	0					0	my $i = $nearest_chunk;
164	0		0			0	while ( $i < $self->{"peak count"} - 1 and $self->chunk_dist($i, $i++) <= $MAX_DIST ) { # kHz
165	0					0	push @chunk_ix, $i;
166							}
167	0					0	$i = $nearest_chunk;
168	0		0			0	while ( $i > 0 and $self->chunk_dist($i, $i--) <= $MAX_DIST ) { # kHz
169	0					0	push @chunk_ix, $i;
170							}
171
172	0					0	return (sort {$b <=> $a} map {$self->chunk_max($_)} @chunk_ix)[0]; # max. intensity
	0					0
	0					0
173							}
174
175							sub chunk_dist {
176							# find the gap distance between the chunks
177	0			0		0	my ($self, $i, $j) = @_;
178	0					0	my ($chunk_i, $chunk_j) = ($self->{chunks}->[$i], $self->{chunks}->[$j]);
179	0					0	my $start = $self->{"first value"};
180	0					0	my $step = $self->{step};
181	0					0	my $min_i = $start + ($chunk_i->{"first bin"} - 1) * $step;
182	0					0	my $max_i = $min_i + $chunk_i->{nbins} * $step;
183	0					0	my $min_j = $start + ($chunk_j->{"first bin"} - 1) * $step;
184	0					0	my $max_j = $min_j + $chunk_j->{nbins} * $step;
185	0					0	my $dist = (sort {$a <=> $b}
	0					0
186							(
187							abs($min_i - $min_j),
188							abs($min_i - $max_j),
189							abs($max_i - $min_j),
190							abs($max_i - $max_j)
191							)
192							)[0];
193	0					0	return $dist;
194							}
195
196							sub chunk_max {
197	0			0		0	my ($self, $num) = @_;
198	0					0	my $chunk = $self->{chunks}->[$num];
199	0					0	my $max = 0;
200	0					0	foreach my $i ( 0 .. $chunk->{nbins} - 1) {
201	0					0	my $intensity = $chunk->{signal}->[$i];
202	0	0				0	$max = $intensity if $intensity > $max;
203							}
204	0					0	return $max;
205							}
206
207							sub find_chunk {
208							# Use binary search to find a pair of profile chunks
209							# in the (sorted) chunk list surrounding the probe value
210
211	0			0		0	my ( $self, $value) = @_;
212	0					0	my $chunks = $self->{chunks};
213	0					0	my $first_value = $self->{"first value"};
214	0					0	my $step = $self->{step};
215	0					0	my ( $lower, $upper, $low_ix, $high_ix, $cur );
216	0					0	my $safety_count = 15;
217	0					0	my $last_match = [undef, 100000];
218	0					0	my $dist;
219
220	0					0	( $upper, $lower ) = ( $first_value, $first_value + $step * $self->{nbins} ) ;
221	0	0	0			0	if ( $value < $lower - $MAX_DIST or $value > $upper + $MAX_DIST) {
222	0					0	return (undef, undef);
223							}
224							else {
225	0					0	( $low_ix, $high_ix ) = ( 0, $self->{"peak count"} - 1 );
226	0					0	while ( $low_ix < $high_ix ) {
227	0	0				0	die "broken find_chunk algorithm" unless $safety_count--;
228	0					0	$cur = int ( ( $low_ix + $high_ix ) / 2 );
229	0					0	my $chunk = $chunks->[$cur];
230	0					0	$upper = $first_value + $chunk->{"first bin"} * $step;
231	0					0	$lower = $upper + $chunk->{nbins} * $step;
232							# say STDERR " testing: $cur [$lower .. $upper] against $value in [$low_ix .. $high_ix]";
233	0	0	0			0	if ( $value >= $lower and $value <= $upper ) {
234							# say STDERR " direct hit";
235	0					0	return ($cur, 0);
236							}
237	0	0				0	if ( $value > $upper ) {
238	0					0	$dist = (sort {$a <=> $b} (abs($value - $lower), abs($value - $upper)))[0];
	0					0
239	0	0				0	$last_match = [$cur, $dist] if $dist < $last_match->[1];
240							# say STDERR " distance = $dist, shifting up";
241	0					0	$high_ix = $cur;
242							}
243	0	0				0	if ( $value < $lower ) {
244	0					0	$dist = (sort {$a <=> $b} (abs($value - $lower), abs($value - $upper)))[0];
	0					0
245	0	0				0	$last_match = [$cur, $dist] if $dist < $last_match->[1];
246							# say STDERR " distance = $dist; shifting down";
247	0					0	$low_ix = $cur + 1;
248							}
249							# say STDERR "The remainder: $low_ix, $high_ix";
250							}
251							# say STDERR "The final remainder: $low_ix, $high_ix";
252							}
253
254	0	0				0	if ( $low_ix == $high_ix ) {
255							# this is one of possibly two closest chunks, with no direct hits found
256	0					0	my $chunk = $chunks->[$low_ix];
257	0					0	$upper = $first_value + $chunk->{"first bin"} * $step;
258	0					0	$lower = $upper + $chunk->{nbins} * $step;
259	0					0	my $dist = (sort {$a <=> $b} (abs($value - $lower), abs($value - $upper)))[0];
	0					0
260
261	0					0	my ($closest_chunk, $min_dist) = ($low_ix, $dist);
262	0	0				0	if ( $dist > $last_match->[1] ) {
263	0					0	($closest_chunk, $min_dist) = @$last_match;
264							}
265							# say STDERR " no direct hit; closest chunk is $closest_chunk; distance between $value and [$lower, $upper] is $min_dist";
266	0					0	return ($closest_chunk, $min_dist);
267							}
268
269	0					0	die "unexpected condition";
270							}
271
272							#----------------------------------------------------------------------------------------
273							package Finnigan::Scan;
274							our $VERSION = 0.0206;
275
276	2			2		16	use strict;
	2					5
	2					65
277	2			2		11	use warnings;
	2					4
	2					72
278
279	2			2		10	use Finnigan;
	2					5
	2					2782
280
281							sub decode {
282	1			1	1	901	my ($class, $stream) = @_;
283
284	1					5	my $self = {
285							addr => tell $stream
286							};
287	1					3	my $buf;
288							my $nbytes;
289	0					0	my $bytes_to_read;
290	0					0	my $current_addr;
291
292	1					12	$self->{header} = Finnigan::PacketHeader->decode($stream);
293	1					4	$self->{size} = $self->{header}->{size};
294	1					3	my $header_data = $self->{header}->{data};
295
296	1					3	$bytes_to_read = 4 * $header_data->{"profile size"}->{value};
297	1					54	$nbytes = CORE::read $stream, $self->{"raw profile"}, $bytes_to_read;
298	1	50				10	$nbytes == $bytes_to_read
299							or die "could not read all $bytes_to_read bytes of scan profile at " . ($self->{addr} + $self->{size});
300	1					5	$self->{size} += $nbytes;
301
302	1					4	$bytes_to_read = 4 * $header_data->{"peak list size"}->{value};
303	1					25	$nbytes = CORE::read $stream, $self->{"raw centroids"}, $bytes_to_read;
304	1	50				6	$nbytes == $bytes_to_read
305							or die "could not read all $bytes_to_read bytes of scan profile at " . ($self->{addr} + $self->{size});
306	1					3	$self->{size} += $nbytes;
307
308							# skip peak descriptors and the unknown streams
309	1					5	$self->{size} += 4 * (
310							$header_data->{"descriptor list size"}->{value} +
311							$header_data->{"size of unknown stream"}->{value} +
312							$header_data->{"size of triplet stream"}->{value}
313							);
314	1					11	seek $stream, $self->{addr} + $self->{size}, 0;
315
316	1					5	return bless $self, $class;
317							}
318
319							sub header {
320	1			1	1	18	return shift->{header};
321							}
322
323							sub profile {
324	1			1	1	15	new Finnigan::Scan::Profile $_[0]->{"raw profile"}, $_[0]->{header}->{data}->{layout}->{value};
325							}
326
327							sub centroids {
328	1			1	1	2256	new Finnigan::Scan::CentroidList $_[0]->{"raw centroids"};
329							}
330
331							# end Finnigan::Scan::Profile
332							# ----------------------------------------------------------------------------------------
333							package Finnigan::Scan::ProfileChunk;
334							our $VERSION = 0.0206;
335
336							sub new {
337	580			580		923	my ($class, $buf, $offset, $layout) = @_;
338	580					1000	my $self = {};
339	580	50				1093	if ( $layout > 0 ) {
340	580					11188	@{$self}{'first bin', 'nbins', 'fudge'} = unpack "x${offset} VVf<", $buf;
	580					1967
341	580					1112	$self->{size} = 12;
342							}
343							else {
344	0					0	@{$self}{'first bin', 'nbins'} = unpack "x${offset} VV", $buf;
	0					0
345	0					0	$self->{size} = 8;
346							}
347	580					789	$offset += $self->{size};
348
349	580					1843	@{$self->{signal}} = unpack "x${offset} f<$self->{nbins}", $buf;
	580					1958
350	580					1114	$self->{size} += 4 * $self->{nbins};
351
352	580					1936	return bless $self, $class;
353							}
354
355							#----------------------------------------------------------------------------------------
356							package Finnigan::Scan::CentroidList;
357							our $VERSION = 0.0206;
358
359							sub new {
360	1			1		3	my ($class, $buf) = @_;
361	1					9	my $self = {count => unpack 'V', $buf};
362	1					4	my $offset = 4; # V
363
364	1					2	my $chunk;
365	1					5	foreach my $i (0 .. $self->{count} - 1) {
366	580					579	push @{$self->{peaks}}, [unpack "x${offset} f
	580					1839
367	580					863	$offset += 8;
368							}
369
370	1					14	return bless $self, $class;
371							}
372
373							sub count {
374	1			1		1395	shift->{count};
375							}
376
377							sub list {
378	2			2		425	shift->{peaks};
379							}
380
381							sub find_peak_intensity {
382							# Finds the nearest peak in the profile for a given query value.
383
384	2			2		456	my ($self, $query) = @_;
385
386							# find the closest peak
387	2					6	my ($nearest_peak, $dist) = $self->find_peak($query);
388	2	100	66			16	if (not defined $dist or $dist > $MAX_DIST_MZ) { # undefind $dist means we're outside the full range of peaks in the scan
389	1					170	say STDERR "$self->{'dependent scan number'}: could not find a profile peak in parent scan $self->{'scan number'} within ${MAX_DIST_MZ} M/z the target value $query";
390	1					8	return 0;
391							}
392
393	1					7	return $self->{peaks}->[$nearest_peak]->[1];
394							}
395
396							sub find_peak {
397							# Use binary search to find a pair of peaks
398							# surrounding the probe value
399
400							# One possible use is to look up the precursor ion intensity, since
401							# it is not stored as a separate item anywhere in the data file.
402
403	5			5		426	my ( $self, $value) = @_;
404	5					7	my ( $lower, $upper, $low_ix, $high_ix, $mid_ix );
405	5					7	my $safety_count = 15;
406	5					8	my $dist;
407
408							sub num_equal {
409	39			39		56	my( $float1, $float2, $diff ) = @_;
410	39		50			172	abs( $float1 - $float2 ) < ($diff or 0.00001);
411							}
412
413	5					14	( $low_ix, $high_ix ) = ( 0, $self->{count} - 1 );
414	5					19	( $lower, $upper ) = ($self->{peaks}->[$low_ix]->[0], $self->{peaks}->[$high_ix]->[0]);
415	5	50	33			39	if ( $value < $lower - $MAX_DIST_MZ or $value > $upper + $MAX_DIST_MZ) {
416	0					0	return (undef, undef);
417							}
418							else {
419	5					7	my $dist;
420	5					14	while ( $low_ix <= $high_ix ) {
421	39	50				77	die "broken find_peak algorithm" unless $safety_count--;
422	39					81	$mid_ix = $low_ix + int ( ( $high_ix - $low_ix ) / 2 );
423	39					67	my $peak = $self->{peaks}->[$mid_ix];
424	39					53	$dist = abs($value - $peak->[0]);
425							# say STDERR " testing: $mid_ix [$peak->[0]] against $value";
426	39	100				74	if ( num_equal($peak->[0], $value, $MAX_DIST_MZ ) ) {
		100
427	3					18	return ($mid_ix, $dist)
428							}
429							elsif ( $peak->[0] < $value) {
430							# say STDERR " distance = $dist, shifting up";
431	29					84	$low_ix = $mid_ix + 1;
432							}
433							else { # $peak->[0] > $value
434							# say STDERR " distance = $dist; shifting down";
435	7					21	$high_ix = $mid_ix - 1;
436							}
437							}
438	2					8	return (undef, $dist);
439							}
440							}
441
442							1;
443
444							__END__