File Coverage

blib/lib/MassSpec/ViewSpectrum/RealVsHypPeptide.pm
Criterion Covered Total %
statement 10 12 83.3
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 14 16 87.5


line stmt bran cond sub pod time code
1             package MassSpec::ViewSpectrum::RealVsHypPeptide;
2            
3 1     1   23008 use 5.006;
  1         3  
  1         31  
4 1     1   5 use strict;
  1         1  
  1         36  
5 1     1   4 use warnings;
  1         6  
  1         38  
6            
7 1     1   360 use MassSpec::ViewSpectrum;
  0            
  0            
8            
9             my $haveCUtilities;
10             if (eval 'require MassSpec::CUtilities') {
11             import MassSpec::CUtilities;
12             $haveCUtilities = 1;
13             } else {
14             $haveCUtilities = 0;
15             }
16            
17            
18             our @ISA = qw(MassSpec::ViewSpectrum);
19            
20             our $VERSION = '0.02';
21            
22             my ($add_aa_A,$add_aa_R,$add_aa_N,$add_aa_D,$add_aa_C,$add_aa_E,$add_aa_Q,$add_aa_G,$add_aa_H,$add_aa_IL,$add_aa_K,$add_aa_M,$add_aa_F,$add_aa_P,$add_aa_S,$add_aa_T,$add_aa_W,$add_aa_Y,$add_aa_V);
23             my @add_aa_vals;
24            
25            
26            
27             # Preloaded methods go here.
28            
29             sub new (\$\@\@;\@\%\%) # (peptide, masses, intensities, [optional] annotations, annotations_matching, colormap)
30             {
31             my $type = shift;
32             my $peptide = shift;
33            
34             my $self = $type->SUPER::new(@_);
35            
36            
37             init_add_aa ();
38            
39             $self->{peptide} = $peptide;
40             $self->{tolerance} = 0.1;
41            
42             my $i;
43             my $maxintensity;
44            
45             for ($i = 0; $i <= $#{$self->{masses}}; $i++) {
46             my $intensity = $self->{intensities}[$i];
47            
48             $maxintensity = $intensity unless defined $maxintensity and $maxintensity > $intensity;
49             }
50            
51            
52            
53             $self->{HminusEpeakheight} = -0.1 * $maxintensity * $self->{yaxismultiplier};
54             $self->{otherPeakHeights} = -0.2 * $maxintensity * $self->{yaxismultiplier};
55             $self->{yScalingFactor} = (0.1/20.0);
56             $self->{yScalingOffset} = 3;
57            
58            
59            
60             return $self;
61             }
62            
63            
64             sub set {
65             my ($self, $key, $value) = @_;
66            
67             $self->{$key} = $value;
68             }
69            
70            
71            
72             sub plot
73             {
74             my $self = shift;
75            
76            
77             my ($EminusHRef,$HminusERef,$intersectionRef, $massintenRef);
78            
79             my $pep = $self->{peptide};
80            
81             my ($aRef_theoretical_masses,$aRef_annotations) = generate_theoretical_spectrum ($pep);
82            
83             # compare the experimental (E) and hypothetical (H) spectra, and compute
84             # their intersection as well as which peaks appear in one spectrum but
85             # not the other
86             ($EminusHRef,$HminusERef,$intersectionRef, $massintenRef) = $self->FindDiffs($aRef_theoretical_masses);
87            
88             my %inter = %$intersectionRef;
89             my @masses;
90             my @intensities;
91             my @annotations;
92             my $mass;
93             my $annot;
94             my $maxNegativeStrLen = 0;
95            
96             my ($HminusEpeakheight,$otherPeakHeights,$yScalingFactor,$yScalingOffset);
97            
98             $HminusEpeakheight = $self->{HminusEpeakheight};
99             $otherPeakHeights = $self->{otherPeakHeights};
100             $yScalingFactor = $self->{yScalingFactor};
101             $yScalingOffset = $self->{yScalingOffset};
102            
103             foreach $mass (@$EminusHRef) {
104             push (@masses,$mass);
105             push (@intensities,$$massintenRef{$mass});
106             push (@annotations,'');
107             }
108            
109            
110             foreach $mass (@$HminusERef) {
111             push (@masses,$mass);
112             push (@intensities,$HminusEpeakheight);
113             $annot = $$aRef_annotations{$mass};
114             $maxNegativeStrLen = length $annot if length $annot > $maxNegativeStrLen;
115             push (@annotations,$annot);
116            
117             }
118            
119             # plot the intersection twice; once above the axis and once below
120             foreach $mass (keys %inter) {
121             push (@masses,$mass);
122             push (@intensities,$$massintenRef{$mass});
123             $annot = $$aRef_annotations{$inter{$mass}};
124             push (@annotations,$annot);
125             push (@masses,$mass);
126             push (@intensities,$otherPeakHeights);
127             push (@annotations,'@' . $annot);
128             }
129            
130            
131             $self->{masses} = \@masses;
132             $self->{intensities} = \@intensities;
133             $self->{annotations} = \@annotations;
134             $self->{extranegativeheight} = $yScalingFactor * ($maxNegativeStrLen - $yScalingOffset);
135            
136             $self->SUPER::plot();
137             }
138            
139             # TODO(JAE):place local subroutines here, by convention using a leading underscore for
140             # these local subroutine names
141            
142            
143             my %add_aa_hsh;
144            
145             sub init_add_aa {
146             my @add_aa_syms = ("A","R","N","D","C","E","Q","G","H","X","K","M","F","P","S","T","W","Y","V");
147             my $i = 0;
148             my $sym;
149            
150             ($add_aa_A,$add_aa_R,$add_aa_N,$add_aa_D,$add_aa_C,$add_aa_E)=(71.03711,156.10111,114.04293,115.02694,103.00919,129.04259);
151             ($add_aa_Q,$add_aa_G,$add_aa_H,$add_aa_IL,$add_aa_K,$add_aa_M)=(128.05858,57.02146,137.05891,113.08406,128.09496,131.04049);
152             ($add_aa_F,$add_aa_P,$add_aa_S,$add_aa_T,$add_aa_W,$add_aa_Y,$add_aa_V)=(147.06841,97.05276,87.03203,101.04768,186.07931,163.06333,99.06841);
153             @add_aa_vals = ($add_aa_A,$add_aa_R,$add_aa_N,$add_aa_D,$add_aa_C,$add_aa_E,$add_aa_Q,$add_aa_G,$add_aa_H,$add_aa_IL,$add_aa_K,$add_aa_M,$add_aa_F,$add_aa_P,$add_aa_S,$add_aa_T,$add_aa_W,$add_aa_Y,$add_aa_V);
154            
155             foreach $sym (@add_aa_syms) {
156             $add_aa_hsh{$sym} = $add_aa_vals[$i++];
157             }
158             }
159            
160             sub computePeptideMass {
161             my $comp = shift (@_);
162             return MassSpec::CUtilities::computePeptideMass($comp) if ($haveCUtilities);
163            
164             my $sum = 0;
165             do {
166             $sum = $sum + $add_aa_hsh{chop($comp)};
167             }until $comp le 0;
168             return $sum;
169             }
170            
171            
172            
173            
174             sub generate_theoretical_spectrum {
175            
176             my ($sequence) = @_;
177             my $parent_comp = aasort ( remove_brackets ($sequence) ); #generate the parent composition which is used for determining compositions of complementary ions
178             my (@family,%annotations,@masses,%ion,%conserved_labels); #essentially the @family is for y ions and the @complement_family is for b ions, although this can be made more sophisticated easily if both ions go in the same direction
179             my $length_sequence = length ($sequence); #for the for loop
180             my $chop_incrementer; #for the for loop
181            
182             $ion{'y'} = 19.01839;
183             $ion{'y-H2O'} = 1.007825;
184             $ion{'b'} = 1.007825;
185             $ion{'a'} = -26.98709;
186             #plus the proton to make a b ion, then minus CO = 27.9949
187            
188             $conserved_labels{'y'} = 1;
189             $conserved_labels{'b'} = 1;
190             $family[0] = ''; #JAE
191            
192            
193             for ($chop_incrementer = 0;$chop_incrementer < $length_sequence;$chop_incrementer++) {
194             my $chop = chop ($sequence); #get a 1 character piece of sequence
195             unless ($chop eq "") {
196             if ( $chop eq "\)" ) { #if this is the beginning of a missed fragmentation
197             my $mf = "";
198             $chop = chop ($sequence);
199             do {
200             $mf .= $chop;
201             $chop = chop ($sequence);
202             }until $chop eq "\(";
203             my $family = aasort($mf.$family[0]);
204             my $complement_family = aasort (compdiff ($family,$parent_comp));
205             unshift (@family, $family);
206             make_ion_mass ($family,'y',\%annotations,\@masses,\%ion,\%conserved_labels);
207             make_ion_mass ($family,'y-H2O',\%annotations,\@masses,\%ion,\%conserved_labels);
208             if ($complement_family eq "") {
209             make_ion_mass ($parent_comp,'b',\%annotations,\@masses,\%ion,\%conserved_labels);
210             make_ion_mass ($parent_comp,'a',\%annotations,\@masses,\%ion,\%conserved_labels);
211             }else{
212             make_ion_mass ($complement_family,'b',\%annotations,\@masses,\%ion,\%conserved_labels);
213             make_ion_mass ($complement_family,'a',\%annotations,\@masses,\%ion,\%conserved_labels);
214             }
215             }else{
216             my $family = aasort ($chop.$family[0]);
217             my $complement_family = aasort ( compdiff ($family,$parent_comp) );
218             unshift (@family, $family);
219             make_ion_mass ($family,'y',\%annotations,\@masses,\%ion,\%conserved_labels);
220             make_ion_mass ($family,'y-H2O',\%annotations,\@masses,\%ion,\%conserved_labels);
221             if ($complement_family eq "") {
222             make_ion_mass ($parent_comp,'b',\%annotations,\@masses,\%ion,\%conserved_labels);
223             make_ion_mass ($parent_comp,'a',\%annotations,\@masses,\%ion,\%conserved_labels);
224             }else{
225             make_ion_mass ($complement_family,'b',\%annotations,\@masses,\%ion,\%conserved_labels);
226             make_ion_mass ($complement_family,'a',\%annotations,\@masses,\%ion,\%conserved_labels);
227             }
228             }
229             }
230             }
231             # return (\@masses,\%annotations,\%conserved_labels);
232             return (\@masses,\%annotations);
233             }
234             my $debug;
235             sub make_ion_mass {
236             my ($composition,$ion,$aRef_annotations,$aRef_masses,$hRef_ion,$aRef_conserved_labels) = @_;
237            
238            
239             my $mass = computePeptideMass ($composition) + $$hRef_ion{$ion};
240             my $annotation = $ion." \(".$composition."\)";
241            
242            
243             $annotation = '@' . $annotation unless $$aRef_conserved_labels{$ion};
244            
245             #push (@$aRef_annotations,$annotation);
246             $$aRef_annotations{$mass} = $annotation;
247            
248             push (@$aRef_masses,$mass);
249            
250            
251             }
252            
253             sub remove_brackets {
254             local $_;
255             $_ = shift @_;
256             tr/()//d;
257             return $_;
258             }
259            
260             sub aasort {
261             return join ( "", sort ( split //,$_[0] ) );
262             }
263            
264             sub compdiff {
265             #returns the compositional difference between 2 amino acid compositions - the second needs to be longer than the first
266             my ($aa_one,$aa_two) = @_;
267             my ($first,$second);
268            
269             if (length ($aa_one) > length ($aa_two)) {
270             $first = $aa_two;
271             $second = $aa_one;
272             }else{
273             $first = $aa_one;
274             $second = $aa_two;
275             }
276            
277             my $diff = "";
278             do{
279             my ($faa,$saa) = (chop $first,chop $second);
280             unless ($faa eq $saa) {
281             do {
282             $diff = $saa.$diff;
283             $saa = chop $second;
284             }until $saa eq $faa;
285             }
286             }until $first le "";
287             $diff = $second.$diff if $second gt "";
288             return $diff;
289             }
290            
291             sub FindDiffs {
292             my $self = shift;
293             my($hyp_peptide) = @_;
294            
295             my @MassData = @{$self->{masses}};
296             my @inten = @{$self->{intensities}};
297             my $mass_tolerance = $self->{tolerance};
298            
299             my($EminusHRef,$HminusERef,$interRef);
300             my $exp_mass;
301             my $hyp_mass;
302             my $EminusH;
303             my $HminusE;
304             my(@EminusH,@HminusE,%intersection);
305            
306             my %hyp; # hypothetical peaks associated with $peptide
307             my %exp; # experimental peaks (filtered masses)
308             my %massinten; # experimental intensities
309            
310             my $count = 0;
311             foreach $exp_mass(@MassData) {
312             $exp{$exp_mass} = -1;
313             $massinten{$exp_mass} = $inten[$count++];
314            
315             }
316            
317             my @s = extend_spectrum(\@MassData,$mass_tolerance,0);
318            
319             foreach $hyp_mass (@$hyp_peptide) {
320             my $mid;
321             $hyp{$hyp_mass} = -1;
322             if ($haveCUtilities) {
323             $mid = MassSpec::CUtilities::binarySearchSpectrum($hyp_mass,\@s);
324             } else {
325             my $low = 0;
326             my $high = @s - 1;
327            
328             while ($low <= $high) {
329             $mid = int(($low+$high)/2);
330             if ($hyp_mass < $s[$mid]) {
331             $high = $mid - 1;
332             } else {
333             $low = $mid + 1;
334             }
335             }
336             $mid++ if ($hyp_mass > $s[$mid]);
337             }
338             #
339             # s[$mid] is now the lowest value in s just greater than $hyp_mass
340             #
341             #
342             # Suppose our spectrum consisted of the peaks 347.42, 639.31, 722.37 and 831.93
343             # and our error margin is 0.05 daltons. Then the spectrum provided by
344             # extend_spectrum() would look like:
345             # s[0] = artificial negative value
346             # s[1] = 347.37
347             # s[2] = 347.47
348             # s[3] = 639.26
349             # s[4] = 639.36
350             # s[5] = 722.32
351             # s[6] = 722.42
352             # s[7] = 831.88
353             # s[8] = 831.98
354             # s[9] = artificial huge value
355             #
356             # Suppose the input $mass is 639.27. Then after the binary search, $mid
357             # will be 4. It turns out that $mid is always even for masses within
358             # the error bands and always odd for values which fall outside the error bands.
359             #
360             unless ($mid & 1) { # unless ($mid is odd)
361             my $index = $mid/2-1;
362             my $peak = $MassData[$index];
363             my $max_intensity = $inten[$index];
364             $index--;
365             while ($index >= 0 && abs($MassData[$index] - $hyp_mass) < $mass_tolerance) {
366             if ($max_intensity < $inten[$index]) {
367             $peak = $MassData[$index];
368             $max_intensity = $inten[$index];
369             }
370             $index--;
371             }
372             $hyp{$hyp_mass} = 1;
373             $exp{$peak} = $hyp_mass;
374             }
375             }
376            
377            
378            
379             foreach $hyp_mass (sort {$a <=> $b} (keys %hyp)) {
380             if ($hyp{$hyp_mass} < 0) {
381             push (@HminusE, $hyp_mass);
382            
383             }
384             }
385            
386             # note that the hypothetical mass is logged in the intersection,
387             # since that is what we want to plot
388             foreach $exp_mass (sort {$a <=> $b} (keys %exp)) {
389             if ($exp{$exp_mass} < 0) {
390             push (@EminusH, $exp_mass)
391             } else { # note the associated experimental mass
392             $hyp_mass = $exp{$exp_mass};
393             $intersection{$exp_mass} = $hyp_mass;
394             }
395             }
396            
397             $EminusHRef = \@EminusH;
398             $HminusERef = \@HminusE;
399             $interRef = \%intersection;
400            
401             return($EminusHRef, $HminusERef, $interRef, \%massinten);
402             }
403            
404            
405             #
406             # Extend a spectrum to make it suitable for subsequent binary searching
407             #
408             sub extend_spectrum {
409             my($aRef_rms,$err,$backwards_compatability) = @_;
410             my $i;
411             my @result = ();
412             my $lastlow;
413             my $lasthigh = -1;
414            
415             # place artificial low & high values to simplify subsequent binary search
416             push (@result,-9999);
417            
418             for ($i = 0; $i < @{$aRef_rms}; $i++) {
419             my $low = $aRef_rms->[$i] - $err;
420             my $high = $aRef_rms->[$i] + $err;
421             if ($backwards_compatability) {
422             $low = sprintf("%.2f",$low);
423             if ($high =~ /(\d+\.\d\d)/) {
424             $high = $1;
425             }
426             $high = sprintf("%.2f",$high) + 0.009999;
427             }
428             if ($low < $lasthigh) { # if (two adjacent bands overlap)
429             my $middle = ($lastlow + $high) / 2;
430             if ($backwards_compatability) {
431             $middle = sprintf("%.2f",$middle);
432             }
433             pop @result;
434             push (@result,$middle-0.000001); # replace high-value of last band
435             $low = $middle;
436             }
437             push (@result,$low);
438             push (@result,$high);
439             $lastlow = $low;
440             $lasthigh = $high;
441             }
442             push (@result,999999999999);
443            
444             return @result;
445             }
446            
447            
448             1;
449             __END__