File Coverage

Bio/Search/HSP/ModelHSP.pm
Criterion Covered Total %
statement 106 113 93.8
branch 18 30 60.0
condition 6 11 54.5
subroutine 18 18 100.0
pod 14 14 100.0
total 162 186 87.1


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Search::HSP::ModelHSP
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Chris Fields
7             #
8             # Copyright Chris Fields
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Search::HSP::ModelHSP - A HSP object for model-based searches
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Search::HSP::ModelHSP;
21             # us it just like a Bio::Search::HSP::ModelHSP object
22              
23             =head1 DESCRIPTION
24              
25             This object is a specialization of L and is used
26             for searches which involve a query model, such as a Hidden Markov Model (HMM),
27             covariance model (CM), descriptor, or anything else besides a sequence. Note
28             that results from any HSPI class methods which rely on the query being a
29             sequence are unreliable and have thus been overridden with warnings indicating
30             they have not been implemented at this time.
31              
32             =head1 FEEDBACK
33              
34             =head2 Mailing Lists
35              
36             User feedback is an integral part of the evolution of this and other
37             Bioperl modules. Send your comments and suggestions preferably to
38             the Bioperl mailing list. Your participation is much appreciated.
39              
40             bioperl-l@bioperl.org - General discussion
41             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
42              
43             =head2 Support
44              
45             Please direct usage questions or support issues to the mailing list:
46              
47             I
48              
49             rather than to the module maintainer directly. Many experienced and
50             reponsive experts will be able look at the problem and quickly
51             address it. Please include a thorough description of the problem
52             with code and data examples if at all possible.
53              
54             =head2 Reporting Bugs
55              
56             Report bugs to the Bioperl bug tracking system to help us keep track
57             of the bugs and their resolution. Bug reports can be submitted via the
58             web:
59              
60             https://github.com/bioperl/bioperl-live/issues
61              
62             =head1 AUTHOR - Chris Fields
63              
64             Email cjfields at bioperl dot org
65              
66             =head1 APPENDIX
67              
68             The rest of the documentation details each of the object methods.
69             Internal methods are usually preceded with a _
70              
71             =cut
72              
73             # Let the code begin...
74              
75             package Bio::Search::HSP::ModelHSP;
76 3     3   16 use strict;
  3         4  
  3         77  
77 3     3   1305 use Bio::Seq::Meta;
  3         8  
  3         120  
78              
79 3     3   20 use base qw(Bio::Search::HSP::GenericHSP);
  3         4  
  3         3424  
80              
81             =head2 new
82              
83             Title : new
84             Usage : my $obj = Bio::Search::HSP::ModelHSP->new();
85             Function: Builds a new Bio::Search::HSP::ModelHSP object
86             Returns : Bio::Search::HSP::ModelHSP
87             Args :
88              
89             Plus Bio::Seach::HSP::ModelHSP methods
90              
91             -algorithm => algorithm used (Infernal, RNAMotif, ERPIN, etc)
92             -evalue => evalue
93             -pvalue => pvalue
94             -bits => bit value for HSP
95             -score => score value for HSP (typically z-score but depends on
96             analysis)
97             -hsp_length=> Length of the HSP (including gaps)
98             -identical => # of residues that that matched identically
99             -conserved => # of residues that matched conservatively
100             (only protein comparisons;
101             conserved == identical in nucleotide comparisons)
102             -hsp_gaps => # of gaps in the HSP
103             -query_gaps => # of gaps in the query in the alignment
104             -hit_gaps => # of gaps in the subject in the alignment
105             -query_name => HSP Query sequence name (if available)
106             -query_start => HSP Query start (in original query sequence coords)
107             -query_end => HSP Query end (in original query sequence coords)
108             -hit_name => HSP Hit sequence name (if available)
109             -hit_start => HSP Hit start (in original hit sequence coords)
110             -hit_end => HSP Hit end (in original hit sequence coords)
111             -hit_length => total length of the hit sequence
112             -query_length=> total length of the query sequence
113             -query_seq => query sequence portion of the HSP
114             -hit_seq => hit sequence portion of the HSP
115             -homology_seq=> homology sequence for the HSP
116             -hit_frame => hit frame (only if hit is translated protein)
117             -query_frame => query frame (only if query is translated protein)
118             -meta => optional meta data (sec structure, markup, etc)
119             -custom_score=> custom score data
120              
121             =cut
122              
123             =head2 meta
124              
125             Title : meta
126             Usage : my $meta = $hsp->meta();
127             Function: Returns meta data for this HSP or undef
128             Returns : string of meta data or undef
129             Args : [optional] string to set value
130             Note : At some point very soon this will likely be a Bio::AnnotationI.
131             Don't get used to a simple string!
132              
133             =cut
134              
135             sub meta {
136 31     31 1 95 my ($self,$value) = @_;
137 31         65 my $previous = $self->{'META'};
138 31 50       70 if( defined $value ) {
139 0         0 $self->{'META'} = $value;
140             }
141 31         142 return $previous;
142             }
143              
144             =head2 noncanonical_string
145              
146             Title : noncanonical_string
147             Usage : my $nc_seq = $hsp->noncanonical_string();
148             Function: Returns noncanonical string (NC) data for this HSP or undef
149             Returns : string of noncanonical data or undef
150             Args : [optional] string to set value
151              
152             =cut
153              
154             sub noncanonical_string {
155 1     1 1 3 my ($self,$value) = @_;
156 1         3 my $previous = $self->{'NC_SEQ'};
157 1 50       3 if( defined $value ) {
158 0         0 $self->{'NC_SEQ'} = $value;
159             }
160 1         4 return $previous;
161             }
162              
163             =head2 custom_score
164              
165             Title : custom_score
166             Usage : my $data = $hsp->custom_score();
167             Function: Returns custom_score data for this HSP, or undef
168             Returns : custom_score data or undef
169             Args : [optional] custom_score
170             Note : This is a Get/Set used to deal with odd score-like data generated
171             from RNAMotif (and other programs) where the score section
172             can be customized to include non-standard data, including sequence
173             data, user-based scores, and other values.
174              
175             =cut
176              
177             sub custom_score {
178 11     11 1 32 my ($self,$value) = @_;
179 11         25 my $previous = $self->{'CUSTOMSCORE'};
180 11 50       33 if( defined $value ) {
181 0         0 $self->{'CUSTOMSCORE'} = $value;
182             }
183 11         48 return $previous;
184             }
185              
186             =head2 Bio::Search::HSP::HSPI methods
187              
188             Implementation of Bio::Search::HSP::HSPI methods follow
189              
190             =head2 algorithm
191              
192             Title : algorithm
193             Usage : my $r_type = $hsp->algorithm
194             Function: Obtain the name of the algorithm used to obtain the HSP
195             Returns : string (e.g., BLASTP)
196             Args : [optional] scalar string to set value
197              
198             =cut
199              
200             =head2 strand
201              
202             Title : strand
203             Usage : $hsp->strand('hit')
204             Function: Retrieves the strand for the HSP component requested
205             Returns : +1 or -1 (0 if unknown)
206             Args : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject.
207             There is no strand available for 'query', as the query is a model
208             and not a true sequence.
209              
210             =cut
211              
212             # overrides HSPI::seq()
213              
214             =head2 seq
215              
216             Usage : $hsp->seq( [seq_type] );
217             Purpose : Get the query or sbjct sequence as a Bio::Seq.pm object.
218             Example : $seqObj = $hsp->seq('sbjct');
219             Returns : Object reference for a Bio::Seq.pm object.
220             Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = 'sbjct').
221             : ('sbjct' is synonymous with 'hit')
222             : default is 'sbjct'
223             : Note: if there is no sequence available (eg for a model-based
224             : search), this returns a LocatableSeq object w/o a sequence
225             Throws : Propagates any exception that occurs during construction
226             : of the Bio::Seq.pm object.
227             Comments : The sequence is returned in an array of strings corresponding
228             : to the strings in the original format of the Blast alignment.
229             : (i.e., same spacing).
230              
231             See Also : L, L
232              
233             =cut
234              
235             #-------
236             sub seq {
237             #-------
238 11     11 1 29 my($self,$seqType) = @_;
239 11   50     61 $seqType ||= 'sbjct';
240 11 50       37 $seqType = 'sbjct' if $seqType eq 'hit';
241 11         58 my $str = $self->seq_str($seqType);
242 11 50       53 if( $seqType =~ /^(m|ho)/i ) {
243 0         0 $self->throw("cannot call seq on the homology match string, it isn't really a sequence, use get_aln to convert the HSP to a Bio::AlignIO and generate a consensus from that.");
244             }
245 11         79 require Bio::LocatableSeq;
246 11 50       59 my $id = $seqType =~ /^q/i ? $self->query->seq_id : $self->hit->seq_id;
247 11         40 $str =~ s{\*\[\s*(\d+)\s*\]\*}{'N' x $1}ge;
  1         7  
248 11         32 $str =~ s{\s+}{}g;
249 11         59 my $seq = Bio::LocatableSeq->new (-ID => $id,
250             -START => $self->start($seqType),
251             -END => $self->end($seqType),
252             -STRAND=> $self->strand($seqType),
253             -DESC => "$seqType sequence ",
254             );
255 11 50       50 $seq->seq($str) if $str;
256 11         59 $seq;
257             }
258              
259             =head2 pvalue
260              
261             Title : pvalue
262             Usage : my $pvalue = $hsp->pvalue();
263             Function: Returns the P-value for this HSP or undef
264             Returns : float or exponential (2e-10)
265             P-value is not defined with NCBI Blast2 reports.
266             Args : [optional] numeric to set value
267              
268             =cut
269              
270             =head2 evalue
271              
272             Title : evalue
273             Usage : my $evalue = $hsp->evalue();
274             Function: Returns the e-value for this HSP
275             Returns : float or exponential (2e-10)
276             Args : [optional] numeric to set value
277              
278             =cut
279              
280             =head2 gaps
281              
282             Title : gaps
283             Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
284             Function : Get the number of gaps in the query, hit, or total alignment.
285             Returns : Integer, number of gaps or 0 if none
286             Args : arg 1: 'query' = num gaps in query seq
287             'hit' = num gaps in hit seq
288             'total' = num gaps in whole alignment
289             default = 'total'
290             arg 2: [optional] integer gap value to set for the type requested
291              
292             =cut
293              
294             =head2 query_string
295              
296             Title : query_string
297             Usage : my $qseq = $hsp->query_string;
298             Function: Retrieves the query sequence of this HSP as a string
299             Returns : string
300             Args : [optional] string to set for query sequence
301              
302             =cut
303              
304             =head2 hit_string
305              
306             Title : hit_string
307             Usage : my $hseq = $hsp->hit_string;
308             Function: Retrieves the hit sequence of this HSP as a string
309             Returns : string
310             Args : [optional] string to set for hit sequence
311              
312             =cut
313              
314             =head2 homology_string
315              
316             Title : homology_string
317             Usage : my $homo_string = $hsp->homology_string;
318             Function: Retrieves the homology sequence for this HSP as a string.
319             : The homology sequence is the string of symbols in between the
320             : query and hit sequences in the alignment indicating the degree
321             : of conservation (e.g., identical, similar, not similar).
322             Returns : string
323             Args : [optional] string to set for homology sequence
324              
325             =cut
326              
327             =head2 length
328              
329             Title : length
330             Usage : my $len = $hsp->length( ['query'|'hit'|'total'] );
331             Function : Returns the length of the query or hit in the alignment
332             (without gaps)
333             or the aggregate length of the HSP (including gaps;
334             this may be greater than either hit or query )
335             Returns : integer
336             Args : arg 1: 'query' = length of query seq (without gaps)
337             'hit' = length of hit seq (without gaps)
338             'total' = length of alignment (with gaps)
339             default = 'total'
340             arg 2: [optional] integer length value to set for specific type
341              
342             =cut
343              
344             =head2 frame
345              
346             Title : frame
347             Usage : my ($qframe, $hframe) = $hsp->frame('list',$queryframe,$subjectframe)
348             Function: Set the Frame for both query and subject and insure that
349             they agree.
350             This overrides the frame() method implementation in
351             FeaturePair.
352             Returns : array of query and subject frame if return type wants an array
353             or query frame if defined or subject frame if not defined
354             Args : 'hit' or 'subject' or 'sbjct' to retrieve the frame of the subject (default)
355             'query' to retrieve the query frame
356             'list' or 'array' to retrieve both query and hit frames together
357             Note : Frames are stored in the GFF way (0-2) not 1-3
358             as they are in BLAST (negative frames are deduced by checking
359             the strand of the query or hit)
360              
361             =cut
362              
363             =head2 get_aln
364              
365             Title : get_aln
366             Usage : my $aln = $hsp->gel_aln
367             Function: Returns a Bio::SimpleAlign representing the HSP alignment
368             Returns : Bio::SimpleAlign
369             Args : none
370              
371             =cut
372              
373             sub get_aln {
374 11     11 1 980 my ($self) = @_;
375 11         76 require Bio::LocatableSeq;
376 11         2289 require Bio::SimpleAlign;
377 11         74 my $aln = Bio::SimpleAlign->new;
378 11         71 my %hsp = (hit => $self->hit_string,
379             midline => $self->homology_string,
380             query => $self->query_string,
381             meta => $self->meta);
382            
383             # this takes care of infernal issues
384 11 100 100     73 if ($hsp{meta} && $hsp{meta} =~ m{~+}) {
385 1         4 $self->_postprocess_hsp(\%hsp);
386             }
387            
388 11 100       33 if (!$hsp{query}) {
389 2         56 $self->warn("Missing query string, can't build alignment");
390 0         0 return;
391             }
392            
393 9         15 my $seqonly = $hsp{query};
394 9         40 $seqonly =~ s/[\-\s]//g;
395 9         29 my ($q_nm,$s_nm) = ($self->query->seq_id(),
396             $self->hit->seq_id());
397 9 50 33     44 unless( defined $q_nm && CORE::length ($q_nm) ) {
398 0         0 $q_nm = 'query';
399             }
400 9 50 33     28 unless( defined $s_nm && CORE::length ($s_nm) ) {
401 0         0 $s_nm = 'hit';
402             }
403             my $query = Bio::LocatableSeq->new('-seq' => $hsp{query},
404 9         34 '-id' => $q_nm,
405             '-start' => $self->query->start,
406             '-end' => $self->query->end,
407             );
408 9         23 $seqonly = $hsp{hit};
409 9         47 $seqonly =~ s/[\-\s]//g;
410             my $hit = Bio::LocatableSeq->new('-seq' => $hsp{hit},
411 9         37 '-id' => $s_nm,
412             '-start' => $self->hit->start,
413             '-end' => $self->hit->end,
414             );
415 9         42 $aln->add_seq($query);
416 9         29 $aln->add_seq($hit);
417 9 50       24 if ($hsp{meta}) {
418 9         59 my $meta_obj = Bio::Seq::Meta->new();
419 9         30 $meta_obj->named_meta('ss_cons', $hsp{meta});
420 9         25 $aln->consensus_meta($meta_obj);
421             }
422 9         61 return $aln;
423             }
424              
425             =head2 Inherited from Bio::SeqFeature::SimilarityPair
426              
427             These methods come from Bio::SeqFeature::SimilarityPair
428              
429             =head2 query
430              
431             Title : query
432             Usage : my $query = $hsp->query
433             Function: Returns a SeqFeature representing the query in the HSP
434             Returns : Bio::SeqFeature::Similarity
435             Args : [optional] new value to set
436              
437              
438             =head2 hit
439              
440             Title : hit
441             Usage : my $hit = $hsp->hit
442             Function: Returns a SeqFeature representing the hit in the HSP
443             Returns : Bio::SeqFeature::Similarity
444             Args : [optional] new value to set
445              
446              
447             =head2 significance
448              
449             Title : significance
450             Usage : $evalue = $obj->significance();
451             $obj->significance($evalue);
452             Function: Get/Set the significance value
453             Returns : numeric
454             Args : [optional] new value to set
455              
456              
457             =head2 score
458              
459             Title : score
460             Usage : my $score = $hsp->score();
461             Function: Returns the score for this HSP or undef
462             Returns : numeric
463             Args : [optional] numeric to set value
464              
465             =cut
466              
467             =head2 bits
468              
469             Title : bits
470             Usage : my $bits = $hsp->bits();
471             Function: Returns the bit value for this HSP or undef
472             Returns : numeric
473             Args : none
474              
475             =cut
476              
477             =head1 ModelHSP methods overridden in ModelHSP
478              
479             The following methods have been overridden due to their current reliance on
480             sequence-based queries. They may be implemented in future versions of this class.
481              
482             =head2 seq_inds
483              
484             =cut
485              
486             sub seq_inds {
487 1     1 1 46 my $self = shift;
488 1         14 $self->warn('$hsp->seq_inds not implemented for Model-based searches');
489 1         4 return;
490             }
491              
492             =head2 frac_identical
493              
494             =cut
495              
496             sub frac_identical {
497 1     1 1 35 my $self = shift;
498 1         4 $self->warn('$hsp->frac_identical not implemented for Model-based searches');
499 1         3 return;
500             }
501              
502             =head2 frac_conserved
503              
504             =cut
505              
506             sub frac_conserved {
507 1     1 1 36 my $self = shift;
508 1         72 $self->warn('$hsp->frac_conserved not implemented for Model-based searches');
509 1         4 return;
510             }
511              
512             =head2 matches
513              
514             =cut
515              
516             sub matches {
517 1     1 1 37 my $self = shift;
518 1         4 $self->warn('$hsp->matches not implemented for Model-based searches');
519 1         4 return;
520             }
521              
522             =head2 num_conserved
523              
524             =cut
525              
526             sub num_conserved {
527 1     1 1 35 my $self = shift;
528 1         3 $self->warn('$hsp->num_conserved not implemented for Model-based searches');
529 1         4 return;
530             }
531              
532             =head2 num_identical
533              
534             =cut
535              
536             sub num_identical {
537 1     1 1 35 my $self = shift;
538 1         4 $self->warn('$hsp->num_identical not implemented for Model-based searches');
539 1         3 return;
540             }
541              
542             =head2 cigar_string
543              
544             =cut
545              
546              
547             sub cigar_string {
548 1     1 1 35 my $self = shift;
549 1         4 $self->warn('$hsp->cigar_string not implemented for Model-based searches');
550 1         4 return;
551             }
552              
553             =head2 generate_cigar_string
554              
555             =cut
556              
557             sub generate_cigar_string {
558 1     1 1 34 my $self = shift;
559 1         4 $self->warn('$hsp->generate_cigar_string not implemented for Model-based searches');
560 1         3 return;
561             }
562              
563             =head2 percent_identity
564              
565             =cut
566              
567             sub percent_identity {
568 1     1 1 37 my $self = shift;
569 1         3 $self->warn('$hsp->percent_identity not implemented for Model-based searches');
570 1         3 return;
571             }
572              
573             ############## PRIVATE ##############
574              
575             # the following method postprocesses HSP data in cases where the sequences
576             # aren't complete (which can trigger a validation error)
577              
578             {
579             my $SEQ_REGEX = qr/\*\[\s*(\d+)\s*\]\*/;
580             my $META_REGEX = qr/(~+)/;
581              
582             sub _postprocess_hsp {
583 1     1   2 my ($self, $hsp) = @_;
584 1 50       4 $self->throw('Must pass a hash ref for HSP processing') unless ref($hsp) eq 'HASH';
585 1         1 my @ins;
586 1         3 for my $type (qw(query hit meta)) {
587 3         7 $hsp->{$type} =~ s{\s+$}{};
588 3         3 my $str = $hsp->{$type};
589 3 100       8 my $regex = $type eq 'meta' ? $META_REGEX : $SEQ_REGEX;
590 3         4 my $ind = 0;
591 3         15 while ($str =~ m{$regex}g) {
592 3         12 $ins[$ind]->{$type} = {pos => pos($str) - length($1), str => $1};
593 3         12 $ind++;
594             }
595             }
596 1         2 for my $chunk (reverse @ins) {
597 1 50       6 my ($max, $min) = ($chunk->{hit}->{str} >= $chunk->{query}->{str}) ?
598             ('hit', 'query') : ('query', 'hit');
599 1         2 my %rep;
600 1         5 $rep{$max} = 'N' x $chunk->{$max}->{str};
601             $rep{$min} = 'N' x $chunk->{$min}->{str}.
602 1         4 ('-'x($chunk->{$max}->{str}-$chunk->{$min}->{str}));
603 1         3 $rep{'meta'} = '~' x $chunk->{$max}->{str};
604 1         2 $rep{'midline'} = ' ' x $chunk->{$max}->{str};
605 1         2 for my $t (qw(hit query meta midline)) {
606 4         11 substr($hsp->{$t}, $chunk->{meta}->{pos}, length($chunk->{meta}->{str}) , $rep{$t});
607             }
608             }
609             }
610              
611             }
612              
613             1;