File Coverage

Bio/Search/Hit/ModelHit.pm
Criterion Covered Total %
statement 81 105 77.1
branch 16 38 42.1
condition 5 10 50.0
subroutine 24 26 92.3
pod 24 24 100.0
total 150 203 73.8


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Search::Hit::ModelHit
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Chris Fields
7             #
8             # Copyright Chris Fields
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Search::Hit::ModelHit - A model-based implementation of the Bio::Search::Hit::HitI interface
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Search::Hit::ModelHit;
21             my $hit = Bio::Search::Hit::ModelHit->new(-algorithm => 'rnamotif');
22              
23             # typically one gets HitI objects from a SearchIO stream via a ResultI
24             use Bio::SearchIO;
25             my $parser = Bio::SearchIO->new(-format => 'infernal', -file => 'trap.inf');
26              
27             my $result = $parser->next_result;
28             my $hit = $result->next_hit;
29              
30             =head1 DESCRIPTION
31              
32             This object handles the hit data from a database search using models or
33             descriptors instead of sequences, such as Infernal, HMMER, RNAMotif, etc.
34              
35             Unless you're writing a parser, you won't ever need to create a ModelHit or
36             any other HitI-implementing object. If you use the SearchIO system, HitI objects
37             are created automatically from a SearchIO stream which returns
38             Bio::Search::Hit::HitI objects.
39              
40             Note that several HitI-based methods have been overridden from ModelHit due to
41             their unreliability when dealing with queries that aren't sequence-based. It may
42             be possible to reimplement these at a later point, but for the time being they
43             will throw warnings and return w/o results.
44              
45             For documentation on what you can do with ModelHit (and other HitI objects),
46             please see the API documentation in
47             L.
48              
49             =head1 FEEDBACK
50              
51             =head2 Mailing Lists
52              
53             User feedback is an integral part of the evolution of this and other
54             Bioperl modules. Send your comments and suggestions preferably to
55             the Bioperl mailing list. Your participation is much appreciated.
56              
57             bioperl-l@bioperl.org - General discussion
58             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
59              
60             =head2 Support
61              
62             Please direct usage questions or support issues to the mailing list:
63              
64             I
65              
66             rather than to the module maintainer directly. Many experienced and
67             reponsive experts will be able look at the problem and quickly
68             address it. Please include a thorough description of the problem
69             with code and data examples if at all possible.
70              
71             =head2 Reporting Bugs
72              
73             Report bugs to the Bioperl bug tracking system to help us keep track
74             of the bugs and their resolution. Bug reports can be submitted via the
75             web:
76              
77             https://github.com/bioperl/bioperl-live/issues
78              
79             =head1 AUTHOR - Chris Fields
80              
81             Email cjfields at bioperl dot org
82              
83             =head1 APPENDIX
84              
85             The rest of the documentation details each of the object methods.
86             Internal methods are usually preceded with a _
87              
88             =cut
89              
90             # Let the code begin...
91              
92             package Bio::Search::Hit::ModelHit;
93              
94 3     3   10 use strict;
  3         4  
  3         77  
95              
96 3     3   9 use base qw(Bio::Search::Hit::GenericHit);
  3         3  
  3         3012  
97              
98             =head1 HitI methods implemented in parent class Bio::Search::Hit::ModelHit
99              
100             =head2 new
101              
102             Title : new
103             Usage : my $obj = Bio::Search::Hit::ModelHit->new();
104             Function: Builds a new Bio::Search::Hit::ModelHit object
105             Returns : Bio::Search::Hit::ModelHit
106             Args : -name => Name of Hit (required)
107             -description => Description (optional)
108             -accession => Accession number (optional)
109             -ncbi_gi => NCBI GI UID (optional)
110             -length => Length of the Hit (optional)
111             -score => Raw Score for the Hit (optional)
112             -bits => Bit Score for the Hit (optional)
113             -significance => Significance value for the Hit (optional)
114             -algorithm => Algorithm used (BLASTP, FASTX, etc...)
115             -hsps => Array ref of HSPs for this Hit.
116             -found_again => boolean, true if hit appears in a
117             "previously found" section of a PSI-Blast report.
118             -hsp_factory => Bio::Factory::ObjectFactoryI able to create HSPI
119             objects.
120              
121             =cut
122              
123             =head2 add_hsp
124              
125             Title : add_hsp
126             Usage : $hit->add_hsp($hsp)
127             Function: Add a HSP to the collection of HSPs for a Hit
128             Returns : number of HSPs in the Hit
129             Args : Bio::Search::HSP::HSPI object, OR hash ref containing data suitable
130             for creating a HSPI object (&hsp_factory must be set to get it back)
131              
132             =cut
133              
134             =head2 hsp_factory
135              
136             Title : hsp_factory
137             Usage : $hit->hsp_factory($hsp_factory)
138             Function: Get/set the factory used to build HSPI objects if necessary.
139             Returns : Bio::Factory::ObjectFactoryI
140             Args : Bio::Factory::ObjectFactoryI
141              
142             =cut
143              
144             =head2 Bio::Search::Hit::HitI methods
145              
146             Implementation of Bio::Search::Hit::HitI methods
147              
148             =head2 name
149              
150             Title : name
151             Usage : $hit_name = $hit->name();
152             Function: returns the name of the Hit sequence
153             Returns : a scalar string
154             Args : [optional] scalar string to set the name
155              
156             =cut
157              
158             =head2 accession
159              
160             Title : accession
161             Usage : $acc = $hit->accession();
162             Function: Retrieve the accession (if available) for the hit
163             Returns : a scalar string (empty string if not set)
164             Args : none
165              
166             =cut
167              
168             =head2 description
169              
170             Title : description
171             Usage : $desc = $hit->description();
172             Function: Retrieve the description for the hit
173             Returns : a scalar string
174             Args : [optional] scalar string to set the descrition
175              
176             =cut
177              
178             =head2 length
179              
180             Title : length
181             Usage : my $len = $hit->length
182             Function: Returns the length of the hit
183             Returns : integer
184             Args : [optional] integer to set the length
185              
186             =cut
187              
188             =head2 algorithm
189              
190             Title : algorithm
191             Usage : $alg = $hit->algorithm();
192             Function: Gets the algorithm specification that was used to obtain the hit
193             For BLAST, the algorithm denotes what type of sequence was aligned
194             against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated
195             dna-prt, TBLASTN prt-translated dna, TBLASTX translated
196             dna-translated dna).
197             Returns : a scalar string
198             Args : [optional] scalar string to set the algorithm
199              
200             =cut
201              
202             =head2 raw_score
203              
204             Title : raw_score
205             Usage : $score = $hit->raw_score();
206             Function: Gets the "raw score" generated by the algorithm. What
207             this score is exactly will vary from algorithm to algorithm,
208             returning undef if unavailable.
209             Returns : a scalar value
210             Args : [optional] scalar value to set the raw score
211              
212             =cut
213              
214             =head2 score
215              
216             Equivalent to L
217              
218             =cut
219              
220             =head2 significance
221              
222             Title : significance
223             Usage : $significance = $hit->significance();
224             Function: Used to obtain the E or P value of a hit, i.e. the probability that
225             this particular hit was obtained purely by random chance. If
226             information is not available (nor calculatable from other
227             information sources), return undef.
228             Returns : a scalar value or undef if unavailable
229             Args : [optional] scalar value to set the significance
230              
231             =cut
232              
233             =head2 bits
234              
235             Usage : $hit_object->bits();
236             Purpose : Gets the bit score of the best HSP for the current hit.
237             Example : $bits = $hit_object->bits();
238             Returns : Integer or undef if bit score is not set
239             Argument : n/a
240             Comments : For BLAST1, the non-bit score is listed in the summary line.
241              
242             See Also : L
243              
244             =cut
245              
246             =head2 next_hsp
247              
248             Title : next_hsp
249             Usage : while( $hsp = $obj->next_hsp()) { ... }
250             Function : Returns the next available High Scoring Pair
251             Example :
252             Returns : Bio::Search::HSP::HSPI object or null if finished
253             Args : none
254              
255             =cut
256              
257             =head2 hsps
258              
259             Usage : $hit_object->hsps();
260             Purpose : Get a list containing all HSP objects.
261             : Get the numbers of HSPs for the current hit.
262             Example : @hsps = $hit_object->hsps();
263             : $num = $hit_object->hsps(); # alternatively, use num_hsps()
264             Returns : Array context : list of Bio::Search::HSP::BlastHSP.pm objects.
265             : Scalar context: integer (number of HSPs).
266             : (Equivalent to num_hsps()).
267             Argument : n/a. Relies on wantarray
268             Throws : Exception if the HSPs have not been collected.
269              
270             See Also : L, L
271              
272             =cut
273              
274             =head2 num_hsps
275              
276             Usage : $hit_object->num_hsps();
277             Purpose : Get the number of HSPs for the present hit.
278             Example : $nhsps = $hit_object->num_hsps();
279             Returns : Integer or '-' if HSPs have not been callected
280             Argument : n/a
281              
282             See Also : L
283              
284             =cut
285              
286             =head2 rewind
287              
288             Title : rewind
289             Usage : $hit->rewind;
290             Function: Allow one to reset the HSP iterator to the beginning
291             Since this is an in-memory implementation
292             Returns : none
293             Args : none
294              
295             =cut
296              
297             =head2 ambiguous_aln
298              
299             Usage : $ambig_code = $hit_object->ambiguous_aln();
300             Purpose : Sets/Gets ambiguity code data member.
301             Example : (see usage)
302             Returns : String = 'q', 's', 'qs', '-'
303             : 'q' = query sequence contains overlapping sub-sequences
304             : while sbjct does not.
305             : 's' = sbjct sequence contains overlapping sub-sequences
306             : while query does not.
307             : 'qs' = query and sbjct sequence contains overlapping sub-sequences
308             : relative to each other.
309             : '-' = query and sbjct sequence do not contains multiple domains
310             : relative to each other OR both contain the same distribution
311             : of similar domains.
312             Argument : n/a
313             Throws : n/a
314             Comment : Note: "sbjct" is synonymous with "hit"
315              
316             =cut
317              
318             =head2 overlap
319              
320             See documentation in L
321              
322             =cut
323              
324             sub overlap {
325 7     7 1 18 my $self = shift;
326 7 50       22 $self->{'_overlap'} = shift if @_;
327 7 50       36 return exists $self->{'_overlap'} ? $self->{'_overlap'} : 0;
328             }
329              
330              
331             =head2 n
332              
333             Usage : $hit_object->n();
334             Purpose : Gets the N number for the current hit.
335             : This is the number of HSPs in the set which was ascribed
336             : the lowest P-value (listed on the description line).
337             : This number is not the same as the total number of HSPs.
338             : To get the total number of HSPs, use num_hsps().
339             Example : $n = $hit_object->n();
340             Returns : Integer
341             Argument : n/a
342             Throws : Exception if HSPs have not been set.
343             Comments : Calling n() on such reports will result in a call to num_hsps().
344             : The num_hsps() method will count the actual number of
345             : HSPs in the alignment listing, which may exceed N in
346             : some cases.
347              
348             See Also : L
349              
350             =cut
351              
352             sub n {
353 7     7 1 14 my $self = shift;
354 7 50       24 $self->{'_n'} = shift if @_;
355 7 50       56 return exists $self->{'_n'} ? $self->{'_n'} : $self->num_hsps;
356             }
357              
358              
359             =head2 p
360              
361             Usage : $hit_object->p( [format] );
362             Purpose : Get the P-value for the best HSP
363             Example : $p = $sbjct->p;
364             : $p = $sbjct->p('exp'); # get exponent only.
365             : ($num, $exp) = $sbjct->p('parts'); # split sci notation into parts
366             Returns : Float or scientific notation number (the raw P-value, DEFAULT).
367             : Integer if format == 'exp' (the magnitude of the base 10 exponent).
368             : 2-element list (float, int) if format == 'parts' and P-value
369             : is in scientific notation (See Comments).
370             Argument : format: string of 'raw' | 'exp' | 'parts'
371             : 'raw' returns value given in report. Default. (1.2e-34)
372             : 'exp' returns exponent value only (34)
373             : 'parts' returns the decimal and exponent as a
374             : 2-element list (1.2, -34) (See Comments).
375             Throws : Warns if no P-value is defined. Uses expect instead.
376             Comments : Using the 'parts' argument is not recommended since it will not
377             : work as expected if the P-value is not in scientific notation.
378             : That is, floats are not converted into sci notation before
379             : splitting into parts.
380              
381             See Also : L, L, L
382              
383             =cut
384              
385             =head2 hsp
386              
387             Usage : $hit_object->hsp( [string] );
388             Purpose : Get a single HSPI object for the present HitI object.
389             Example : $hspObj = $hit_object->hsp; # same as 'best'
390             : $hspObj = $hit_object->hsp('best');
391             : $hspObj = $hit_object->hsp('worst');
392             Returns : Object reference for a Bio::Search::HSP::BlastHSP.pm object.
393             Argument : String (or no argument).
394             : No argument (default) = highest scoring HSP (same as 'best').
395             : 'best' or 'first' = highest scoring HSP.
396             : 'worst' or 'last' = lowest scoring HSP.
397             Throws : Exception if the HSPs have not been collected.
398             : Exception if an unrecognized argument is used.
399              
400             See Also : L, L()
401              
402             =cut
403              
404             sub hsp {
405 0     0 1 0 my( $self, $option ) = @_;
406 0   0     0 $option ||= 'best';
407              
408 0 0       0 if (not ref $self->{'_hsps'}) {
409 0         0 $self->throw("Can't get HSPs: data not collected.");
410             }
411              
412 0         0 my @hsps = $self->hsps;
413              
414 0 0       0 return $hsps[0] if $option =~ /best|first|1/i;
415 0 0       0 return $hsps[$#hsps] if $option =~ /worst|last/i;
416              
417 0         0 $self->throw("Can't get HSP for: $option\n" .
418             "Valid arguments: 'best', 'worst'");
419             }
420              
421             =head2 rank
422              
423             Title : rank
424             Usage : $obj->rank($newval)
425             Function: Get/Set the rank of this Hit in the Query search list
426             i.e. this is the Nth hit for a specific query
427             Returns : value of rank
428             Args : newvalue (optional)
429              
430              
431             =cut
432              
433             sub rank {
434 22     22 1 40 my $self = shift;
435 22 100       75 return $self->{'_rank'} = shift if @_;
436 9   50     48 return $self->{'_rank'} || 1;
437             }
438              
439             =head2 locus
440              
441             Title : locus
442             Usage : $locus = $hit->locus();
443             Function: Retrieve the locus (if available) for the hit
444             Returns : a scalar string (empty string if not set)
445             Args : none
446              
447             =cut
448              
449             sub locus {
450 7     7 1 16 my ($self,$value) = @_;
451 7         15 my $previous = $self->{'_locus'};
452 7 50 33     50 if( defined $value || ! defined $previous ) {
453 7 50       20 unless (defined $value) {
454 7 100       56 if ($self->{'_name'} =~/(gb|emb|dbj|ref)\|(.*)\|(.*)/) {
455 6         21 $value = $previous = $3;
456             } else {
457 1         2 $value = $previous = '';
458             }
459             }
460 7         19 $self->{'_locus'} = $value;
461             }
462 7         24 return $previous;
463             }
464              
465             =head2 each_accession_number
466              
467             Title : each_accession_number
468             Usage : @each_accession_number = $hit->each_accession_number();
469             Function: Get each accession number listed in the description of the hit.
470             If there are no alternatives, then only the primary accession will
471             be given
472             Returns : list of all accession numbers in the description
473             Args : none
474              
475             =cut
476              
477             sub each_accession_number {
478 0     0 1 0 my ($self,$value) = @_;
479 0         0 my $desc = $self->{'_description'};
480             #put primary accnum on the list
481 0         0 my @accnums;
482 0         0 push (@accnums,$self->{'_accession'});
483 0 0       0 if( defined $desc ) {
484 0         0 while ($desc =~ /(\b\S+\|\S*\|\S*\s?)/g) {
485 0         0 my $id = $1;
486 0         0 my ($acc, $version);
487 0 0       0 if ($id =~ /(gb|emb|dbj|sp|pdb|bbs|ref|tp[gde])\|(.*)\|(.*)/) {
    0          
    0          
    0          
488 0         0 ($acc, $version) = split /\./, $2;
489             } elsif ($id =~ /(pir|prf|pat|gnl)\|(.*)\|(.*)/) {
490 0         0 ($acc, $version) = split /\./, $3;
491             } elsif( $id =~ /(gim|gi|bbm|bbs|lcl)\|(\d*)/) {
492 0         0 $acc = $id;
493             } elsif( $id =~ /(oth)\|(.*)\|(.*)\|(.*)/ ) { # discontinued...
494 0         0 ($acc,$version) = ($2);
495             } else {
496             #punt, not matching the db's at ftp://ftp.ncbi.nih.gov/blast/db/README
497             #Database Name Identifier Syntax
498             #============================ ========================
499             #GenBank gb|accession|locus
500             #EMBL Data Library emb|accession|locus
501             #DDBJ, DNA Database of Japan dbj|accession|locus
502             #NBRF PIR pir||entry
503             #Protein Research Foundation prf||name
504             #SWISS-PROT sp|accession|entry name
505             #Brookhaven Protein Data Bank pdb|entry|chain
506             #Patents pat|country|number
507             #GenInfo Backbone Id bbs|number
508             #General database identifier gnl|database|identifier
509             #NCBI Reference Sequence ref|accession|locus
510             #Local Sequence identifier lcl|identifier
511 0         0 $acc=$id;
512             }
513 0         0 push(@accnums, $acc);
514             }
515             }
516 0         0 return @accnums;
517             }
518              
519             =head2 tiled_hsps
520              
521             See documentation in L
522              
523             =cut
524              
525             =head2 query_length
526              
527             Title : query_length
528             Usage : $obj->query_length($newval)
529             Function: Get/Set the query_length
530             Returns : value of query_length (a scalar)
531             Args : on set, new value (a scalar or undef, optional)
532              
533              
534             =cut
535              
536             sub query_length {
537 17     17 1 27 my $self = shift;
538              
539 17 100       59 return $self->{'_query_length'} = shift if @_;
540 6         25 return $self->{'_query_length'};
541             }
542              
543             =head2 ncbi_gi
544              
545             Title : ncbi_gi
546             Usage : $acc = $hit->ncbi_gi();
547             Function: Retrieve the NCBI Unique ID (aka the GI #),
548             if available, for the hit
549             Returns : a scalar string (empty string if not set)
550             Args : none
551             Note : As of Sept. 2016 NCBI records will no longer have a
552             GI; this attributue will remain in place for older
553             records
554              
555             =cut
556              
557             sub ncbi_gi {
558 15     15 1 332 my ($self,$value) = @_;
559 15         25 my $previous = $self->{'_ncbi_gi'};
560 15 100 100     70 if( defined $value || ! defined $previous ) {
561 9 100       24 $value = $previous = '' unless defined $value;
562 9         16 $self->{'_ncbi_gi'} = $value;
563             }
564 15         38 return $previous;
565             }
566              
567             =head1 ModelHit methods overridden in ModelHit
568              
569             The following methods have been overridden due to their current reliance on
570             sequence-based queries. They may be implemented in future versions of this class.
571              
572             =head2 length_aln
573              
574             =cut
575              
576             sub length_aln {
577 1     1 1 53 my $self = shift;
578 1         15 $self->warn('$hit->length_aln not implemented for Model-based searches');
579 1         4 return;
580             }
581              
582             =head2 gaps
583              
584             =cut
585              
586             sub gaps {
587 1     1 1 27 my $self = shift;
588 1         4 $self->warn('$hit->gaps not implemented for Model-based searches');
589 1         3 return;
590             }
591              
592             =head2 matches
593              
594             =cut
595              
596             sub matches {
597 1     1 1 26 my $self = shift;
598 1         4 $self->warn('$hit->matches not implemented for Model-based searches');
599 1         3 return;
600             }
601              
602             =head2 start
603              
604             =cut
605              
606             sub start {
607 1     1 1 26 my $self = shift;
608 1         4 $self->warn('$hit->start not implemented for Model-based searches');
609 1         4 return;
610             }
611              
612              
613             =head2 end
614              
615             =cut
616              
617             sub end {
618 1     1 1 26 my $self = shift;
619 1         3 $self->warn('$hit->end not implemented for Model-based searches');
620 1         4 return;
621             }
622              
623             =head2 range
624              
625             =cut
626              
627             sub range {
628 1     1 1 38 my $self = shift;
629 1         5 $self->warn('$hit->range not implemented for Model-based searches');
630 1         3 return;
631             }
632              
633             =head2 frac_identical
634              
635             =cut
636              
637             sub frac_identical {
638 1     1 1 26 my $self = shift;
639 1         4 $self->warn('$hit->frac_identical not implemented for Model-based searches');
640 1         3 return;
641             }
642              
643             =head2 frac_conserved
644              
645             =cut
646              
647             sub frac_conserved {
648 1     1 1 26 my $self = shift;
649 1         4 $self->warn('$hit->frac_conserved not implemented for Model-based searches');
650 1         3 return;
651             }
652              
653             =head2 frac_aligned_query
654              
655             =cut
656              
657             sub frac_aligned_query {
658 1     1 1 27 my $self = shift;
659 1         4 $self->warn('$hit->frac_aligned_query not implemented for Model-based searches');
660 1         3 return;
661             }
662              
663             =head2 frac_aligned_hit
664              
665             =cut
666              
667             sub frac_aligned_hit {
668 1     1 1 27 my $self = shift;
669 1         4 $self->warn('$hit->frac_aligned_hit not implemented for Model-based searches');
670 1         3 return;
671             }
672              
673             =head2 num_unaligned_hit
674              
675             =cut
676              
677             *num_unaligned_sbjct = \&num_unaligned_hit;
678              
679             sub num_unaligned_hit {
680 2     2 1 53 my $self = shift;
681 2         7 $self->warn('$hit->num_unaligned_hit/num_unaligned_sbjct not implemented for Model-based searches');
682 2         6 return;
683             }
684              
685             =head2 num_unaligned_query
686              
687             =cut
688              
689             sub num_unaligned_query {
690 1     1 1 26 my $self = shift;
691 1         3 $self->warn('$hit->num_unaligned_query not implemented for Model-based searches');
692 1         4 return;
693             }
694              
695             =head2 seq_inds
696              
697             =cut
698              
699             sub seq_inds {
700 1     1 1 26 my $self = shift;
701 1         4 $self->warn('$hit->seq_inds not implemented for Model-based searches');
702 1         3 return;
703             }
704              
705             =head2 strand
706              
707             =cut
708              
709             sub strand {
710 1     1 1 29 my $self = shift;
711 1         5 $self->warn('$hit->strand not implemented for Model-based searches');
712 1         3 return;
713             }
714              
715             =head2 frame
716              
717             =cut
718              
719             sub frame {
720 1     1 1 27 my $self = shift;
721 1         3 $self->warn('$hit->frame not implemented for Model-based searches');
722 1         3 return;
723             }
724              
725             =head2 logical_length
726              
727             =cut
728              
729             sub logical_length {
730 1     1 1 26 my $self = shift;
731 1         4 $self->warn('$hit->logical_length not implemented for Model-based searches');
732 1         3 return;
733             }
734              
735             1;