File Coverage

Bio/Search/Hit/PullHitI.pm
Criterion Covered Total %
statement 152 187 81.2
branch 62 104 59.6
condition 28 50 56.0
subroutine 35 42 83.3
pod 37 37 100.0
total 314 420 74.7


line stmt bran cond sub pod time code
1             #
2             # BioPerl module Bio::Search::Hit::PullHitI
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # You may distribute this module under the same terms as perl itself
9              
10             # POD documentation - main docs before the code
11              
12             =head1 NAME
13              
14             Bio::Search::Hit::PullHitI - Bio::Search::Hit::HitI interface for pull parsers.
15              
16             =head1 SYNOPSIS
17              
18             # This is an interface and cannot be instantiated
19              
20             # typically one gets HitI objects from a SearchIO stream via a ResultI
21             use Bio::SearchIO;
22             my $parser = Bio::SearchIO->new(-format => 'hmmer_pull',
23             -file => 't/data/hmmpfam.out');
24              
25             my $result = $parser->next_result;
26             my $hit = $result->next_hit;
27              
28             $hit_name = $hit->name();
29              
30             $desc = $hit->description();
31              
32             $len = $hit->length
33              
34             $alg = $hit->algorithm();
35              
36             $score = $hit->raw_score();
37              
38             $significance = $hit->significance();
39              
40             $rank = $hit->rank(); # the Nth hit for a specific query
41              
42             while( $hsp = $obj->next_hsp()) { ... } # process in iterator fashion
43              
44             for my $hsp ( $obj->hsps()()) { ... } # process in list fashion
45              
46             =head1 DESCRIPTION
47              
48             This object handles the hit data from a database sequence search.
49              
50             PullHitI is for fast implementations that only do parsing work on the hit
51             data when you actually request information by calling one of the HitI
52             methods.
53              
54             Many methods of HitI are implemented in a way suitable for inheriting classes
55             that use Bio::PullParserI. It only really makes sense for PullHit modules to be
56             created by (and have as a -parent) PullResult modules.
57              
58             In addition to the usual -chunk and -parent, -hit_data is all you should supply
59             when making a PullHit object. This will store that data and make it accessible
60             via _raw_hit_data, which you can access in your subclass. It would be best to
61             simply provide the data as the input -chunk instead, if the raw data is large
62             enough.
63              
64             =head1 FEEDBACK
65              
66             =head2 Mailing Lists
67              
68             User feedback is an integral part of the evolution of this and other
69             Bioperl modules. Send your comments and suggestions preferably to one
70             of the Bioperl mailing lists. Your participation is much appreciated.
71              
72             bioperl-l@bioperl.org - General discussion
73             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
74              
75             =head2 Support
76              
77             Please direct usage questions or support issues to the mailing list:
78              
79             I
80              
81             rather than to the module maintainer directly. Many experienced and
82             reponsive experts will be able look at the problem and quickly
83             address it. Please include a thorough description of the problem
84             with code and data examples if at all possible.
85              
86             =head2 Reporting Bugs
87              
88             Report bugs to the Bioperl bug tracking system to help us keep track
89             the bugs and their resolution. Bug reports can be submitted via the
90             web:
91              
92             https://github.com/bioperl/bioperl-live/issues
93              
94             =head1 AUTHOR - Sendu Bala
95              
96             Email bix@sendu.me.uk
97              
98             =head1 COPYRIGHT
99              
100             Copyright (c) 2006 Sendu Bala. All Rights Reserved.
101              
102             =head1 DISCLAIMER
103              
104             This software is provided "as is" without warranty of any kind.
105              
106             =head1 APPENDIX
107              
108             The rest of the documentation details each of the object
109             methods. Internal methods are usually preceded with a _
110              
111             =cut
112              
113             # Let the code begin...
114              
115             package Bio::Search::Hit::PullHitI;
116              
117 2     2   1000 use Bio::Search::SearchUtils;
  2         3  
  2         52  
118              
119 2     2   8 use strict;
  2         3  
  2         37  
120              
121 2     2   7 use base qw(Bio::PullParserI Bio::Search::Hit::HitI);
  2         2  
  2         1018  
122              
123             =head2 _setup
124              
125             Title : _setup
126             Usage : $self->_setup(@args)
127             Function: Implementers should call this to setup common fields and deal with
128             common arguments to new().
129             Returns : n/a
130             Args : @args received in new().
131              
132             =cut
133              
134             sub _setup {
135 55     55   91 my ($self, @args) = @_;
136            
137             # fields most subclasses probably will want
138 55         360 $self->_fields( { ( next_hsp => undef,
139             num_hsps => undef,
140             hsps => undef,
141             hit_start => undef,
142             query_start => undef,
143             hit_end => undef,
144             query_end => undef,
145             length => undef,
146             name => undef ,
147             accession => undef ) } );
148            
149 55         176 my ($parent, $chunk, $hit_data) = $self->_rearrange([qw(PARENT
150             CHUNK
151             HIT_DATA)], @args);
152 55 50 33     152 $self->throw("Need -parent or -chunk to be defined") unless $parent || $chunk;
153            
154 55 50       144 $self->parent($parent) if $parent;
155            
156 55 100       84 if ($chunk) {
157 39         47 my ($io, $start, $end) = (undef, 0, undef);
158 39 50       82 if (ref($chunk) eq 'ARRAY') {
159 39         30 ($io, $start, $end) = @{$chunk};
  39         68  
160             }
161             else {
162 0         0 $io = $chunk;
163             }
164 39         117 $self->chunk($io, -start => $start, -end => $end);
165             }
166            
167 55 50       146 $self->_raw_hit_data($hit_data) if $hit_data;
168             }
169              
170             sub _raw_hit_data {
171 110     110   94 my $self = shift;
172 110 100       150 if (@_) {
173 55         101 $self->{_raw_hit_data} = shift;
174             }
175 110         167 return $self->{_raw_hit_data};
176             }
177              
178             #
179             # Some of these methods are written explitely to avoid HitI throwing not
180             # implemented; if it didn't do that then PullParserI AUTOLOAD would have
181             # cought them.
182             #
183              
184             =head2 name
185              
186             Title : name
187             Usage : $hit_name = $hit->name();
188             Function: returns the name of the Hit sequence
189             Returns : a scalar string
190             Args : none
191              
192             The B of a hit is unique within a Result or within an Iteration.
193              
194             =cut
195              
196             sub name {
197 25     25 1 2907 return shift->get_field('name');
198             }
199              
200             =head2 description
201              
202             Title : description
203             Usage : $desc = $hit->description();
204             Function: Retrieve the description for the hit
205             Returns : a scalar string
206             Args : none
207              
208             =cut
209              
210             sub description {
211 5     5 1 1084 return shift->get_field('description');
212             }
213              
214             =head2 accession
215              
216             Title : accession
217             Usage : $acc = $hit->accession();
218             Function: Retrieve the accession (if available) for the hit
219             Returns : a scalar string (empty string if not set)
220             Args : none
221              
222             =cut
223              
224             sub accession {
225 19     19 1 1035 return shift->get_field('accession');
226             }
227              
228             =head2 locus
229              
230             Title : locus
231             Usage : $acc = $hit->locus();
232             Function: Retrieve the locus(if available) for the hit
233             Returns : a scalar string (empty string if not set)
234             Args : none
235              
236             =cut
237              
238             sub locus {
239 3     3 1 9 return shift->get_field('locus');
240             }
241              
242             =head2 length
243              
244             Title : length
245             Usage : my $len = $hit->length
246             Function: Returns the length of the hit
247             Returns : integer
248             Args : none
249              
250             =cut
251              
252             sub length {
253 19     19 1 48 return shift->get_field('length');
254             }
255              
256             =head2 algorithm
257              
258             Title : algorithm
259             Usage : $alg = $hit->algorithm();
260             Function: Gets the algorithm specification that was used to obtain the hit
261             For BLAST, the algorithm denotes what type of sequence was aligned
262             against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated
263             dna-prt, TBLASTN prt-translated dna, TBLASTX translated
264             dna-translated dna).
265             Returns : a scalar string
266             Args : none
267              
268             =cut
269              
270             sub algorithm {
271 3     3 1 8 return shift->get_field('algorithm');
272             }
273              
274             =head2 raw_score
275              
276             Title : raw_score
277             Usage : $score = $hit->raw_score();
278             Function: Gets the "raw score" generated by the algorithm. What
279             this score is exactly will vary from algorithm to algorithm,
280             returning undef if unavailable.
281             Returns : a scalar value
282             Args : none
283              
284             =cut
285              
286             sub raw_score {
287 19     19 1 1140 return shift->get_field('score');
288             }
289              
290             =head2 score
291              
292             Equivalent to L
293              
294             =cut
295              
296             sub score {
297 5     5 1 1075 return shift->get_field('score');
298             }
299              
300             =head2 significance
301              
302             Title : significance
303             Usage : $significance = $hit->significance();
304             Function: Used to obtain the E or P value of a hit, i.e. the probability that
305             this particular hit was obtained purely by random chance. If
306             information is not available (nor calculatable from other
307             information sources), return undef.
308             Returns : a scalar value or undef if unavailable
309             Args : none
310              
311             =cut
312              
313             sub significance {
314 24     24 1 68 return shift->get_field('significance');
315             }
316              
317             =head2 bits
318              
319             Usage : $hit_object->bits();
320             Purpose : Gets the bit score of the best HSP for the current hit.
321             Example : $bits = $hit_object->bits();
322             Returns : Integer or double for FASTA reports
323             Argument : n/a
324             Comments : For BLAST1, the non-bit score is listed in the summary line.
325              
326             See Also : L
327              
328             =cut
329              
330             sub bits {
331 3     3 1 7 return shift->get_field('bits');
332             }
333              
334             =head2 next_hsp
335              
336             Title : next_hsp
337             Usage : while( $hsp = $obj->next_hsp()) { ... }
338             Function : Returns the next available High Scoring Pair
339             Example :
340             Returns : L object or null if finished
341             Args : none
342              
343             =cut
344              
345             sub next_hsp {
346 0     0 1 0 return shift->get_field('next_hsp');
347             }
348              
349             =head2 hsps
350              
351             Usage : $hit_object->hsps();
352             Purpose : Get a list containing all HSP objects.
353             : Get the numbers of HSPs for the current hit.
354             Example : @hsps = $hit_object->hsps();
355             : $num = $hit_object->hsps(); # alternatively, use num_hsps()
356             Returns : Array context : list of L objects.
357             : Scalar context: integer (number of HSPs).
358             : (Equivalent to num_hsps()).
359             Argument : n/a. Relies on wantarray
360             Throws : Exception if the HSPs have not been collected.
361              
362             See Also : L, L
363              
364             =cut
365              
366             sub hsps {
367 0     0 1 0 return shift->get_field('hsps');
368             }
369              
370             =head2 num_hsps
371              
372             Usage : $hit_object->num_hsps();
373             Purpose : Get the number of HSPs for the present Blast hit.
374             Example : $nhsps = $hit_object->num_hsps();
375             Returns : Integer
376             Argument : n/a
377             Throws : Exception if the HSPs have not been collected.
378              
379             See Also : L
380              
381             =cut
382              
383             sub num_hsps {
384 10     10 1 21 return shift->get_field('num_hsps');
385             }
386              
387             #
388             # HitI/ GenericHit methods that are unrelated to simply parsing information
389             # directly out of a file, but need more complex calculation; mostly not
390             # implemented here.
391             #
392              
393             =head2 seq_inds
394              
395             Usage : $hit->seq_inds( seq_type, class, collapse );
396             Purpose : Get a list of residue positions (indices) across all HSPs
397             : for identical or conserved residues in the query or sbjct sequence.
398             Example : @s_ind = $hit->seq_inds('query', 'identical');
399             : @h_ind = $hit->seq_inds('hit', 'conserved');
400             : @h_ind = $hit->seq_inds('hit', 'conserved', 1);
401             Returns : Array of integers
402             : May include ranges if collapse is non-zero.
403             Argument : [0] seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
404             : ('sbjct' is synonymous with 'hit')
405             : [1] class = 'identical' or 'conserved' or 'nomatch' or 'gap'
406             : (default = 'identical')
407             : (can be shortened to 'id' or 'cons')
408             : Note that 'conserved' includes identical unless you use
409             : 'conserved-not-identical'
410             : [2] collapse = boolean, if non-zero, consecutive positions are
411             : merged using a range notation, e.g.,
412             : "1 2 3 4 5 7 9 10 11" collapses to "1-5 7 9-11". This
413             : is useful for consolidating long lists. Default = no
414             : collapse.
415             Throws : n/a.
416              
417             See Also : L
418              
419             =cut
420              
421             sub seq_inds {
422 52     52 1 1666 my ($self, $seqType, $class, $collapse) = @_;
423            
424 52   50     126 $seqType ||= 'query';
425 52   50     85 $class ||= 'identical';
426 52   100     158 $collapse ||= 0;
427            
428 52 50       103 $seqType = 'hit' if $seqType eq 'sbjct';
429            
430 52         142 my $storage_name = '_seq_inds_'.$seqType.'_'.$class;
431 52 100       118 unless (defined $self->{$storage_name}) {
432 33         33 my @inds;
433 33         126 foreach my $hsp ($self->hsps) {
434             # This will merge data for all HSPs together.
435 142         459 push @inds, $hsp->seq_inds($seqType, $class);
436             }
437            
438             # Need to remove duplicates and sort the merged positions, unless gaps.
439 33 100 100     149 if (@inds && $class ne 'gap') {
440 23         160 my %tmp = map { $_, 1 } @inds;
  12012         12866  
441 23         1656 @inds = sort {$a <=> $b} keys %tmp;
  96783         58898  
442             }
443            
444 33         386 $self->{$storage_name} = \@inds;
445             }
446            
447 52         59 my @inds = @{$self->{$storage_name}};
  52         1590  
448 52 100       1278 $collapse ? &Bio::Search::SearchUtils::collapse_nums(@inds) : @inds;
449             }
450              
451             =head2 rewind
452              
453             Title : rewind
454             Usage : $hit->rewind;
455             Function: Allow one to reset the HSP iterator to the beginning if possible
456             Returns : none
457             Args : none
458              
459             =cut
460              
461             sub rewind {
462 0     0 1 0 shift->throw_not_implemented();
463             }
464              
465             =head2 overlap
466              
467             Usage : $hit_object->overlap( [integer] );
468             Purpose : Gets/Sets the allowable amount overlap between different HSP
469             sequences.
470             Example : $hit_object->overlap(5);
471             : $overlap = $hit_object->overlap;
472             Returns : Integer.
473             Argument : integer.
474             Throws : n/a
475             Status : Deprecated
476             Comments : This value isn't used for anything
477              
478             =cut
479              
480             sub overlap {
481 3     3 1 5 my $self = shift;
482 3 50       8 if (@_) { $self->{_overlap} = shift }
  0         0  
483 3   50     17 return $self->{_overlap} || 0;
484             }
485              
486             =head2 n
487              
488             Usage : $hit_object->n();
489             Purpose : Gets the N number for the current Blast hit.
490             : This is the number of HSPs in the set which was ascribed
491             : the lowest P-value (listed on the description line).
492             : This number is not the same as the total number of HSPs.
493             : To get the total number of HSPs, use num_hsps().
494             Example : $n = $hit_object->n();
495             Returns : Integer
496             Argument : n/a
497             Throws : Exception if HSPs have not been set (BLAST2 reports).
498             Comments : Note that the N parameter is not reported in gapped BLAST2.
499             : Calling n() on such reports will result in a call to num_hsps().
500             : The num_hsps() method will count the actual number of
501             : HSPs in the alignment listing, which may exceed N in
502             : some cases.
503              
504             See Also : L
505              
506             =cut
507              
508             sub n {
509 3     3 1 1052 return shift->get_field('num_hsps');
510             }
511              
512             =head2 p
513              
514             Usage : $hit_object->p( [format] );
515             Purpose : Get the P-value for the best HSP of the given BLAST hit.
516             : (Note that P-values are not provided with NCBI Blast2 reports).
517             Example : $p = $sbjct->p;
518             : $p = $sbjct->p('exp'); # get exponent only.
519             : ($num, $exp) = $sbjct->p('parts'); # split sci notation into parts
520             Returns : Float or scientific notation number (the raw P-value, DEFAULT).
521             : Integer if format == 'exp' (the magnitude of the base 10 exponent).
522             : 2-element list (float, int) if format == 'parts' and P-value
523             : is in scientific notation (See Comments).
524             Argument : format: string of 'raw' | 'exp' | 'parts'
525             : 'raw' returns value given in report. Default. (1.2e-34)
526             : 'exp' returns exponent value only (34)
527             : 'parts' returns the decimal and exponent as a
528             : 2-element list (1.2, -34) (See Comments).
529             Throws : Warns if no P-value is defined. Uses expect instead.
530             Comments : Using the 'parts' argument is not recommended since it will not
531             : work as expected if the P-value is not in scientific notation.
532             : That is, floats are not converted into sci notation before
533             : splitting into parts.
534              
535             See Also : L, L,
536             L
537              
538             =cut
539              
540             sub p {
541 0     0 1 0 shift->throw_not_implemented;
542             }
543              
544             =head2 hsp
545              
546             Usage : $hit_object->hsp( [string] );
547             Purpose : Get a single HSPI object for the present HitI object.
548             Example : $hspObj = $hit_object->hsp; # same as 'best'
549             : $hspObj = $hit_object->hsp('best');
550             : $hspObj = $hit_object->hsp('worst');
551             Returns : Object reference for a L object.
552             Argument : String (or no argument).
553             : No argument (default) = highest scoring HSP (same as 'best').
554             : 'best' = highest scoring HSP.
555             : 'worst' = lowest scoring HSP.
556             Throws : Exception if an unrecognized argument is used.
557              
558             See Also : L, L()
559              
560             =cut
561              
562             sub hsp {
563 0     0 1 0 shift->throw_not_implemented;
564             }
565              
566             =head2 logical_length
567              
568             Usage : $hit_object->logical_length( [seq_type] );
569             : (mostly intended for internal use).
570             Purpose : Get the logical length of the hit sequence.
571             : If the Blast is a TBLASTN or TBLASTX, the returned length
572             : is the length of the would-be amino acid sequence (length/3).
573             : For all other BLAST flavors, this function is the same as length().
574             Example : $len = $hit_object->logical_length();
575             Returns : Integer
576             Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
577             ('sbjct' is synonymous with 'hit')
578             Throws : n/a
579             Comments : This is important for functions like frac_aligned_query()
580             : which need to operate in amino acid coordinate space when dealing
581             : with [T]BLAST[NX] type reports.
582              
583             See Also : L, L,
584             L
585              
586             =cut
587              
588             sub logical_length {
589 8     8 1 970 my ($self, $type) = @_;
590 8   50     23 $type ||= 'query';
591 8         17 $type = lc($type);
592 8 100       22 $type = 'hit' if $type eq 'sbjct';
593 8 100       28 if ($type eq 'query') {
    50          
594 4         14 return $self->get_field('query_length');
595             }
596             elsif ($type eq 'hit') {
597 4         10 return $self->get_field('length');
598             }
599             }
600              
601             =head2 rank
602              
603             Title : rank
604             Usage : $obj->rank($newval)
605             Function: Get/Set the rank of this Hit in the Query search list
606             i.e. this is the Nth hit for a specific query
607             Returns : value of rank
608             Args : newvalue (optional)
609              
610             =cut
611              
612             sub rank {
613 3     3 1 8 return shift->get_field('rank');
614             }
615              
616             =head2 each_accession_number
617              
618             Title : each_accession_number
619             Usage : $obj->each_accession_number
620             Function: Get each accession number listed in the description of the hit.
621             If there are no alternatives, then only the primary accession will
622             be given (if there is one).
623             Returns : list of all accession numbers in the description
624             Args : none
625              
626             =cut
627              
628             sub each_accession_number {
629 3     3 1 5 my $self = shift;
630 3 50       10 my $accession = $self->get_field('accession') if $self->has_field('accession');
631 3 50       5 my $desc = $self->get_field('description') if $self->has_field('description');
632 3 50 33     20 return unless $accession || $desc;
633            
634 0         0 my @accnums;
635 0 0       0 push (@accnums, $accession) if $accession;
636            
637 0 0       0 if (defined $desc) {
638 0         0 while ($desc =~ /(\b\S+\|\S*\|\S*\s?)/g) {
639 0         0 my $id = $1;
640 0         0 my $acc;
641 0 0       0 if ($id =~ /(?:gb|emb|dbj|sp|pdb|bbs|ref|tp[gde])\|(.*)\|(?:.*)/) {
    0          
    0          
    0          
642 0         0 ($acc) = split /\./, $1;
643             }
644             elsif ($id =~ /(?:pir|prf|pat|gnl)\|(?:.*)\|(.*)/) {
645 0         0 ($acc) = split /\./, $1;
646             }
647             elsif ($id =~ /(?:gim|gi|bbm|bbs|lcl)\|(?:\d*)/) {
648 0         0 $acc = $id;
649             }
650             elsif ($id =~ /(?:oth)\|(.*)\|(?:.*)\|(?:.*)/ ) { # discontinued...
651 0         0 $acc = $1;
652             }
653             else {
654 0         0 $acc = $id;
655             }
656 0         0 push(@accnums, $acc);
657             }
658             }
659 0         0 return @accnums;
660             }
661              
662             =head2 tiled_hsps
663              
664             Usage : $hit_object->tiled_hsps( [integer] );
665             Purpose : Gets/Sets an indicator for whether or not the HSPs in this Hit
666             : have been tiled.
667             Example : $hit_object->tiled_hsps(1);
668             : if( $hit_object->tiled_hsps ) { # do something }
669             Returns : Boolean (1 or 0)
670             Argument : integer (optional)
671             Throws : n/a
672             Status : Deprecated
673             Notes : This value is not used for anything
674              
675             =cut
676              
677             sub tiled_hsps {
678 3     3 1 1059 my $self = shift;
679 3 50       9 if (@_) { $self->{_hsps_are_tiled} = shift }
  0         0  
680 3   50     17 return $self->{_hsps_are_tiled} || 0;
681             }
682              
683             =head2 strand
684              
685             Usage : $sbjct->strand( [seq_type] );
686             Purpose : Gets the strand(s) for the query, sbjct, or both sequences
687             : in the best HSP of the BlastHit object after HSP tiling.
688             : Only valid for BLASTN, TBLASTX, BLASTX-query, TBLASTN-hit.
689             Example : $qstrand = $sbjct->strand('query');
690             : $sstrand = $sbjct->strand('hit');
691             : ($qstrand, $sstrand) = $sbjct->strand();
692             Returns : scalar context: integer '1', '-1', or '0'
693             : array context without args: list of two strings (queryStrand, sbjctStrand)
694             : Array context can be "induced" by providing an argument of 'list' or 'array'.
695             Argument : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
696             ('sbjct' is synonymous with 'hit')
697             Throws : n/a
698             Comments : This method requires that all HSPs be tiled. If they have not
699             : already been tiled, they will be tiled first automatically..
700             : If you don't want the tiled data, iterate through each HSP
701             : calling strand() on each (use hsps() to get all HSPs).
702             :
703             : Formerly (prior to 10/21/02), this method would return the
704             : string "-1/1" for hits with HSPs on both strands.
705             : However, now that strand and frame is properly being accounted
706             : for during HSP tiling, it makes more sense for strand()
707             : to return the strand data for the best HSP after tiling.
708             :
709             : If you really want to know about hits on opposite strands,
710             : you should be iterating through the HSPs using methods on the
711             : HSP objects.
712             :
713             : A possible use case where knowing whether a hit has HSPs
714             : on both strands would be when filtering via SearchIO for hits with
715             : this property. However, in this case it would be better to have a
716             : dedicated method such as $hit->hsps_on_both_strands(). Similarly
717             : for frame. This could be provided if there is interest.
718              
719             See Also : L()
720              
721             =cut
722              
723             sub strand {
724 0     0 1 0 shift->throw_not_implemented;
725             }
726              
727             =head2 frame
728              
729             Usage : $hit_object->frame();
730             Purpose : Gets the reading frame for the best HSP after HSP tiling.
731             : This is only valid for BLASTX and TBLASTN/X type reports.
732             Example : $frame = $hit_object->frame();
733             Returns : Integer (-2 .. +2)
734             Argument : n/a
735             Throws : Exception if HSPs have not been set.
736             Comments : This method requires that all HSPs be tiled. If they have not
737             : already been tiled, they will be tiled first automatically..
738             : If you don't want the tiled data, iterate through each HSP
739             : calling frame() on each (use hsps() to get all HSPs).
740              
741             See Also : L
742              
743             =cut
744              
745             sub frame {
746 0     0 1 0 shift->throw_not_implemented;
747             }
748              
749             =head2 length_aln
750              
751             Usage : $hit_object->length_aln( [seq_type] );
752             Purpose : Get the total length of the aligned region for query or sbjct seq.
753             : This number will include all HSPs, and excludes gaps.
754             Example : $len = $hit_object->length_aln(); # default = query
755             : $lenAln = $hit_object->length_aln('query');
756             Returns : Integer
757             Argument : seq_Type = 'query' or 'hit' or 'sbjct' (Default = 'query')
758             ('sbjct' is synonymous with 'hit')
759             Throws : Exception if the argument is not recognized.
760             Comments : This method will report the logical length of the alignment,
761             : meaning that for TBLAST[NX] reports, the length is reported
762             : using amino acid coordinate space (i.e., nucleotides / 3).
763              
764             See Also : L, L,
765             L, L,
766             L,
767             L
768              
769             =cut
770              
771             sub length_aln {
772 11     11 1 25 my ($self, $seqType) = @_;
773 11   50     26 $seqType ||= 'query';
774 11 100       32 $seqType = 'hit' if $seqType eq 'sbjct';
775            
776 11         28 my %non_gaps = map { $_, 1 } $self->seq_inds($seqType, 'conserved'),
  4665         4465  
777             $self->seq_inds($seqType, 'no_match');
778 11         571 return scalar(keys %non_gaps);
779             }
780              
781             =head2 gaps
782              
783             Usage : $hit_object->gaps( [seq_type] );
784             Purpose : Get the number of gaps in the aligned query, hit, or both sequences.
785             : Data is summed across all HSPs.
786             Example : $qgaps = $hit_object->gaps('query');
787             : $hgaps = $hit_object->gaps('hit');
788             : $tgaps = $hit_object->gaps(); # default = total (query + hit)
789             Returns : scalar context: integer
790             : array context without args: two-element list of integers
791             : (queryGaps, hitGaps)
792             : Array context can be forced by providing an argument of 'list' or
793             : 'array'.
794             :
795             : CAUTION: Calling this method within printf or sprintf is arrray
796             : context.
797             : So this function may not give you what you expect. For example:
798             : printf "Total gaps: %d", $hit->gaps();
799             : Actually returns a two-element array, so what gets printed
800             : is the number of gaps in the query, not the total
801             :
802             Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total' | 'list'
803             : (default = 'total') ('sbjct' is synonymous with 'hit')
804             Comments : If you need data for each HSP, use hsps() and then interate
805             : through each HSP object.
806              
807             =cut
808              
809             sub gaps {
810 5     5 1 6 my ($self, $seqType) = @_;
811            
812 5 0 33     12 $seqType ||= (wantarray ? 'list' : 'total');
813 5 50       10 $seqType = 'hit' if $seqType eq 'sbjct';
814            
815 5 50       25 if ($seqType =~ /list|array/i) {
    100          
816 0         0 return (scalar($self->seq_inds('query', 'gap')), scalar($self->seq_inds('hit', 'gap')));
817             }
818             elsif ($seqType eq 'total') {
819 1   50     3 return (scalar($self->seq_inds('query', 'gap')) + scalar($self->seq_inds('hit', 'gap'))) || 0;
820             }
821             else {
822 4   50     8 return scalar($self->seq_inds($seqType, 'gap')) || 0;
823             }
824             }
825              
826             =head2 matches
827              
828             Usage : $hit_object->matches( [class] );
829             Purpose : Get the total number of identical or conserved matches
830             : (or both) across all HSPs.
831             : (Note: 'conservative' matches are indicated as 'positives'
832             : in BLAST reports.)
833             Example : ($id,$cons) = $hit_object->matches(); # no argument
834             : $id = $hit_object->matches('id');
835             : $cons = $hit_object->matches('cons');
836             Returns : Integer or a 2-element array of integers
837             Argument : [0] class = 'id' | 'cons' OR none.
838             : [1] seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
839             : ('sbjct' is synonymous with 'hit')
840             : If no argument is provided, both identical and conservative
841             : numbers are returned in a two element list.
842             : (Other terms can be used to refer to the conservative
843             : matches, e.g., 'positive'. All that is checked is whether or
844             : not the supplied string starts with 'id'. If not, the
845             : conservative matches are returned.)
846              
847             =cut
848              
849             sub matches {
850 6     6 1 8 my ($self, $class, $seqType) = @_;
851            
852             # no query/hit choice? The answer differs depending on sequence, since
853             # hsps could overlap on one sequence and not the other. Added an option,
854             # but otherwise will assume 'hit'
855 6   100     19 $seqType ||= 'hit';
856 6 50       11 $seqType = 'hit' if $seqType eq 'sbjct';
857            
858 6 100       14 unless (exists $self->{_id_matches}) {
859 3         10 $self->{_id_matches}->{hit} = scalar($self->seq_inds('hit', 'identical'));
860 3         16 $self->{_id_matches}->{query} = scalar($self->seq_inds('query', 'identical'));
861             }
862 6 100       23 unless (exists $self->{_con_matches}) {
863 3         11 foreach my $type ('hit', 'query') {
864             # 'conserved-not-identical' can give us 'identical' matches if hsps
865             # overlapped so have to get the difference
866 6         23 my %identicals = map { $_ => 1 } $self->seq_inds($type, 'identical');
  5970         6254  
867 6         582 my @conserved = $self->seq_inds($type, 'conserved-not-identical');
868            
869 6         10 my $real_conserved;
870 6         15 foreach (@conserved) {
871 100 100       105 unless (exists $identicals{$_}) {
872 80         51 $real_conserved++;
873             }
874             }
875 6         629 $self->{_con_matches}->{$type} = $real_conserved;
876             }
877             }
878            
879            
880 6 50       14 unless ($class) {
881 0         0 return ($self->{_id_matches}->{$seqType}, $self->{_con_matches}->{$seqType});
882             }
883             else {
884 6 100       27 if ($class =~ /^id/i) {
885 4         18 return $self->{_id_matches}->{$seqType};
886             }
887             else {
888 2         6 return $self->{_con_matches}->{$seqType};
889             }
890             }
891 0         0 return;
892             }
893              
894             =head2 start
895              
896             Usage : $sbjct->start( [seq_type] );
897             Purpose : Gets the start coordinate for the query, sbjct, or both sequences
898             : in the object. If there is more than one HSP, the lowest start
899             : value of all HSPs is returned.
900             Example : $qbeg = $sbjct->start('query');
901             : $sbeg = $sbjct->start('hit');
902             : ($qbeg, $sbeg) = $sbjct->start();
903             Returns : scalar context: integer
904             : array context without args: list of two integers (queryStart,
905             : sbjctStart)
906             : Array context can be "induced" by providing an argument of 'list'
907             : or 'array'.
908             Argument : 'query' or 'hit' or 'sbjct' (default = 'query') ('sbjct' is
909             synonymous with 'hit')
910              
911             =cut
912              
913             sub start {
914 12     12 1 19 my ($self, $seqType) = @_;
915            
916 12 50       30 unless ($self->get_field('num_hsps')) {
917 0         0 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
918 0         0 return;
919             }
920            
921 12 50 66     34 $seqType ||= (wantarray ? 'list' : 'query');
922 12 100       24 $seqType = 'hit' if $seqType eq 'sbjct';
923            
924 12 50       78 if ($seqType =~ /list|array/i) {
    100          
    50          
925 0         0 return ($self->get_field('query_start'), $self->get_field('hit_start'));
926             }
927             elsif ($seqType eq 'hit') {
928 5         11 return $self->get_field('hit_start');
929             }
930             elsif ($seqType eq 'query') {
931 7         18 return $self->get_field('query_start');
932             }
933             else {
934 0         0 $self->throw("Unknown sequence type '$seqType'");
935             }
936             }
937              
938             =head2 end
939              
940             Usage : $sbjct->end( [seq_type] );
941             Purpose : Gets the end coordinate for the query, sbjct, or both sequences
942             : in the object. If there is more than one HSP, the largest end
943             : value of all HSPs is returned.
944             Example : $qend = $sbjct->end('query');
945             : $send = $sbjct->end('hit');
946             : ($qend, $send) = $sbjct->end();
947             Returns : scalar context: integer
948             : array context without args: list of two integers
949             : (queryEnd, sbjctEnd)
950             : Array context can be "induced" by providing an argument
951             : of 'list' or 'array'.
952             Argument : 'query' or 'hit' or 'sbjct' (default = 'query') ('sbjct' is
953             synonymous with 'hit')
954              
955             =cut
956              
957             sub end {
958 12     12 1 16 my ($self, $seqType) = @_;
959            
960 12 50       26 unless ($self->get_field('num_hsps')) {
961 0         0 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
962 0         0 return;
963             }
964            
965 12 50 66     31 $seqType ||= (wantarray ? 'list' : 'query');
966 12 100       27 $seqType = 'hit' if $seqType eq 'sbjct';
967            
968 12 50       62 if ($seqType =~ /list|array/i) {
    100          
    50          
969 0         0 return ($self->get_field('query_end'), $self->get_field('hit_end'));
970             }
971             elsif ($seqType eq 'hit') {
972 5         9 return $self->get_field('hit_end');
973             }
974             elsif ($seqType eq 'query') {
975 7         14 return $self->get_field('query_end');
976             }
977             else {
978 0         0 $self->throw("Unknown sequence type '$seqType'");
979             }
980             }
981              
982             =head2 range
983              
984             Usage : $sbjct->range( [seq_type] );
985             Purpose : Gets the (start, end) coordinates for the query or sbjct sequence
986             : in the HSP alignment.
987             Example : ($qbeg, $qend) = $sbjct->range('query');
988             : ($sbeg, $send) = $sbjct->range('hit');
989             Returns : Two-element array of integers
990             Argument : seq_type = string, 'query' or 'hit' or 'sbjct' (default = 'query')
991             ('sbjct' is synonymous with 'hit')
992             Throws : n/a
993              
994             See Also : L, L
995              
996             =cut
997              
998             sub range {
999 4     4 1 749 my ($self, $seqType) = @_;
1000 4   50     11 $seqType ||= 'query';
1001 4 50       9 $seqType = 'hit' if $seqType eq 'sbjct';
1002 4         7 return ($self->start($seqType), $self->end($seqType));
1003             }
1004              
1005             =head2 frac_identical
1006              
1007             Usage : $hit_object->frac_identical( [seq_type] );
1008             Purpose : Get the overall fraction of identical positions across all HSPs.
1009             : The number refers to only the aligned regions and does not
1010             : account for unaligned regions in between the HSPs, if any.
1011             Example : $frac_iden = $hit_object->frac_identical('query');
1012             Returns : Float (2-decimal precision, e.g., 0.75).
1013             Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
1014             : default = 'query' (but see comments below).
1015             : ('sbjct' is synonymous with 'hit')
1016              
1017             =cut
1018              
1019             sub frac_identical {
1020 3     3 1 23 my ($self, $seqType) = @_;
1021 3   50     14 $seqType ||= 'query';
1022 3         7 $seqType = lc($seqType);
1023 3 50       9 $seqType = 'hit' if $seqType eq 'sbjct';
1024            
1025 3         15 my $ident = $self->matches('id', $seqType);
1026 3         12 my $total = $self->length_aln($seqType);
1027 3         11 my $ratio = $ident / $total;
1028 3         60 my $ratio_rounded = sprintf( "%.3f", $ratio);
1029            
1030             # Round down if normal rounding yields 1 (just like blast)
1031 3 50 66     292 $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
1032 3         23 return $ratio_rounded;
1033             }
1034              
1035             =head2 frac_conserved
1036              
1037             Usage : $hit_object->frac_conserved( [seq_type] );
1038             Purpose : Get the overall fraction of conserved positions across all HSPs.
1039             : The number refers to only the aligned regions and does not
1040             : account for unaligned regions in between the HSPs, if any.
1041             Example : $frac_cons = $hit_object->frac_conserved('hit');
1042             Returns : Float (2-decimal precision, e.g., 0.75).
1043             Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
1044             : default = 'query' (but see comments below).
1045             : ('sbjct' is synonymous with 'hit')
1046              
1047             =cut
1048              
1049             sub frac_conserved {
1050 1     1 1 2 my ($self, $seqType) = @_;
1051 1   50     5 $seqType ||= 'query';
1052 1         2 $seqType = lc($seqType);
1053 1 50       3 $seqType = 'hit' if $seqType eq 'sbjct';
1054            
1055 1         4 my $consv = $self->matches('cons');
1056 1         2 my $total = $self->length_aln($seqType);
1057 1         2 my $ratio = $consv / $total;
1058 1         7 my $ratio_rounded = sprintf( "%.3f", $ratio);
1059            
1060             # Round down iff normal rounding yields 1 (just like blast)
1061 1 50 33     6 $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
1062 1         4 return $ratio_rounded;
1063             }
1064              
1065             =head2 frac_aligned_query
1066              
1067             Usage : $hit_object->frac_aligned_query();
1068             Purpose : Get the fraction of the query sequence which has been aligned
1069             : across all HSPs (not including intervals between non-overlapping
1070             : HSPs).
1071             Example : $frac_alnq = $hit_object->frac_aligned_query();
1072             Returns : Float (2-decimal precision, e.g., 0.75).
1073             Argument : none
1074             Comments : If you need data for each HSP, use hsps() and then interate
1075             : through the HSP objects.
1076              
1077             =cut
1078              
1079             sub frac_aligned_query {
1080 4     4 1 8 my $self = shift;
1081 4         17 return sprintf("%.2f", $self->length_aln('query') / $self->logical_length('query'));
1082             }
1083              
1084             =head2 frac_aligned_hit
1085              
1086             Usage : $hit_object->frac_aligned_hit();
1087             Purpose : Get the fraction of the hit (sbjct) sequence which has been aligned
1088             : across all HSPs (not including intervals between non-overlapping
1089             : HSPs).
1090             Example : $frac_alnq = $hit_object->frac_aligned_hit();
1091             Returns : Float (2-decimal precision, e.g., 0.75).
1092             Argument : none
1093             Comments : If you need data for each HSP, use hsps() and then interate
1094             : through the HSP objects.
1095              
1096             =cut
1097              
1098             sub frac_aligned_hit {
1099 1     1 1 2 my $self = shift;
1100 1         3 return sprintf( "%.2f", $self->length_aln('sbjct') / $self->logical_length('sbjct'));
1101             }
1102              
1103             =head2 num_unaligned_hit
1104              
1105             Usage : $hit_object->num_unaligned_hit();
1106             Purpose : Get the number of the unaligned residues in the hit sequence.
1107             : Sums across all all HSPs.
1108             Example : $num_unaln = $hit_object->num_unaligned_hit();
1109             Returns : Integer
1110             Argument : none
1111             Comments : If you need data for each HSP, use hsps() and then interate
1112             : through the HSP objects.
1113              
1114             =cut
1115              
1116             sub num_unaligned_hit {
1117 1     1 1 2 my $self = shift;
1118             # why does this method even exist?!
1119 1         3 return $self->gaps('hit');
1120             }
1121              
1122             =head2 num_unaligned_query
1123              
1124             Usage : $hit_object->num_unaligned_query();
1125             Purpose : Get the number of the unaligned residues in the query sequence.
1126             : Sums across all all HSPs.
1127             Example : $num_unaln = $hit_object->num_unaligned_query();
1128             Returns : Integer
1129             Argument : none
1130             Comments : If you need data for each HSP, use hsps() and then interate
1131             : through the HSP objects.
1132              
1133             =cut
1134              
1135             sub num_unaligned_query {
1136 1     1 1 1 my $self = shift;
1137             # why does this method even exist?!
1138 1         3 return $self->gaps('query');
1139             }
1140              
1141             # aliasing for Steve's method names
1142             *hit_description = \&description;
1143             *hit_length = \&length;
1144              
1145             1;