File Coverage

Bio/Search/Hit/BlastPullHit.pm
Criterion Covered Total %
statement 88 92 95.6
branch 22 24 91.6
condition 21 29 72.4
subroutine 12 14 85.7
pod 6 6 100.0
total 149 165 90.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Search::Hit::BlastPullHit
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Search::Hit::BlastPullHit - A parser and hit object for BLASTN hits
17              
18             =head1 SYNOPSIS
19              
20             # generally we use Bio::SearchIO to build these objects
21             use Bio::SearchIO;
22             my $in = Bio::SearchIO->new(-format => 'blast_pull',
23             -file => 'result.blast');
24              
25             while (my $result = $in->next_result) {
26             while (my $hit = $result->next_hit) {
27             print $hit->name, "\n";
28             print $hit->score, "\n";
29             print $hit->significance, "\n";
30              
31             while (my $hsp = $hit->next_hsp) {
32             # process HSPI objects
33             }
34             }
35             }
36              
37             =head1 DESCRIPTION
38              
39             This object implements a parser for BLASTN hit output.
40              
41             =head1 FEEDBACK
42              
43             =head2 Mailing Lists
44              
45             User feedback is an integral part of the evolution of this and other
46             Bioperl modules. Send your comments and suggestions preferably to
47             the Bioperl mailing list. Your participation is much appreciated.
48              
49             bioperl-l@bioperl.org - General discussion
50             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
51              
52             =head2 Support
53              
54             Please direct usage questions or support issues to the mailing list:
55              
56             I
57              
58             rather than to the module maintainer directly. Many experienced and
59             reponsive experts will be able look at the problem and quickly
60             address it. Please include a thorough description of the problem
61             with code and data examples if at all possible.
62              
63             =head2 Reporting Bugs
64              
65             Report bugs to the Bioperl bug tracking system to help us keep track
66             of the bugs and their resolution. Bug reports can be submitted via the
67             web:
68              
69             https://github.com/bioperl/bioperl-live/issues
70              
71             =head1 AUTHOR - Sendu Bala
72              
73             Email bix@sendu.me.uk
74              
75             =head1 CONTRIBUTORS
76              
77             Additional contributors names and emails here
78              
79             =head1 APPENDIX
80              
81             The rest of the documentation details each of the object methods.
82             Internal methods are usually preceded with a _
83              
84             =cut
85              
86             # Let the code begin...
87              
88             package Bio::Search::Hit::BlastPullHit;
89              
90 1     1   5 use strict;
  1         1  
  1         23  
91              
92 1     1   396 use Bio::Search::HSP::BlastPullHSP;
  1         4  
  1         52  
93              
94 1     1   9 use base qw(Bio::Root::Root Bio::Search::Hit::PullHitI);
  1         2  
  1         588  
95              
96             =head2 new
97              
98             Title : new
99             Usage : my $obj = Bio::Search::Hit::BlastNHit->new();
100             Function: Builds a new Bio::Search::Hit::BlastNHit object.
101             Returns : Bio::Search::Hit::BlastNHit
102             Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent)
103             -parent => Bio::PullParserI object (required if no -chunk)
104             -hit_data => array ref with [name description score significance]
105              
106             where the array ref provided to -chunk contains an IO object
107             for a filehandle to something representing the raw data of the
108             hit, and $start and $end define the tell() position within the
109             filehandle that the hit data starts and ends (optional; defaults
110             to start and end of the entire thing described by the filehandle)
111              
112             =cut
113              
114             sub new {
115 39     39 1 109 my ($class, @args) = @_;
116 39         131 my $self = $class->SUPER::new(@args);
117            
118 39         144 $self->_setup(@args);
119            
120 39         77 my $fields = $self->_fields;
121 39         69 foreach my $field (qw( header start_end )) {
122 78         145 $fields->{$field} = undef;
123             }
124            
125 39         85 my $hit_data = $self->_raw_hit_data;
126 39 50 33     165 if ($hit_data && ref($hit_data) eq 'ARRAY') {
127 39         61 foreach my $field (qw(name description score significance)) {
128 156         143 $fields->{$field} = shift(@{$hit_data});
  156         313  
129             }
130             }
131            
132 39         248 $self->_dependencies( { ( name => 'header',
133             length => 'header',
134             description => 'header',
135             accession => 'header',
136             next_hsp => 'header',
137             query_start => 'start_end',
138             query_end => 'start_end',
139             hit_start => 'start_end',
140             hit_end => 'start_end' ) } );
141            
142 39         150 return $self;
143             }
144              
145             #
146             # PullParserI discovery methods so we can answer all HitI questions
147             #
148              
149             sub _discover_header {
150 20     20   29 my $self = shift;
151 20         65 $self->_chunk_seek(0);
152 20         53 my $header = $self->_get_chunk_by_end("\n Score = ");
153            
154 20 100       50 unless ($header) {
155             # no alignment or other data; all information was in the hit table of
156             # the result
157 3         9 $self->_calculate_accession_from_name;
158            
159 3         7 $self->_fields->{header} = 1;
160 3         6 return;
161             }
162            
163 17         38 $self->{_after_header} = $self->_chunk_tell;
164            
165 17         278 ($self->_fields->{name}, $self->_fields->{description}, $self->_fields->{length}) = $header =~ /^(\S+)\s+(\S.+?)?\s+Length\s*=\s*(\d+)/sm;
166 17 100       41 if ($self->_fields->{description}) {
167 15         30 $self->_fields->{description} =~ s/\n//g;
168             }
169             else {
170 2         6 $self->_fields->{description} = '';
171             }
172            
173 17         58 $self->_calculate_accession_from_name;
174            
175 17         41 $self->_fields->{header} = 1;
176             }
177              
178             sub _calculate_accession_from_name {
179 20     20   29 my $self = shift;
180 20         47 my $name = $self->get_field('name');
181 20 100       116 if ($name =~ /.+?\|.+?\|.+?\|(\w+)/) {
    100          
182 9         18 $self->_fields->{accession} = $1;
183             }
184             elsif ($self->_fields->{name} =~ /.+?\|(\w+)?\./) {
185             # old form?
186 6         18 $self->_fields->{accession} = $1;
187             }
188             else {
189 5         14 $self->_fields->{accession} = $name;
190             }
191             }
192              
193             sub _discover_start_end {
194 4     4   9 my $self = shift;
195            
196 4         10 my ($q_start, $q_end, $h_start, $h_end);
197 4         15 foreach my $hsp ($self->hsps) {
198 9         41 my ($this_q_start, $this_h_start) = $hsp->start;
199 9         38 my ($this_q_end, $this_h_end) = $hsp->end;
200            
201 9 100 100     40 if (! defined $q_start || $this_q_start < $q_start) {
202 6         12 $q_start = $this_q_start;
203             }
204 9 100 100     29 if (! defined $h_start || $this_h_start < $h_start) {
205 6         14 $h_start = $this_h_start;
206             }
207            
208 9 100 100     32 if (! defined $q_end || $this_q_end > $q_end) {
209 7         14 $q_end = $this_q_end;
210             }
211 9 100 100     35 if (! defined $h_end || $this_h_end > $h_end) {
212 5         17 $h_end = $this_h_end;
213             }
214             }
215            
216 4         35 $self->_fields->{query_start} = $q_start;
217 4         16 $self->_fields->{query_end} = $q_end;
218 4         16 $self->_fields->{hit_start} = $h_start;
219 4         12 $self->_fields->{hit_end} = $h_end;
220             }
221              
222             sub _discover_next_hsp {
223 177     177   251 my $self = shift;
224 177   66     429 my $pos = $self->{_end_of_previous_hsp} || $self->{_after_header};
225 177 50       299 return unless $pos;
226 177         474 $self->_chunk_seek($pos);
227            
228 177         437 my ($start, $end) = $self->_find_chunk_by_end("\n Score = ");
229 177 100 66     609 if ((defined $end && ($end + $self->_chunk_true_start) > $self->_chunk_true_end) || ! $end) {
      66        
230 45   66     176 $start = $self->{_end_of_previous_hsp} || $self->{_after_header};
231 45         131 $end = $self->_chunk_true_end;
232             }
233             else {
234 132         223 $end += $self->_chunk_true_start;
235             }
236 177         311 $start += $self->_chunk_true_start;
237            
238 177 100       291 return if $start >= $self->_chunk_true_end;
239            
240 145         256 $self->{_end_of_previous_hsp} = $end - $self->_chunk_true_start;
241            
242             #*** needs to inherit piped_behaviour, and we need to deal with _sequential
243             # ourselves
244 145         273 $self->_fields->{next_hsp} = Bio::Search::HSP::BlastPullHSP->new(-parent => $self,
245             -chunk => [$self->chunk, $start, $end]);
246             }
247              
248             sub _discover_num_hsps {
249 4     4   11 my $self = shift;
250 4         19 $self->_fields->{num_hsps} = $self->hsps;
251             }
252              
253             =head2 next_hsp
254              
255             Title : next_hsp
256             Usage : while( $hsp = $obj->next_hsp()) { ... }
257             Function : Returns the next available High Scoring Pair
258             Example :
259             Returns : L object or null if finished
260             Args : none
261              
262             =cut
263              
264             sub next_hsp {
265 177     177 1 271 my $self = shift;
266 177         423 my $hsp = $self->get_field('next_hsp');
267 177         321 undef $self->_fields->{next_hsp};
268 177         429 return $hsp;
269             }
270              
271             =head2 hsps
272              
273             Usage : $hit_object->hsps();
274             Purpose : Get a list containing all HSP objects.
275             Example : @hsps = $hit_object->hsps();
276             Returns : list of L objects.
277             Argument : none
278              
279             =cut
280              
281             sub hsps {
282 28     28 1 59 my $self = shift;
283 28         79 my $old = $self->{_end_of_previous_hsp};
284 28         109 $self->rewind;
285 28         65 my @hsps;
286 28         89 while (defined(my $hsp = $self->next_hsp)) {
287 140         337 push(@hsps, $hsp);
288             }
289 28         80 $self->{_end_of_previous_hsp} = $old;
290 28         137 return @hsps;
291             }
292              
293             =head2 hsp
294              
295             Usage : $hit_object->hsp( [string] );
296             Purpose : Get a single HSPI object for the present HitI object.
297             Example : $hspObj = $hit_object->hsp; # same as 'best'
298             : $hspObj = $hit_object->hsp('best');
299             : $hspObj = $hit_object->hsp('worst');
300             Returns : Object reference for a L object.
301             Argument : String (or no argument).
302             : No argument (default) = highest scoring HSP (same as 'best').
303             : 'best' = highest scoring HSP.
304             : 'worst' = lowest scoring HSP.
305             Throws : Exception if an unrecognized argument is used.
306              
307             See Also : L, L()
308              
309             =cut
310              
311             sub hsp {
312 0     0 1 0 my ($self, $type) = @_;
313 0   0     0 $type ||= 'best';
314 0         0 $self->throw_not_implemented;
315             }
316              
317             =head2 rewind
318              
319             Title : rewind
320             Usage : $result->rewind;
321             Function: Allow one to reset the HSP iterator to the beginning, so that
322             next_hsp() will subsequently return the first hsp and so on.
323             Returns : n/a
324             Args : none
325              
326             =cut
327              
328             sub rewind {
329 28     28 1 49 my $self = shift;
330 28         83 delete $self->{_end_of_previous_hsp};
331             }
332              
333             # have p() a synonym of significance()
334             sub p {
335 0     0 1   return shift->significance;
336             }
337              
338             1;