File Coverage

Bio/Search/Result/BlastPullResult.pm
Criterion Covered Total %
statement 132 137 96.3
branch 62 74 83.7
condition 12 21 57.1
subroutine 13 14 92.8
pod 7 7 100.0
total 226 253 89.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Search::Result::BlastPullResult
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Search::Result::BlastPullResult - A parser and result object for BLASTN
17             results
18              
19             =head1 SYNOPSIS
20              
21             # generally we use Bio::SearchIO to build these objects
22             use Bio::SearchIO;
23             my $in = Bio::SearchIO->new(-format => 'blast_pull',
24             -file => 'result.blast');
25              
26             while (my $result = $in->next_result) {
27             print $result->query_name, " ", $result->algorithm, " ", $result->num_hits(), " hits\n";
28             }
29              
30             =head1 DESCRIPTION
31              
32             This object implements a parser for NCBI BLASTN result output.
33              
34             =head1 FEEDBACK
35              
36             =head2 Mailing Lists
37              
38             User feedback is an integral part of the evolution of this and other
39             Bioperl modules. Send your comments and suggestions preferably to
40             the Bioperl mailing list. Your participation is much appreciated.
41              
42             bioperl-l@bioperl.org - General discussion
43             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
44              
45             =head2 Support
46              
47             Please direct usage questions or support issues to the mailing list:
48              
49             I
50              
51             rather than to the module maintainer directly. Many experienced and
52             reponsive experts will be able look at the problem and quickly
53             address it. Please include a thorough description of the problem
54             with code and data examples if at all possible.
55              
56             =head2 Reporting Bugs
57              
58             Report bugs to the Bioperl bug tracking system to help us keep track
59             of the bugs and their resolution. Bug reports can be submitted via the
60             web:
61              
62             https://github.com/bioperl/bioperl-live/issues
63              
64             =head1 AUTHOR - Sendu Bala
65              
66             Email bix@sendu.me.uk
67              
68             =head1 CONTRIBUTORS
69              
70             Additional contributors names and emails here
71              
72             =head1 APPENDIX
73              
74             The rest of the documentation details each of the object methods.
75             Internal methods are usually preceded with a _
76              
77             =cut
78              
79             # Let the code begin...
80              
81             package Bio::Search::Result::BlastPullResult;
82              
83 1     1   5 use strict;
  1         2  
  1         24  
84              
85 1     1   380 use Bio::Search::Hit::BlastPullHit;
  1         110  
  1         35  
86              
87 1     1   59 use base qw(Bio::Root::Root Bio::Search::Result::PullResultI);
  1         2  
  1         514  
88              
89             =head2 new
90              
91             Title : new
92             Usage : my $obj = Bio::SearchIO::Result::hmmpfam->new();
93             Function: Builds a new Bio::SearchIO::Result::hmmpfam object
94             Returns : Bio::SearchIO::Result::hmmpfam
95             Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent)
96             -parent => Bio::PullParserI object (required if no -chunk)
97             -parameters => hash ref of search parameters (key => value), optional
98             -statistics => hash ref of search statistics (key => value), optional
99              
100             where the array ref provided to -chunk contains an IO object
101             for a filehandle to something representing the raw data of the
102             result, and $start and $end define the tell() position within the
103             filehandle that the result data starts and ends (optional; defaults
104             to start and end of the entire thing described by the filehandle)
105              
106             =cut
107              
108             sub new {
109 17     17 1 46 my ($class, @args) = @_;
110 17         53 my $self = $class->SUPER::new(@args);
111            
112 17         72 $self->_setup(@args);
113            
114 17         34 foreach my $field (qw( header hit_table hsp_table alignments next_model
115             models query_length stats_and_params)) {
116 136         178 $self->_fields->{$field} = undef;
117             }
118            
119 17         96 $self->_dependencies( { ( query_name => 'header',
120             query_accession => 'header',
121             query_description => 'header',
122             query_length => 'header',
123             hit_table => 'header',
124             num_hits => 'hit_table',
125             no_hits_found => 'hit_table' ) } );
126            
127 17         42 return $self;
128             }
129              
130             #
131             # PullParserI discovery methods so we can answer all ResultI questions
132             #
133              
134             sub _discover_header {
135 17     17   27 my $self = shift;
136 17         59 $self->_chunk_seek(0);
137 17         38 my $header = $self->_get_chunk_by_end("Value\n");
138 17 50       72 if (!$header) {
139 0         0 $header = $self->_get_chunk_by_end("***** No hits found ******\n");
140 0         0 $self->{_no_hits_found} = 1;
141             }
142 17 50       36 $self->throw("Invalid header returned") unless $header;
143 17         32 $self->{_after_header} = $self->_chunk_tell;
144            
145 17         89 ($self->_fields->{query_name}) = $header =~ /^\s*(\S+)/;
146 17         33 $self->_fields->{query_accession} = '';
147 17         29 $self->_fields->{query_description} = '';
148            
149 17 100       92 if ($header =~ /^Length=(\d+)/m) {
    100          
150 5         11 $self->_fields->{query_length} = $1;
151             }
152             elsif ($header =~ /\((\d+) letters\)/) {
153             # older form?
154 11         33 $self->_fields->{query_length} = $1;
155            
156 11 100       52 if ($header =~ /^\s*\(\d+ letters/) {
157             # there wasn't a query sequence name
158 1         4 $self->_fields->{query_name} = '';
159             }
160             }
161            
162 17         35 $self->_fields->{header} = 1;
163             }
164              
165             sub _discover_hit_table {
166 13     13   19 my $self = shift;
167 13         37 $self->_chunk_seek($self->{_after_header});
168            
169 13         39 my $table = $self->_get_chunk_by_end("\n>");
170            
171 13 50 66     37 if (!$table && !$self->{_no_hits_found}) {
172             # no alignments, presumably; hit table comprises the remainder of this
173             # result
174 2         5 while (my $line = $self->_get_chunk_by_nol(1)) {
175 22         50 $table .= $line;
176             }
177             }
178            
179 13   50     24 $table ||= '';
180            
181 13         23 $self->{_after_hit_table} = $self->_chunk_tell;
182            
183 13         26 my $evalue_cutoff = $self->get_field('evalue_cutoff');
184 13 50       44 undef $evalue_cutoff if $evalue_cutoff eq '[unset]';
185 13         28 my $score_cutoff = $self->get_field('score_cutoff');
186 13 50       39 undef $score_cutoff if $score_cutoff eq '[unset]';
187            
188 13         16 my @table;
189 13         25 my $no_hit = 1;
190 13         181 while ($table =~ /^(\S+)\s+(\S.*?)?\s+(\S+)\s+([\de]\S*)\s*\n/gm) {
191 93         110 $no_hit = 0;
192 93         242 my ($name, $desc, $score, $evalue) = ($1, $2, $3, $4);
193 93   100     129 $desc ||= '';
194 93 50       129 if ($evalue =~ /^e/) {
195 0         0 $evalue = '1'.$evalue;
196             }
197 93 50 33     134 next if ($evalue_cutoff && $evalue > $evalue_cutoff);
198 93 50 33     124 next if ($score_cutoff && $score < $score_cutoff);
199 93         780 push(@table, [$name, $desc, $score, $evalue]);
200             }
201 13         32 $self->_fields->{hit_table} = \@table;
202 13 50       39 $self->{_next_hit_index} = @table > 0 ? 0 : -1;
203            
204 13         30 $self->_fields->{no_hits_found} = $no_hit;
205 13         34 $self->_fields->{num_hits} = @table;
206             }
207              
208             sub _discover_next_hit {
209 41     41   74 my $self = shift;
210 41         81 my $hit_table = $self->get_field('hit_table');
211 41 100       93 return if $self->{_next_hit_index} == -1;
212 39         50 my $hit_row = ${$hit_table}[$self->{_next_hit_index}];
  39         67  
213            
214 39   66     136 $self->_chunk_seek($self->{_end_of_previous_hit} || $self->{_after_hit_table});
215            
216 39         92 my ($start, $end) = $self->_find_chunk_by_end("\n>");
217 39 100       72 unless ($end) {
218 21   66     45 $start = $self->{_end_of_previous_hit} || $self->{_after_hit_table};
219 21         34 $end = $self->_chunk_true_end;
220             }
221             else {
222 18         36 $end += $self->_chunk_true_start;
223             }
224 39         65 $start += $self->_chunk_true_start;
225            
226 39         66 $self->{_end_of_previous_hit} = $end - $self->_chunk_true_start;
227            
228             #*** needs to inherit piped_behaviour, and we need to deal with _sequential
229             # ourselves
230 39         88 $self->_fields->{next_hit} = Bio::Search::Hit::BlastPullHit->new(-parent => $self,
231             -chunk => [$self->chunk, $start, $end],
232             -hit_data => $hit_row);
233            
234 39         91 $self->{_next_hit_index}++;
235            
236 39 100       47 if ($self->{_next_hit_index} > $#{$hit_table}) {
  39         97  
237 9         21 $self->{_next_hit_index} = -1;
238             }
239             }
240              
241             sub _discover_stats_and_params {
242 4     4   11 my $self = shift;
243 4         16 $self->_chunk_seek(0);
244 4         19 my ($start, $end) = $self->_find_chunk_by_end("\n Database: ");
245 4         12 $self->_chunk_seek($end);
246            
247 4         8 my $gapped = 0;
248 4         13 while ($self->_get_chunk_by_nol(1)) {
249 135 100       851 if (/Number of letters in database:\s+(\S+)/) {
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
250 4         9 my $stat = $1;
251 4         16 $stat =~ s/,//g;
252 4         16 $self->add_statistic('dbletters', $stat);
253             }
254             elsif (/Number of sequences in database:\s+(\S+)/) {
255 4         9 my $stat = $1;
256 4         9 $stat =~ s/,//g;
257 4         10 $self->add_statistic('dbentries', $stat);
258             }
259             elsif (/^Lambda/) {
260 8         16 my $line = $self->_get_chunk_by_nol(1);
261 8         31 $line =~ /\s+(\S+)\s+(\S+)\s+(\S+)/;
262 8 100       26 $self->add_statistic($gapped ? 'lambda_gapped' : 'lambda', $1);
263 8 100       21 $self->add_statistic($gapped ? 'kappa_gapped' : 'kappa', $2);
264 8 100       24 $self->add_statistic($gapped ? 'entropy_gapped' : 'entropy', $3);
265 8         15 $gapped = 1;
266             }
267             elsif (/^Matrix: (.+)\n/) {
268 4         22 $self->add_parameter('matrix', $1);
269             }
270             elsif (/^Gap Penalties: Existence: (\d+), Extension: (\d+)/) {
271 4         13 $self->add_parameter('gapopen', $1);
272 4         9 $self->add_parameter('gapext', $2);
273             }
274             elsif (/^Number of Hits to DB: (\d+)/) {
275 4         14 $self->add_statistic('Hits_to_DB', $1);
276             }
277             elsif (/^Number of extensions: (\d+)/) {
278 4         9 $self->add_statistic('num_extensions', $1);
279             }
280             elsif (/^Number of successful extensions: (\d+)/) {
281 4         11 $self->add_statistic('num_successful_extensions', $1);
282             }
283             elsif (/^Number of sequences better than (\S+):/) {
284 4         10 $self->add_parameter('expect', $1);
285             }
286             elsif (/^[Ll]ength of query: (\d+)/) {
287 4         10 $self->add_statistic('querylength', $1);
288             }
289             elsif (/^[Ee]ffective HSP length: (\d+)/) {
290 2         7 $self->add_statistic('effective_hsplength', $1);
291             }
292             elsif (/^[Ee]ffective length of database: (\d+)/) {
293 4         10 $self->add_statistic('effectivedblength', $1);
294             }
295             elsif (/^[Ee]ffective search space: (\d+)/) {
296 4         11 $self->add_statistic('effectivespace', $1);
297             }
298             elsif (/^[Ee]ffective search space used: (\d+)/) {
299 4         13 $self->add_statistic('effectivespaceused', $1);
300             }
301             elsif (/^([TAXS]\d?): (\d+)(?: \((\S+))?/) {
302 25         58 $self->add_statistic($1, $2);
303 25 100       73 $self->add_statistic($1.'_bits', $3) if $3;
304             }
305             }
306            
307 4         14 $self->_fields->{stats_and_params} = 1;
308             }
309              
310             =head2 next_hit
311              
312             Title : next_hit
313             Usage : while( $hit = $result->next_hit()) { ... }
314             Function: Returns the next available Hit object, representing potential
315             matches between the query and various entities from the database.
316             Returns : a Bio::Search::Hit::HitI object or undef if there are no more.
317             Args : none
318              
319             =cut
320              
321             sub next_hit {
322 41     41 1 661 my $self = shift;
323 41         131 my $hit = $self->get_field('next_hit');
324 41         81 undef $self->_fields->{next_hit};
325 41         88 return $hit;
326             }
327              
328             =head2 hits
329              
330             Title : hits
331             Usage : my @hits = $result->hits
332             Function: Returns the HitI objects contained within this Result
333             Returns : Array of Bio::Search::Hit::HitI objects
334             Args : none
335              
336             See Also: L
337              
338             =cut
339              
340             sub hits {
341 2     2 1 3 my $self = shift;
342 2   50     9 my $old = $self->{_next_hit_index} || 0;
343 2         6 $self->rewind;
344 2         4 my @hits;
345 2         3 while (defined(my $hit = $self->next_hit)) {
346 20         47 push(@hits, $hit);
347             }
348 2 50       6 $self->{_next_hit_index} = @hits > 0 ? $old : -1;
349 2         9 return @hits;
350             }
351              
352             =head2 sort_hits
353              
354             Title : sort_hits
355             Usage : $result->sort_hits('
356             Function : Sorts the hits so that they come out in the desired order when
357             hits() or next_hit() is called.
358             Returns : n/a
359             Args : A coderef for the sort function. See the documentation on the Perl
360             sort() function for guidelines on writing sort functions.
361             By default the sort order is ascending significance value (ie.
362             most significant hits first).
363             *** example
364              
365             =cut
366              
367             sub sort_hits {
368 0     0 1 0 my ($self, $code_ref) = @_;
369 0         0 $self->throw("Not implemented yet");
370             }
371              
372             =head2 rewind
373              
374             Title : rewind
375             Usage : $result->rewind;
376             Function: Allow one to reset the Hit iterator to the beginning, so that
377             next_hit() will subsequently return the first hit and so on.
378             Returns : n/a
379             Args : none
380              
381             =cut
382              
383             sub rewind {
384 2     2 1 3 my $self = shift;
385 2 50       5 unless ($self->_fields->{hit_table}) {
386 2         5 $self->get_field('hit_table');
387             }
388 2 50       2 $self->{_next_hit_index} = @{$self->_fields->{hit_table}} > 0 ? 0 : -1;
  2         5  
389             }
390              
391             =head2 get_statistic
392              
393             Title : get_statistic
394             Usage : my $gap_ext = $result->get_statistic('kappa')
395             Function: Returns the value for a specific statistic available
396             from this result
397             Returns : string
398             Args : name of statistic (string)
399              
400             =cut
401              
402             sub get_statistic {
403 52     52 1 127 my $self = shift;
404 52         223 $self->get_field('stats_and_params');
405 52         204 return $self->SUPER::get_statistic(@_);
406             }
407              
408             =head2 get_parameter
409              
410             Title : get_parameter
411             Usage : my $gap_ext = $result->get_parameter('gapext')
412             Function: Returns the value for a specific parameter used
413             when running this result
414             Returns : string
415             Args : name of parameter (string)
416              
417             =cut
418              
419             sub get_parameter {
420 15     15 1 40 my $self = shift;
421 15         65 $self->get_field('stats_and_params');
422 15         65 return $self->SUPER::get_parameter(@_);
423             }
424              
425             1;