File Coverage

Bio/SearchIO/blast_pull.pm
Criterion Covered Total %
statement 54 65 83.0
branch 11 20 55.0
condition 8 16 50.0
subroutine 7 9 77.7
pod 3 3 100.0
total 83 113 73.4


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::SearchIO::blast_pull
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::SearchIO::blast_pull - A parser for BLAST output
17              
18             =head1 SYNOPSIS
19              
20             # do not use this class directly it is available through Bio::SearchIO
21             use Bio::SearchIO;
22             my $in = Bio::SearchIO->new(-format => 'blast_pull',
23             -file => 't/data/new_blastn.txt');
24             while (my $result = $in->next_result) {
25             # this is a Bio::Search::Result::BlastPullResult object
26             print "Results for ", $result->query_name(), "\n";
27             while (my $hit = $result->next_hit) {
28             print $hit->name(), "\n";
29             while (my $hsp = $hit->next_hsp) {
30             print "length is ", $hsp->length(), "\n";
31             }
32             }
33             }
34              
35             =head1 DESCRIPTION
36              
37             This object implements a pull-parser for BLAST output. It is fast since it
38             only does work on request (hence 'pull').
39              
40             Currently only NCBI BLASTN and BLASTP are supported.
41              
42             =head1 FEEDBACK
43              
44             =head2 Mailing Lists
45              
46             User feedback is an integral part of the evolution of this and other
47             Bioperl modules. Send your comments and suggestions preferably to
48             the Bioperl mailing list. Your participation is much appreciated.
49              
50             bioperl-l@bioperl.org - General discussion
51             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
52              
53             =head2 Support
54              
55             Please direct usage questions or support issues to the mailing list:
56              
57             I
58              
59             rather than to the module maintainer directly. Many experienced and
60             reponsive experts will be able look at the problem and quickly
61             address it. Please include a thorough description of the problem
62             with code and data examples if at all possible.
63              
64             =head2 Reporting Bugs
65              
66             Report bugs to the Bioperl bug tracking system to help us keep track
67             of the bugs and their resolution. Bug reports can be submitted via the
68             web:
69              
70             https://github.com/bioperl/bioperl-live/issues
71              
72             =head1 AUTHOR - Sendu Bala
73              
74             Email bix@sendu.me.uk
75              
76             =head1 APPENDIX
77              
78             The rest of the documentation details each of the object methods.
79             Internal methods are usually preceded with a _
80              
81             =cut
82              
83             # Let the code begin...
84              
85             package Bio::SearchIO::blast_pull;
86              
87 1     1   6 use strict;
  1         1  
  1         26  
88 1     1   395 use Bio::Search::Result::BlastPullResult;
  1         3  
  1         31  
89              
90 1     1   6 use base qw(Bio::SearchIO Bio::PullParserI);
  1         1  
  1         631  
91              
92             =head2 new
93              
94             Title : new
95             Usage : my $obj = Bio::SearchIO::blast_pull->new();
96             Function: Builds a new Bio::SearchIO::blast_pull object
97             Returns : Bio::SearchIO::blast_pull
98             Args : -fh/-file => BLAST output filename
99             -format => 'blast_pull'
100             -evalue => float or scientific notation number to be used
101             as an evalue cutoff for hits
102             -score => integer or scientific notation number to be used
103             as a score value cutoff for hits
104             -piped_behaviour => 'temp_file'|'memory'|'sequential_read'
105              
106             -piped_behaviour defines what the parser should do if the input is
107             an unseekable filehandle (eg. piped input), see
108             Bio::PullParserI::chunk for details. Default is 'memory'.
109              
110             =cut
111              
112             sub _initialize {
113 12     12   60 my ($self, @args) = @_;
114            
115             # don't do normal SearchIO initialization
116            
117 12         53 my ($writer, $file, $fh, $piped_behaviour, $evalue, $score) =
118             $self->_rearrange([qw(WRITER
119             FILE FH
120             PIPED_BEHAVIOUR
121             EVALUE
122             SCORE)], @args);
123 12 50       30 $self->writer($writer) if $writer;
124            
125 12         128 $self->_fields( { ( header => undef,
126             algorithm => undef,
127             algorithm_version => undef,
128             algorithm_reference => '',
129             database_name => undef,
130             database_letters => undef,
131             database_entries => undef,
132             next_result => undef,
133             evalue_cutoff => '[unset]',
134             score_cutoff => '[unset]' ) } );
135            
136 12 50       22 $self->_fields->{evalue_cutoff} = $evalue if $evalue;
137 12 50       23 $self->_fields->{score_cutoff} = $score if $score;
138            
139 12         64 $self->_dependencies( { ( algorithm => 'header',
140             algorithm_version => 'header',
141             database_name => 'header',
142             database_letters => 'header',
143             database_entries => 'header' ) } );
144            
145 12   33     75 $self->chunk($file || $fh || $self->throw("-file or -fh must be supplied"),
      50        
146             -piped_behaviour => $piped_behaviour || 'memory');
147             }
148              
149             sub _discover_header {
150 12     12   18 my $self = shift;
151 12         25 $self->_chunk_seek(0);
152 12         28 my $header = $self->_get_chunk_by_end("\nQuery=");
153 12         25 $self->{_after_header} = $self->_chunk_tell;
154            
155             #*** won't catch all types? only support blastn/p now anyway
156 12         55 $header =~ /^(\S+) (\S+\s+\S+)/;
157 12         25 $self->_fields->{algorithm} = $1;
158 12         25 $self->_fields->{algorithm_version} = $2;
159            
160 12         39 my ($database) = $header =~ /^Database: (.+)/sm;
161            
162 12 100       23 unless ($database) {
163             # earlier versions put query before database?
164 7         17 my $header2 = $self->_get_chunk_by_end(".done\n");
165 7         72 ($database) = $header2 =~ /^Database: (.+)/sm;
166             }
167            
168 12         100 $database =~ s/\s+(\d\S+) sequences; (\d\S+) total letters.*//s;
169 12         25 my $entries = $1;
170 12         23 my $letters = $2;
171 12         31 $database =~ s/\n//g;
172 12         24 $entries =~ s/,//g;
173 12         30 $letters =~ s/,//g;
174 12         25 $self->_fields->{database_name} = $database;
175 12         21 $self->_fields->{database_entries} = $entries;
176 12         21 $self->_fields->{database_letters} = $letters;
177            
178 12         17 $self->_fields->{header} = 1;
179             }
180              
181             sub _discover_next_result {
182 19     19   33 my $self = shift;
183 19 50       34 return if $self->{_after_results};
184 19         40 my $type = $self->get_field('algorithm'); # also sets _after_header if not set
185            
186 19 50 66     69 if ($type eq 'BLASTN' || $type eq 'BLASTP') {
187 19 50       38 unless ($self->_sequential) {
188 19   66     79 $self->_chunk_seek($self->{_end_of_previous_result} || $self->{_after_header});
189            
190 19         55 my ($start, $end) = $self->_find_chunk_by_end("\nQuery=");
191 19 100       44 return if ($start == $end);
192            
193 17 50       33 unless ($end) {
194 0   0     0 $start = $self->{_end_of_previous_result} || $self->{_after_header};
195 0         0 $end = undef;
196             }
197            
198 17         34 $self->_fields->{next_result} = Bio::Search::Result::BlastPullResult->new(-chunk => [($self->chunk, $start, $end)],
199             -parent => $self);
200            
201 17         46 $self->{_end_of_previous_result} = $end;
202             }
203             else {
204             #*** doesn't work for the last result, needs fixing - try getting the database end chunk on failure?...
205 0         0 $self->throw("sequential mode not yet implemented");
206 0         0 my $chunk = $self->_get_chunk_by_end("\nQuery=");
207 0 0       0 $chunk || return;
208 0         0 $self->_fields->{next_result} = Bio::Search::Result::BlastPullResult->new(-chunk => [$chunk],
209             -parent => $self);
210             }
211             }
212             else {
213 0         0 $self->throw("Can only handle NCBI BLASTN and BLASTP right now");
214             }
215             }
216              
217             =head2 next_result
218              
219             Title : next_result
220             Usage : my $hit = $searchio->next_result;
221             Function: Returns the next Result from a search
222             Returns : Bio::Search::Result::ResultI object
223             Args : none
224              
225             =cut
226              
227             sub next_result {
228 19     19 1 67 my $self = shift;
229 19   100     44 my $result = $self->get_field('next_result') || return;
230            
231 17         23 undef $self->_fields->{next_result};
232            
233 17         38 $self->{'_result_count'}++;
234 17         45 return $result;
235             }
236              
237             =head2 result_count
238              
239             Title : result_count
240             Usage : my $count = $searchio->result_count
241             Function: Returns the number of results we have processed.
242             Returns : integer
243             Args : none
244              
245             =cut
246              
247             sub result_count {
248 0     0 1   my $self = shift;
249 0           return $self->{'_result_count'};
250             }
251              
252             =head2 rewind
253              
254             Title : rewind
255             Usage : $searchio->rewind;
256             Function: Allow one to reset the Result iterator to the beginning, so that
257             next_result() will subsequently return the first result and so on.
258              
259             NB: result objects are not cached, so you will get new result objects
260             each time you rewind. Also, note that result_count() counts the
261             number of times you have called next_result(), so will not be able
262             tell you how many results there were in the file if you use rewind().
263              
264             Returns : n/a
265             Args : none
266              
267             =cut
268              
269             sub rewind {
270 0     0 1   my $self = shift;
271 0           delete $self->{_end_of_previous_result};
272             }
273              
274             1;