File Coverage

blib/lib/Bio/FastParsers/Blast/Table.pm
Criterion Covered Total %
statement 52 53 98.1
branch 15 16 93.7
condition 5 6 83.3
subroutine 11 11 100.0
pod 3 3 100.0
total 86 89 96.6


line stmt bran cond sub pod time code
1             package Bio::FastParsers::Blast::Table;
2             # ABSTRACT: Front-end class for tabular BLAST parser
3             $Bio::FastParsers::Blast::Table::VERSION = '0.213510';
4 7     7   3868 use Moose;
  7         3533933  
  7         48  
5 7     7   60573 use namespace::autoclean;
  7         61818  
  7         36  
6              
7 7     7   596 use autodie;
  7         15  
  7         80  
8              
9 7     7   42135 use List::AllUtils qw(mesh);
  7         34379  
  7         976  
10              
11             extends 'Bio::FastParsers::Base';
12              
13 7     7   3873 use Bio::FastParsers::Constants qw(:files);
  7         20  
  7         1149  
14 7     7   3629 use aliased 'Bio::FastParsers::Blast::Table::Hsp';
  7         5471  
  7         41  
15              
16             # TODO: recreate Table classes and internal classes through Templating
17             # TODO: check API consistency with Hmmer::Table and DomTable through synonyms
18             # TODO: document Hsp/Hit methods
19              
20             # public attributes (inherited)
21              
22              
23             # private attributes
24              
25             has '_line_iterator' => (
26             traits => ['Code'],
27             is => 'ro',
28             isa => 'CodeRef',
29             init_arg => undef,
30             lazy => 1,
31             builder => '_build_line_iterator',
32             handles => {
33             _next_line => 'execute',
34             },
35             );
36              
37             has '_last_' . $_ => (
38             is => 'ro',
39             isa => 'Str',
40             init_arg => undef,
41             default => q{},
42             writer => '_set_last_' . $_,
43             ) for qw(query hit);
44              
45              
46             ## no critic (ProhibitUnusedPrivateSubroutines)
47              
48             sub _build_line_iterator {
49 19     19   41 my $self = shift;
50              
51 19         567 open my $fh, '<', $self->file; # autodie
52 19     182   7892 return sub { <$fh> }; # return closure
  182         1535  
53             }
54              
55             ## use critic
56              
57             my @attrs = qw(
58             query_id hit_id
59             percent_identity hsp_length mismatches gaps
60             query_from query_to
61             hit_from hit_to
62             evalue bit_score
63             query_strand
64             hit_strand
65             query_start query_end
66             hit_start hit_end
67             ); # DID try to use MOP to get HSP attrs but order was not preserved
68              
69              
70             sub next_hsp {
71 89     89 1 414 my $self = shift;
72              
73             # optional args for next_hit/next_query mode
74 89         160 my $curr_query = shift;
75 89         150 my $curr_hit = shift;
76              
77             LINE:
78 89         3523 while (my $line = $self->_next_line) {
79              
80             # skip header/comments and empty lines
81 182         405 chomp $line;
82 182 100 66     4724 next LINE if $line =~ $COMMENT_LINE
83             || $line =~ $EMPTY_LINE;
84              
85             # process HSP line
86 92         575 my @fields = ( split(/\t/xms, $line), +1, +1 );
87              
88             # Fields for m8/m9 (now 6/7) format:
89             # 0. query id
90             # 1. subject id
91             # 2. % identity
92             # 3. alignment length
93             # 4. mismatches
94             # 5. gap opens
95             # 6. q. start => query_from
96             # 7. q. end => query_to
97             # 8. s. start => hit_from
98             # 9. s. end => hit_to
99             # 10. evalue
100             # 11. bit score
101             # [12.] query_strand
102             # [13.] hit_strand
103             # [14.] query_start
104             # [15.] query_end
105             # [16.] hit_start
106             # [17.] hit_end
107              
108             # optionally skip current line in next_hit/next_query mode
109 92 100       248 if (defined $curr_query) {
110 41 100       101 if (defined $curr_hit) {
111 34 100 100     297 next LINE if $fields[0] eq $curr_query
112             && $fields[1] eq $curr_hit;
113             }
114             else {
115 7 50       24 next LINE if $fields[0] eq $curr_query;
116             }
117             }
118              
119             # coerce numeric fields to numbers...
120             # ... and handle missing bitscores and evalues from USEARCH reports
121 89         276 @fields[2..9] = map { 0 + $_ } @fields[2..9];
  712         1623  
122 89 100       220 @fields[10..11] = map { $_ ne '*' ? 0 + $_ : undef } @fields[10..11];
  178         589  
123              
124             # add default strands and coordinates
125 89         223 push @fields, @fields[6..9];
126              
127             # fix query strand and coordinates based on query orientation
128 89 100       238 if ($fields[14] > $fields[15]) {
129 8         20 @fields[14,15] = @fields[15,14];
130 8         17 $fields[12] = -1;
131             }
132              
133             # fix hit strand and coordinates based on hit orientation
134 89 100       198 if ($fields[16] > $fields[17]) {
135 11         24 @fields[16,17] = @fields[17,16];
136 11         25 $fields[13] = -1;
137             }
138              
139             # build HSP object
140 89         5454 my $hsp = Hsp->new( { mesh @attrs, @fields } );
141              
142             # update last query and last hit
143             # Note: this allows mixing calls to next_hsp / next_hit / next_query
144 89         3794 $self->_set_last_query($hsp->query_id);
145 89         2806 $self->_set_last_hit( $hsp->hit_id );
146              
147             # return HSP object
148 89         472 return $hsp;
149             }
150              
151 0         0 return; # no more line to read
152             }
153              
154              
155              
156             sub next_hit {
157 31     31 1 241 my $self = shift;
158 31         1082 return $self->next_hsp( $self->_last_query, $self->_last_hit );
159             }
160              
161              
162              
163             sub next_query {
164 7     7 1 71 my $self = shift;
165 7         237 return $self->next_hsp( $self->_last_query );
166             }
167              
168              
169             __PACKAGE__->meta->make_immutable;
170             1;
171              
172             __END__
173              
174             =pod
175              
176             =head1 NAME
177              
178             Bio::FastParsers::Blast::Table - Front-end class for tabular BLAST parser
179              
180             =head1 VERSION
181              
182             version 0.213510
183              
184             =head1 SYNOPSIS
185              
186             use aliased 'Bio::FastParsers::Blast::Table';
187              
188             # open and parse BLAST report in tabular format
189             my $infile = 'test/blastp.m9';
190             my $report = Table->new( file => $infile );
191              
192             # loop through hsps
193             while (my $hsp = $report->next_hsp) {
194             my ($hit_id, $evalue) = ($hsp->hit_id, $hsp->evalue);
195             # ...
196             }
197              
198             # ...
199              
200             my $infile = 'test/multiquery-blastp.m9';
201             my $report = Table->new( file => $infile );
202              
203             # loop through first hits for each query
204             while (my $first_hit = $report->next_query) {
205             my ($hit_id, $evalue) = ($hsp->hit_id, $hsp->evalue);
206             # ...
207             }
208              
209             =head1 DESCRIPTION
210              
211             This module implements a parser for the standard tabular output format of the
212             BLAST program (e.g., C<-outfmt 7>). It provides methods for iterating over
213             queries, hits and HSPs (e.g., C<next_hsp>). Individual HSPs can then be
214             queried using methods described in L<Bio::FastParsers::Blast::Table::Hsp>.
215              
216             =head1 ATTRIBUTES
217              
218             =head2 file
219              
220             Path to BLAST report file in tabular format (m8/m9 or now 6/7) to be parsed
221              
222             =head1 METHODS
223              
224             =head2 next_hsp
225              
226             Shifts the first HSP of the report off and returns it, shortening the report
227             by 1 and moving everything down. If there are no more HSPs in the report,
228             returns C<undef>.
229              
230             # $report is a Bio::FastParsers::Blast::Table
231             while (my $hsp = $report->next_hsp) {
232             # process $hsp
233             # ...
234             }
235              
236             This method does not accept any arguments.
237              
238             =head2 next_hit
239              
240             Directly returns the first HSP of the next hit, skipping any remaining HSPs
241             for the current hit in the process. If there are no more hits in the report,
242             returns C<undef>. Useful for processing only the first HSP of each hit.
243              
244             # $report is a Bio::FastParsers::Blast::Table
245             while (my $first_hsp = $report->next_hit) {
246             # process $first_hsp
247             # ...
248             }
249              
250             This method does not accept any arguments.
251              
252             =head2 next_query
253              
254             Directly returns the first HSP of the next query, skipping any remaining
255             HSPs for the current query in the process. If there are no more queries in
256             the report, returns C<undef>. Useful for processing only the first hit of
257             each query.
258              
259             # $report is a Bio::FastParsers::Blast::Table
260             while (my $first_hit = $report->next_query) {
261             # process $first_hit
262             # ...
263             }
264              
265             This method does not accept any arguments.
266              
267             =head1 AUTHOR
268              
269             Denis BAURAIN <denis.baurain@uliege.be>
270              
271             =head1 COPYRIGHT AND LICENSE
272              
273             This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN.
274              
275             This is free software; you can redistribute it and/or modify it under
276             the same terms as the Perl 5 programming language system itself.
277              
278             =cut