line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::SearchIO::blasttable |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Jason Stajich |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Jason Stajich |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::SearchIO::blasttable - Driver module for SearchIO for parsing NCBI -m 8/9 format |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# do not use this module directly |
21
|
|
|
|
|
|
|
use Bio::SearchIO; |
22
|
|
|
|
|
|
|
my $parser = Bio::SearchIO->new(-file => $file, |
23
|
|
|
|
|
|
|
-format => 'blasttable'); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
while( my $result = $parser->next_result ) { |
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 DESCRIPTION |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
This module will support parsing NCBI -m 8 or -m 9 tabular output |
31
|
|
|
|
|
|
|
and WU-BLAST -mformat 2 or -mformat 3 tabular output. |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head1 FEEDBACK |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=head2 Mailing Lists |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
38
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
39
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
42
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head2 Support |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
I |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
51
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
52
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
53
|
|
|
|
|
|
|
with code and data examples if at all possible. |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head2 Reporting Bugs |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
58
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via |
59
|
|
|
|
|
|
|
the web: |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head1 AUTHOR - Jason Stajich |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
Email jason-at-bioperl-dot-org |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 APPENDIX |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
70
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=cut |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# Let the code begin... |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
package Bio::SearchIO::blasttable; |
78
|
2
|
|
|
2
|
|
8
|
use vars qw(%MAPPING %MODEMAP $DEFAULT_WRITER_CLASS $DefaultProgramName); |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
145
|
|
79
|
2
|
|
|
2
|
|
9
|
use strict; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
36
|
|
80
|
2
|
|
|
2
|
|
563
|
use Bio::Search::Result::ResultFactory; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
45
|
|
81
|
2
|
|
|
2
|
|
553
|
use Bio::Search::Hit::HitFactory; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
41
|
|
82
|
2
|
|
|
2
|
|
566
|
use Bio::Search::HSP::HSPFactory; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
177
|
|
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
$DefaultProgramName = 'BLASTN'; |
85
|
|
|
|
|
|
|
$DEFAULT_WRITER_CLASS = 'Bio::SearchIO::Writer::HitTableWriter'; |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# mapping of terms to Bioperl hash keys |
88
|
|
|
|
|
|
|
%MODEMAP = ( |
89
|
|
|
|
|
|
|
'Result' => 'result', |
90
|
|
|
|
|
|
|
'Hit' => 'hit', |
91
|
|
|
|
|
|
|
'Hsp' => 'hsp' |
92
|
|
|
|
|
|
|
); |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
%MAPPING = ( |
95
|
|
|
|
|
|
|
'Hsp_bit-score' => 'HSP-bits', |
96
|
|
|
|
|
|
|
'Hsp_score' => 'HSP-score', |
97
|
|
|
|
|
|
|
'Hsp_evalue' => 'HSP-evalue', |
98
|
|
|
|
|
|
|
'Hsp_query-from' => 'HSP-query_start', |
99
|
|
|
|
|
|
|
'Hsp_query-to' => 'HSP-query_end', |
100
|
|
|
|
|
|
|
'Hsp_hit-from' => 'HSP-hit_start', |
101
|
|
|
|
|
|
|
'Hsp_hit-to' => 'HSP-hit_end', |
102
|
|
|
|
|
|
|
'Hsp_positive' => 'HSP-conserved', |
103
|
|
|
|
|
|
|
'Hsp_identity' => 'HSP-identical', |
104
|
|
|
|
|
|
|
'Hsp_mismatches' => 'HSP-mismatches', |
105
|
|
|
|
|
|
|
'Hsp_qgapblocks' => 'HSP-query_gapblocks', |
106
|
|
|
|
|
|
|
'Hsp_hgapblocks' => 'HSP-hit_gapblocks', |
107
|
|
|
|
|
|
|
'Hsp_gaps' => 'HSP-hsp_gaps', |
108
|
|
|
|
|
|
|
'Hsp_hitgaps' => 'HSP-hit_gaps', |
109
|
|
|
|
|
|
|
'Hsp_querygaps' => 'HSP-query_gaps', |
110
|
|
|
|
|
|
|
'Hsp_align-len' => 'HSP-hsp_length', |
111
|
|
|
|
|
|
|
'Hsp_query-frame'=> 'HSP-query_frame', |
112
|
|
|
|
|
|
|
'Hsp_hit-frame' => 'HSP-hit_frame', |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
'Hit_id' => 'HIT-name', |
115
|
|
|
|
|
|
|
'Hit_len' => 'HIT-length', |
116
|
|
|
|
|
|
|
'Hit_accession' => 'HIT-accession', |
117
|
|
|
|
|
|
|
'Hit_def' => 'HIT-description', |
118
|
|
|
|
|
|
|
'Hit_signif' => 'HIT-significance', |
119
|
|
|
|
|
|
|
'Hit_score' => 'HIT-score', |
120
|
|
|
|
|
|
|
'Hit_bits' => 'HIT-bits', |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
'Result_program' => 'RESULT-algorithm_name', |
123
|
|
|
|
|
|
|
'Result_version' => 'RESULT-algorithm_version', |
124
|
|
|
|
|
|
|
'Result_query-def'=> 'RESULT-query_name', |
125
|
|
|
|
|
|
|
'Result_query-len'=> 'RESULT-query_length', |
126
|
|
|
|
|
|
|
'Result_query-acc'=> 'RESULT-query_accession', |
127
|
|
|
|
|
|
|
'Result_querydesc'=> 'RESULT-query_description', |
128
|
|
|
|
|
|
|
'Result_db' => 'RESULT-database_name', |
129
|
|
|
|
|
|
|
'Result_db-len' => 'RESULT-database_entries', |
130
|
|
|
|
|
|
|
'Result_db-let' => 'RESULT-database_letters', |
131
|
|
|
|
|
|
|
); |
132
|
|
|
|
|
|
|
|
133
|
2
|
|
|
2
|
|
12
|
use base qw(Bio::SearchIO); |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
2928
|
|
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head2 new |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
Title : new |
138
|
|
|
|
|
|
|
Usage : my $obj = Bio::SearchIO::blasttable->new(); |
139
|
|
|
|
|
|
|
Function: Builds a new Bio::SearchIO::blasttable object |
140
|
|
|
|
|
|
|
Returns : an instance of Bio::SearchIO::blasttable |
141
|
|
|
|
|
|
|
Args : |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=cut |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
sub _initialize { |
147
|
11
|
|
|
11
|
|
22
|
my ($self,@args) = @_; |
148
|
11
|
|
|
|
|
32
|
$self->SUPER::_initialize(@args); |
149
|
|
|
|
|
|
|
|
150
|
11
|
|
|
|
|
35
|
my ($pname) = $self->_rearrange([qw(PROGRAM_NAME)], |
151
|
|
|
|
|
|
|
@args); |
152
|
11
|
|
66
|
|
|
57
|
$self->program_name($pname || $DefaultProgramName); |
153
|
11
|
|
|
|
|
32
|
$self->_eventHandler->register_factory('result', Bio::Search::Result::ResultFactory->new(-type => 'Bio::Search::Result::GenericResult')); |
154
|
11
|
|
|
|
|
19
|
$self->_eventHandler->register_factory('hit', Bio::Search::Hit::HitFactory->new(-type => 'Bio::Search::Hit::GenericHit')); |
155
|
11
|
|
|
|
|
15
|
$self->_eventHandler->register_factory('hsp', Bio::Search::HSP::HSPFactory->new(-type => 'Bio::Search::HSP::GenericHSP')); |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head2 next_result |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Title : next_result |
162
|
|
|
|
|
|
|
Usage : my $result = $parser->next_result |
163
|
|
|
|
|
|
|
Function: Parse the next result from the data stream |
164
|
|
|
|
|
|
|
Returns : L |
165
|
|
|
|
|
|
|
Args : none |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=cut |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
sub next_result{ |
171
|
12
|
|
|
12
|
1
|
518
|
my ($self) = @_; |
172
|
12
|
|
|
|
|
21
|
my ($lastquery,$lasthit); |
173
|
12
|
|
|
|
|
48
|
local $/ = "\n"; |
174
|
12
|
|
|
|
|
14
|
local $_; |
175
|
12
|
|
|
|
|
15
|
my ($alg, $ver); |
176
|
12
|
|
|
|
|
36
|
while( defined ($_ = $self->_readline) ) { |
177
|
|
|
|
|
|
|
# WU-BLAST -mformat 3 only |
178
|
766
|
100
|
|
|
|
1988
|
if(m{^#\s((?:\S+?)?BLAST[NPX])\s(\d+\.\d+.+\d{4}\])}) { |
|
|
100
|
|
|
|
|
|
179
|
1
|
|
|
|
|
3
|
($alg, $ver) = ($1, $2); |
180
|
|
|
|
|
|
|
# only one header for whole file with WU-BLAST |
181
|
|
|
|
|
|
|
# so $alg and $ver won't get set properly for |
182
|
|
|
|
|
|
|
# each result |
183
|
1
|
50
|
|
|
|
6
|
$self->program_name($alg) if $alg; |
184
|
1
|
50
|
|
|
|
6
|
$self->element({'Name' => 'Result_version', |
185
|
|
|
|
|
|
|
'Data' => $ver}) if $ver; |
186
|
1
|
|
|
|
|
3
|
next; |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
# -m 9 only |
189
|
|
|
|
|
|
|
elsif(m{^#\s+((?:\S+?)?BLAST[NPX])\s+(.+)}) { |
190
|
1
|
|
|
|
|
3
|
($alg, $ver) = ($1, $2); |
191
|
1
|
|
|
|
|
4
|
next; |
192
|
|
|
|
|
|
|
} |
193
|
764
|
100
|
100
|
|
|
2801
|
next if /^#/ || /^\s*$/; |
194
|
|
|
|
|
|
|
|
195
|
686
|
|
|
|
|
2130
|
my @fields = split; |
196
|
686
|
50
|
|
|
|
997
|
next if @fields == 1; |
197
|
686
|
|
|
|
|
564
|
my ($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm, |
198
|
|
|
|
|
|
|
$qstart,$qend,$hstart,$hend,$evalue,$bits); |
199
|
|
|
|
|
|
|
# WU-BLAST-specific |
200
|
0
|
|
|
|
|
0
|
my ($num_scores, $raw_score, $identities, $positives, $percent_pos, |
201
|
|
|
|
|
|
|
$qgap_blocks,$qgaps, $sgap_blocks, $sgaps, $qframe, |
202
|
|
|
|
|
|
|
$sframe); |
203
|
|
|
|
|
|
|
# NCBI -m8 and -m9 |
204
|
686
|
100
|
33
|
|
|
1107
|
if (@fields == 12) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
205
|
8
|
|
|
|
|
12
|
($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm, |
206
|
|
|
|
|
|
|
$qstart,$qend,$hstart,$hend,$evalue,$bits) = @fields; |
207
|
|
|
|
|
|
|
# NCBI -m8 and -m9, v 2.2.18+ |
208
|
|
|
|
|
|
|
} elsif (@fields == 13) { |
209
|
675
|
|
|
|
|
1274
|
($qname, $hname, $percent_id, $percent_pos, $hsp_len, $mismatches, $gapsm, |
210
|
|
|
|
|
|
|
$qstart,$qend,$hstart,$hend,$evalue,$bits) = @fields; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
# WU-BLAST -mformat 2 and 3 |
213
|
|
|
|
|
|
|
elsif ((@fields == 22) or (@fields == 24)) { |
214
|
3
|
|
|
|
|
11
|
($qname,$hname,$evalue,$num_scores, $bits, $raw_score, $hsp_len, |
215
|
|
|
|
|
|
|
$identities, $positives,$mismatches, $percent_id, $percent_pos, |
216
|
|
|
|
|
|
|
$qgap_blocks, $qgaps, $sgap_blocks, $sgaps, $qframe, $qstart, |
217
|
|
|
|
|
|
|
$qend, $sframe, $hstart,$hend,) = @fields; |
218
|
|
|
|
|
|
|
# we need total gaps in the alignment |
219
|
3
|
|
|
|
|
4
|
$gapsm=$qgaps+$sgaps; |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
686
|
100
|
100
|
|
|
2129
|
if (@fields == 12 || @fields == 13) { |
223
|
|
|
|
|
|
|
# need to determine total gaps in the alignment for NCBI output |
224
|
|
|
|
|
|
|
# since NCBI reports number of gapopens and NOT total gaps |
225
|
683
|
|
|
|
|
1202
|
my $qlen = abs($qstart - $qend) + 1; |
226
|
683
|
|
|
|
|
504
|
my $querygaps = $hsp_len - $qlen; |
227
|
683
|
|
|
|
|
558
|
my $hlen = abs($hstart - $hend) + 1; |
228
|
683
|
|
|
|
|
476
|
my $hitgaps = $hsp_len - $hlen; |
229
|
683
|
|
|
|
|
638
|
$gapsm = $querygaps + $hitgaps; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# Remember Jim's code is 0 based |
233
|
686
|
100
|
100
|
|
|
2679
|
if( defined $lastquery && |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
234
|
|
|
|
|
|
|
$lastquery ne $qname ) { |
235
|
4
|
|
|
|
|
13
|
$self->end_element({'Name' => 'Hit'}); |
236
|
4
|
|
|
|
|
16
|
$self->end_element({'Name' => 'Result'}); |
237
|
4
|
|
|
|
|
18
|
$self->_pushback($_); |
238
|
4
|
|
|
|
|
11
|
return $self->end_document; |
239
|
|
|
|
|
|
|
} elsif( ! defined $lastquery ) { |
240
|
11
|
|
|
|
|
23
|
$self->{'_result_count'}++; |
241
|
11
|
|
|
|
|
47
|
$self->start_element({'Name' => 'Result'}); |
242
|
11
|
|
66
|
|
|
49
|
$self->element({'Name' => 'Result_program', |
243
|
|
|
|
|
|
|
'Data' => $alg || $self->program_name}); |
244
|
11
|
100
|
|
|
|
33
|
$self->element({'Name' => 'Result_version', |
245
|
|
|
|
|
|
|
'Data' => $ver}) if $ver; |
246
|
11
|
|
|
|
|
38
|
$self->element({'Name' => 'Result_query-def', |
247
|
|
|
|
|
|
|
'Data' => $qname}); |
248
|
11
|
|
|
|
|
32
|
$self->start_element({'Name' => 'Hit'}); |
249
|
11
|
|
|
|
|
36
|
$self->element({'Name' => 'Hit_id', |
250
|
|
|
|
|
|
|
'Data' => $hname}); |
251
|
|
|
|
|
|
|
# we'll store the 1st hsp bits as the hit bits |
252
|
11
|
|
|
|
|
37
|
$self->element({'Name' => 'Hit_bits', |
253
|
|
|
|
|
|
|
'Data' => $bits}); |
254
|
|
|
|
|
|
|
# we'll store the 1st hsp value as the hit evalue |
255
|
11
|
|
|
|
|
36
|
$self->element({'Name' => 'Hit_signif', |
256
|
|
|
|
|
|
|
'Data' => $evalue}); |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
} elsif( $lasthit ne $hname ) { |
259
|
640
|
50
|
|
|
|
957
|
if( $self->in_element('hit') ) { |
260
|
640
|
|
|
|
|
1272
|
$self->end_element({'Name' => 'Hit'}); |
261
|
|
|
|
|
|
|
} |
262
|
640
|
|
|
|
|
1491
|
$self->start_element({'Name' => 'Hit'}); |
263
|
640
|
|
|
|
|
1496
|
$self->element({'Name' => 'Hit_id', |
264
|
|
|
|
|
|
|
'Data' => $hname}); |
265
|
|
|
|
|
|
|
# we'll store the 1st hsp bits as the hit bits |
266
|
640
|
|
|
|
|
1452
|
$self->element({'Name' => 'Hit_bits', |
267
|
|
|
|
|
|
|
'Data' => $bits}); |
268
|
|
|
|
|
|
|
# we'll store the 1st hsp value as the hit evalue |
269
|
640
|
|
|
|
|
1358
|
$self->element({'Name' => 'Hit_signif', |
270
|
|
|
|
|
|
|
'Data' => $evalue}); |
271
|
|
|
|
|
|
|
} |
272
|
682
|
|
|
|
|
1131
|
my $identical = $hsp_len - $mismatches - $gapsm; |
273
|
|
|
|
|
|
|
# If $positives value is absent, try to recover it from $percent_pos, |
274
|
|
|
|
|
|
|
# this is better than letting the program to assume "conserved == identical" |
275
|
682
|
100
|
100
|
|
|
2417
|
if (not defined $positives and defined $percent_pos) { |
276
|
671
|
|
|
|
|
2013
|
$positives = sprintf "%d", ($percent_pos * $hsp_len / 100); |
277
|
|
|
|
|
|
|
} |
278
|
682
|
|
|
|
|
1200
|
$self->start_element({'Name' => 'Hsp'}); |
279
|
682
|
|
|
|
|
1428
|
$self->element({'Name' => 'Hsp_evalue', |
280
|
|
|
|
|
|
|
'Data' => $evalue}); |
281
|
682
|
|
|
|
|
1581
|
$self->element({'Name' => 'Hsp_bit-score', |
282
|
|
|
|
|
|
|
'Data' => $bits}); |
283
|
682
|
|
|
|
|
1530
|
$self->element({'Name' => 'Hsp_identity', |
284
|
|
|
|
|
|
|
'Data' => $identical}); |
285
|
682
|
|
|
|
|
1546
|
$self->element({'Name' => 'Hsp_positive', |
286
|
|
|
|
|
|
|
'Data' => $positives}); |
287
|
682
|
|
|
|
|
1462
|
$self->element({'Name' => 'Hsp_gaps', |
288
|
|
|
|
|
|
|
'Data' => $gapsm}); |
289
|
682
|
|
|
|
|
1696
|
$self->element({'Name' => 'Hsp_query-from', |
290
|
|
|
|
|
|
|
'Data' => $qstart}); |
291
|
682
|
|
|
|
|
1550
|
$self->element({'Name' => 'Hsp_query-to', |
292
|
|
|
|
|
|
|
'Data' => $qend}); |
293
|
|
|
|
|
|
|
|
294
|
682
|
|
|
|
|
1499
|
$self->element({'Name' => 'Hsp_hit-from', |
295
|
|
|
|
|
|
|
'Data' => $hstart }); |
296
|
682
|
|
|
|
|
1493
|
$self->element({'Name' => 'Hsp_hit-to', |
297
|
|
|
|
|
|
|
'Data' => $hend }); |
298
|
682
|
|
|
|
|
1608
|
$self->element({'Name' => 'Hsp_align-len', |
299
|
|
|
|
|
|
|
'Data' => $hsp_len}); |
300
|
682
|
|
|
|
|
1382
|
$self->end_element({'Name' => 'Hsp'}); |
301
|
682
|
|
|
|
|
854
|
$lastquery = $qname; |
302
|
682
|
|
|
|
|
2393
|
$lasthit = $hname; |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
# fencepost |
305
|
8
|
100
|
66
|
|
|
50
|
if( defined $lasthit && defined $lastquery ) { |
306
|
7
|
50
|
|
|
|
19
|
if( $self->in_element('hit') ) { |
307
|
7
|
|
|
|
|
20
|
$self->end_element({'Name' => 'Hit'}); |
308
|
|
|
|
|
|
|
} |
309
|
7
|
|
|
|
|
26
|
$self->end_element({'Name' => 'Result'}); |
310
|
7
|
|
|
|
|
25
|
return $self->end_document; |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
=head2 start_element |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
Title : start_element |
317
|
|
|
|
|
|
|
Usage : $eventgenerator->start_element |
318
|
|
|
|
|
|
|
Function: Handles a start element event |
319
|
|
|
|
|
|
|
Returns : none |
320
|
|
|
|
|
|
|
Args : hashref with at least 2 keys 'Data' and 'Name' |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
=cut |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub start_element{ |
326
|
10142
|
|
|
10142
|
1
|
6828
|
my ($self,$data) = @_; |
327
|
|
|
|
|
|
|
# we currently don't care about attributes |
328
|
10142
|
|
|
|
|
7753
|
my $nm = $data->{'Name'}; |
329
|
10142
|
100
|
|
|
|
13566
|
if( my $type = $MODEMAP{$nm} ) { |
330
|
1344
|
|
|
|
|
1558
|
$self->_mode($type); |
331
|
1344
|
50
|
|
|
|
1531
|
if( $self->_will_handle($type) ) { |
332
|
1344
|
|
|
|
|
2296
|
my $func = sprintf("start_%s",lc $type); |
333
|
1344
|
|
|
|
|
2073
|
$self->_eventHandler->$func($data->{'Attributes'}); |
334
|
|
|
|
|
|
|
} |
335
|
1344
|
|
|
|
|
1436
|
unshift @{$self->{'_elements'}}, $type; |
|
1344
|
|
|
|
|
1842
|
|
336
|
|
|
|
|
|
|
} |
337
|
10142
|
100
|
|
|
|
14369
|
if($nm eq 'Result') { |
338
|
11
|
|
|
|
|
16
|
$self->{'_values'} = {}; |
339
|
11
|
|
|
|
|
22
|
$self->{'_result'}= undef; |
340
|
11
|
|
|
|
|
16
|
$self->{'_mode'} = ''; |
341
|
|
|
|
|
|
|
} |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=head2 end_element |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
Title : start_element |
348
|
|
|
|
|
|
|
Usage : $eventgenerator->end_element |
349
|
|
|
|
|
|
|
Function: Handles an end element event |
350
|
|
|
|
|
|
|
Returns : none |
351
|
|
|
|
|
|
|
Args : hashref with at least 2 keys 'Data' and 'Name' |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=cut |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
sub end_element { |
357
|
10142
|
|
|
10142
|
1
|
7059
|
my ($self,$data) = @_; |
358
|
10142
|
|
|
|
|
7511
|
my $nm = $data->{'Name'}; |
359
|
10142
|
|
|
|
|
6547
|
my $rc; |
360
|
|
|
|
|
|
|
# Hsp are sort of weird, in that they end when another |
361
|
|
|
|
|
|
|
# object begins so have to detect this in end_element for now |
362
|
|
|
|
|
|
|
|
363
|
10142
|
100
|
|
|
|
16035
|
if( my $type = $MODEMAP{$nm} ) { |
|
|
50
|
|
|
|
|
|
364
|
1344
|
50
|
|
|
|
1786
|
if( $self->_will_handle($type) ) { |
365
|
1344
|
|
|
|
|
2819
|
my $func = sprintf("end_%s",lc $type); |
366
|
|
|
|
|
|
|
$rc = $self->_eventHandler->$func($self->{'_reporttype'}, |
367
|
1344
|
|
|
|
|
2267
|
$self->{'_values'}); |
368
|
|
|
|
|
|
|
} |
369
|
1344
|
|
|
|
|
1130
|
shift @{$self->{'_elements'}}; |
|
1344
|
|
|
|
|
1488
|
|
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
} elsif( $MAPPING{$nm} ) { |
372
|
8798
|
50
|
|
|
|
9135
|
if ( ref($MAPPING{$nm}) =~ /hash/i ) { |
373
|
0
|
|
|
|
|
0
|
my $key = (keys %{$MAPPING{$nm}})[0]; |
|
0
|
|
|
|
|
0
|
|
374
|
0
|
|
|
|
|
0
|
$self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'}; |
375
|
|
|
|
|
|
|
} else { |
376
|
8798
|
|
|
|
|
10801
|
$self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'}; |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
} else { |
379
|
0
|
|
|
|
|
0
|
$self->warn( "unknown nm $nm ignoring\n"); |
380
|
|
|
|
|
|
|
} |
381
|
10142
|
|
|
|
|
7276
|
$self->{'_last_data'} = ''; # remove read data if we are at |
382
|
|
|
|
|
|
|
# end of an element |
383
|
10142
|
100
|
|
|
|
12325
|
$self->{'_result'} = $rc if( $nm eq 'Result' ); |
384
|
10142
|
|
|
|
|
9117
|
return $rc; |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=head2 element |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
Title : element |
391
|
|
|
|
|
|
|
Usage : $eventhandler->element({'Name' => $name, 'Data' => $str}); |
392
|
|
|
|
|
|
|
Function: Convience method that calls start_element, characters, end_element |
393
|
|
|
|
|
|
|
Returns : none |
394
|
|
|
|
|
|
|
Args : Hash ref with the keys 'Name' and 'Data' |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=cut |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
sub element{ |
400
|
8798
|
|
|
8798
|
1
|
6632
|
my ($self,$data) = @_; |
401
|
8798
|
|
|
|
|
8284
|
$self->start_element($data); |
402
|
8798
|
|
|
|
|
8563
|
$self->characters($data); |
403
|
8798
|
|
|
|
|
8976
|
$self->end_element($data); |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=head2 characters |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
Title : characters |
410
|
|
|
|
|
|
|
Usage : $eventgenerator->characters($str) |
411
|
|
|
|
|
|
|
Function: Send a character events |
412
|
|
|
|
|
|
|
Returns : none |
413
|
|
|
|
|
|
|
Args : string |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
=cut |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
sub characters{ |
419
|
8798
|
|
|
8798
|
1
|
5960
|
my ($self,$data) = @_; |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
# deep bug fix: set $self->{'_last_data'} to undef if $$data{Data} is |
422
|
|
|
|
|
|
|
# a valid slot, whose value is undef -- |
423
|
|
|
|
|
|
|
# allows an undef to be propagated to object constructors and |
424
|
|
|
|
|
|
|
# handled there as desired; in particular, when Hsp_postive => -conserved |
425
|
|
|
|
|
|
|
# is not defined (in BLASTN, e.g.), the value of hsp's {CONSERVED} property is |
426
|
|
|
|
|
|
|
# set to the value of {IDENTICAL}. |
427
|
|
|
|
|
|
|
#/maj |
428
|
|
|
|
|
|
|
# return unless ( defined $data->{'Data'} ); |
429
|
8798
|
50
|
|
|
|
23033
|
return unless ( grep /Data/, keys %$data ); |
430
|
8798
|
100
|
|
|
|
11737
|
if ( !defined $data->{'Data'} ) { |
431
|
8
|
|
|
|
|
8
|
$self->{'_last_data'} = undef; |
432
|
8
|
|
|
|
|
8
|
return; |
433
|
|
|
|
|
|
|
} |
434
|
8790
|
50
|
|
|
|
13979
|
if( $data->{'Data'} =~ /^\s+$/ ) { |
435
|
0
|
0
|
|
|
|
0
|
return unless $data->{'Name'} =~ /Hsp\_(midline|qseq|hseq)/; |
436
|
|
|
|
|
|
|
} |
437
|
|
|
|
|
|
|
|
438
|
8790
|
50
|
66
|
|
|
8786
|
if( $self->in_element('hsp') && |
439
|
|
|
|
|
|
|
$data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/ ) { |
440
|
|
|
|
|
|
|
|
441
|
0
|
|
|
|
|
0
|
$self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'}; |
442
|
|
|
|
|
|
|
} |
443
|
|
|
|
|
|
|
|
444
|
8790
|
|
|
|
|
9778
|
$self->{'_last_data'} = $data->{'Data'}; |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
=head2 _mode |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
Title : _mode |
450
|
|
|
|
|
|
|
Usage : $obj->_mode($newval) |
451
|
|
|
|
|
|
|
Function: |
452
|
|
|
|
|
|
|
Example : |
453
|
|
|
|
|
|
|
Returns : value of _mode |
454
|
|
|
|
|
|
|
Args : newvalue (optional) |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=cut |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
sub _mode{ |
460
|
1344
|
|
|
1344
|
|
1115
|
my ($self,$value) = @_; |
461
|
1344
|
50
|
|
|
|
1786
|
if( defined $value) { |
462
|
1344
|
|
|
|
|
1190
|
$self->{'_mode'} = $value; |
463
|
|
|
|
|
|
|
} |
464
|
1344
|
|
|
|
|
1236
|
return $self->{'_mode'}; |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=head2 within_element |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
Title : within_element |
470
|
|
|
|
|
|
|
Usage : if( $eventgenerator->within_element($element) ) {} |
471
|
|
|
|
|
|
|
Function: Test if we are within a particular element |
472
|
|
|
|
|
|
|
This is different than 'in' because within can be tested |
473
|
|
|
|
|
|
|
for a whole block. |
474
|
|
|
|
|
|
|
Returns : boolean |
475
|
|
|
|
|
|
|
Args : string element name |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=cut |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
sub within_element{ |
481
|
0
|
|
|
0
|
1
|
0
|
my ($self,$name) = @_; |
482
|
|
|
|
|
|
|
return 0 if ( ! defined $name && |
483
|
|
|
|
|
|
|
! defined $self->{'_elements'} || |
484
|
0
|
0
|
0
|
|
|
0
|
scalar @{$self->{'_elements'}} == 0) ; |
|
0
|
|
0
|
|
|
0
|
|
485
|
0
|
|
|
|
|
0
|
foreach ( @{$self->{'_elements'}} ) { |
|
0
|
|
|
|
|
0
|
|
486
|
0
|
0
|
|
|
|
0
|
if( $_ eq $name ) { |
487
|
0
|
|
|
|
|
0
|
return 1; |
488
|
|
|
|
|
|
|
} |
489
|
|
|
|
|
|
|
} |
490
|
0
|
|
|
|
|
0
|
return 0; |
491
|
|
|
|
|
|
|
} |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
=head2 in_element |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
Title : in_element |
496
|
|
|
|
|
|
|
Usage : if( $eventgenerator->in_element($element) ) {} |
497
|
|
|
|
|
|
|
Function: Test if we are in a particular element |
498
|
|
|
|
|
|
|
This is different than 'in' because within can be tested |
499
|
|
|
|
|
|
|
for a whole block. |
500
|
|
|
|
|
|
|
Returns : boolean |
501
|
|
|
|
|
|
|
Args : string element name |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
=cut |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
sub in_element{ |
507
|
9437
|
|
|
9437
|
1
|
7026
|
my ($self,$name) = @_; |
508
|
9437
|
100
|
|
|
|
11927
|
return 0 if ! defined $self->{'_elements'}->[0]; |
509
|
9436
|
|
|
|
|
31375
|
return ( $self->{'_elements'}->[0] eq $name) |
510
|
|
|
|
|
|
|
} |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
=head2 start_document |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
Title : start_document |
516
|
|
|
|
|
|
|
Usage : $eventgenerator->start_document |
517
|
|
|
|
|
|
|
Function: Handles a start document event |
518
|
|
|
|
|
|
|
Returns : none |
519
|
|
|
|
|
|
|
Args : none |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=cut |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
sub start_document{ |
525
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
526
|
0
|
|
|
|
|
0
|
$self->{'_lasttype'} = ''; |
527
|
0
|
|
|
|
|
0
|
$self->{'_values'} = {}; |
528
|
0
|
|
|
|
|
0
|
$self->{'_result'}= undef; |
529
|
0
|
|
|
|
|
0
|
$self->{'_mode'} = ''; |
530
|
0
|
|
|
|
|
0
|
$self->{'_elements'} = []; |
531
|
|
|
|
|
|
|
} |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=head2 end_document |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
Title : end_document |
537
|
|
|
|
|
|
|
Usage : $eventgenerator->end_document |
538
|
|
|
|
|
|
|
Function: Handles an end document event |
539
|
|
|
|
|
|
|
Returns : Bio::Search::Result::ResultI object |
540
|
|
|
|
|
|
|
Args : none |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
=cut |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
sub end_document{ |
546
|
11
|
|
|
11
|
1
|
15
|
my ($self,@args) = @_; |
547
|
11
|
|
|
|
|
90
|
return $self->{'_result'}; |
548
|
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
=head2 result_count |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
Title : result_count |
553
|
|
|
|
|
|
|
Usage : my $count = $searchio->result_count |
554
|
|
|
|
|
|
|
Function: Returns the number of results we have processed |
555
|
|
|
|
|
|
|
Returns : integer |
556
|
|
|
|
|
|
|
Args : none |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
=cut |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
sub result_count { |
562
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
563
|
0
|
|
|
|
|
0
|
return $self->{'_result_count'}; |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
|
566
|
0
|
|
|
0
|
0
|
0
|
sub report_count { shift->result_count } |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
=head2 program_name |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
Title : program_name |
572
|
|
|
|
|
|
|
Usage : $obj->program_name($newval) |
573
|
|
|
|
|
|
|
Function: Get/Set the program name |
574
|
|
|
|
|
|
|
Returns : value of program_name (a scalar) |
575
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
=cut |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
sub program_name{ |
581
|
21
|
|
|
21
|
1
|
23
|
my $self = shift; |
582
|
|
|
|
|
|
|
|
583
|
21
|
100
|
|
|
|
48
|
$self->{'program_name'} = shift if @_; |
584
|
21
|
|
33
|
|
|
74
|
return $self->{'program_name'} || $DefaultProgramName; |
585
|
|
|
|
|
|
|
} |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
=head2 _will_handle |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
Title : _will_handle |
591
|
|
|
|
|
|
|
Usage : Private method. For internal use only. |
592
|
|
|
|
|
|
|
if( $self->_will_handle($type) ) { ... } |
593
|
|
|
|
|
|
|
Function: Provides an optimized way to check whether or not an element of a |
594
|
|
|
|
|
|
|
given type is to be handled. |
595
|
|
|
|
|
|
|
Returns : Reference to EventHandler object if the element type is to be handled. |
596
|
|
|
|
|
|
|
undef if the element type is not to be handled. |
597
|
|
|
|
|
|
|
Args : string containing type of element. |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
Optimizations: |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
=over 2 |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
=item 1 |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
Using the cached pointer to the EventHandler to minimize repeated |
606
|
|
|
|
|
|
|
lookups. |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
=item 2 |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
Caching the will_handle status for each type that is encountered so |
611
|
|
|
|
|
|
|
that it only need be checked by calling |
612
|
|
|
|
|
|
|
handler-Ewill_handle($type) once. |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
=back |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
This does not lead to a major savings by itself (only 5-10%). In |
617
|
|
|
|
|
|
|
combination with other optimizations, or for large parse jobs, the |
618
|
|
|
|
|
|
|
savings good be significant. |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
To test against the unoptimized version, remove the parentheses from |
621
|
|
|
|
|
|
|
around the third term in the ternary " ? : " operator and add two |
622
|
|
|
|
|
|
|
calls to $self-E_eventHandler(). |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
=cut |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
sub _will_handle { |
627
|
2688
|
|
|
2688
|
|
1966
|
my ($self,$type) = @_; |
628
|
2688
|
|
|
|
|
2083
|
my $handler = $self->{'_handler'}; |
629
|
|
|
|
|
|
|
my $will_handle = defined($self->{'_will_handle_cache'}->{$type}) |
630
|
|
|
|
|
|
|
? $self->{'_will_handle_cache'}->{$type} |
631
|
2688
|
100
|
|
|
|
3912
|
: ($self->{'_will_handle_cache'}->{$type} = |
632
|
|
|
|
|
|
|
$handler->will_handle($type)); |
633
|
|
|
|
|
|
|
|
634
|
2688
|
50
|
|
|
|
5345
|
return $will_handle ? $handler : undef; |
635
|
|
|
|
|
|
|
} |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
1; |