line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::SearchIO::blast |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Jason Stajich |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Jason Stajich |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# 20030409 - sac |
15
|
|
|
|
|
|
|
# PSI-BLAST full parsing support. Rollout of new |
16
|
|
|
|
|
|
|
# model which will remove Steve's old psiblast driver |
17
|
|
|
|
|
|
|
# 20030424 - jason |
18
|
|
|
|
|
|
|
# Megablast parsing fix as reported by Neil Saunders |
19
|
|
|
|
|
|
|
# 20030427 - jason |
20
|
|
|
|
|
|
|
# Support bl2seq parsing |
21
|
|
|
|
|
|
|
# 20031124 - jason |
22
|
|
|
|
|
|
|
# Parse more blast statistics, lambda, entropy, etc |
23
|
|
|
|
|
|
|
# from WU-BLAST in frame-specific manner |
24
|
|
|
|
|
|
|
# 20060216 - cjf - fixed blast parsing for BLAST v2.2.13 output |
25
|
|
|
|
|
|
|
# 20071104 - dmessina - added support for WUBLAST -echofilter |
26
|
|
|
|
|
|
|
# 20071121 - cjf - fixed several bugs (bugs 2391, 2399, 2409) |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 NAME |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
Bio::SearchIO::blast - Event generator for event based parsing of |
31
|
|
|
|
|
|
|
blast reports |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head1 SYNOPSIS |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# Do not use this object directly - it is used as part of the |
36
|
|
|
|
|
|
|
# Bio::SearchIO system. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
use Bio::SearchIO; |
39
|
|
|
|
|
|
|
my $searchio = Bio::SearchIO->new(-format => 'blast', |
40
|
|
|
|
|
|
|
-file => 't/data/ecolitst.bls'); |
41
|
|
|
|
|
|
|
while( my $result = $searchio->next_result ) { |
42
|
|
|
|
|
|
|
while( my $hit = $result->next_hit ) { |
43
|
|
|
|
|
|
|
while( my $hsp = $hit->next_hsp ) { |
44
|
|
|
|
|
|
|
# ... |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head1 DESCRIPTION |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
This object encapsulated the necessary methods for generating events |
52
|
|
|
|
|
|
|
suitable for building Bio::Search objects from a BLAST report file. |
53
|
|
|
|
|
|
|
Read the L for more information about how to use this. |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
This driver can parse: |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=over 4 |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=item * |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
NCBI produced plain text BLAST reports from blastall, this also |
62
|
|
|
|
|
|
|
includes PSIBLAST, PSITBLASTN, RPSBLAST, and bl2seq reports. NCBI XML |
63
|
|
|
|
|
|
|
BLAST output is parsed with the blastxml SearchIO driver |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=item * |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
WU-BLAST all reports |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=item * |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
Jim Kent's BLAST-like output from his programs (BLASTZ, BLAT) |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=item * |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
BLAST-like output from Paracel BTK output |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=back |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head2 bl2seq parsing |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Since I cannot differentiate between BLASTX and TBLASTN since bl2seq |
82
|
|
|
|
|
|
|
doesn't report the algorithm used - I assume it is BLASTX by default - |
83
|
|
|
|
|
|
|
you can supply the program type with -report_type in the SearchIO |
84
|
|
|
|
|
|
|
constructor i.e. |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
my $parser = Bio::SearchIO->new(-format => 'blast', |
87
|
|
|
|
|
|
|
-file => 'bl2seq.tblastn.report', |
88
|
|
|
|
|
|
|
-report_type => 'tblastn'); |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
This only really affects where the frame and strand information are |
91
|
|
|
|
|
|
|
put - they will always be on the $hsp-Equery instead of on the |
92
|
|
|
|
|
|
|
$hsp-Ehit part of the feature pair for blastx and tblastn bl2seq |
93
|
|
|
|
|
|
|
produced reports. Hope that's clear... |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=head1 FEEDBACK |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=head2 Mailing Lists |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
100
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
101
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
104
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head2 Support |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
I |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
113
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
114
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
115
|
|
|
|
|
|
|
with code and data examples if at all possible. |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head2 Reporting Bugs |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
120
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via the |
121
|
|
|
|
|
|
|
web: |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=head1 AUTHOR - Jason Stajich |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
Email Jason Stajich jason-at-bioperl.org |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
Steve Chervitz sac-at-bioperl.org |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=head1 APPENDIX |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
136
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=cut |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# Let the code begin...' |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
package Bio::SearchIO::blast; |
143
|
|
|
|
|
|
|
|
144
|
12
|
|
|
12
|
|
4266
|
use Bio::SearchIO::IteratedSearchResultEventBuilder; |
|
12
|
|
|
|
|
18
|
|
|
12
|
|
|
|
|
327
|
|
145
|
12
|
|
|
12
|
|
51
|
use strict; |
|
12
|
|
|
|
|
13
|
|
|
12
|
|
|
|
|
255
|
|
146
|
12
|
|
|
|
|
849
|
use vars qw(%MAPPING %MODEMAP |
147
|
|
|
|
|
|
|
$DEFAULT_BLAST_WRITER_CLASS |
148
|
|
|
|
|
|
|
$MAX_HSP_OVERLAP |
149
|
|
|
|
|
|
|
$DEFAULT_SIGNIF |
150
|
|
|
|
|
|
|
$DEFAULT_SCORE |
151
|
|
|
|
|
|
|
$DEFAULTREPORTTYPE |
152
|
12
|
|
|
12
|
|
36
|
); |
|
12
|
|
|
|
|
15
|
|
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
|
155
|
12
|
|
|
12
|
|
37
|
use base qw(Bio::SearchIO); |
|
12
|
|
|
|
|
16
|
|
|
12
|
|
|
|
|
700
|
|
156
|
12
|
|
|
12
|
|
44
|
use Data::Dumper; |
|
12
|
|
|
|
|
12
|
|
|
12
|
|
|
|
|
5042
|
|
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
BEGIN { |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# mapping of NCBI Blast terms to Bioperl hash keys |
161
|
12
|
|
|
12
|
|
57
|
%MODEMAP = ( |
162
|
|
|
|
|
|
|
'BlastOutput' => 'result', |
163
|
|
|
|
|
|
|
'Iteration' => 'iteration', |
164
|
|
|
|
|
|
|
'Hit' => 'hit', |
165
|
|
|
|
|
|
|
'Hsp' => 'hsp' |
166
|
|
|
|
|
|
|
); |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
# This should really be done more intelligently, like with |
169
|
|
|
|
|
|
|
# XSLT |
170
|
|
|
|
|
|
|
|
171
|
12
|
|
|
|
|
538
|
%MAPPING = ( |
172
|
|
|
|
|
|
|
'Hsp_bit-score' => 'HSP-bits', |
173
|
|
|
|
|
|
|
'Hsp_score' => 'HSP-score', |
174
|
|
|
|
|
|
|
'Hsp_evalue' => 'HSP-evalue', |
175
|
|
|
|
|
|
|
'Hsp_n', => 'HSP-n', |
176
|
|
|
|
|
|
|
'Hsp_pvalue' => 'HSP-pvalue', |
177
|
|
|
|
|
|
|
'Hsp_query-from' => 'HSP-query_start', |
178
|
|
|
|
|
|
|
'Hsp_query-to' => 'HSP-query_end', |
179
|
|
|
|
|
|
|
'Hsp_hit-from' => 'HSP-hit_start', |
180
|
|
|
|
|
|
|
'Hsp_hit-to' => 'HSP-hit_end', |
181
|
|
|
|
|
|
|
'Hsp_positive' => 'HSP-conserved', |
182
|
|
|
|
|
|
|
'Hsp_identity' => 'HSP-identical', |
183
|
|
|
|
|
|
|
'Hsp_gaps' => 'HSP-hsp_gaps', |
184
|
|
|
|
|
|
|
'Hsp_hitgaps' => 'HSP-hit_gaps', |
185
|
|
|
|
|
|
|
'Hsp_querygaps' => 'HSP-query_gaps', |
186
|
|
|
|
|
|
|
'Hsp_qseq' => 'HSP-query_seq', |
187
|
|
|
|
|
|
|
'Hsp_hseq' => 'HSP-hit_seq', |
188
|
|
|
|
|
|
|
'Hsp_midline' => 'HSP-homology_seq', |
189
|
|
|
|
|
|
|
'Hsp_align-len' => 'HSP-hsp_length', |
190
|
|
|
|
|
|
|
'Hsp_query-frame' => 'HSP-query_frame', |
191
|
|
|
|
|
|
|
'Hsp_hit-frame' => 'HSP-hit_frame', |
192
|
|
|
|
|
|
|
'Hsp_links' => 'HSP-links', |
193
|
|
|
|
|
|
|
'Hsp_group' => 'HSP-hsp_group', |
194
|
|
|
|
|
|
|
'Hsp_features' => 'HSP-hit_features', |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
'Hit_id' => 'HIT-name', |
197
|
|
|
|
|
|
|
'Hit_len' => 'HIT-length', |
198
|
|
|
|
|
|
|
'Hit_accession' => 'HIT-accession', |
199
|
|
|
|
|
|
|
'Hit_def' => 'HIT-description', |
200
|
|
|
|
|
|
|
'Hit_signif' => 'HIT-significance', |
201
|
|
|
|
|
|
|
# For NCBI blast, the description line contains bits. |
202
|
|
|
|
|
|
|
# For WU-blast, the description line contains score. |
203
|
|
|
|
|
|
|
'Hit_score' => 'HIT-score', |
204
|
|
|
|
|
|
|
'Hit_bits' => 'HIT-bits', |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
'Iteration_iter-num' => 'ITERATION-number', |
207
|
|
|
|
|
|
|
'Iteration_converged' => 'ITERATION-converged', |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
'BlastOutput_program' => 'RESULT-algorithm_name', |
210
|
|
|
|
|
|
|
'BlastOutput_version' => 'RESULT-algorithm_version', |
211
|
|
|
|
|
|
|
'BlastOutput_algorithm-reference' => 'RESULT-algorithm_reference', |
212
|
|
|
|
|
|
|
'BlastOutput_rid' => 'RESULT-rid', |
213
|
|
|
|
|
|
|
'BlastOutput_query-def' => 'RESULT-query_name', |
214
|
|
|
|
|
|
|
'BlastOutput_query-len' => 'RESULT-query_length', |
215
|
|
|
|
|
|
|
'BlastOutput_query-acc' => 'RESULT-query_accession', |
216
|
|
|
|
|
|
|
'BlastOutput_query-gi' => 'RESULT-query_gi', |
217
|
|
|
|
|
|
|
'BlastOutput_querydesc' => 'RESULT-query_description', |
218
|
|
|
|
|
|
|
'BlastOutput_db' => 'RESULT-database_name', |
219
|
|
|
|
|
|
|
'BlastOutput_db-len' => 'RESULT-database_entries', |
220
|
|
|
|
|
|
|
'BlastOutput_db-let' => 'RESULT-database_letters', |
221
|
|
|
|
|
|
|
'BlastOutput_inclusion-threshold' => 'RESULT-inclusion_threshold', |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
'Parameters_matrix' => { 'RESULT-parameters' => 'matrix' }, |
224
|
|
|
|
|
|
|
'Parameters_expect' => { 'RESULT-parameters' => 'expect' }, |
225
|
|
|
|
|
|
|
'Parameters_include' => { 'RESULT-parameters' => 'include' }, |
226
|
|
|
|
|
|
|
'Parameters_sc-match' => { 'RESULT-parameters' => 'match' }, |
227
|
|
|
|
|
|
|
'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch' }, |
228
|
|
|
|
|
|
|
'Parameters_gap-open' => { 'RESULT-parameters' => 'gapopen' }, |
229
|
|
|
|
|
|
|
'Parameters_gap-extend' => { 'RESULT-parameters' => 'gapext' }, |
230
|
|
|
|
|
|
|
'Parameters_filter' => { 'RESULT-parameters' => 'filter' }, |
231
|
|
|
|
|
|
|
'Parameters_allowgaps' => { 'RESULT-parameters' => 'allowgaps' }, |
232
|
|
|
|
|
|
|
'Parameters_full_dbpath' => { 'RESULT-parameters' => 'full_dbpath' }, |
233
|
|
|
|
|
|
|
'Statistics_db-len' => { 'RESULT-statistics' => 'dbentries' }, |
234
|
|
|
|
|
|
|
'Statistics_db-let' => { 'RESULT-statistics' => 'dbletters' }, |
235
|
|
|
|
|
|
|
'Statistics_hsp-len' => |
236
|
|
|
|
|
|
|
{ 'RESULT-statistics' => 'effective_hsplength' }, |
237
|
|
|
|
|
|
|
'Statistics_query-len' => { 'RESULT-statistics' => 'querylength' }, |
238
|
|
|
|
|
|
|
'Statistics_eff-space' => { 'RESULT-statistics' => 'effectivespace' }, |
239
|
|
|
|
|
|
|
'Statistics_eff-spaceused' => |
240
|
|
|
|
|
|
|
{ 'RESULT-statistics' => 'effectivespaceused' }, |
241
|
|
|
|
|
|
|
'Statistics_eff-dblen' => |
242
|
|
|
|
|
|
|
{ 'RESULT-statistics' => 'effectivedblength' }, |
243
|
|
|
|
|
|
|
'Statistics_kappa' => { 'RESULT-statistics' => 'kappa' }, |
244
|
|
|
|
|
|
|
'Statistics_lambda' => { 'RESULT-statistics' => 'lambda' }, |
245
|
|
|
|
|
|
|
'Statistics_entropy' => { 'RESULT-statistics' => 'entropy' }, |
246
|
|
|
|
|
|
|
'Statistics_gapped_kappa' => { 'RESULT-statistics' => 'kappa_gapped' }, |
247
|
|
|
|
|
|
|
'Statistics_gapped_lambda' => |
248
|
|
|
|
|
|
|
{ 'RESULT-statistics' => 'lambda_gapped' }, |
249
|
|
|
|
|
|
|
'Statistics_gapped_entropy' => |
250
|
|
|
|
|
|
|
{ 'RESULT-statistics' => 'entropy_gapped' }, |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
'Statistics_framewindow' => |
253
|
|
|
|
|
|
|
{ 'RESULT-statistics' => 'frameshiftwindow' }, |
254
|
|
|
|
|
|
|
'Statistics_decay' => { 'RESULT-statistics' => 'decayconst' }, |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
'Statistics_hit_to_db' => { 'RESULT-statistics' => 'Hits_to_DB' }, |
257
|
|
|
|
|
|
|
'Statistics_num_suc_extensions' => |
258
|
|
|
|
|
|
|
{ 'RESULT-statistics' => 'num_successful_extensions' }, |
259
|
|
|
|
|
|
|
'Statistics_length_adjustment' => { 'RESULT-statistics' => 'length_adjustment' }, |
260
|
|
|
|
|
|
|
'Statistics_number_of_hsps_better_than_expect_value_cutoff_without_gapping' => |
261
|
|
|
|
|
|
|
{ 'RESULT-statistics' => 'number_of_hsps_better_than_expect_value_cutoff_without_gapping' }, |
262
|
|
|
|
|
|
|
'Statistics_number_of_hsps_gapped' => { 'RESULT-statistics' => 'number_of_hsps_gapped' }, |
263
|
|
|
|
|
|
|
'Statistics_number_of_hsps_successfully_gapped' => { 'RESULT-statistics' => 'number_of_hsps_successfully_gapped' }, |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
# WU-BLAST stats |
266
|
|
|
|
|
|
|
'Statistics_DFA_states' => { 'RESULT-statistics' => 'num_dfa_states' }, |
267
|
|
|
|
|
|
|
'Statistics_DFA_size' => { 'RESULT-statistics' => 'dfa_size' }, |
268
|
|
|
|
|
|
|
'Statistics_noprocessors' => |
269
|
|
|
|
|
|
|
{ 'RESULT-statistics' => 'no_of_processors' }, |
270
|
|
|
|
|
|
|
'Statistics_neighbortime' => |
271
|
|
|
|
|
|
|
{ 'RESULT-statistics' => 'neighborhood_generate_time' }, |
272
|
|
|
|
|
|
|
'Statistics_starttime' => { 'RESULT-statistics' => 'start_time' }, |
273
|
|
|
|
|
|
|
'Statistics_endtime' => { 'RESULT-statistics' => 'end_time' }, |
274
|
|
|
|
|
|
|
); |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
# add WU-BLAST Frame-Based Statistics |
277
|
12
|
|
|
|
|
47
|
for my $frame ( 0 .. 3 ) { |
278
|
48
|
|
|
|
|
44
|
for my $strand ( '+', '-' ) { |
279
|
96
|
|
|
|
|
84
|
for my $ind ( |
280
|
|
|
|
|
|
|
qw(length efflength E S W T X X_gapped E2 |
281
|
|
|
|
|
|
|
E2_gapped S2) |
282
|
|
|
|
|
|
|
) |
283
|
|
|
|
|
|
|
{ |
284
|
1056
|
|
|
|
|
2080
|
$MAPPING{"Statistics_frame$strand$frame\_$ind"} = |
285
|
|
|
|
|
|
|
{ 'RESULT-statistics' => "Frame$strand$frame\_$ind" }; |
286
|
|
|
|
|
|
|
} |
287
|
96
|
|
|
|
|
81
|
for my $val (qw(lambda kappa entropy )) { |
288
|
288
|
|
|
|
|
211
|
for my $type (qw(used computed gapped)) { |
289
|
864
|
|
|
|
|
802
|
my $key = "Statistics_frame$strand$frame\_$val\_$type"; |
290
|
864
|
|
|
|
|
1148
|
my $val = |
291
|
|
|
|
|
|
|
{ 'RESULT-statistics' => |
292
|
|
|
|
|
|
|
"Frame$strand$frame\_$val\_$type" }; |
293
|
864
|
|
|
|
|
1051
|
$MAPPING{$key} = $val; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
# add Statistics |
300
|
12
|
|
|
|
|
19
|
for my $stats ( |
301
|
|
|
|
|
|
|
qw(T A X1 X2 X3 S1 S2 X1_bits X2_bits X3_bits |
302
|
|
|
|
|
|
|
S1_bits S2_bits num_extensions |
303
|
|
|
|
|
|
|
num_successful_extensions |
304
|
|
|
|
|
|
|
seqs_better_than_cutoff |
305
|
|
|
|
|
|
|
posted_date |
306
|
|
|
|
|
|
|
search_cputime total_cputime |
307
|
|
|
|
|
|
|
search_actualtime total_actualtime |
308
|
|
|
|
|
|
|
no_of_processors ctxfactor) |
309
|
|
|
|
|
|
|
) |
310
|
|
|
|
|
|
|
{ |
311
|
264
|
|
|
|
|
203
|
my $key = "Statistics_$stats"; |
312
|
264
|
|
|
|
|
236
|
my $val = { 'RESULT-statistics' => $stats }; |
313
|
264
|
|
|
|
|
366
|
$MAPPING{$key} = $val; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
# add WU-BLAST Parameters |
317
|
12
|
|
|
|
|
17
|
for my $param ( |
318
|
|
|
|
|
|
|
qw(span span1 span2 links warnings notes hspsepsmax |
319
|
|
|
|
|
|
|
hspsepqmax topcomboN topcomboE postsw cpus wordmask |
320
|
|
|
|
|
|
|
filter sort_by_pvalue sort_by_count sort_by_highscore |
321
|
|
|
|
|
|
|
sort_by_totalscore sort_by_subjectlength noseqs gi qtype |
322
|
|
|
|
|
|
|
qres V B Z Y M N) |
323
|
|
|
|
|
|
|
) |
324
|
|
|
|
|
|
|
{ |
325
|
348
|
|
|
|
|
256
|
my $key = "Parameters_$param"; |
326
|
348
|
|
|
|
|
356
|
my $val = { 'RESULT-parameters' => $param }; |
327
|
348
|
|
|
|
|
398
|
$MAPPING{$key} = $val; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
12
|
|
|
|
|
15
|
$DEFAULT_BLAST_WRITER_CLASS = 'Bio::SearchIO::Writer::HitTableWriter'; |
331
|
12
|
|
|
|
|
15
|
$MAX_HSP_OVERLAP = 2; # Used when tiling multiple HSPs. |
332
|
12
|
|
|
|
|
35647
|
$DEFAULTREPORTTYPE = 'BLASTP'; # for bl2seq |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=head2 new |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
Title : new |
338
|
|
|
|
|
|
|
Usage : my $obj = Bio::SearchIO::blast->new(%args); |
339
|
|
|
|
|
|
|
Function: Builds a new Bio::SearchIO::blast object |
340
|
|
|
|
|
|
|
Returns : Bio::SearchIO::blast |
341
|
|
|
|
|
|
|
Args : Key-value pairs: |
342
|
|
|
|
|
|
|
-fh/-file => filehandle/filename to BLAST file |
343
|
|
|
|
|
|
|
-format => 'blast' |
344
|
|
|
|
|
|
|
-report_type => 'blastx', 'tblastn', etc -- only for bl2seq |
345
|
|
|
|
|
|
|
reports when you want to distinguish between |
346
|
|
|
|
|
|
|
tblastn and blastx reports (this only controls |
347
|
|
|
|
|
|
|
where the frame information is put - on the query |
348
|
|
|
|
|
|
|
or subject object. |
349
|
|
|
|
|
|
|
-inclusion_threshold => e-value threshold for inclusion in the |
350
|
|
|
|
|
|
|
PSI-BLAST score matrix model (blastpgp) |
351
|
|
|
|
|
|
|
-signif => float or scientific notation number to be used |
352
|
|
|
|
|
|
|
as a P- or Expect value cutoff |
353
|
|
|
|
|
|
|
-score => integer or scientific notation number to be used |
354
|
|
|
|
|
|
|
as a blast score value cutoff |
355
|
|
|
|
|
|
|
-bits => integer or scientific notation number to be used |
356
|
|
|
|
|
|
|
as a bit score value cutoff |
357
|
|
|
|
|
|
|
-hit_filter => reference to a function to be used for |
358
|
|
|
|
|
|
|
filtering hits based on arbitrary criteria. |
359
|
|
|
|
|
|
|
All hits of each BLAST report must satisfy |
360
|
|
|
|
|
|
|
this criteria to be retained. |
361
|
|
|
|
|
|
|
If a hit fails this test, it is ignored. |
362
|
|
|
|
|
|
|
This function should take a |
363
|
|
|
|
|
|
|
Bio::Search::Hit::BlastHit.pm object as its first |
364
|
|
|
|
|
|
|
argument and return true |
365
|
|
|
|
|
|
|
if the hit should be retained. |
366
|
|
|
|
|
|
|
Sample filter function: |
367
|
|
|
|
|
|
|
-hit_filter => sub { $hit = shift; |
368
|
|
|
|
|
|
|
$hit->gaps == 0; }, |
369
|
|
|
|
|
|
|
(Note: -filt_func is synonymous with -hit_filter) |
370
|
|
|
|
|
|
|
-overlap => integer. The amount of overlap to permit between |
371
|
|
|
|
|
|
|
adjacent HSPs when tiling HSPs. A reasonable value is 2. |
372
|
|
|
|
|
|
|
Default = $Bio::SearchIO::blast::MAX_HSP_OVERLAP. |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
The following criteria are not yet supported: |
375
|
|
|
|
|
|
|
(these are probably best applied within this module rather than in the |
376
|
|
|
|
|
|
|
event handler since they would permit the parser to take some shortcuts.) |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
-check_all_hits => boolean. Check all hits for significance against |
379
|
|
|
|
|
|
|
significance criteria. Default = false. |
380
|
|
|
|
|
|
|
If false, stops processing hits after the first |
381
|
|
|
|
|
|
|
non-significant hit or the first hit that fails |
382
|
|
|
|
|
|
|
the hit_filter call. This speeds parsing, |
383
|
|
|
|
|
|
|
taking advantage of the fact that the hits |
384
|
|
|
|
|
|
|
are processed in the order they appear in the report. |
385
|
|
|
|
|
|
|
-min_query_len => integer to be used as a minimum for query sequence length. |
386
|
|
|
|
|
|
|
Reports with query sequences below this length will |
387
|
|
|
|
|
|
|
not be processed. Default = no minimum length. |
388
|
|
|
|
|
|
|
-best => boolean. Only process the best hit of each report; |
389
|
|
|
|
|
|
|
default = false. |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=cut |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
sub _initialize { |
394
|
92
|
|
|
92
|
|
240
|
my ( $self, @args ) = @_; |
395
|
92
|
|
|
|
|
352
|
$self->SUPER::_initialize(@args); |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
# Blast reports require a specialized version of the SREB due to the |
398
|
|
|
|
|
|
|
# possibility of iterations (PSI-BLAST). Forwarding all arguments to it. An |
399
|
|
|
|
|
|
|
# issue here is that we want to set new default object factories if none are |
400
|
|
|
|
|
|
|
# supplied. |
401
|
|
|
|
|
|
|
|
402
|
92
|
|
|
|
|
690
|
my $handler = Bio::SearchIO::IteratedSearchResultEventBuilder->new(@args); |
403
|
92
|
|
|
|
|
221
|
$self->attach_EventHandler($handler); |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
# 2006-04-26 move this to the attach_handler function in this module so we |
406
|
|
|
|
|
|
|
# can really reset the handler |
407
|
|
|
|
|
|
|
# Optimization: caching |
408
|
|
|
|
|
|
|
# the EventHandler since it is used a lot during the parse. |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
# $self->{'_handler_cache'} = $handler; |
411
|
|
|
|
|
|
|
|
412
|
92
|
|
|
|
|
333
|
my ($rpttype ) = $self->_rearrange( |
413
|
|
|
|
|
|
|
[ |
414
|
|
|
|
|
|
|
qw( |
415
|
|
|
|
|
|
|
REPORT_TYPE) |
416
|
|
|
|
|
|
|
], |
417
|
|
|
|
|
|
|
@args |
418
|
|
|
|
|
|
|
); |
419
|
92
|
100
|
|
|
|
406
|
defined $rpttype && ( $self->{'_reporttype'} = $rpttype ); |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
sub attach_EventHandler { |
423
|
184
|
|
|
184
|
1
|
209
|
my ($self,$handler) = @_; |
424
|
|
|
|
|
|
|
|
425
|
184
|
|
|
|
|
595
|
$self->SUPER::attach_EventHandler($handler); |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
# Optimization: caching the EventHandler since it is used a lot |
428
|
|
|
|
|
|
|
# during the parse. |
429
|
|
|
|
|
|
|
|
430
|
184
|
|
|
|
|
278
|
$self->{'_handler_cache'} = $handler; |
431
|
184
|
|
|
|
|
282
|
return; |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
=head2 next_result |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
Title : next_result |
437
|
|
|
|
|
|
|
Usage : my $hit = $searchio->next_result; |
438
|
|
|
|
|
|
|
Function: Returns the next Result from a search |
439
|
|
|
|
|
|
|
Returns : Bio::Search::Result::ResultI object |
440
|
|
|
|
|
|
|
Args : none |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
=cut |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
sub next_result { |
445
|
102
|
|
|
102
|
1
|
2271
|
my ($self) = @_; |
446
|
102
|
|
|
|
|
289
|
my $v = $self->verbose; |
447
|
102
|
|
|
|
|
186
|
my $data = ''; |
448
|
102
|
|
|
|
|
140
|
my $flavor = ''; |
449
|
102
|
|
|
|
|
194
|
$self->{'_seentop'} = 0; # start next report at top |
450
|
|
|
|
|
|
|
|
451
|
102
|
|
|
|
|
126
|
my ( $reporttype, $seenquery, $reportline, $reportversion ); |
452
|
0
|
|
|
|
|
0
|
my ( $seeniteration, $found_again ); |
453
|
102
|
|
|
|
|
385
|
my $incl_threshold = $self->inclusion_threshold; |
454
|
102
|
|
|
|
|
131
|
my $bl2seq_fix; |
455
|
102
|
|
|
|
|
291
|
$self->start_document(); # let the fun begin... |
456
|
102
|
|
|
|
|
118
|
my (@hit_signifs); |
457
|
102
|
|
|
|
|
182
|
my $gapped_stats = 0; # for switching between gapped/ungapped |
458
|
|
|
|
|
|
|
# lambda, K, H |
459
|
102
|
|
|
|
|
204
|
local $_ = "\n"; #consistency |
460
|
|
|
|
|
|
|
PARSER: |
461
|
102
|
|
|
|
|
418
|
while ( defined( $_ = $self->_readline ) ) { |
462
|
20670
|
100
|
|
|
|
45889
|
next if (/^\s+$/); # skip empty lines |
463
|
15331
|
100
|
|
|
|
22472
|
next if (/CPU time:/); |
464
|
15329
|
50
|
|
|
|
18904
|
next if (/^>\s*$/); |
465
|
15329
|
100
|
|
|
|
19636
|
next if (/[*]+\s+No hits found\s+[*]+/); |
466
|
15325
|
100
|
66
|
|
|
186180
|
if ( |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
50
|
100
|
|
|
|
|
|
|
50
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
467
|
|
|
|
|
|
|
/^((?:\S+?)?BLAST[NPX]?)\s+(.+)$/i # NCBI BLAST, PSIBLAST |
468
|
|
|
|
|
|
|
# RPSBLAST, MEGABLAST |
469
|
|
|
|
|
|
|
|| /^(P?GENEWISE|HFRAME|SWN|TSWN)\s+(.+)/i #Paracel BTK |
470
|
|
|
|
|
|
|
) |
471
|
|
|
|
|
|
|
{ |
472
|
85
|
|
|
|
|
332
|
($reporttype, $reportversion) = ($1, $2); |
473
|
|
|
|
|
|
|
# need to keep track of whether this is WU-BLAST |
474
|
85
|
50
|
33
|
|
|
449
|
if ($reportversion && $reportversion =~ m{WashU$}) { |
475
|
0
|
|
|
|
|
0
|
$self->{'_wublast'}++; |
476
|
|
|
|
|
|
|
} |
477
|
85
|
|
|
|
|
568
|
$self->debug("blast.pm: Start of new report: $reporttype, $reportversion\n"); |
478
|
85
|
100
|
|
|
|
264
|
if ( $self->{'_seentop'} ) { |
479
|
|
|
|
|
|
|
# This handles multi-result input streams |
480
|
5
|
|
|
|
|
15
|
$self->_pushback($_); |
481
|
5
|
|
|
|
|
12
|
last PARSER; |
482
|
|
|
|
|
|
|
} |
483
|
80
|
|
|
|
|
278
|
$self->_start_blastoutput; |
484
|
80
|
100
|
|
|
|
232
|
if ($reporttype =~ /RPS-BLAST/) { |
485
|
4
|
|
|
|
|
10
|
$reporttype .= '(BLASTP)'; # default RPS-BLAST type |
486
|
|
|
|
|
|
|
} |
487
|
80
|
|
|
|
|
119
|
$reportline = $_; # to fix the fact that RPS-BLAST output is wrong |
488
|
80
|
|
|
|
|
397
|
$self->element( |
489
|
|
|
|
|
|
|
{ |
490
|
|
|
|
|
|
|
'Name' => 'BlastOutput_program', |
491
|
|
|
|
|
|
|
'Data' => $reporttype |
492
|
|
|
|
|
|
|
} |
493
|
|
|
|
|
|
|
); |
494
|
|
|
|
|
|
|
|
495
|
80
|
|
|
|
|
336
|
$self->element( |
496
|
|
|
|
|
|
|
{ |
497
|
|
|
|
|
|
|
'Name' => 'BlastOutput_version', |
498
|
|
|
|
|
|
|
'Data' => $reportversion |
499
|
|
|
|
|
|
|
} |
500
|
|
|
|
|
|
|
); |
501
|
80
|
|
|
|
|
311
|
$self->element( |
502
|
|
|
|
|
|
|
{ |
503
|
|
|
|
|
|
|
'Name' => 'BlastOutput_inclusion-threshold', |
504
|
|
|
|
|
|
|
'Data' => $incl_threshold |
505
|
|
|
|
|
|
|
} |
506
|
|
|
|
|
|
|
); |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
# parse the BLAST algorithm reference |
509
|
|
|
|
|
|
|
elsif(/^Reference:\s+(.*)$/) { |
510
|
|
|
|
|
|
|
# want to preserve newlines for the BLAST algorithm reference |
511
|
73
|
|
|
|
|
273
|
my $algorithm_reference = "$1\n"; |
512
|
73
|
|
|
|
|
206
|
$_ = $self->_readline; |
513
|
|
|
|
|
|
|
# while the current line, does not match an empty line, a RID:, a |
514
|
|
|
|
|
|
|
# Database:, or a query definition line (Query=) we are still |
515
|
|
|
|
|
|
|
# looking at the algorithm_reference, append it to what we parsed so |
516
|
|
|
|
|
|
|
# far |
517
|
73
|
|
66
|
|
|
609
|
while($_ !~ /^$/ && $_ !~ /^RID:/ && $_ !~ /^Database:/ && $_ !~ /^Query=/) { |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
518
|
146
|
|
|
|
|
331
|
$algorithm_reference .= "$_"; |
519
|
146
|
|
|
|
|
256
|
$_ = $self->_readline; |
520
|
|
|
|
|
|
|
} |
521
|
|
|
|
|
|
|
# if we exited the while loop, we saw an empty line, a RID:, or a |
522
|
|
|
|
|
|
|
# Database:, so push it back |
523
|
73
|
|
|
|
|
266
|
$self->_pushback($_); |
524
|
73
|
|
|
|
|
275
|
$self->element( |
525
|
|
|
|
|
|
|
{ |
526
|
|
|
|
|
|
|
'Name' => 'BlastOutput_algorithm-reference', |
527
|
|
|
|
|
|
|
'Data' => $algorithm_reference |
528
|
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
); |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
# parse BLAST RID (Request ID) |
532
|
|
|
|
|
|
|
elsif(/^RID:\s+(.*)$/) { |
533
|
9
|
|
|
|
|
25
|
my $rid = $1; |
534
|
9
|
|
|
|
|
39
|
$self->element( |
535
|
|
|
|
|
|
|
{ |
536
|
|
|
|
|
|
|
'Name' => 'BlastOutput_rid', |
537
|
|
|
|
|
|
|
'Data' => $rid |
538
|
|
|
|
|
|
|
} |
539
|
|
|
|
|
|
|
); |
540
|
|
|
|
|
|
|
} |
541
|
|
|
|
|
|
|
# added Windows workaround for bug 1985 |
542
|
|
|
|
|
|
|
elsif (/^(Searching|Results from round)/) { |
543
|
67
|
100
|
|
|
|
362
|
next unless $1 =~ /Results from round/; |
544
|
2
|
|
|
|
|
6
|
$self->debug("blast.pm: Possible psi blast iterations found...\n"); |
545
|
|
|
|
|
|
|
|
546
|
2
|
100
|
|
|
|
7
|
$self->in_element('hsp') |
547
|
|
|
|
|
|
|
&& $self->end_element( { 'Name' => 'Hsp' } ); |
548
|
2
|
100
|
|
|
|
6
|
$self->in_element('hit') |
549
|
|
|
|
|
|
|
&& $self->end_element( { 'Name' => 'Hit' } ); |
550
|
2
|
100
|
|
|
|
7
|
if ( defined $seeniteration ) { |
551
|
1
|
50
|
|
|
|
4
|
$self->within_element('iteration') |
552
|
|
|
|
|
|
|
&& $self->end_element( { 'Name' => 'Iteration' } ); |
553
|
1
|
|
|
|
|
5
|
$self->start_element( { 'Name' => 'Iteration' } ); |
554
|
|
|
|
|
|
|
} |
555
|
|
|
|
|
|
|
else { |
556
|
1
|
|
|
|
|
5
|
$self->start_element( { 'Name' => 'Iteration' } ); |
557
|
|
|
|
|
|
|
} |
558
|
2
|
|
|
|
|
10
|
$seeniteration = 1; |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
elsif (/^Query=\s*(.*)$/) { |
561
|
102
|
|
|
|
|
262
|
my $q = $1; |
562
|
102
|
|
|
|
|
495
|
$self->debug("blast.pm: Query= found...$_\n"); |
563
|
102
|
|
|
|
|
165
|
my $size = 0; |
564
|
102
|
100
|
|
|
|
253
|
if ( defined $seenquery ) { |
565
|
8
|
|
|
|
|
24
|
$self->_pushback($_); |
566
|
8
|
100
|
|
|
|
24
|
$self->_pushback($reportline) if $reportline; |
567
|
8
|
|
|
|
|
13
|
last PARSER; |
568
|
|
|
|
|
|
|
} |
569
|
94
|
100
|
|
|
|
215
|
if ( !defined $reporttype ) { |
570
|
14
|
|
|
|
|
50
|
$self->_start_blastoutput; |
571
|
14
|
50
|
|
|
|
36
|
if ( defined $seeniteration ) { |
572
|
0
|
0
|
|
|
|
0
|
$self->in_element('iteration') |
573
|
|
|
|
|
|
|
&& $self->end_element( { 'Name' => 'Iteration' } ); |
574
|
0
|
|
|
|
|
0
|
$self->start_element( { 'Name' => 'Iteration' } ); |
575
|
|
|
|
|
|
|
} |
576
|
|
|
|
|
|
|
else { |
577
|
14
|
|
|
|
|
48
|
$self->start_element( { 'Name' => 'Iteration' } ); |
578
|
|
|
|
|
|
|
} |
579
|
14
|
|
|
|
|
28
|
$seeniteration = 1; |
580
|
|
|
|
|
|
|
} |
581
|
94
|
|
|
|
|
151
|
$seenquery = $q; |
582
|
94
|
|
|
|
|
223
|
$_ = $self->_readline; |
583
|
94
|
|
|
|
|
270
|
while ( defined($_) ) { |
584
|
142
|
50
|
|
|
|
298
|
if (/^Database:/) { |
585
|
0
|
|
|
|
|
0
|
$self->_pushback($_); |
586
|
0
|
|
|
|
|
0
|
last; |
587
|
|
|
|
|
|
|
} |
588
|
|
|
|
|
|
|
# below line fixes length issue with BLAST v2.2.13; still works |
589
|
|
|
|
|
|
|
# with BLAST v2.2.12 |
590
|
142
|
100
|
100
|
|
|
866
|
if ( /\((\-?[\d,]+)\s+letters.*\)/ || /^Length=(\-?[\d,]+)/ ) { |
591
|
94
|
|
|
|
|
210
|
$size = $1; |
592
|
94
|
|
|
|
|
174
|
$size =~ s/,//g; |
593
|
94
|
|
|
|
|
150
|
last; |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
else { |
596
|
|
|
|
|
|
|
# bug 2391 |
597
|
48
|
100
|
100
|
|
|
371
|
$q .= ($q =~ /\w$/ && $_ =~ /^\w/) ? " $_" : $_; |
598
|
48
|
|
|
|
|
366
|
$q =~ s/\s+/ /g; # this catches the newline as well |
599
|
48
|
|
|
|
|
496
|
$q =~ s/^ | $//g; |
600
|
|
|
|
|
|
|
} |
601
|
|
|
|
|
|
|
|
602
|
48
|
|
|
|
|
133
|
$_ = $self->_readline; |
603
|
|
|
|
|
|
|
} |
604
|
94
|
|
|
|
|
206
|
chomp($q); |
605
|
94
|
|
|
|
|
414
|
my ( $nm, $desc ) = split( /\s+/, $q, 2 ); |
606
|
94
|
100
|
|
|
|
557
|
$self->element( |
607
|
|
|
|
|
|
|
{ |
608
|
|
|
|
|
|
|
'Name' => 'BlastOutput_query-def', |
609
|
|
|
|
|
|
|
'Data' => $nm |
610
|
|
|
|
|
|
|
} |
611
|
|
|
|
|
|
|
) if $nm; |
612
|
94
|
|
|
|
|
458
|
$self->element( |
613
|
|
|
|
|
|
|
{ |
614
|
|
|
|
|
|
|
'Name' => 'BlastOutput_query-len', |
615
|
|
|
|
|
|
|
'Data' => $size |
616
|
|
|
|
|
|
|
} |
617
|
|
|
|
|
|
|
); |
618
|
94
|
100
|
|
|
|
402
|
defined $desc && $desc =~ s/\s+$//; |
619
|
94
|
|
|
|
|
332
|
$self->element( |
620
|
|
|
|
|
|
|
{ |
621
|
|
|
|
|
|
|
'Name' => 'BlastOutput_querydesc', |
622
|
|
|
|
|
|
|
'Data' => $desc |
623
|
|
|
|
|
|
|
} |
624
|
|
|
|
|
|
|
); |
625
|
94
|
|
|
|
|
408
|
my ( $gi, $acc, $version ) = $self->_get_seq_identifiers($nm); |
626
|
94
|
100
|
66
|
|
|
461
|
$version = defined($version) && length($version) ? ".$version" : ""; |
627
|
94
|
100
|
|
|
|
523
|
$self->element( |
628
|
|
|
|
|
|
|
{ |
629
|
|
|
|
|
|
|
'Name' => 'BlastOutput_query-acc', |
630
|
|
|
|
|
|
|
'Data' => "$acc$version" |
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
) if $acc; |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
# these elements are dropped with some multiquery reports; add |
635
|
|
|
|
|
|
|
# back here |
636
|
|
|
|
|
|
|
$self->element( |
637
|
|
|
|
|
|
|
{ |
638
|
|
|
|
|
|
|
'Name' => 'BlastOutput_db-len', |
639
|
|
|
|
|
|
|
'Data' => $self->{'_blsdb_length'} |
640
|
|
|
|
|
|
|
} |
641
|
94
|
100
|
|
|
|
337
|
) if $self->{'_blsdb_length'}; |
642
|
|
|
|
|
|
|
$self->element( |
643
|
|
|
|
|
|
|
{ |
644
|
|
|
|
|
|
|
'Name' => 'BlastOutput_db-let', |
645
|
|
|
|
|
|
|
'Data' => $self->{'_blsdb_letters'} |
646
|
|
|
|
|
|
|
} |
647
|
94
|
100
|
|
|
|
345
|
) if $self->{'_blsdb_letters'}; |
648
|
|
|
|
|
|
|
$self->element( |
649
|
|
|
|
|
|
|
{ |
650
|
|
|
|
|
|
|
'Name' => 'BlastOutput_db', |
651
|
|
|
|
|
|
|
'Data' => $self->{'_blsdb'} |
652
|
|
|
|
|
|
|
} |
653
|
94
|
100
|
|
|
|
419
|
) if $self->{'_blsdb_letters'}; |
654
|
|
|
|
|
|
|
} |
655
|
|
|
|
|
|
|
# added check for WU-BLAST -echofilter option (bug 2388) |
656
|
|
|
|
|
|
|
elsif (/^>Unfiltered[+-]1$/) { |
657
|
|
|
|
|
|
|
# skip all of the lines of unfiltered sequence |
658
|
1
|
|
|
|
|
5
|
while($_ !~ /^Database:/) { |
659
|
69
|
|
|
|
|
113
|
$self->debug("Bypassing features line: $_"); |
660
|
69
|
|
|
|
|
82
|
$_ = $self->_readline; |
661
|
|
|
|
|
|
|
} |
662
|
1
|
|
|
|
|
4
|
$self->_pushback($_); |
663
|
|
|
|
|
|
|
} |
664
|
|
|
|
|
|
|
elsif (/Sequences producing significant alignments:/) { |
665
|
56
|
|
|
|
|
210
|
$self->debug("blast.pm: Processing NCBI-BLAST descriptions\n"); |
666
|
56
|
|
|
|
|
91
|
$flavor = 'ncbi'; |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
# PSI-BLAST parsing needs to be fixed to specifically look |
669
|
|
|
|
|
|
|
# for old vs new per iteration, as sorting based on duplication |
670
|
|
|
|
|
|
|
# leads to bugs, see bug 1986 |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
# The next line is not necessarily whitespace in psiblast reports. |
673
|
|
|
|
|
|
|
# Also note that we must look for the end of this section by testing |
674
|
|
|
|
|
|
|
# for a line with a leading >. Blank lines occur with this section |
675
|
|
|
|
|
|
|
# for psiblast. |
676
|
56
|
100
|
|
|
|
148
|
if ( !$self->in_element('iteration') ) { |
677
|
50
|
|
|
|
|
190
|
$self->start_element( { 'Name' => 'Iteration' } ); |
678
|
|
|
|
|
|
|
} |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
# changed 8/28/2008 to exit hit table if blank line is found after an |
681
|
|
|
|
|
|
|
# appropriate line |
682
|
56
|
|
|
|
|
152
|
my $h_regex; |
683
|
|
|
|
|
|
|
my $seen_block; |
684
|
|
|
|
|
|
|
DESCLINE: |
685
|
56
|
|
|
|
|
195
|
while ( defined( my $descline = $self->_readline() ) ) { |
686
|
1354
|
100
|
|
|
|
2524
|
if ($descline =~ m{^\s*$}) { |
687
|
105
|
100
|
|
|
|
283
|
last DESCLINE if $seen_block; |
688
|
55
|
|
|
|
|
184
|
next DESCLINE; |
689
|
|
|
|
|
|
|
} |
690
|
|
|
|
|
|
|
# any text match is part of block... |
691
|
1249
|
|
|
|
|
771
|
$seen_block++; |
692
|
|
|
|
|
|
|
# GCG multiline oddness... |
693
|
1249
|
100
|
|
|
|
1633
|
if ($descline =~ /^(\S+)\s+Begin:\s\d+\s+End:\s+\d+/xms) { |
694
|
3
|
|
|
|
|
7
|
my ($id, $nextline) = ($1, $self->_readline); |
695
|
3
|
|
|
|
|
9
|
$nextline =~ s{^!}{}; |
696
|
3
|
|
|
|
|
6
|
$descline = "$id $nextline"; |
697
|
|
|
|
|
|
|
} |
698
|
|
|
|
|
|
|
# NCBI style hit table (no N) |
699
|
1249
|
100
|
|
|
|
204025
|
if ($descline =~ /(?
|
|
|
50
|
|
|
|
|
|
700
|
|
|
|
|
|
|
(\d*\.?(?:[\+\-eE]+)?\d+) # number (float or scientific notation) |
701
|
|
|
|
|
|
|
\s+ # space |
702
|
|
|
|
|
|
|
(\d*\.?(?:[\+\-eE]+)?\d+) # number (float or scientific notation) |
703
|
|
|
|
|
|
|
\s*$/xms) { |
704
|
|
|
|
|
|
|
|
705
|
1243
|
|
|
|
|
1713
|
my ( $score, $evalue ) = ($1, $2); |
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
# Some data clean-up so e-value will appear numeric to perl |
708
|
1243
|
|
|
|
|
1225
|
$evalue =~ s/^e/1e/i; |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
# This to handle no-HSP case |
711
|
1243
|
|
|
|
|
2833
|
my @line = split ' ',$descline; |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
# we want to throw away the score, evalue |
714
|
1243
|
|
|
|
|
922
|
pop @line, pop @line; |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
# and N if it is present (of course they are not |
717
|
|
|
|
|
|
|
# really in that order, but it doesn't matter |
718
|
1243
|
50
|
|
|
|
2009
|
if ($3) { pop @line } |
|
0
|
|
|
|
|
0
|
|
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
# add the last 2 entries s.t. we can reconstruct |
721
|
|
|
|
|
|
|
# a minimal Hit object at the end of the day |
722
|
1243
|
|
|
|
|
4994
|
push @hit_signifs, [ $evalue, $score, shift @line, join( ' ', @line ) ]; |
723
|
|
|
|
|
|
|
} elsif ($descline =~ /^CONVERGED/i) { |
724
|
0
|
|
|
|
|
0
|
$self->element( |
725
|
|
|
|
|
|
|
{ |
726
|
|
|
|
|
|
|
'Name' => 'Iteration_converged', |
727
|
|
|
|
|
|
|
'Data' => 1 |
728
|
|
|
|
|
|
|
} |
729
|
|
|
|
|
|
|
); |
730
|
|
|
|
|
|
|
} else { |
731
|
6
|
|
|
|
|
27
|
$self->_pushback($descline); # Catch leading > (end of section) |
732
|
6
|
|
|
|
|
18
|
last DESCLINE; |
733
|
|
|
|
|
|
|
} |
734
|
|
|
|
|
|
|
} |
735
|
|
|
|
|
|
|
} |
736
|
|
|
|
|
|
|
elsif (/Sequences producing High-scoring Segment Pairs:/) { |
737
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
# This block is for WU-BLAST, so we don't have to check for psi-blast stuff |
739
|
|
|
|
|
|
|
# skip the next line |
740
|
26
|
|
|
|
|
84
|
$self->debug("blast.pm: Processing WU-BLAST descriptions\n"); |
741
|
26
|
|
|
|
|
61
|
$_ = $self->_readline(); |
742
|
26
|
|
|
|
|
40
|
$flavor = 'wu'; |
743
|
|
|
|
|
|
|
|
744
|
26
|
100
|
|
|
|
58
|
if ( !$self->in_element('iteration') ) { |
745
|
23
|
|
|
|
|
86
|
$self->start_element( { 'Name' => 'Iteration' } ); |
746
|
|
|
|
|
|
|
} |
747
|
|
|
|
|
|
|
|
748
|
26
|
|
66
|
|
|
99
|
while ( defined( $_ = $self->_readline() ) |
749
|
|
|
|
|
|
|
&& !/^\s+$/ ) |
750
|
|
|
|
|
|
|
{ |
751
|
923
|
|
|
|
|
2818
|
my @line = split; |
752
|
923
|
|
|
|
|
782
|
pop @line; # throw away first number which is for 'N'col |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
# add the last 2 entries to array s.t. we can reconstruct |
755
|
|
|
|
|
|
|
# a minimal Hit object at the end of the day |
756
|
923
|
|
|
|
|
4689
|
push @hit_signifs, |
757
|
|
|
|
|
|
|
[ pop @line, pop @line, shift @line, join( ' ', @line ) ]; |
758
|
|
|
|
|
|
|
} |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
} |
761
|
|
|
|
|
|
|
elsif (/^Database:\s*(.+?)\s*$/) { |
762
|
|
|
|
|
|
|
|
763
|
77
|
|
|
|
|
428
|
$self->debug("blast.pm: Database: $1\n"); |
764
|
77
|
|
|
|
|
162
|
my $db = $1; |
765
|
77
|
|
|
|
|
208
|
while ( defined( $_ = $self->_readline ) ) { |
766
|
155
|
100
|
|
|
|
582
|
if ( |
767
|
|
|
|
|
|
|
/^\s+(\-?[\d\,]+|\S+)\s+sequences\; |
768
|
|
|
|
|
|
|
\s+(\-?[\d,]+|\S+)\s+ # Deal with NCBI 2.2.8 OSX problems |
769
|
|
|
|
|
|
|
total\s+letters/ox |
770
|
|
|
|
|
|
|
) |
771
|
|
|
|
|
|
|
{ |
772
|
75
|
|
|
|
|
216
|
my ( $s, $l ) = ( $1, $2 ); |
773
|
75
|
|
|
|
|
180
|
$s =~ s/,//g; |
774
|
75
|
|
|
|
|
203
|
$l =~ s/,//g; |
775
|
75
|
|
|
|
|
318
|
$self->element( |
776
|
|
|
|
|
|
|
{ |
777
|
|
|
|
|
|
|
'Name' => 'BlastOutput_db-len', |
778
|
|
|
|
|
|
|
'Data' => $s |
779
|
|
|
|
|
|
|
} |
780
|
|
|
|
|
|
|
); |
781
|
75
|
|
|
|
|
297
|
$self->element( |
782
|
|
|
|
|
|
|
{ |
783
|
|
|
|
|
|
|
'Name' => 'BlastOutput_db-let', |
784
|
|
|
|
|
|
|
'Data' => $l |
785
|
|
|
|
|
|
|
} |
786
|
|
|
|
|
|
|
); |
787
|
|
|
|
|
|
|
# cache for next round in cases with multiple queries |
788
|
75
|
|
|
|
|
201
|
$self->{'_blsdb'} = $db; |
789
|
75
|
|
|
|
|
150
|
$self->{'_blsdb_length'} = $s; |
790
|
75
|
|
|
|
|
145
|
$self->{'_blsdb_letters'} = $l; |
791
|
75
|
|
|
|
|
133
|
last; |
792
|
|
|
|
|
|
|
} |
793
|
|
|
|
|
|
|
else { |
794
|
80
|
|
|
|
|
69
|
chomp; |
795
|
80
|
|
|
|
|
158
|
$db .= $_; |
796
|
|
|
|
|
|
|
} |
797
|
|
|
|
|
|
|
} |
798
|
|
|
|
|
|
|
$self->element( |
799
|
|
|
|
|
|
|
{ |
800
|
77
|
|
|
|
|
272
|
'Name' => 'BlastOutput_db', |
801
|
|
|
|
|
|
|
'Data' => $db |
802
|
|
|
|
|
|
|
} |
803
|
|
|
|
|
|
|
); |
804
|
|
|
|
|
|
|
} |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
# move inside of a hit |
807
|
|
|
|
|
|
|
elsif (/^(?:Subject=|>)\s*(\S+)\s*(.*)?/) { |
808
|
922
|
|
|
|
|
1335
|
chomp; |
809
|
|
|
|
|
|
|
|
810
|
922
|
|
|
|
|
3661
|
$self->debug("blast.pm: Hit: $1\n"); |
811
|
922
|
100
|
|
|
|
1586
|
$self->in_element('hsp') |
812
|
|
|
|
|
|
|
&& $self->end_element( { 'Name' => 'Hsp' } ); |
813
|
922
|
100
|
|
|
|
2178
|
$self->in_element('hit') |
814
|
|
|
|
|
|
|
&& $self->end_element( { 'Name' => 'Hit' } ); |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
# special case when bl2seq reports don't have a leading |
817
|
|
|
|
|
|
|
# Query= |
818
|
922
|
100
|
|
|
|
2192
|
if ( !$self->within_element('result') ) { |
|
|
100
|
|
|
|
|
|
819
|
2
|
|
|
|
|
13
|
$self->_start_blastoutput; |
820
|
2
|
|
|
|
|
7
|
$self->start_element( { 'Name' => 'Iteration' } ); |
821
|
|
|
|
|
|
|
} |
822
|
|
|
|
|
|
|
elsif ( !$self->within_element('iteration') ) { |
823
|
2
|
|
|
|
|
8
|
$self->start_element( { 'Name' => 'Iteration' } ); |
824
|
|
|
|
|
|
|
} |
825
|
922
|
|
|
|
|
2338
|
$self->start_element( { 'Name' => 'Hit' } ); |
826
|
922
|
|
|
|
|
3299
|
my $id = $1; |
827
|
922
|
|
|
|
|
1224
|
my $restofline = $2; |
828
|
|
|
|
|
|
|
|
829
|
922
|
|
|
|
|
3746
|
$self->debug("Starting a hit: $1 $2\n"); |
830
|
922
|
|
|
|
|
2845
|
$self->element( |
831
|
|
|
|
|
|
|
{ |
832
|
|
|
|
|
|
|
'Name' => 'Hit_id', |
833
|
|
|
|
|
|
|
'Data' => $id |
834
|
|
|
|
|
|
|
} |
835
|
|
|
|
|
|
|
); |
836
|
922
|
|
|
|
|
2856
|
my ($gi, $acc, $version ) = $self->_get_seq_identifiers($id); |
837
|
922
|
|
|
|
|
2250
|
$self->element( |
838
|
|
|
|
|
|
|
{ |
839
|
|
|
|
|
|
|
'Name' => 'Hit_accession', |
840
|
|
|
|
|
|
|
'Data' => $acc |
841
|
|
|
|
|
|
|
} |
842
|
|
|
|
|
|
|
); |
843
|
|
|
|
|
|
|
# add hit significance (from the hit table) |
844
|
|
|
|
|
|
|
# this is where Bug 1986 went awry |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
# Changed for Bug2409; hit->significance and hit->score/bits derived |
847
|
|
|
|
|
|
|
# from HSPs, not hit table unless necessary |
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
HITTABLE: |
850
|
922
|
|
|
|
|
2470
|
while (my $v = shift @hit_signifs) { |
851
|
642
|
|
|
|
|
778
|
my $tableid = $v->[2]; |
852
|
642
|
100
|
|
|
|
6860
|
if ($tableid !~ m{\Q$id\E}) { |
853
|
14
|
|
|
|
|
48
|
$self->debug("Hit table ID $tableid doesn't match current hit id $id, checking next hit table entry...\n"); |
854
|
14
|
|
|
|
|
46
|
next HITTABLE; |
855
|
|
|
|
|
|
|
} else { |
856
|
628
|
|
|
|
|
1104
|
last HITTABLE; |
857
|
|
|
|
|
|
|
} |
858
|
|
|
|
|
|
|
} |
859
|
922
|
|
|
|
|
3079
|
while ( defined( $_ = $self->_readline() ) ) { |
860
|
1461
|
100
|
|
|
|
3790
|
next if (/^\s+$/); |
861
|
1458
|
|
|
|
|
1375
|
chomp; |
862
|
1458
|
100
|
|
|
|
3956
|
if (/Length\s*=\s*([\d,]+)/) { |
863
|
922
|
|
|
|
|
1560
|
my $l = $1; |
864
|
922
|
|
|
|
|
1006
|
$l =~ s/\,//g; |
865
|
922
|
|
|
|
|
2610
|
$self->element( |
866
|
|
|
|
|
|
|
{ |
867
|
|
|
|
|
|
|
'Name' => 'Hit_len', |
868
|
|
|
|
|
|
|
'Data' => $l |
869
|
|
|
|
|
|
|
} |
870
|
|
|
|
|
|
|
); |
871
|
922
|
|
|
|
|
1464
|
last; |
872
|
|
|
|
|
|
|
} |
873
|
|
|
|
|
|
|
else { |
874
|
536
|
100
|
|
|
|
1179
|
if ($restofline !~ /\s$/) { # bug #3235 |
875
|
519
|
|
|
|
|
806
|
s/^\s(?!\s)/\x01/; #new line to concatenate desc lines with |
876
|
|
|
|
|
|
|
} |
877
|
536
|
100
|
100
|
|
|
2487
|
$restofline .= ($restofline =~ /\w$/ && $_ =~ /^\w/) ? " $_" : $_; |
878
|
536
|
|
|
|
|
4188
|
$restofline =~ s/\s+/ /g; # this catches the newline as well |
879
|
536
|
|
|
|
|
6433
|
$restofline =~ s/^ | $//g; |
880
|
|
|
|
|
|
|
} |
881
|
|
|
|
|
|
|
} |
882
|
922
|
|
|
|
|
5135
|
$restofline =~ s/\s+/ /g; |
883
|
922
|
|
|
|
|
2205
|
$self->element( |
884
|
|
|
|
|
|
|
{ |
885
|
|
|
|
|
|
|
'Name' => 'Hit_def', |
886
|
|
|
|
|
|
|
'Data' => $restofline |
887
|
|
|
|
|
|
|
} |
888
|
|
|
|
|
|
|
); |
889
|
|
|
|
|
|
|
} |
890
|
|
|
|
|
|
|
elsif (/\s+(Plus|Minus) Strand HSPs:/i) { |
891
|
19
|
|
|
|
|
41
|
next; |
892
|
|
|
|
|
|
|
} |
893
|
|
|
|
|
|
|
elsif ( |
894
|
|
|
|
|
|
|
( $self->in_element('hit') || $self->in_element('hsp') ) |
895
|
|
|
|
|
|
|
&& # paracel genewise BTK |
896
|
|
|
|
|
|
|
m/Score\s*=\s*(\S+)\s*bits\s* # Bit score |
897
|
|
|
|
|
|
|
(?:\((\d+)\))?, # Raw score |
898
|
|
|
|
|
|
|
\s+Log\-Length\sScore\s*=\s*(\d+) # Log-Length score |
899
|
|
|
|
|
|
|
/ox |
900
|
|
|
|
|
|
|
) |
901
|
|
|
|
|
|
|
{ |
902
|
0
|
0
|
|
|
|
0
|
$self->in_element('hsp') |
903
|
|
|
|
|
|
|
&& $self->end_element( { 'Name' => 'Hsp' } ); |
904
|
0
|
|
|
|
|
0
|
$self->start_element( { 'Name' => 'Hsp' } ); |
905
|
|
|
|
|
|
|
|
906
|
0
|
|
|
|
|
0
|
$self->debug( "Got paracel genewise HSP score=$1\n"); |
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
# Some data clean-up so e-value will appear numeric to perl |
909
|
0
|
|
|
|
|
0
|
my ( $bits, $score, $evalue ) = ( $1, $2, $3 ); |
910
|
0
|
|
|
|
|
0
|
$evalue =~ s/^e/1e/i; |
911
|
0
|
|
|
|
|
0
|
$self->element( |
912
|
|
|
|
|
|
|
{ |
913
|
|
|
|
|
|
|
'Name' => 'Hsp_score', |
914
|
|
|
|
|
|
|
'Data' => $score |
915
|
|
|
|
|
|
|
} |
916
|
|
|
|
|
|
|
); |
917
|
0
|
|
|
|
|
0
|
$self->element( |
918
|
|
|
|
|
|
|
{ |
919
|
|
|
|
|
|
|
'Name' => 'Hsp_bit-score', |
920
|
|
|
|
|
|
|
'Data' => $bits |
921
|
|
|
|
|
|
|
} |
922
|
|
|
|
|
|
|
); |
923
|
0
|
|
|
|
|
0
|
$self->element( |
924
|
|
|
|
|
|
|
{ |
925
|
|
|
|
|
|
|
'Name' => 'Hsp_evalue', |
926
|
|
|
|
|
|
|
'Data' => $evalue |
927
|
|
|
|
|
|
|
} |
928
|
|
|
|
|
|
|
); |
929
|
|
|
|
|
|
|
} |
930
|
|
|
|
|
|
|
elsif ( |
931
|
|
|
|
|
|
|
( $self->in_element('hit') || $self->in_element('hsp') ) |
932
|
|
|
|
|
|
|
&& # paracel hframe BTK |
933
|
|
|
|
|
|
|
m/Score\s*=\s*([^,\s]+), # Raw score |
934
|
|
|
|
|
|
|
\s*Expect\s*=\s*([^,\s]+), # E-value |
935
|
|
|
|
|
|
|
\s*P(?:\(\S+\))?\s*=\s*([^,\s]+) # P-value |
936
|
|
|
|
|
|
|
/ox |
937
|
|
|
|
|
|
|
) |
938
|
|
|
|
|
|
|
{ |
939
|
0
|
0
|
|
|
|
0
|
$self->in_element('hsp') |
940
|
|
|
|
|
|
|
&& $self->end_element( { 'Name' => 'Hsp' } ); |
941
|
0
|
|
|
|
|
0
|
$self->start_element( { 'Name' => 'Hsp' } ); |
942
|
|
|
|
|
|
|
|
943
|
0
|
|
|
|
|
0
|
$self->debug( "Got paracel hframe HSP score=$1\n"); |
944
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
# Some data clean-up so e-value will appear numeric to perl |
946
|
0
|
|
|
|
|
0
|
my ( $score, $evalue, $pvalue ) = ( $1, $2, $3 ); |
947
|
0
|
0
|
|
|
|
0
|
$evalue = "1$evalue" if $evalue =~ /^e/; |
948
|
0
|
0
|
|
|
|
0
|
$pvalue = "1$pvalue" if $pvalue =~ /^e/; |
949
|
|
|
|
|
|
|
|
950
|
0
|
|
|
|
|
0
|
$self->element( |
951
|
|
|
|
|
|
|
{ |
952
|
|
|
|
|
|
|
'Name' => 'Hsp_score', |
953
|
|
|
|
|
|
|
'Data' => $score |
954
|
|
|
|
|
|
|
} |
955
|
|
|
|
|
|
|
); |
956
|
0
|
|
|
|
|
0
|
$self->element( |
957
|
|
|
|
|
|
|
{ |
958
|
|
|
|
|
|
|
'Name' => 'Hsp_evalue', |
959
|
|
|
|
|
|
|
'Data' => $evalue |
960
|
|
|
|
|
|
|
} |
961
|
|
|
|
|
|
|
); |
962
|
0
|
|
|
|
|
0
|
$self->element( |
963
|
|
|
|
|
|
|
{ |
964
|
|
|
|
|
|
|
'Name' => 'Hsp_pvalue', |
965
|
|
|
|
|
|
|
'Data' => $pvalue |
966
|
|
|
|
|
|
|
} |
967
|
|
|
|
|
|
|
); |
968
|
|
|
|
|
|
|
} |
969
|
|
|
|
|
|
|
elsif ( |
970
|
|
|
|
|
|
|
( $self->in_element('hit') || $self->in_element('hsp') ) |
971
|
|
|
|
|
|
|
&& # wublast |
972
|
|
|
|
|
|
|
m/Score\s*=\s*(\S+)\s* # Bit score |
973
|
|
|
|
|
|
|
\(([\d\.]+)\s*bits\), # Raw score |
974
|
|
|
|
|
|
|
\s*Expect\s*=\s*([^,\s]+), # E-value |
975
|
|
|
|
|
|
|
\s*(?:Sum)?\s* # SUM |
976
|
|
|
|
|
|
|
P(?:\(\d+\))?\s*=\s*([^,\s]+) # P-value |
977
|
|
|
|
|
|
|
(?:\s*,\s+Group\s*\=\s*(\d+))? # HSP Group |
978
|
|
|
|
|
|
|
/ox |
979
|
|
|
|
|
|
|
) |
980
|
|
|
|
|
|
|
{ # wu-blast HSP parse |
981
|
423
|
100
|
|
|
|
559
|
$self->in_element('hsp') |
982
|
|
|
|
|
|
|
&& $self->end_element( { 'Name' => 'Hsp' } ); |
983
|
423
|
|
|
|
|
1111
|
$self->start_element( { 'Name' => 'Hsp' } ); |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
# Some data clean-up so e-value will appear numeric to perl |
986
|
423
|
|
|
|
|
1835
|
my ( $score, $bits, $evalue, $pvalue, $group ) = |
987
|
|
|
|
|
|
|
( $1, $2, $3, $4, $5 ); |
988
|
423
|
|
|
|
|
627
|
$evalue =~ s/^e/1e/i; |
989
|
423
|
|
|
|
|
372
|
$pvalue =~ s/^e/1e/i; |
990
|
|
|
|
|
|
|
|
991
|
423
|
|
|
|
|
1052
|
$self->element( |
992
|
|
|
|
|
|
|
{ |
993
|
|
|
|
|
|
|
'Name' => 'Hsp_score', |
994
|
|
|
|
|
|
|
'Data' => $score |
995
|
|
|
|
|
|
|
} |
996
|
|
|
|
|
|
|
); |
997
|
423
|
|
|
|
|
1008
|
$self->element( |
998
|
|
|
|
|
|
|
{ |
999
|
|
|
|
|
|
|
'Name' => 'Hsp_bit-score', |
1000
|
|
|
|
|
|
|
'Data' => $bits |
1001
|
|
|
|
|
|
|
} |
1002
|
|
|
|
|
|
|
); |
1003
|
423
|
|
|
|
|
957
|
$self->element( |
1004
|
|
|
|
|
|
|
{ |
1005
|
|
|
|
|
|
|
'Name' => 'Hsp_evalue', |
1006
|
|
|
|
|
|
|
'Data' => $evalue |
1007
|
|
|
|
|
|
|
} |
1008
|
|
|
|
|
|
|
); |
1009
|
423
|
|
|
|
|
865
|
$self->element( |
1010
|
|
|
|
|
|
|
{ |
1011
|
|
|
|
|
|
|
'Name' => 'Hsp_pvalue', |
1012
|
|
|
|
|
|
|
'Data' => $pvalue |
1013
|
|
|
|
|
|
|
} |
1014
|
|
|
|
|
|
|
); |
1015
|
|
|
|
|
|
|
|
1016
|
423
|
100
|
|
|
|
1560
|
if ( defined $group ) { |
1017
|
29
|
|
|
|
|
80
|
$self->element( |
1018
|
|
|
|
|
|
|
{ |
1019
|
|
|
|
|
|
|
'Name' => 'Hsp_group', |
1020
|
|
|
|
|
|
|
'Data' => $group |
1021
|
|
|
|
|
|
|
} |
1022
|
|
|
|
|
|
|
); |
1023
|
|
|
|
|
|
|
} |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
} |
1026
|
|
|
|
|
|
|
elsif ( |
1027
|
|
|
|
|
|
|
( $self->in_element('hit') || $self->in_element('hsp') ) |
1028
|
|
|
|
|
|
|
&& # ncbi blast, works with 2.2.17 |
1029
|
|
|
|
|
|
|
m/^\sFeatures\s\w+\sthis\spart/xmso |
1030
|
|
|
|
|
|
|
# If the line begins with "Features in/flanking this part of subject sequence:" |
1031
|
|
|
|
|
|
|
) |
1032
|
|
|
|
|
|
|
{ |
1033
|
70
|
100
|
|
|
|
180
|
$self->in_element('hsp') |
1034
|
|
|
|
|
|
|
&& $self->end_element( { 'Name' => 'Hsp' } ); |
1035
|
70
|
|
|
|
|
143
|
my $featline; |
1036
|
70
|
|
|
|
|
243
|
$_ = $self->_readline; |
1037
|
70
|
|
|
|
|
365
|
while($_ !~ /^\s*$/) { |
1038
|
119
|
|
|
|
|
200
|
chomp; |
1039
|
119
|
|
|
|
|
255
|
$featline .= $_; |
1040
|
119
|
|
|
|
|
265
|
$_ = $self->_readline; |
1041
|
|
|
|
|
|
|
} |
1042
|
70
|
|
|
|
|
288
|
$self->_pushback($_); |
1043
|
70
|
|
|
|
|
879
|
$featline =~ s{(?:^\s+|\s+^)}{}g; |
1044
|
70
|
|
|
|
|
122
|
$featline =~ s{\n}{;}g; |
1045
|
70
|
|
|
|
|
283
|
$self->start_element( { 'Name' => 'Hsp' } ); |
1046
|
70
|
|
|
|
|
424
|
$self->element( |
1047
|
|
|
|
|
|
|
{ |
1048
|
|
|
|
|
|
|
'Name' => 'Hsp_features', |
1049
|
|
|
|
|
|
|
'Data' => $featline |
1050
|
|
|
|
|
|
|
} |
1051
|
|
|
|
|
|
|
); |
1052
|
70
|
|
|
|
|
365
|
$self->{'_seen_hsp_features'} = 1; |
1053
|
|
|
|
|
|
|
} |
1054
|
|
|
|
|
|
|
elsif ( |
1055
|
|
|
|
|
|
|
( $self->in_element('hit') || $self->in_element('hsp') ) |
1056
|
|
|
|
|
|
|
&& # ncbi blast, works with 2.2.17 |
1057
|
|
|
|
|
|
|
m/Score\s*=\s*(\S+)\s*bits\s* # Bit score |
1058
|
|
|
|
|
|
|
(?:\((\d+)\))?, # Missing for BLAT pseudo-BLAST fmt |
1059
|
|
|
|
|
|
|
\s*Expect(?:\((\d+\+?)\))?\s*=\s*([^,\s]+) # E-value |
1060
|
|
|
|
|
|
|
/ox |
1061
|
|
|
|
|
|
|
) |
1062
|
|
|
|
|
|
|
{ # parse NCBI blast HSP |
1063
|
1200
|
100
|
|
|
|
2397
|
if( !$self->{'_seen_hsp_features'} ) { |
1064
|
1130
|
100
|
|
|
|
1459
|
$self->in_element('hsp') |
1065
|
|
|
|
|
|
|
&& $self->end_element( { 'Name' => 'Hsp' } ); |
1066
|
1130
|
|
|
|
|
2952
|
$self->start_element( { 'Name' => 'Hsp' } ); |
1067
|
|
|
|
|
|
|
} |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
# Some data clean-up so e-value will appear numeric to perl |
1070
|
1200
|
|
|
|
|
5015
|
my ( $bits, $score, $n, $evalue ) = ( $1, $2, $3, $4 ); |
1071
|
1200
|
|
|
|
|
1854
|
$evalue =~ s/^e/1e/i; |
1072
|
1200
|
|
|
|
|
3599
|
$self->element( |
1073
|
|
|
|
|
|
|
{ |
1074
|
|
|
|
|
|
|
'Name' => 'Hsp_score', |
1075
|
|
|
|
|
|
|
'Data' => $score |
1076
|
|
|
|
|
|
|
} |
1077
|
|
|
|
|
|
|
); |
1078
|
1200
|
|
|
|
|
3376
|
$self->element( |
1079
|
|
|
|
|
|
|
{ |
1080
|
|
|
|
|
|
|
'Name' => 'Hsp_bit-score', |
1081
|
|
|
|
|
|
|
'Data' => $bits |
1082
|
|
|
|
|
|
|
} |
1083
|
|
|
|
|
|
|
); |
1084
|
1200
|
|
|
|
|
3046
|
$self->element( |
1085
|
|
|
|
|
|
|
{ |
1086
|
|
|
|
|
|
|
'Name' => 'Hsp_evalue', |
1087
|
|
|
|
|
|
|
'Data' => $evalue |
1088
|
|
|
|
|
|
|
} |
1089
|
|
|
|
|
|
|
); |
1090
|
1200
|
100
|
|
|
|
2246
|
$self->element( |
1091
|
|
|
|
|
|
|
{ |
1092
|
|
|
|
|
|
|
'Name' => 'Hsp_n', |
1093
|
|
|
|
|
|
|
'Data' => $n |
1094
|
|
|
|
|
|
|
} |
1095
|
|
|
|
|
|
|
) if defined $n; |
1096
|
1200
|
50
|
|
|
|
2160
|
$score = '' unless defined $score; # deal with BLAT which |
1097
|
|
|
|
|
|
|
# has no score only bits |
1098
|
1200
|
|
|
|
|
4422
|
$self->debug("Got NCBI HSP score=$score, evalue $evalue\n"); |
1099
|
|
|
|
|
|
|
} |
1100
|
|
|
|
|
|
|
elsif ( |
1101
|
|
|
|
|
|
|
$self->in_element('hsp') |
1102
|
|
|
|
|
|
|
&& m/Identities\s*=\s*(\d+)\s*\/\s*(\d+)\s*[\d\%\(\)]+\s* |
1103
|
|
|
|
|
|
|
(?:,\s*Positives\s*=\s*(\d+)\/(\d+)\s*[\d\%\(\)]+\s*)? # pos only valid for Protein alignments |
1104
|
|
|
|
|
|
|
(?:\,\s*Gaps\s*=\s*(\d+)\/(\d+))? # Gaps |
1105
|
|
|
|
|
|
|
/oxi |
1106
|
|
|
|
|
|
|
) |
1107
|
|
|
|
|
|
|
{ |
1108
|
1623
|
|
|
|
|
5816
|
$self->element( |
1109
|
|
|
|
|
|
|
{ |
1110
|
|
|
|
|
|
|
'Name' => 'Hsp_identity', |
1111
|
|
|
|
|
|
|
'Data' => $1 |
1112
|
|
|
|
|
|
|
} |
1113
|
|
|
|
|
|
|
); |
1114
|
1623
|
|
|
|
|
5123
|
$self->element( |
1115
|
|
|
|
|
|
|
{ |
1116
|
|
|
|
|
|
|
'Name' => 'Hsp_align-len', |
1117
|
|
|
|
|
|
|
'Data' => $2 |
1118
|
|
|
|
|
|
|
} |
1119
|
|
|
|
|
|
|
); |
1120
|
1623
|
100
|
|
|
|
2895
|
if ( defined $3 ) { |
1121
|
1251
|
|
|
|
|
2867
|
$self->element( |
1122
|
|
|
|
|
|
|
{ |
1123
|
|
|
|
|
|
|
'Name' => 'Hsp_positive', |
1124
|
|
|
|
|
|
|
'Data' => $3 |
1125
|
|
|
|
|
|
|
} |
1126
|
|
|
|
|
|
|
); |
1127
|
|
|
|
|
|
|
} |
1128
|
|
|
|
|
|
|
else { |
1129
|
372
|
|
|
|
|
1099
|
$self->element( |
1130
|
|
|
|
|
|
|
{ |
1131
|
|
|
|
|
|
|
'Name' => 'Hsp_positive', |
1132
|
|
|
|
|
|
|
'Data' => $1 |
1133
|
|
|
|
|
|
|
} |
1134
|
|
|
|
|
|
|
); |
1135
|
|
|
|
|
|
|
} |
1136
|
1623
|
100
|
|
|
|
3521
|
if ( defined $6 ) { |
1137
|
787
|
|
|
|
|
2053
|
$self->element( |
1138
|
|
|
|
|
|
|
{ |
1139
|
|
|
|
|
|
|
'Name' => 'Hsp_gaps', |
1140
|
|
|
|
|
|
|
'Data' => $5 |
1141
|
|
|
|
|
|
|
} |
1142
|
|
|
|
|
|
|
); |
1143
|
|
|
|
|
|
|
} |
1144
|
|
|
|
|
|
|
|
1145
|
1623
|
|
|
|
|
3536
|
$self->{'_Query'} = { 'begin' => 0, 'end' => 0 }; |
1146
|
1623
|
|
|
|
|
3018
|
$self->{'_Sbjct'} = { 'begin' => 0, 'end' => 0 }; |
1147
|
|
|
|
|
|
|
|
1148
|
1623
|
100
|
|
|
|
7393
|
if (/(Frame\s*=\s*.+)$/) { |
1149
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
# handle wu-blast Frame listing on same line |
1151
|
44
|
|
|
|
|
100
|
$self->_pushback($1); |
1152
|
|
|
|
|
|
|
} |
1153
|
|
|
|
|
|
|
} |
1154
|
|
|
|
|
|
|
elsif ( $self->in_element('hsp') |
1155
|
|
|
|
|
|
|
&& /Strand\s*=\s*(Plus|Minus)\s*\/\s*(Plus|Minus)/i ) |
1156
|
|
|
|
|
|
|
{ |
1157
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
# consume this event ( we infer strand from start/end) |
1159
|
372
|
100
|
|
|
|
771
|
if (!defined($reporttype)) { |
1160
|
2
|
|
|
|
|
6
|
$self->{'_reporttype'} = $reporttype = 'BLASTN'; |
1161
|
2
|
|
|
|
|
4
|
$bl2seq_fix = 1; # special case to resubmit the algorithm |
1162
|
|
|
|
|
|
|
# reporttype |
1163
|
|
|
|
|
|
|
} |
1164
|
372
|
|
|
|
|
1103
|
next; |
1165
|
|
|
|
|
|
|
} |
1166
|
|
|
|
|
|
|
elsif ( $self->in_element('hsp') |
1167
|
|
|
|
|
|
|
&& /Links\s*=\s*(\S+)/ox ) |
1168
|
|
|
|
|
|
|
{ |
1169
|
342
|
|
|
|
|
918
|
$self->element( |
1170
|
|
|
|
|
|
|
{ |
1171
|
|
|
|
|
|
|
'Name' => 'Hsp_links', |
1172
|
|
|
|
|
|
|
'Data' => $1 |
1173
|
|
|
|
|
|
|
} |
1174
|
|
|
|
|
|
|
); |
1175
|
|
|
|
|
|
|
} |
1176
|
|
|
|
|
|
|
elsif ( $self->in_element('hsp') |
1177
|
|
|
|
|
|
|
&& /Frame\s*=\s*([\+\-][1-3])\s*(\/\s*([\+\-][1-3]))?/ ) |
1178
|
|
|
|
|
|
|
{ |
1179
|
308
|
|
50
|
|
|
787
|
my $frame1 = $1 || 0; |
1180
|
308
|
|
100
|
|
|
699
|
my $frame2 = $2 || 0; |
1181
|
|
|
|
|
|
|
# this is for bl2seq only |
1182
|
308
|
100
|
|
|
|
466
|
if ( not defined $reporttype ) { |
1183
|
4
|
|
|
|
|
6
|
$bl2seq_fix = 1; |
1184
|
4
|
100
|
66
|
|
|
24
|
if ( $frame1 && $frame2 ) { |
1185
|
1
|
|
|
|
|
2
|
$reporttype = 'TBLASTX' |
1186
|
|
|
|
|
|
|
} |
1187
|
|
|
|
|
|
|
else { |
1188
|
|
|
|
|
|
|
# We can distinguish between BLASTX and TBLASTN from the report |
1189
|
|
|
|
|
|
|
# (and assign $frame1 properly) by using the start/end from query. |
1190
|
|
|
|
|
|
|
# If the report is BLASTX, the coordinates distance from query |
1191
|
|
|
|
|
|
|
# will be 3 times the length of the alignment shown (coordinates in nt, |
1192
|
|
|
|
|
|
|
# alignment in aa); if not then subject is the nucleotide sequence (TBLASTN). |
1193
|
|
|
|
|
|
|
# Will have to fast-forward to query alignment line and then go back. |
1194
|
3
|
|
|
|
|
100
|
my $fh = $self->_fh; |
1195
|
3
|
|
|
|
|
33
|
my $file_pos = tell $fh; |
1196
|
|
|
|
|
|
|
|
1197
|
3
|
|
|
|
|
11
|
my $a_position = ''; |
1198
|
3
|
|
|
|
|
4
|
my $ali_length = ''; |
1199
|
3
|
|
|
|
|
6
|
my $b_position = ''; |
1200
|
3
|
|
|
|
|
17
|
while (my $line = <$fh>) { |
1201
|
6
|
100
|
|
|
|
36
|
if ($line =~ m/^(?:Query|Sbjct):?\s+(\-?\d+)?\s*(\S+)\s+(\-?\d+)?/) { |
1202
|
3
|
|
|
|
|
9
|
$a_position = $1; |
1203
|
3
|
|
|
|
|
8
|
my $alignment = $2; |
1204
|
3
|
|
|
|
|
7
|
$b_position = $3; |
1205
|
|
|
|
|
|
|
|
1206
|
12
|
|
|
12
|
|
4447
|
use Bio::LocatableSeq; |
|
12
|
|
|
|
|
21
|
|
|
12
|
|
|
|
|
53110
|
|
1207
|
3
|
|
|
|
|
7
|
my $gap_symbols = $Bio::LocatableSeq::GAP_SYMBOLS; |
1208
|
3
|
|
|
|
|
44
|
$alignment =~ s/[$gap_symbols]//g; |
1209
|
3
|
|
|
|
|
6
|
$ali_length = length($alignment); |
1210
|
3
|
|
|
|
|
8
|
last; |
1211
|
|
|
|
|
|
|
} |
1212
|
|
|
|
|
|
|
} |
1213
|
3
|
100
|
|
|
|
17
|
my $coord_length = ($a_position < $b_position) ? ($b_position - $a_position + 1) |
1214
|
|
|
|
|
|
|
: ($a_position - $b_position + 1); |
1215
|
3
|
100
|
|
|
|
14
|
($coord_length == ($ali_length * 3)) ? ($reporttype = 'BLASTX') : ($reporttype = 'TBLASTN'); |
1216
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
# Rewind filehandle to its original position to continue parsing |
1218
|
3
|
|
|
|
|
16
|
seek $fh, $file_pos, 0; |
1219
|
|
|
|
|
|
|
} |
1220
|
4
|
|
|
|
|
11
|
$self->{'_reporttype'} = $reporttype; |
1221
|
|
|
|
|
|
|
} |
1222
|
|
|
|
|
|
|
|
1223
|
308
|
|
|
|
|
268
|
my ( $queryframe, $hitframe ); |
1224
|
308
|
100
|
66
|
|
|
698
|
if ( $reporttype eq 'TBLASTX' ) { |
|
|
100
|
33
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1225
|
187
|
|
|
|
|
204
|
( $queryframe, $hitframe ) = ( $frame1, $frame2 ); |
1226
|
187
|
|
|
|
|
488
|
$hitframe =~ s/\/\s*//g; |
1227
|
|
|
|
|
|
|
} |
1228
|
|
|
|
|
|
|
elsif ( $reporttype eq 'TBLASTN' || $reporttype eq 'PSITBLASTN') { |
1229
|
95
|
|
|
|
|
125
|
( $hitframe, $queryframe ) = ( $frame1, 0 ); |
1230
|
|
|
|
|
|
|
} |
1231
|
|
|
|
|
|
|
elsif ( $reporttype eq 'BLASTX' || $reporttype eq 'RPS-BLAST(BLASTP)') { |
1232
|
26
|
|
|
|
|
34
|
( $queryframe, $hitframe ) = ( $frame1, 0 ); |
1233
|
|
|
|
|
|
|
# though NCBI doesn't report it, this is a special BLASTX-like |
1234
|
|
|
|
|
|
|
# RPS-BLAST; should be handled differently |
1235
|
26
|
50
|
|
|
|
49
|
if ($reporttype eq 'RPS-BLAST(BLASTP)') { |
1236
|
0
|
|
|
|
|
0
|
$self->element( |
1237
|
|
|
|
|
|
|
{ |
1238
|
|
|
|
|
|
|
'Name' => 'BlastOutput_program', |
1239
|
|
|
|
|
|
|
'Data' => 'RPS-BLAST(BLASTX)' |
1240
|
|
|
|
|
|
|
} |
1241
|
|
|
|
|
|
|
); |
1242
|
|
|
|
|
|
|
} |
1243
|
|
|
|
|
|
|
} |
1244
|
|
|
|
|
|
|
$self->element( |
1245
|
|
|
|
|
|
|
{ |
1246
|
308
|
|
|
|
|
828
|
'Name' => 'Hsp_query-frame', |
1247
|
|
|
|
|
|
|
'Data' => $queryframe |
1248
|
|
|
|
|
|
|
} |
1249
|
|
|
|
|
|
|
); |
1250
|
|
|
|
|
|
|
|
1251
|
308
|
|
|
|
|
776
|
$self->element( |
1252
|
|
|
|
|
|
|
{ |
1253
|
|
|
|
|
|
|
'Name' => 'Hsp_hit-frame', |
1254
|
|
|
|
|
|
|
'Data' => $hitframe |
1255
|
|
|
|
|
|
|
} |
1256
|
|
|
|
|
|
|
); |
1257
|
|
|
|
|
|
|
} |
1258
|
|
|
|
|
|
|
elsif (/^Parameters:/ |
1259
|
|
|
|
|
|
|
|| /^\s+Database:\s+?/ |
1260
|
|
|
|
|
|
|
|| /^\s+Subset/ |
1261
|
|
|
|
|
|
|
|| /^\s*Lambda/ |
1262
|
|
|
|
|
|
|
|| /^\s*Histogram/ |
1263
|
|
|
|
|
|
|
|| ( $self->in_element('hsp') && /WARNING|NOTE/ ) ) |
1264
|
|
|
|
|
|
|
{ |
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
# Note: Lambda check was necessary to parse |
1267
|
|
|
|
|
|
|
# t/data/ecoli_domains.rpsblast AND to parse bl2seq |
1268
|
83
|
|
|
|
|
250
|
$self->debug("blast.pm: found parameters section \n"); |
1269
|
|
|
|
|
|
|
|
1270
|
83
|
100
|
|
|
|
205
|
$self->in_element('hsp') |
1271
|
|
|
|
|
|
|
&& $self->end_element( { 'Name' => 'Hsp' } ); |
1272
|
83
|
100
|
|
|
|
281
|
$self->in_element('hit') |
1273
|
|
|
|
|
|
|
&& $self->end_element( { 'Name' => 'Hit' } ); |
1274
|
|
|
|
|
|
|
|
1275
|
|
|
|
|
|
|
# This is for the case when we specify -b 0 (or B=0 for WU-BLAST) |
1276
|
|
|
|
|
|
|
# and still want to construct minimal Hit objects |
1277
|
83
|
100
|
|
|
|
304
|
$self->_cleanup_hits(\@hit_signifs) if scalar(@hit_signifs); |
1278
|
83
|
100
|
|
|
|
240
|
$self->within_element('iteration') |
1279
|
|
|
|
|
|
|
&& $self->end_element( { 'Name' => 'Iteration' } ); |
1280
|
|
|
|
|
|
|
|
1281
|
83
|
50
|
|
|
|
299
|
next if /^\s+Subset/; |
1282
|
83
|
100
|
|
|
|
420
|
my $blast = (/^(\s+Database\:)|(\s*Lambda)/) ? 'ncbi' : 'wublast'; |
1283
|
83
|
50
|
|
|
|
289
|
if (/^\s*Histogram/) { |
1284
|
0
|
|
|
|
|
0
|
$blast = 'btk'; |
1285
|
|
|
|
|
|
|
} |
1286
|
|
|
|
|
|
|
|
1287
|
83
|
|
|
|
|
138
|
my $last = ''; |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
# default is that gaps are allowed |
1290
|
83
|
|
|
|
|
792
|
$self->element( |
1291
|
|
|
|
|
|
|
{ |
1292
|
|
|
|
|
|
|
'Name' => 'Parameters_allowgaps', |
1293
|
|
|
|
|
|
|
'Data' => 'yes' |
1294
|
|
|
|
|
|
|
} |
1295
|
|
|
|
|
|
|
); |
1296
|
83
|
|
|
|
|
320
|
while ( defined( $_ = $self->_readline ) ) { |
1297
|
|
|
|
|
|
|
# If Lambda/Kappa/Entropy numbers appear first at this point, |
1298
|
|
|
|
|
|
|
# pushback and add the header line to process it correctly |
1299
|
2819
|
100
|
100
|
|
|
17811
|
if (/^\s+[\d+\.]+\s+[\d+\.]+\s+[\d+\.]/ and $last eq '') { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1300
|
15
|
|
|
|
|
60
|
$self->_pushback($_); |
1301
|
15
|
|
|
|
|
43
|
$self->_pushback("Lambda K H\n"); |
1302
|
15
|
|
|
|
|
36
|
next; |
1303
|
|
|
|
|
|
|
} |
1304
|
|
|
|
|
|
|
elsif ( |
1305
|
|
|
|
|
|
|
/^((?:\S+)?BLAST[NPX]?)\s+(.+)$/i # NCBI BLAST, PSIBLAST |
1306
|
|
|
|
|
|
|
# RPSBLAST, MEGABLAST |
1307
|
|
|
|
|
|
|
|| /^(P?GENEWISE|HFRAME|SWN|TSWN)\s+(.+)/i #Paracel BTK |
1308
|
|
|
|
|
|
|
) |
1309
|
|
|
|
|
|
|
{ |
1310
|
5
|
|
|
|
|
20
|
$self->_pushback($_); |
1311
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
# let's handle this in the loop |
1313
|
5
|
|
|
|
|
24
|
last; |
1314
|
|
|
|
|
|
|
} |
1315
|
|
|
|
|
|
|
elsif (/^Query=/) { |
1316
|
4
|
|
|
|
|
13
|
$self->_pushback($_); |
1317
|
4
|
50
|
|
|
|
16
|
$self->_pushback($reportline) if $reportline; |
1318
|
4
|
|
|
|
|
9
|
last PARSER; |
1319
|
|
|
|
|
|
|
} |
1320
|
|
|
|
|
|
|
|
1321
|
|
|
|
|
|
|
# here is where difference between wublast and ncbiblast |
1322
|
|
|
|
|
|
|
# is better handled by different logic |
1323
|
2795
|
100
|
100
|
|
|
12434
|
if ( /Number of Sequences:\s+([\d\,]+)/i |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
|| /of sequences in database:\s+(\-?[\d,]+)/i ) |
1325
|
|
|
|
|
|
|
{ |
1326
|
125
|
|
|
|
|
253
|
my $c = $1; |
1327
|
125
|
|
|
|
|
190
|
$c =~ s/\,//g; |
1328
|
125
|
|
|
|
|
407
|
$self->element( |
1329
|
|
|
|
|
|
|
{ |
1330
|
|
|
|
|
|
|
'Name' => 'Statistics_db-len', |
1331
|
|
|
|
|
|
|
'Data' => $c |
1332
|
|
|
|
|
|
|
} |
1333
|
|
|
|
|
|
|
); |
1334
|
|
|
|
|
|
|
} |
1335
|
|
|
|
|
|
|
elsif (/letters in database:\s+(\-?[\d,]+)/i) { |
1336
|
73
|
|
|
|
|
170
|
my $s = $1; |
1337
|
73
|
|
|
|
|
203
|
$s =~ s/,//g; |
1338
|
73
|
|
|
|
|
255
|
$self->element( |
1339
|
|
|
|
|
|
|
{ |
1340
|
|
|
|
|
|
|
'Name' => 'Statistics_db-let', |
1341
|
|
|
|
|
|
|
'Data' => $s |
1342
|
|
|
|
|
|
|
} |
1343
|
|
|
|
|
|
|
); |
1344
|
|
|
|
|
|
|
} |
1345
|
|
|
|
|
|
|
elsif ( $blast eq 'btk' ) { |
1346
|
0
|
|
|
|
|
0
|
next; |
1347
|
|
|
|
|
|
|
} |
1348
|
|
|
|
|
|
|
elsif ( $blast eq 'wublast' ) { |
1349
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
# warn($_); |
1351
|
985
|
100
|
|
|
|
8675
|
if (/E=(\S+)/) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1352
|
29
|
|
|
|
|
121
|
$self->element( |
1353
|
|
|
|
|
|
|
{ |
1354
|
|
|
|
|
|
|
'Name' => 'Parameters_expect', |
1355
|
|
|
|
|
|
|
'Data' => $1 |
1356
|
|
|
|
|
|
|
} |
1357
|
|
|
|
|
|
|
); |
1358
|
|
|
|
|
|
|
} |
1359
|
|
|
|
|
|
|
elsif (/nogaps/) { |
1360
|
2
|
|
|
|
|
9
|
$self->element( |
1361
|
|
|
|
|
|
|
{ |
1362
|
|
|
|
|
|
|
'Name' => 'Parameters_allowgaps', |
1363
|
|
|
|
|
|
|
'Data' => 'no' |
1364
|
|
|
|
|
|
|
} |
1365
|
|
|
|
|
|
|
); |
1366
|
|
|
|
|
|
|
} |
1367
|
|
|
|
|
|
|
elsif (/ctxfactor=(\S+)/) { |
1368
|
26
|
|
|
|
|
110
|
$self->element( |
1369
|
|
|
|
|
|
|
{ |
1370
|
|
|
|
|
|
|
'Name' => 'Statistics_ctxfactor', |
1371
|
|
|
|
|
|
|
'Data' => $1 |
1372
|
|
|
|
|
|
|
} |
1373
|
|
|
|
|
|
|
); |
1374
|
|
|
|
|
|
|
} |
1375
|
|
|
|
|
|
|
elsif ( |
1376
|
|
|
|
|
|
|
/(postsw|links|span[12]?|warnings|notes|gi|noseqs|qres|qype)/ |
1377
|
|
|
|
|
|
|
) |
1378
|
|
|
|
|
|
|
{ |
1379
|
34
|
|
|
|
|
156
|
$self->element( |
1380
|
|
|
|
|
|
|
{ |
1381
|
|
|
|
|
|
|
'Name' => "Parameters_$1", |
1382
|
|
|
|
|
|
|
'Data' => 'yes' |
1383
|
|
|
|
|
|
|
} |
1384
|
|
|
|
|
|
|
); |
1385
|
|
|
|
|
|
|
} |
1386
|
|
|
|
|
|
|
elsif (/(\S+)=(\S+)/) { |
1387
|
141
|
|
|
|
|
506
|
$self->element( |
1388
|
|
|
|
|
|
|
{ |
1389
|
|
|
|
|
|
|
'Name' => "Parameters_$1", |
1390
|
|
|
|
|
|
|
'Data' => $2 |
1391
|
|
|
|
|
|
|
} |
1392
|
|
|
|
|
|
|
); |
1393
|
|
|
|
|
|
|
} |
1394
|
|
|
|
|
|
|
elsif ( $last =~ /(Frame|Strand)\s+MatID\s+Matrix name/i ) { |
1395
|
26
|
|
|
|
|
31
|
my $firstgapinfo = 1; |
1396
|
26
|
|
|
|
|
35
|
my $frame = undef; |
1397
|
26
|
|
66
|
|
|
163
|
while ( defined($_) && !/^\s+$/ ) { |
1398
|
108
|
|
|
|
|
224
|
s/^\s+//; |
1399
|
108
|
|
|
|
|
341
|
s/\s+$//; |
1400
|
108
|
100
|
100
|
|
|
385
|
if ( $firstgapinfo |
1401
|
|
|
|
|
|
|
&& s/Q=(\d+),R=(\d+)\s+//x ) |
1402
|
|
|
|
|
|
|
{ |
1403
|
24
|
|
|
|
|
28
|
$firstgapinfo = 0; |
1404
|
|
|
|
|
|
|
|
1405
|
24
|
|
|
|
|
95
|
$self->element( |
1406
|
|
|
|
|
|
|
{ |
1407
|
|
|
|
|
|
|
'Name' => 'Parameters_gap-open', |
1408
|
|
|
|
|
|
|
'Data' => $1 |
1409
|
|
|
|
|
|
|
} |
1410
|
|
|
|
|
|
|
); |
1411
|
24
|
|
|
|
|
102
|
$self->element( |
1412
|
|
|
|
|
|
|
{ |
1413
|
|
|
|
|
|
|
'Name' => 'Parameters_gap-extend', |
1414
|
|
|
|
|
|
|
'Data' => $2 |
1415
|
|
|
|
|
|
|
} |
1416
|
|
|
|
|
|
|
); |
1417
|
24
|
|
|
|
|
112
|
my @fields = split; |
1418
|
|
|
|
|
|
|
|
1419
|
24
|
|
|
|
|
51
|
for my $type ( |
1420
|
|
|
|
|
|
|
qw(lambda_gapped |
1421
|
|
|
|
|
|
|
kappa_gapped |
1422
|
|
|
|
|
|
|
entropy_gapped) |
1423
|
|
|
|
|
|
|
) |
1424
|
|
|
|
|
|
|
{ |
1425
|
72
|
50
|
|
|
|
106
|
next if $type eq 'n/a'; |
1426
|
72
|
50
|
|
|
|
116
|
if ( !@fields ) { |
1427
|
0
|
|
|
|
|
0
|
warn "fields is empty for $type\n"; |
1428
|
0
|
|
|
|
|
0
|
next; |
1429
|
|
|
|
|
|
|
} |
1430
|
|
|
|
|
|
|
$self->element( |
1431
|
|
|
|
|
|
|
{ |
1432
|
72
|
|
|
|
|
233
|
'Name' => |
1433
|
|
|
|
|
|
|
"Statistics_frame$frame\_$type", |
1434
|
|
|
|
|
|
|
'Data' => shift @fields |
1435
|
|
|
|
|
|
|
} |
1436
|
|
|
|
|
|
|
); |
1437
|
|
|
|
|
|
|
} |
1438
|
|
|
|
|
|
|
} |
1439
|
|
|
|
|
|
|
else { |
1440
|
84
|
|
|
|
|
321
|
my ( $frameo, $matid, $matrix, @fields ) = |
1441
|
|
|
|
|
|
|
split; |
1442
|
84
|
100
|
|
|
|
161
|
if ( !defined $frame ) { |
1443
|
|
|
|
|
|
|
|
1444
|
|
|
|
|
|
|
# keep some sort of default feature I guess |
1445
|
|
|
|
|
|
|
# even though this is sort of wrong |
1446
|
26
|
|
|
|
|
102
|
$self->element( |
1447
|
|
|
|
|
|
|
{ |
1448
|
|
|
|
|
|
|
'Name' => 'Parameters_matrix', |
1449
|
|
|
|
|
|
|
'Data' => $matrix |
1450
|
|
|
|
|
|
|
} |
1451
|
|
|
|
|
|
|
); |
1452
|
26
|
|
|
|
|
107
|
$self->element( |
1453
|
|
|
|
|
|
|
{ |
1454
|
|
|
|
|
|
|
'Name' => 'Statistics_lambda', |
1455
|
|
|
|
|
|
|
'Data' => $fields[0] |
1456
|
|
|
|
|
|
|
} |
1457
|
|
|
|
|
|
|
); |
1458
|
26
|
|
|
|
|
111
|
$self->element( |
1459
|
|
|
|
|
|
|
{ |
1460
|
|
|
|
|
|
|
'Name' => 'Statistics_kappa', |
1461
|
|
|
|
|
|
|
'Data' => $fields[1] |
1462
|
|
|
|
|
|
|
} |
1463
|
|
|
|
|
|
|
); |
1464
|
26
|
|
|
|
|
84
|
$self->element( |
1465
|
|
|
|
|
|
|
{ |
1466
|
|
|
|
|
|
|
'Name' => 'Statistics_entropy', |
1467
|
|
|
|
|
|
|
'Data' => $fields[2] |
1468
|
|
|
|
|
|
|
} |
1469
|
|
|
|
|
|
|
); |
1470
|
|
|
|
|
|
|
} |
1471
|
84
|
|
|
|
|
83
|
$frame = $frameo; |
1472
|
84
|
|
|
|
|
74
|
my $ii = 0; |
1473
|
84
|
|
|
|
|
114
|
for my $type ( |
1474
|
|
|
|
|
|
|
qw(lambda_used |
1475
|
|
|
|
|
|
|
kappa_used |
1476
|
|
|
|
|
|
|
entropy_used |
1477
|
|
|
|
|
|
|
lambda_computed |
1478
|
|
|
|
|
|
|
kappa_computed |
1479
|
|
|
|
|
|
|
entropy_computed) |
1480
|
|
|
|
|
|
|
) |
1481
|
|
|
|
|
|
|
{ |
1482
|
504
|
|
|
|
|
407
|
my $f = $fields[$ii]; |
1483
|
504
|
100
|
|
|
|
604
|
next unless defined $f; # deal with n/a |
1484
|
456
|
100
|
|
|
|
520
|
if ( $f eq 'same' ) { |
1485
|
72
|
|
|
|
|
98
|
$f = $fields[ $ii - 3 ]; |
1486
|
|
|
|
|
|
|
} |
1487
|
456
|
|
|
|
|
282
|
$ii++; |
1488
|
456
|
|
|
|
|
1044
|
$self->element( |
1489
|
|
|
|
|
|
|
{ |
1490
|
|
|
|
|
|
|
'Name' => |
1491
|
|
|
|
|
|
|
"Statistics_frame$frame\_$type", |
1492
|
|
|
|
|
|
|
'Data' => $f |
1493
|
|
|
|
|
|
|
} |
1494
|
|
|
|
|
|
|
); |
1495
|
|
|
|
|
|
|
|
1496
|
|
|
|
|
|
|
} |
1497
|
|
|
|
|
|
|
} |
1498
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
# get the next line |
1500
|
108
|
|
|
|
|
212
|
$_ = $self->_readline; |
1501
|
|
|
|
|
|
|
} |
1502
|
26
|
|
|
|
|
38
|
$last = $_; |
1503
|
|
|
|
|
|
|
} |
1504
|
|
|
|
|
|
|
elsif ( $last =~ /(Frame|Strand)\s+MatID\s+Length/i ) { |
1505
|
26
|
|
|
|
|
33
|
my $frame = undef; |
1506
|
26
|
|
33
|
|
|
180
|
while ( defined($_) && !/^\s+/ ) { |
1507
|
0
|
|
|
|
|
0
|
s/^\s+//; |
1508
|
0
|
|
|
|
|
0
|
s/\s+$//; |
1509
|
0
|
|
|
|
|
0
|
my @fields = split; |
1510
|
0
|
0
|
|
|
|
0
|
if ( @fields <= 3 ) { |
1511
|
0
|
|
|
|
|
0
|
for my $type (qw(X_gapped E2_gapped S2)) { |
1512
|
0
|
0
|
|
|
|
0
|
last unless @fields; |
1513
|
0
|
|
|
|
|
0
|
$self->element( |
1514
|
|
|
|
|
|
|
{ |
1515
|
|
|
|
|
|
|
'Name' => |
1516
|
|
|
|
|
|
|
"Statistics_frame$frame\_$type", |
1517
|
|
|
|
|
|
|
'Data' => shift @fields |
1518
|
|
|
|
|
|
|
} |
1519
|
|
|
|
|
|
|
); |
1520
|
|
|
|
|
|
|
} |
1521
|
|
|
|
|
|
|
} |
1522
|
|
|
|
|
|
|
else { |
1523
|
|
|
|
|
|
|
|
1524
|
0
|
|
|
|
|
0
|
for my $type ( |
1525
|
|
|
|
|
|
|
qw(length |
1526
|
|
|
|
|
|
|
efflength |
1527
|
|
|
|
|
|
|
E S W T X E2 S2) |
1528
|
|
|
|
|
|
|
) |
1529
|
|
|
|
|
|
|
{ |
1530
|
0
|
|
|
|
|
0
|
$self->element( |
1531
|
|
|
|
|
|
|
{ |
1532
|
|
|
|
|
|
|
'Name' => |
1533
|
|
|
|
|
|
|
"Statistics_frame$frame\_$type", |
1534
|
|
|
|
|
|
|
'Data' => shift @fields |
1535
|
|
|
|
|
|
|
} |
1536
|
|
|
|
|
|
|
); |
1537
|
|
|
|
|
|
|
} |
1538
|
|
|
|
|
|
|
} |
1539
|
0
|
|
|
|
|
0
|
$_ = $self->_readline; |
1540
|
|
|
|
|
|
|
} |
1541
|
26
|
|
|
|
|
41
|
$last = $_; |
1542
|
|
|
|
|
|
|
} |
1543
|
|
|
|
|
|
|
elsif (/(\S+\s+\S+)\s+DFA:\s+(\S+)\s+\((.+)\)/) { |
1544
|
23
|
50
|
|
|
|
89
|
if ( $1 eq 'states in' ) { |
|
|
0
|
|
|
|
|
|
1545
|
23
|
|
|
|
|
128
|
$self->element( |
1546
|
|
|
|
|
|
|
{ |
1547
|
|
|
|
|
|
|
'Name' => 'Statistics_DFA_states', |
1548
|
|
|
|
|
|
|
'Data' => "$2 $3" |
1549
|
|
|
|
|
|
|
} |
1550
|
|
|
|
|
|
|
); |
1551
|
|
|
|
|
|
|
} |
1552
|
|
|
|
|
|
|
elsif ( $1 eq 'size of' ) { |
1553
|
0
|
|
|
|
|
0
|
$self->element( |
1554
|
|
|
|
|
|
|
{ |
1555
|
|
|
|
|
|
|
'Name' => 'Statistics_DFA_size', |
1556
|
|
|
|
|
|
|
'Data' => "$2 $3" |
1557
|
|
|
|
|
|
|
} |
1558
|
|
|
|
|
|
|
); |
1559
|
|
|
|
|
|
|
} |
1560
|
|
|
|
|
|
|
} |
1561
|
|
|
|
|
|
|
elsif ( |
1562
|
|
|
|
|
|
|
m/^\s+Time to generate neighborhood:\s+ |
1563
|
|
|
|
|
|
|
(\S+\s+\S+\s+\S+)/x |
1564
|
|
|
|
|
|
|
) |
1565
|
|
|
|
|
|
|
{ |
1566
|
0
|
|
|
|
|
0
|
$self->element( |
1567
|
|
|
|
|
|
|
{ |
1568
|
|
|
|
|
|
|
'Name' => 'Statistics_neighbortime', |
1569
|
|
|
|
|
|
|
'Data' => $1 |
1570
|
|
|
|
|
|
|
} |
1571
|
|
|
|
|
|
|
); |
1572
|
|
|
|
|
|
|
} |
1573
|
|
|
|
|
|
|
elsif (/processors\s+used:\s+(\d+)/) { |
1574
|
23
|
|
|
|
|
103
|
$self->element( |
1575
|
|
|
|
|
|
|
{ |
1576
|
|
|
|
|
|
|
'Name' => 'Statistics_noprocessors', |
1577
|
|
|
|
|
|
|
'Data' => $1 |
1578
|
|
|
|
|
|
|
} |
1579
|
|
|
|
|
|
|
); |
1580
|
|
|
|
|
|
|
} |
1581
|
|
|
|
|
|
|
elsif ( |
1582
|
|
|
|
|
|
|
m/^\s+(\S+)\s+cpu\s+time:\s+ # cputype |
1583
|
|
|
|
|
|
|
(\S+\s+\S+\s+\S+) # cputime |
1584
|
|
|
|
|
|
|
\s+Elapsed:\s+(\S+)/x |
1585
|
|
|
|
|
|
|
) |
1586
|
|
|
|
|
|
|
{ |
1587
|
46
|
|
|
|
|
104
|
my $cputype = lc($1); |
1588
|
46
|
|
|
|
|
180
|
$self->element( |
1589
|
|
|
|
|
|
|
{ |
1590
|
|
|
|
|
|
|
'Name' => "Statistics_$cputype\_cputime", |
1591
|
|
|
|
|
|
|
'Data' => $2 |
1592
|
|
|
|
|
|
|
} |
1593
|
|
|
|
|
|
|
); |
1594
|
46
|
|
|
|
|
171
|
$self->element( |
1595
|
|
|
|
|
|
|
{ |
1596
|
|
|
|
|
|
|
'Name' => "Statistics_$cputype\_actualtime", |
1597
|
|
|
|
|
|
|
'Data' => $3 |
1598
|
|
|
|
|
|
|
} |
1599
|
|
|
|
|
|
|
); |
1600
|
|
|
|
|
|
|
} |
1601
|
|
|
|
|
|
|
elsif (/^\s+Start:/) { |
1602
|
23
|
|
|
|
|
204
|
my ( $junk, $start, $stime, $end, $etime ) = |
1603
|
|
|
|
|
|
|
split( /\s+(Start|End)\:\s+/, $_ ); |
1604
|
23
|
|
|
|
|
51
|
chomp($stime); |
1605
|
23
|
|
|
|
|
81
|
$self->element( |
1606
|
|
|
|
|
|
|
{ |
1607
|
|
|
|
|
|
|
'Name' => 'Statistics_starttime', |
1608
|
|
|
|
|
|
|
'Data' => $stime |
1609
|
|
|
|
|
|
|
} |
1610
|
|
|
|
|
|
|
); |
1611
|
23
|
|
|
|
|
34
|
chomp($etime); |
1612
|
23
|
|
|
|
|
68
|
$self->element( |
1613
|
|
|
|
|
|
|
{ |
1614
|
|
|
|
|
|
|
'Name' => 'Statistics_endtime', |
1615
|
|
|
|
|
|
|
'Data' => $etime |
1616
|
|
|
|
|
|
|
} |
1617
|
|
|
|
|
|
|
); |
1618
|
|
|
|
|
|
|
} |
1619
|
|
|
|
|
|
|
elsif (/^\s+Database:\s+(.+)$/) { |
1620
|
21
|
|
|
|
|
113
|
$self->element( |
1621
|
|
|
|
|
|
|
{ |
1622
|
|
|
|
|
|
|
'Name' => 'Parameters_full_dbpath', |
1623
|
|
|
|
|
|
|
'Data' => $1 |
1624
|
|
|
|
|
|
|
} |
1625
|
|
|
|
|
|
|
); |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
} |
1628
|
|
|
|
|
|
|
elsif (/^\s+Posted:\s+(.+)/) { |
1629
|
21
|
|
|
|
|
47
|
my $d = $1; |
1630
|
21
|
|
|
|
|
33
|
chomp($d); |
1631
|
21
|
|
|
|
|
81
|
$self->element( |
1632
|
|
|
|
|
|
|
{ |
1633
|
|
|
|
|
|
|
'Name' => 'Statistics_posted_date', |
1634
|
|
|
|
|
|
|
'Data' => $d |
1635
|
|
|
|
|
|
|
} |
1636
|
|
|
|
|
|
|
); |
1637
|
|
|
|
|
|
|
} |
1638
|
|
|
|
|
|
|
} |
1639
|
|
|
|
|
|
|
elsif ( $blast eq 'ncbi' ) { |
1640
|
|
|
|
|
|
|
|
1641
|
1612
|
100
|
|
|
|
14404
|
if (m/^Matrix:\s+(.+)\s*$/oxi) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1642
|
55
|
|
|
|
|
284
|
$self->element( |
1643
|
|
|
|
|
|
|
{ |
1644
|
|
|
|
|
|
|
'Name' => 'Parameters_matrix', |
1645
|
|
|
|
|
|
|
'Data' => $1 |
1646
|
|
|
|
|
|
|
} |
1647
|
|
|
|
|
|
|
); |
1648
|
|
|
|
|
|
|
} |
1649
|
|
|
|
|
|
|
elsif (/^Gapped/) { |
1650
|
49
|
|
|
|
|
85
|
$gapped_stats = 1; |
1651
|
|
|
|
|
|
|
} |
1652
|
|
|
|
|
|
|
elsif (/^Lambda/) { |
1653
|
107
|
|
|
|
|
241
|
$_ = $self->_readline; |
1654
|
107
|
|
|
|
|
303
|
s/^\s+//; |
1655
|
107
|
|
|
|
|
334
|
my ( $lambda, $kappa, $entropy ) = split; |
1656
|
107
|
100
|
|
|
|
209
|
if ($gapped_stats) { |
1657
|
49
|
|
|
|
|
177
|
$self->element( |
1658
|
|
|
|
|
|
|
{ |
1659
|
|
|
|
|
|
|
'Name' => "Statistics_gapped_lambda", |
1660
|
|
|
|
|
|
|
'Data' => $lambda |
1661
|
|
|
|
|
|
|
} |
1662
|
|
|
|
|
|
|
); |
1663
|
49
|
|
|
|
|
182
|
$self->element( |
1664
|
|
|
|
|
|
|
{ |
1665
|
|
|
|
|
|
|
'Name' => "Statistics_gapped_kappa", |
1666
|
|
|
|
|
|
|
'Data' => $kappa |
1667
|
|
|
|
|
|
|
} |
1668
|
|
|
|
|
|
|
); |
1669
|
49
|
|
|
|
|
134
|
$self->element( |
1670
|
|
|
|
|
|
|
{ |
1671
|
|
|
|
|
|
|
'Name' => "Statistics_gapped_entropy", |
1672
|
|
|
|
|
|
|
'Data' => $entropy |
1673
|
|
|
|
|
|
|
} |
1674
|
|
|
|
|
|
|
); |
1675
|
|
|
|
|
|
|
} |
1676
|
|
|
|
|
|
|
else { |
1677
|
58
|
|
|
|
|
212
|
$self->element( |
1678
|
|
|
|
|
|
|
{ |
1679
|
|
|
|
|
|
|
'Name' => "Statistics_lambda", |
1680
|
|
|
|
|
|
|
'Data' => $lambda |
1681
|
|
|
|
|
|
|
} |
1682
|
|
|
|
|
|
|
); |
1683
|
58
|
|
|
|
|
241
|
$self->element( |
1684
|
|
|
|
|
|
|
{ |
1685
|
|
|
|
|
|
|
'Name' => "Statistics_kappa", |
1686
|
|
|
|
|
|
|
'Data' => $kappa |
1687
|
|
|
|
|
|
|
} |
1688
|
|
|
|
|
|
|
); |
1689
|
58
|
|
|
|
|
198
|
$self->element( |
1690
|
|
|
|
|
|
|
{ |
1691
|
|
|
|
|
|
|
'Name' => "Statistics_entropy", |
1692
|
|
|
|
|
|
|
'Data' => $entropy |
1693
|
|
|
|
|
|
|
} |
1694
|
|
|
|
|
|
|
); |
1695
|
|
|
|
|
|
|
} |
1696
|
|
|
|
|
|
|
} |
1697
|
|
|
|
|
|
|
elsif (m/effective\s+search\s+space\s+used:\s+(\d+)/oxi) { |
1698
|
54
|
|
|
|
|
419
|
$self->element( |
1699
|
|
|
|
|
|
|
{ |
1700
|
|
|
|
|
|
|
'Name' => 'Statistics_eff-spaceused', |
1701
|
|
|
|
|
|
|
'Data' => $1 |
1702
|
|
|
|
|
|
|
} |
1703
|
|
|
|
|
|
|
); |
1704
|
|
|
|
|
|
|
} |
1705
|
|
|
|
|
|
|
elsif (m/effective\s+search\s+space:\s+(\d+)/oxi) { |
1706
|
48
|
|
|
|
|
249
|
$self->element( |
1707
|
|
|
|
|
|
|
{ |
1708
|
|
|
|
|
|
|
'Name' => 'Statistics_eff-space', |
1709
|
|
|
|
|
|
|
'Data' => $1 |
1710
|
|
|
|
|
|
|
} |
1711
|
|
|
|
|
|
|
); |
1712
|
|
|
|
|
|
|
} |
1713
|
|
|
|
|
|
|
elsif ( |
1714
|
|
|
|
|
|
|
m/Gap\s+Penalties:\s+Existence:\s+(\d+)\, |
1715
|
|
|
|
|
|
|
\s+Extension:\s+(\d+)/ox |
1716
|
|
|
|
|
|
|
) |
1717
|
|
|
|
|
|
|
{ |
1718
|
48
|
|
|
|
|
311
|
$self->element( |
1719
|
|
|
|
|
|
|
{ |
1720
|
|
|
|
|
|
|
'Name' => 'Parameters_gap-open', |
1721
|
|
|
|
|
|
|
'Data' => $1 |
1722
|
|
|
|
|
|
|
} |
1723
|
|
|
|
|
|
|
); |
1724
|
48
|
|
|
|
|
189
|
$self->element( |
1725
|
|
|
|
|
|
|
{ |
1726
|
|
|
|
|
|
|
'Name' => 'Parameters_gap-extend', |
1727
|
|
|
|
|
|
|
'Data' => $2 |
1728
|
|
|
|
|
|
|
} |
1729
|
|
|
|
|
|
|
); |
1730
|
|
|
|
|
|
|
} |
1731
|
|
|
|
|
|
|
elsif (/effective\s+HSP\s+length:\s+(\d+)/) { |
1732
|
45
|
|
|
|
|
203
|
$self->element( |
1733
|
|
|
|
|
|
|
{ |
1734
|
|
|
|
|
|
|
'Name' => 'Statistics_hsp-len', |
1735
|
|
|
|
|
|
|
'Data' => $1 |
1736
|
|
|
|
|
|
|
} |
1737
|
|
|
|
|
|
|
); |
1738
|
|
|
|
|
|
|
} |
1739
|
|
|
|
|
|
|
elsif (/Number\s+of\s+HSP's\s+better\s+than\s+(\S+)\s+without\s+gapping:\s+(\d+)/) { |
1740
|
34
|
|
|
|
|
168
|
$self->element( |
1741
|
|
|
|
|
|
|
{ |
1742
|
|
|
|
|
|
|
'Name' => 'Statistics_number_of_hsps_better_than_expect_value_cutoff_without_gapping', |
1743
|
|
|
|
|
|
|
'Data' => $2 |
1744
|
|
|
|
|
|
|
} |
1745
|
|
|
|
|
|
|
); |
1746
|
|
|
|
|
|
|
} |
1747
|
|
|
|
|
|
|
elsif (/Number\s+of\s+HSP's\s+gapped:\s+(\d+)/) { |
1748
|
7
|
|
|
|
|
32
|
$self->element( |
1749
|
|
|
|
|
|
|
{ |
1750
|
|
|
|
|
|
|
'Name' => 'Statistics_number_of_hsps_gapped', |
1751
|
|
|
|
|
|
|
'Data' => $1 |
1752
|
|
|
|
|
|
|
} |
1753
|
|
|
|
|
|
|
); |
1754
|
|
|
|
|
|
|
} |
1755
|
|
|
|
|
|
|
elsif (/Number\s+of\s+HSP's\s+successfully\s+gapped:\s+(\d+)/) { |
1756
|
7
|
|
|
|
|
36
|
$self->element( |
1757
|
|
|
|
|
|
|
{ |
1758
|
|
|
|
|
|
|
'Name' => 'Statistics_number_of_hsps_successfully_gapped', |
1759
|
|
|
|
|
|
|
'Data' => $1 |
1760
|
|
|
|
|
|
|
} |
1761
|
|
|
|
|
|
|
); |
1762
|
|
|
|
|
|
|
} |
1763
|
|
|
|
|
|
|
elsif (/Length\s+adjustment:\s+(\d+)/) { |
1764
|
6
|
|
|
|
|
29
|
$self->element( |
1765
|
|
|
|
|
|
|
{ |
1766
|
|
|
|
|
|
|
'Name' => 'Statistics_length_adjustment', |
1767
|
|
|
|
|
|
|
'Data' => $1 |
1768
|
|
|
|
|
|
|
} |
1769
|
|
|
|
|
|
|
); |
1770
|
|
|
|
|
|
|
} |
1771
|
|
|
|
|
|
|
elsif (/effective\s+length\s+of\s+query:\s+([\d\,]+)/i) { |
1772
|
48
|
|
|
|
|
119
|
my $c = $1; |
1773
|
48
|
|
|
|
|
84
|
$c =~ s/\,//g; |
1774
|
48
|
|
|
|
|
203
|
$self->element( |
1775
|
|
|
|
|
|
|
{ |
1776
|
|
|
|
|
|
|
'Name' => 'Statistics_query-len', |
1777
|
|
|
|
|
|
|
'Data' => $c |
1778
|
|
|
|
|
|
|
} |
1779
|
|
|
|
|
|
|
); |
1780
|
|
|
|
|
|
|
} |
1781
|
|
|
|
|
|
|
elsif (/effective\s+length\s+of\s+database:\s+([\d\,]+)/i) { |
1782
|
51
|
|
|
|
|
108
|
my $c = $1; |
1783
|
51
|
|
|
|
|
150
|
$c =~ s/\,//g; |
1784
|
51
|
|
|
|
|
222
|
$self->element( |
1785
|
|
|
|
|
|
|
{ |
1786
|
|
|
|
|
|
|
'Name' => 'Statistics_eff-dblen', |
1787
|
|
|
|
|
|
|
'Data' => $c |
1788
|
|
|
|
|
|
|
} |
1789
|
|
|
|
|
|
|
); |
1790
|
|
|
|
|
|
|
} |
1791
|
|
|
|
|
|
|
elsif ( |
1792
|
|
|
|
|
|
|
/^(T|A|X1|X2|X3|S1|S2):\s+(\d+(\.\d+)?)\s+(?:\(\s*(\d+\.\d+) bits\))?/ |
1793
|
|
|
|
|
|
|
) |
1794
|
|
|
|
|
|
|
{ |
1795
|
325
|
|
|
|
|
471
|
my $v = $2; |
1796
|
325
|
|
|
|
|
341
|
chomp($v); |
1797
|
325
|
|
|
|
|
1144
|
$self->element( |
1798
|
|
|
|
|
|
|
{ |
1799
|
|
|
|
|
|
|
'Name' => "Statistics_$1", |
1800
|
|
|
|
|
|
|
'Data' => $v |
1801
|
|
|
|
|
|
|
} |
1802
|
|
|
|
|
|
|
); |
1803
|
325
|
100
|
|
|
|
760
|
if ( defined $4 ) { |
1804
|
235
|
|
|
|
|
735
|
$self->element( |
1805
|
|
|
|
|
|
|
{ |
1806
|
|
|
|
|
|
|
'Name' => "Statistics_$1_bits", |
1807
|
|
|
|
|
|
|
'Data' => $4 |
1808
|
|
|
|
|
|
|
} |
1809
|
|
|
|
|
|
|
); |
1810
|
|
|
|
|
|
|
} |
1811
|
|
|
|
|
|
|
} |
1812
|
|
|
|
|
|
|
elsif ( |
1813
|
|
|
|
|
|
|
m/frameshift\s+window\, |
1814
|
|
|
|
|
|
|
\s+decay\s+const:\s+(\d+)\,\s+([\.\d]+)/x |
1815
|
|
|
|
|
|
|
) |
1816
|
|
|
|
|
|
|
{ |
1817
|
14
|
|
|
|
|
76
|
$self->element( |
1818
|
|
|
|
|
|
|
{ |
1819
|
|
|
|
|
|
|
'Name' => 'Statistics_framewindow', |
1820
|
|
|
|
|
|
|
'Data' => $1 |
1821
|
|
|
|
|
|
|
} |
1822
|
|
|
|
|
|
|
); |
1823
|
14
|
|
|
|
|
53
|
$self->element( |
1824
|
|
|
|
|
|
|
{ |
1825
|
|
|
|
|
|
|
'Name' => 'Statistics_decay', |
1826
|
|
|
|
|
|
|
'Data' => $2 |
1827
|
|
|
|
|
|
|
} |
1828
|
|
|
|
|
|
|
); |
1829
|
|
|
|
|
|
|
} |
1830
|
|
|
|
|
|
|
elsif (m/^Number\s+of\s+Hits\s+to\s+DB:\s+(\S+)/ox) { |
1831
|
52
|
|
|
|
|
306
|
$self->element( |
1832
|
|
|
|
|
|
|
{ |
1833
|
|
|
|
|
|
|
'Name' => 'Statistics_hit_to_db', |
1834
|
|
|
|
|
|
|
'Data' => $1 |
1835
|
|
|
|
|
|
|
} |
1836
|
|
|
|
|
|
|
); |
1837
|
|
|
|
|
|
|
} |
1838
|
|
|
|
|
|
|
elsif (m/^Number\s+of\s+extensions:\s+(\S+)/ox) { |
1839
|
51
|
|
|
|
|
265
|
$self->element( |
1840
|
|
|
|
|
|
|
{ |
1841
|
|
|
|
|
|
|
'Name' => 'Statistics_num_extensions', |
1842
|
|
|
|
|
|
|
'Data' => $1 |
1843
|
|
|
|
|
|
|
} |
1844
|
|
|
|
|
|
|
); |
1845
|
|
|
|
|
|
|
} |
1846
|
|
|
|
|
|
|
elsif ( |
1847
|
|
|
|
|
|
|
m/^Number\s+of\s+successful\s+extensions:\s+ |
1848
|
|
|
|
|
|
|
(\S+)/ox |
1849
|
|
|
|
|
|
|
) |
1850
|
|
|
|
|
|
|
{ |
1851
|
51
|
|
|
|
|
250
|
$self->element( |
1852
|
|
|
|
|
|
|
{ |
1853
|
|
|
|
|
|
|
'Name' => 'Statistics_num_suc_extensions', |
1854
|
|
|
|
|
|
|
'Data' => $1 |
1855
|
|
|
|
|
|
|
} |
1856
|
|
|
|
|
|
|
); |
1857
|
|
|
|
|
|
|
} |
1858
|
|
|
|
|
|
|
elsif ( |
1859
|
|
|
|
|
|
|
m/^Number\s+of\s+sequences\s+better\s+than\s+ |
1860
|
|
|
|
|
|
|
(\S+):\s+(\d+)/ox |
1861
|
|
|
|
|
|
|
) |
1862
|
|
|
|
|
|
|
{ |
1863
|
52
|
|
|
|
|
276
|
$self->element( |
1864
|
|
|
|
|
|
|
{ |
1865
|
|
|
|
|
|
|
'Name' => 'Parameters_expect', |
1866
|
|
|
|
|
|
|
'Data' => $1 |
1867
|
|
|
|
|
|
|
} |
1868
|
|
|
|
|
|
|
); |
1869
|
52
|
|
|
|
|
193
|
$self->element( |
1870
|
|
|
|
|
|
|
{ |
1871
|
|
|
|
|
|
|
'Name' => 'Statistics_seqs_better_than_cutoff', |
1872
|
|
|
|
|
|
|
'Data' => $2 |
1873
|
|
|
|
|
|
|
} |
1874
|
|
|
|
|
|
|
); |
1875
|
|
|
|
|
|
|
} |
1876
|
|
|
|
|
|
|
elsif (/^\s+Posted\s+date:\s+(.+)/) { |
1877
|
50
|
|
|
|
|
136
|
my $d = $1; |
1878
|
50
|
|
|
|
|
67
|
chomp($d); |
1879
|
50
|
|
|
|
|
208
|
$self->element( |
1880
|
|
|
|
|
|
|
{ |
1881
|
|
|
|
|
|
|
'Name' => 'Statistics_posted_date', |
1882
|
|
|
|
|
|
|
'Data' => $d |
1883
|
|
|
|
|
|
|
} |
1884
|
|
|
|
|
|
|
); |
1885
|
|
|
|
|
|
|
} |
1886
|
|
|
|
|
|
|
elsif ( !/^\s+$/ ) { |
1887
|
|
|
|
|
|
|
#$self->debug( "unmatched stat $_"); |
1888
|
|
|
|
|
|
|
} |
1889
|
|
|
|
|
|
|
} |
1890
|
2795
|
|
|
|
|
5706
|
$last = $_; |
1891
|
|
|
|
|
|
|
} |
1892
|
|
|
|
|
|
|
} elsif ( $self->in_element('hsp') ) { |
1893
|
9058
|
|
|
|
|
18614
|
$self->debug("blast.pm: Processing HSP\n"); |
1894
|
|
|
|
|
|
|
# let's read 3 lines at a time; |
1895
|
|
|
|
|
|
|
# bl2seq hackiness... Not sure I like |
1896
|
9058
|
|
66
|
|
|
15815
|
$self->{'_reporttype'} ||= $DEFAULTREPORTTYPE; |
1897
|
9058
|
|
|
|
|
19013
|
my %data = ( |
1898
|
|
|
|
|
|
|
'Query' => '', |
1899
|
|
|
|
|
|
|
'Mid' => '', |
1900
|
|
|
|
|
|
|
'Hit' => '' |
1901
|
|
|
|
|
|
|
); |
1902
|
9058
|
|
|
|
|
6079
|
my $len; |
1903
|
9058
|
|
66
|
|
|
28779
|
for ( my $i = 0 ; defined($_) && $i < 3 ; $i++ ) { |
1904
|
|
|
|
|
|
|
# $self->debug("$i: $_") if $v; |
1905
|
27150
|
50
|
66
|
|
|
127007
|
if ( ( $i == 0 && /^\s+$/) || |
|
|
|
33
|
|
|
|
|
1906
|
|
|
|
|
|
|
/^\s*(?:Lambda|Minus|Plus|Score)/i |
1907
|
|
|
|
|
|
|
) { |
1908
|
0
|
0
|
|
|
|
0
|
$self->_pushback($_) if defined $_; |
1909
|
0
|
|
|
|
|
0
|
$self->end_element( { 'Name' => 'Hsp' } ); |
1910
|
0
|
|
|
|
|
0
|
last; |
1911
|
|
|
|
|
|
|
} |
1912
|
27150
|
|
|
|
|
22638
|
chomp; |
1913
|
27150
|
100
|
|
|
|
76834
|
if (/^((Query|Sbjct):?\s+(\-?\d+)?\s*)(\S+)\s+(\-?\d+)?/) { |
|
|
50
|
|
|
|
|
|
1914
|
18116
|
|
|
|
|
45675
|
my ( $full, $type, $start, $str, $end ) = |
1915
|
|
|
|
|
|
|
( $1, $2, $3, $4, $5 ); |
1916
|
|
|
|
|
|
|
|
1917
|
18116
|
100
|
|
|
|
20643
|
if ( $str eq '-' ) { |
1918
|
48
|
100
|
|
|
|
83
|
$i = 3 if $type eq 'Sbjct'; |
1919
|
|
|
|
|
|
|
} |
1920
|
|
|
|
|
|
|
else { |
1921
|
18068
|
|
|
|
|
23406
|
$data{$type} = $str; |
1922
|
|
|
|
|
|
|
} |
1923
|
18116
|
|
|
|
|
12787
|
$len = length($full); |
1924
|
|
|
|
|
|
|
$self->{"\_$type"}->{'begin'} = $start |
1925
|
18116
|
100
|
|
|
|
38222
|
unless $self->{"_$type"}->{'begin'}; |
1926
|
18116
|
|
|
|
|
24305
|
$self->{"\_$type"}->{'end'} = $end; |
1927
|
|
|
|
|
|
|
} elsif (/^((Query|Sbjct):?\s+(\-?0+)\s*)/) { |
1928
|
|
|
|
|
|
|
# Bug from NCBI's BLAST: unaligned output |
1929
|
0
|
|
|
|
|
0
|
$_ = $self->_readline() for 0..1; |
1930
|
0
|
|
|
|
|
0
|
last; |
1931
|
|
|
|
|
|
|
} else { |
1932
|
9034
|
50
|
33
|
|
|
26807
|
$self->throw("no data for midline $_") |
1933
|
|
|
|
|
|
|
unless ( defined $_ && defined $len ); |
1934
|
9034
|
|
|
|
|
14839
|
$data{'Mid'} = substr( $_, $len ); |
1935
|
|
|
|
|
|
|
} |
1936
|
27150
|
|
|
|
|
45573
|
$_ = $self->_readline(); |
1937
|
|
|
|
|
|
|
} |
1938
|
|
|
|
|
|
|
$self->characters( |
1939
|
|
|
|
|
|
|
{ |
1940
|
|
|
|
|
|
|
'Name' => 'Hsp_qseq', |
1941
|
9058
|
|
|
|
|
26814
|
'Data' => $data{'Query'} |
1942
|
|
|
|
|
|
|
} |
1943
|
|
|
|
|
|
|
); |
1944
|
|
|
|
|
|
|
$self->characters( |
1945
|
|
|
|
|
|
|
{ |
1946
|
|
|
|
|
|
|
'Name' => 'Hsp_hseq', |
1947
|
9058
|
|
|
|
|
23007
|
'Data' => $data{'Sbjct'} |
1948
|
|
|
|
|
|
|
} |
1949
|
|
|
|
|
|
|
); |
1950
|
|
|
|
|
|
|
$self->characters( |
1951
|
|
|
|
|
|
|
{ |
1952
|
|
|
|
|
|
|
'Name' => 'Hsp_midline', |
1953
|
9058
|
|
|
|
|
20298
|
'Data' => $data{'Mid'} |
1954
|
|
|
|
|
|
|
} |
1955
|
|
|
|
|
|
|
); |
1956
|
|
|
|
|
|
|
} |
1957
|
|
|
|
|
|
|
else { |
1958
|
|
|
|
|
|
|
#$self->debug("blast.pm: unrecognized line $_"); |
1959
|
|
|
|
|
|
|
} |
1960
|
|
|
|
|
|
|
} |
1961
|
|
|
|
|
|
|
|
1962
|
102
|
|
|
|
|
399
|
$self->debug("blast.pm: End of BlastOutput\n"); |
1963
|
102
|
100
|
|
|
|
354
|
if ( $self->{'_seentop'} ) { |
1964
|
96
|
100
|
|
|
|
231
|
$self->within_element('hsp') |
1965
|
|
|
|
|
|
|
&& $self->end_element( { 'Name' => 'Hsp' } ); |
1966
|
96
|
100
|
|
|
|
237
|
$self->within_element('hit') |
1967
|
|
|
|
|
|
|
&& $self->end_element( { 'Name' => 'Hit' } ); |
1968
|
|
|
|
|
|
|
# cleanup extra hits |
1969
|
96
|
100
|
|
|
|
257
|
$self->_cleanup_hits(\@hit_signifs) if scalar(@hit_signifs); |
1970
|
96
|
100
|
|
|
|
222
|
$self->within_element('iteration') |
1971
|
|
|
|
|
|
|
&& $self->end_element( { 'Name' => 'Iteration' } ); |
1972
|
96
|
100
|
|
|
|
216
|
if ($bl2seq_fix) { |
1973
|
6
|
|
|
|
|
24
|
$self->element( |
1974
|
|
|
|
|
|
|
{ |
1975
|
|
|
|
|
|
|
'Name' => 'BlastOutput_program', |
1976
|
|
|
|
|
|
|
'Data' => $reporttype |
1977
|
|
|
|
|
|
|
} |
1978
|
|
|
|
|
|
|
); |
1979
|
|
|
|
|
|
|
} |
1980
|
96
|
|
|
|
|
313
|
$self->end_element( { 'Name' => 'BlastOutput' } ); |
1981
|
|
|
|
|
|
|
} |
1982
|
102
|
|
|
|
|
335
|
return $self->end_document(); |
1983
|
|
|
|
|
|
|
} |
1984
|
|
|
|
|
|
|
|
1985
|
|
|
|
|
|
|
# Private method for internal use only. |
1986
|
|
|
|
|
|
|
sub _start_blastoutput { |
1987
|
96
|
|
|
96
|
|
149
|
my $self = shift; |
1988
|
96
|
|
|
|
|
514
|
$self->start_element( { 'Name' => 'BlastOutput' } ); |
1989
|
96
|
|
|
|
|
192
|
$self->{'_seentop'} = 1; |
1990
|
96
|
|
|
|
|
159
|
$self->{'_result_count'}++; |
1991
|
96
|
|
|
|
|
133
|
$self->{'_handler_rc'} = undef; |
1992
|
|
|
|
|
|
|
} |
1993
|
|
|
|
|
|
|
|
1994
|
|
|
|
|
|
|
=head2 _will_handle |
1995
|
|
|
|
|
|
|
|
1996
|
|
|
|
|
|
|
Title : _will_handle |
1997
|
|
|
|
|
|
|
Usage : Private method. For internal use only. |
1998
|
|
|
|
|
|
|
if( $self->_will_handle($type) ) { ... } |
1999
|
|
|
|
|
|
|
Function: Provides an optimized way to check whether or not an element of a |
2000
|
|
|
|
|
|
|
given type is to be handled. |
2001
|
|
|
|
|
|
|
Returns : Reference to EventHandler object if the element type is to be handled. |
2002
|
|
|
|
|
|
|
undef if the element type is not to be handled. |
2003
|
|
|
|
|
|
|
Args : string containing type of element. |
2004
|
|
|
|
|
|
|
|
2005
|
|
|
|
|
|
|
Optimizations: |
2006
|
|
|
|
|
|
|
|
2007
|
|
|
|
|
|
|
=over 2 |
2008
|
|
|
|
|
|
|
|
2009
|
|
|
|
|
|
|
=item 1 |
2010
|
|
|
|
|
|
|
|
2011
|
|
|
|
|
|
|
Using the cached pointer to the EventHandler to minimize repeated |
2012
|
|
|
|
|
|
|
lookups. |
2013
|
|
|
|
|
|
|
|
2014
|
|
|
|
|
|
|
=item 2 |
2015
|
|
|
|
|
|
|
|
2016
|
|
|
|
|
|
|
Caching the will_handle status for each type that is encountered so |
2017
|
|
|
|
|
|
|
that it only need be checked by calling |
2018
|
|
|
|
|
|
|
handler-Ewill_handle($type) once. |
2019
|
|
|
|
|
|
|
|
2020
|
|
|
|
|
|
|
=back |
2021
|
|
|
|
|
|
|
|
2022
|
|
|
|
|
|
|
This does not lead to a major savings by itself (only 5-10%). In |
2023
|
|
|
|
|
|
|
combination with other optimizations, or for large parse jobs, the |
2024
|
|
|
|
|
|
|
savings good be significant. |
2025
|
|
|
|
|
|
|
|
2026
|
|
|
|
|
|
|
To test against the unoptimized version, remove the parentheses from |
2027
|
|
|
|
|
|
|
around the third term in the ternary " ? : " operator and add two |
2028
|
|
|
|
|
|
|
calls to $self-E_eventHandler(). |
2029
|
|
|
|
|
|
|
|
2030
|
|
|
|
|
|
|
=cut |
2031
|
|
|
|
|
|
|
|
2032
|
|
|
|
|
|
|
sub _will_handle { |
2033
|
8516
|
|
|
8516
|
|
7078
|
my ( $self, $type ) = @_; |
2034
|
8516
|
|
|
|
|
8600
|
my $handler = $self->{'_handler_cache'}; |
2035
|
|
|
|
|
|
|
my $will_handle = |
2036
|
|
|
|
|
|
|
defined( $self->{'_will_handle_cache'}->{$type} ) |
2037
|
|
|
|
|
|
|
? $self->{'_will_handle_cache'}->{$type} |
2038
|
8516
|
100
|
|
|
|
16875
|
: ( $self->{'_will_handle_cache'}->{$type} = |
2039
|
|
|
|
|
|
|
$handler->will_handle($type) ); |
2040
|
|
|
|
|
|
|
|
2041
|
8516
|
50
|
|
|
|
12917
|
return $will_handle ? $handler : undef; |
2042
|
|
|
|
|
|
|
} |
2043
|
|
|
|
|
|
|
|
2044
|
|
|
|
|
|
|
=head2 start_element |
2045
|
|
|
|
|
|
|
|
2046
|
|
|
|
|
|
|
Title : start_element |
2047
|
|
|
|
|
|
|
Usage : $eventgenerator->start_element |
2048
|
|
|
|
|
|
|
Function: Handles a start element event |
2049
|
|
|
|
|
|
|
Returns : none |
2050
|
|
|
|
|
|
|
Args : hashref with at least 2 keys 'Data' and 'Name' |
2051
|
|
|
|
|
|
|
|
2052
|
|
|
|
|
|
|
=cut |
2053
|
|
|
|
|
|
|
|
2054
|
|
|
|
|
|
|
sub start_element { |
2055
|
4258
|
|
|
4258
|
1
|
3983
|
my ( $self, $data ) = @_; |
2056
|
|
|
|
|
|
|
|
2057
|
|
|
|
|
|
|
# we currently don't care about attributes |
2058
|
4258
|
|
|
|
|
3927
|
my $nm = $data->{'Name'}; |
2059
|
4258
|
|
|
|
|
4610
|
my $type = $MODEMAP{$nm}; |
2060
|
4258
|
50
|
|
|
|
6698
|
if ($type) { |
2061
|
4258
|
|
|
|
|
5730
|
my $handler = $self->_will_handle($type); |
2062
|
4258
|
50
|
|
|
|
7439
|
if ($handler) { |
2063
|
4258
|
|
|
|
|
9642
|
my $func = sprintf( "start_%s", lc $type ); |
2064
|
4258
|
|
|
|
|
15355
|
$self->{'_handler_rc'} = $handler->$func( $data->{'Attributes'} ); |
2065
|
|
|
|
|
|
|
} |
2066
|
|
|
|
|
|
|
#else { |
2067
|
|
|
|
|
|
|
#$self->debug( # changed 4/29/2006 to play nice with other event handlers |
2068
|
|
|
|
|
|
|
# "Bio::SearchIO::InternalParserError ". |
2069
|
|
|
|
|
|
|
# "\nCan't handle elements of type \'$type.\'" |
2070
|
|
|
|
|
|
|
#); |
2071
|
|
|
|
|
|
|
#} |
2072
|
4258
|
|
|
|
|
5240
|
unshift @{ $self->{'_elements'} }, $type; |
|
4258
|
|
|
|
|
6677
|
|
2073
|
4258
|
100
|
|
|
|
6074
|
if ( $type eq 'result' ) { |
2074
|
96
|
|
|
|
|
200
|
$self->{'_values'} = {}; |
2075
|
96
|
|
|
|
|
202
|
$self->{'_result'} = undef; |
2076
|
|
|
|
|
|
|
} else { |
2077
|
|
|
|
|
|
|
# cleanup some things |
2078
|
4162
|
50
|
|
|
|
7337
|
if ( defined $self->{'_values'} ) { |
2079
|
4162
|
|
|
|
|
2931
|
foreach my $k ( |
2080
|
117000
|
|
|
|
|
201776
|
grep { /^\U$type\-/ } |
2081
|
4162
|
|
|
|
|
16558
|
keys %{ $self->{'_values'} } |
2082
|
|
|
|
|
|
|
) |
2083
|
|
|
|
|
|
|
{ |
2084
|
38633
|
|
|
|
|
36655
|
delete $self->{'_values'}->{$k}; |
2085
|
|
|
|
|
|
|
} |
2086
|
|
|
|
|
|
|
} |
2087
|
|
|
|
|
|
|
} |
2088
|
|
|
|
|
|
|
} |
2089
|
|
|
|
|
|
|
} |
2090
|
|
|
|
|
|
|
|
2091
|
|
|
|
|
|
|
=head2 end_element |
2092
|
|
|
|
|
|
|
|
2093
|
|
|
|
|
|
|
Title : end_element |
2094
|
|
|
|
|
|
|
Usage : $eventgenerator->end_element |
2095
|
|
|
|
|
|
|
Function: Handles an end element event |
2096
|
|
|
|
|
|
|
Returns : hashref with an element's worth of data |
2097
|
|
|
|
|
|
|
Args : hashref with at least 2 keys 'Data' and 'Name' |
2098
|
|
|
|
|
|
|
|
2099
|
|
|
|
|
|
|
|
2100
|
|
|
|
|
|
|
=cut |
2101
|
|
|
|
|
|
|
|
2102
|
|
|
|
|
|
|
sub end_element { |
2103
|
43007
|
|
|
43007
|
1
|
30352
|
my ( $self, $data ) = @_; |
2104
|
|
|
|
|
|
|
|
2105
|
43007
|
|
|
|
|
34088
|
my $nm = $data->{'Name'}; |
2106
|
43007
|
|
|
|
|
28339
|
my $type; |
2107
|
|
|
|
|
|
|
my $rc; |
2108
|
|
|
|
|
|
|
# cache these (TODO: we should probably cache all cross-report data) |
2109
|
43007
|
100
|
|
|
|
53919
|
if ( $nm eq 'BlastOutput_program' ) { |
2110
|
86
|
100
|
|
|
|
433
|
if ( $self->{'_last_data'} =~ /(t?blast[npx])/i ) { |
2111
|
83
|
|
|
|
|
244
|
$self->{'_reporttype'} = uc $1; |
2112
|
|
|
|
|
|
|
} |
2113
|
86
|
|
66
|
|
|
219
|
$self->{'_reporttype'} ||= $DEFAULTREPORTTYPE; |
2114
|
|
|
|
|
|
|
} |
2115
|
|
|
|
|
|
|
|
2116
|
43007
|
100
|
|
|
|
47090
|
if ( $nm eq 'BlastOutput_version' ) { |
2117
|
80
|
|
|
|
|
148
|
$self->{'_reportversion'} = $self->{'_last_data'}; |
2118
|
|
|
|
|
|
|
} |
2119
|
|
|
|
|
|
|
|
2120
|
|
|
|
|
|
|
# Hsps are sort of weird, in that they end when another |
2121
|
|
|
|
|
|
|
# object begins so have to detect this in end_element for now |
2122
|
43007
|
100
|
|
|
|
47606
|
if ( $nm eq 'Hsp' ) { |
2123
|
1623
|
|
|
|
|
2745
|
foreach (qw(Hsp_qseq Hsp_midline Hsp_hseq Hsp_features)) { |
2124
|
|
|
|
|
|
|
$self->element( |
2125
|
|
|
|
|
|
|
{ |
2126
|
|
|
|
|
|
|
'Name' => $_, |
2127
|
|
|
|
|
|
|
'Data' => $self->{'_last_hspdata'}->{$_} |
2128
|
|
|
|
|
|
|
} |
2129
|
6492
|
100
|
|
|
|
18448
|
) if defined $self->{'_last_hspdata'}->{$_}; |
2130
|
|
|
|
|
|
|
} |
2131
|
1623
|
|
|
|
|
1892
|
$self->{'_last_hspdata'} = {}; |
2132
|
|
|
|
|
|
|
$self->element( |
2133
|
|
|
|
|
|
|
{ |
2134
|
|
|
|
|
|
|
'Name' => 'Hsp_query-from', |
2135
|
1623
|
|
|
|
|
5719
|
'Data' => $self->{'_Query'}->{'begin'} |
2136
|
|
|
|
|
|
|
} |
2137
|
|
|
|
|
|
|
); |
2138
|
|
|
|
|
|
|
$self->element( |
2139
|
|
|
|
|
|
|
{ |
2140
|
|
|
|
|
|
|
'Name' => 'Hsp_query-to', |
2141
|
1623
|
|
|
|
|
4261
|
'Data' => $self->{'_Query'}->{'end'} |
2142
|
|
|
|
|
|
|
} |
2143
|
|
|
|
|
|
|
); |
2144
|
|
|
|
|
|
|
|
2145
|
|
|
|
|
|
|
$self->element( |
2146
|
|
|
|
|
|
|
{ |
2147
|
|
|
|
|
|
|
'Name' => 'Hsp_hit-from', |
2148
|
1623
|
|
|
|
|
4622
|
'Data' => $self->{'_Sbjct'}->{'begin'} |
2149
|
|
|
|
|
|
|
} |
2150
|
|
|
|
|
|
|
); |
2151
|
|
|
|
|
|
|
$self->element( |
2152
|
|
|
|
|
|
|
{ |
2153
|
|
|
|
|
|
|
'Name' => 'Hsp_hit-to', |
2154
|
1623
|
|
|
|
|
4259
|
'Data' => $self->{'_Sbjct'}->{'end'} |
2155
|
|
|
|
|
|
|
} |
2156
|
|
|
|
|
|
|
); |
2157
|
|
|
|
|
|
|
|
2158
|
|
|
|
|
|
|
# } elsif( $nm eq 'Iteration' ) { |
2159
|
|
|
|
|
|
|
# Nothing special needs to be done here. |
2160
|
|
|
|
|
|
|
} |
2161
|
43007
|
100
|
|
|
|
78985
|
if ( $type = $MODEMAP{$nm} ) { |
|
|
100
|
|
|
|
|
|
2162
|
4258
|
|
|
|
|
5990
|
my $handler = $self->_will_handle($type); |
2163
|
4258
|
50
|
|
|
|
6575
|
if ($handler) { |
2164
|
4258
|
|
|
|
|
10604
|
my $func = sprintf( "end_%s", lc $type ); |
2165
|
4258
|
|
|
|
|
13568
|
$rc = $handler->$func( $self->{'_reporttype'}, $self->{'_values'} ); |
2166
|
|
|
|
|
|
|
} |
2167
|
4258
|
|
|
|
|
3144
|
shift @{ $self->{'_elements'} }; |
|
4258
|
|
|
|
|
6180
|
|
2168
|
|
|
|
|
|
|
|
2169
|
|
|
|
|
|
|
} |
2170
|
|
|
|
|
|
|
elsif ( $MAPPING{$nm} ) { |
2171
|
38562
|
100
|
|
|
|
45950
|
if ( ref( $MAPPING{$nm} ) =~ /hash/i ) { |
2172
|
|
|
|
|
|
|
|
2173
|
|
|
|
|
|
|
# this is where we shove in the data from the |
2174
|
|
|
|
|
|
|
# hashref info about params or statistics |
2175
|
2900
|
|
|
|
|
2058
|
my $key = ( keys %{ $MAPPING{$nm} } )[0]; |
|
2900
|
|
|
|
|
5766
|
|
2176
|
|
|
|
|
|
|
$self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } = |
2177
|
2900
|
|
|
|
|
7579
|
$self->{'_last_data'}; |
2178
|
|
|
|
|
|
|
} |
2179
|
|
|
|
|
|
|
else { |
2180
|
35662
|
|
|
|
|
60113
|
$self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'}; |
2181
|
|
|
|
|
|
|
} |
2182
|
|
|
|
|
|
|
} |
2183
|
|
|
|
|
|
|
else { |
2184
|
|
|
|
|
|
|
#$self->debug("blast.pm: unknown nm $nm, ignoring\n"); |
2185
|
|
|
|
|
|
|
} |
2186
|
43007
|
|
|
|
|
34121
|
$self->{'_last_data'} = ''; # remove read data if we are at |
2187
|
|
|
|
|
|
|
# end of an element |
2188
|
43007
|
100
|
100
|
|
|
70170
|
$self->{'_result'} = $rc if ( defined $type && $type eq 'result' ); |
2189
|
43007
|
|
|
|
|
31676
|
$self->{'_seen_hsp_features'} = 0; |
2190
|
43007
|
|
|
|
|
49519
|
return $rc; |
2191
|
|
|
|
|
|
|
} |
2192
|
|
|
|
|
|
|
|
2193
|
|
|
|
|
|
|
=head2 element |
2194
|
|
|
|
|
|
|
|
2195
|
|
|
|
|
|
|
Title : element |
2196
|
|
|
|
|
|
|
Usage : $eventhandler->element({'Name' => $name, 'Data' => $str}); |
2197
|
|
|
|
|
|
|
Function: Convenience method that calls start_element, characters, end_element |
2198
|
|
|
|
|
|
|
Returns : none |
2199
|
|
|
|
|
|
|
Args : Hash ref with the keys 'Name' and 'Data' |
2200
|
|
|
|
|
|
|
|
2201
|
|
|
|
|
|
|
|
2202
|
|
|
|
|
|
|
=cut |
2203
|
|
|
|
|
|
|
|
2204
|
|
|
|
|
|
|
sub element { |
2205
|
38749
|
|
|
38749
|
1
|
30593
|
my ( $self, $data ) = @_; |
2206
|
|
|
|
|
|
|
# Note that start element isn't needed for character data |
2207
|
|
|
|
|
|
|
# Not too SAX-y, though |
2208
|
|
|
|
|
|
|
#$self->start_element($data); |
2209
|
38749
|
|
|
|
|
37922
|
$self->characters($data); |
2210
|
38749
|
|
|
|
|
43311
|
$self->end_element($data); |
2211
|
|
|
|
|
|
|
} |
2212
|
|
|
|
|
|
|
|
2213
|
|
|
|
|
|
|
=head2 characters |
2214
|
|
|
|
|
|
|
|
2215
|
|
|
|
|
|
|
Title : characters |
2216
|
|
|
|
|
|
|
Usage : $eventgenerator->characters($str) |
2217
|
|
|
|
|
|
|
Function: Send a character events |
2218
|
|
|
|
|
|
|
Returns : none |
2219
|
|
|
|
|
|
|
Args : string |
2220
|
|
|
|
|
|
|
|
2221
|
|
|
|
|
|
|
|
2222
|
|
|
|
|
|
|
=cut |
2223
|
|
|
|
|
|
|
|
2224
|
|
|
|
|
|
|
sub characters { |
2225
|
65923
|
|
|
65923
|
1
|
50430
|
my ( $self, $data ) = @_; |
2226
|
65923
|
100
|
100
|
|
|
64941
|
if ( $self->in_element('hsp') |
2227
|
|
|
|
|
|
|
&& $data->{'Name'} =~ /^Hsp\_(qseq|hseq|midline)$/ ) |
2228
|
|
|
|
|
|
|
{ |
2229
|
|
|
|
|
|
|
$self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'} |
2230
|
32019
|
100
|
|
|
|
82882
|
if defined $data->{'Data'}; |
2231
|
|
|
|
|
|
|
} |
2232
|
65923
|
100
|
100
|
|
|
218951
|
return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/ ); |
2233
|
65853
|
|
|
|
|
96099
|
$self->{'_last_data'} = $data->{'Data'}; |
2234
|
|
|
|
|
|
|
} |
2235
|
|
|
|
|
|
|
|
2236
|
|
|
|
|
|
|
=head2 within_element |
2237
|
|
|
|
|
|
|
|
2238
|
|
|
|
|
|
|
Title : within_element |
2239
|
|
|
|
|
|
|
Usage : if( $eventgenerator->within_element($element) ) {} |
2240
|
|
|
|
|
|
|
Function: Test if we are within a particular element |
2241
|
|
|
|
|
|
|
This is different than 'in' because within can be tested |
2242
|
|
|
|
|
|
|
for a whole block. |
2243
|
|
|
|
|
|
|
Returns : boolean |
2244
|
|
|
|
|
|
|
Args : string element name |
2245
|
|
|
|
|
|
|
|
2246
|
|
|
|
|
|
|
See Also: L |
2247
|
|
|
|
|
|
|
|
2248
|
|
|
|
|
|
|
=cut |
2249
|
|
|
|
|
|
|
|
2250
|
|
|
|
|
|
|
sub within_element { |
2251
|
2214
|
|
|
2214
|
1
|
2008
|
my ( $self, $name ) = @_; |
2252
|
|
|
|
|
|
|
return 0 |
2253
|
|
|
|
|
|
|
if ( !defined $name && !defined $self->{'_elements'} |
2254
|
2214
|
100
|
33
|
|
|
4850
|
|| scalar @{ $self->{'_elements'} } == 0 ); |
|
2214
|
|
66
|
|
|
5512
|
|
2255
|
2212
|
|
|
|
|
1702
|
foreach ( @{ $self->{'_elements'} } ) { |
|
2212
|
|
|
|
|
3471
|
|
2256
|
3134
|
100
|
|
|
|
4751
|
if ( $_ eq $name ) { |
2257
|
1949
|
|
|
|
|
4562
|
return 1; |
2258
|
|
|
|
|
|
|
} |
2259
|
|
|
|
|
|
|
} |
2260
|
263
|
|
|
|
|
500
|
return 0; |
2261
|
|
|
|
|
|
|
} |
2262
|
|
|
|
|
|
|
|
2263
|
|
|
|
|
|
|
=head2 in_element |
2264
|
|
|
|
|
|
|
|
2265
|
|
|
|
|
|
|
Title : in_element |
2266
|
|
|
|
|
|
|
Usage : if( $eventgenerator->in_element($element) ) {} |
2267
|
|
|
|
|
|
|
Function: Test if we are in a particular element |
2268
|
|
|
|
|
|
|
This is different than 'within_element' because within |
2269
|
|
|
|
|
|
|
can be tested for a whole block. |
2270
|
|
|
|
|
|
|
Returns : boolean |
2271
|
|
|
|
|
|
|
Args : string element name |
2272
|
|
|
|
|
|
|
|
2273
|
|
|
|
|
|
|
See Also: L |
2274
|
|
|
|
|
|
|
|
2275
|
|
|
|
|
|
|
=cut |
2276
|
|
|
|
|
|
|
|
2277
|
|
|
|
|
|
|
sub in_element { |
2278
|
264029
|
|
|
264029
|
1
|
194503
|
my ( $self, $name ) = @_; |
2279
|
264029
|
100
|
|
|
|
320818
|
return 0 if !defined $self->{'_elements'}->[0]; |
2280
|
263910
|
|
|
|
|
989808
|
return ( $self->{'_elements'}->[0] eq $name ); |
2281
|
|
|
|
|
|
|
} |
2282
|
|
|
|
|
|
|
|
2283
|
|
|
|
|
|
|
=head2 start_document |
2284
|
|
|
|
|
|
|
|
2285
|
|
|
|
|
|
|
Title : start_document |
2286
|
|
|
|
|
|
|
Usage : $eventgenerator->start_document |
2287
|
|
|
|
|
|
|
Function: Handle a start document event |
2288
|
|
|
|
|
|
|
Returns : none |
2289
|
|
|
|
|
|
|
Args : none |
2290
|
|
|
|
|
|
|
|
2291
|
|
|
|
|
|
|
|
2292
|
|
|
|
|
|
|
=cut |
2293
|
|
|
|
|
|
|
|
2294
|
|
|
|
|
|
|
sub start_document { |
2295
|
102
|
|
|
102
|
1
|
138
|
my ($self) = @_; |
2296
|
102
|
|
|
|
|
205
|
$self->{'_lasttype'} = ''; |
2297
|
102
|
|
|
|
|
199
|
$self->{'_values'} = {}; |
2298
|
102
|
|
|
|
|
414
|
$self->{'_result'} = undef; |
2299
|
102
|
|
|
|
|
216
|
$self->{'_elements'} = []; |
2300
|
|
|
|
|
|
|
} |
2301
|
|
|
|
|
|
|
|
2302
|
|
|
|
|
|
|
=head2 end_document |
2303
|
|
|
|
|
|
|
|
2304
|
|
|
|
|
|
|
Title : end_document |
2305
|
|
|
|
|
|
|
Usage : $eventgenerator->end_document |
2306
|
|
|
|
|
|
|
Function: Handles an end document event |
2307
|
|
|
|
|
|
|
Returns : Bio::Search::Result::ResultI object |
2308
|
|
|
|
|
|
|
Args : none |
2309
|
|
|
|
|
|
|
|
2310
|
|
|
|
|
|
|
|
2311
|
|
|
|
|
|
|
=cut |
2312
|
|
|
|
|
|
|
|
2313
|
|
|
|
|
|
|
sub end_document { |
2314
|
102
|
|
|
102
|
1
|
154
|
my ( $self, @args ) = @_; |
2315
|
|
|
|
|
|
|
|
2316
|
|
|
|
|
|
|
#$self->debug("blast.pm: end_document\n"); |
2317
|
102
|
|
|
|
|
575
|
return $self->{'_result'}; |
2318
|
|
|
|
|
|
|
} |
2319
|
|
|
|
|
|
|
|
2320
|
|
|
|
|
|
|
sub write_result { |
2321
|
5
|
|
|
5
|
1
|
24
|
my ( $self, $blast, @args ) = @_; |
2322
|
|
|
|
|
|
|
|
2323
|
5
|
50
|
|
|
|
14
|
if ( not defined( $self->writer ) ) { |
2324
|
0
|
|
|
|
|
0
|
$self->warn("Writer not defined. Using a $DEFAULT_BLAST_WRITER_CLASS"); |
2325
|
0
|
|
|
|
|
0
|
$self->writer( $DEFAULT_BLAST_WRITER_CLASS->new() ); |
2326
|
|
|
|
|
|
|
} |
2327
|
5
|
|
|
|
|
31
|
$self->SUPER::write_result( $blast, @args ); |
2328
|
|
|
|
|
|
|
} |
2329
|
|
|
|
|
|
|
|
2330
|
|
|
|
|
|
|
sub result_count { |
2331
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
2332
|
0
|
|
|
|
|
0
|
return $self->{'_result_count'}; |
2333
|
|
|
|
|
|
|
} |
2334
|
|
|
|
|
|
|
|
2335
|
0
|
|
|
0
|
0
|
0
|
sub report_count { shift->result_count } |
2336
|
|
|
|
|
|
|
|
2337
|
|
|
|
|
|
|
=head2 inclusion_threshold |
2338
|
|
|
|
|
|
|
|
2339
|
|
|
|
|
|
|
Title : inclusion_threshold |
2340
|
|
|
|
|
|
|
Usage : my $incl_thresh = $isreb->inclusion_threshold; |
2341
|
|
|
|
|
|
|
: $isreb->inclusion_threshold(1e-5); |
2342
|
|
|
|
|
|
|
Function: Get/Set the e-value threshold for inclusion in the PSI-BLAST |
2343
|
|
|
|
|
|
|
score matrix model (blastpgp) that was used for generating the reports |
2344
|
|
|
|
|
|
|
being parsed. |
2345
|
|
|
|
|
|
|
Returns : number (real) |
2346
|
|
|
|
|
|
|
Default value: $Bio::SearchIO::IteratedSearchResultEventBuilder::DEFAULT_INCLUSION_THRESHOLD |
2347
|
|
|
|
|
|
|
Args : number (real) (e.g., 0.0001 or 1e-4 ) |
2348
|
|
|
|
|
|
|
|
2349
|
|
|
|
|
|
|
=cut |
2350
|
|
|
|
|
|
|
|
2351
|
|
|
|
|
|
|
=head2 max_significance |
2352
|
|
|
|
|
|
|
|
2353
|
|
|
|
|
|
|
Usage : $obj->max_significance(); |
2354
|
|
|
|
|
|
|
Purpose : Set/Get the P or Expect value used as significance screening cutoff. |
2355
|
|
|
|
|
|
|
This is the value of the -signif parameter supplied to new(). |
2356
|
|
|
|
|
|
|
Hits with P or E-value above this are skipped. |
2357
|
|
|
|
|
|
|
Returns : Scientific notation number with this format: 1.0e-05. |
2358
|
|
|
|
|
|
|
Argument : Scientific notation number or float (when setting) |
2359
|
|
|
|
|
|
|
Comments : Screening of significant hits uses the data provided on the |
2360
|
|
|
|
|
|
|
: description line. For NCBI BLAST1 and WU-BLAST, this data |
2361
|
|
|
|
|
|
|
: is P-value. for NCBI BLAST2 it is an Expect value. |
2362
|
|
|
|
|
|
|
|
2363
|
|
|
|
|
|
|
=cut |
2364
|
|
|
|
|
|
|
|
2365
|
|
|
|
|
|
|
=head2 signif |
2366
|
|
|
|
|
|
|
|
2367
|
|
|
|
|
|
|
Synonym for L |
2368
|
|
|
|
|
|
|
|
2369
|
|
|
|
|
|
|
=cut |
2370
|
|
|
|
|
|
|
|
2371
|
|
|
|
|
|
|
=head2 min_score |
2372
|
|
|
|
|
|
|
|
2373
|
|
|
|
|
|
|
Usage : $obj->min_score(); |
2374
|
|
|
|
|
|
|
Purpose : Set/Get the Blast score used as screening cutoff. |
2375
|
|
|
|
|
|
|
This is the value of the -score parameter supplied to new(). |
2376
|
|
|
|
|
|
|
Hits with scores below this are skipped. |
2377
|
|
|
|
|
|
|
Returns : Integer or scientific notation number. |
2378
|
|
|
|
|
|
|
Argument : Integer or scientific notation number (when setting) |
2379
|
|
|
|
|
|
|
Comments : Screening of significant hits uses the data provided on the |
2380
|
|
|
|
|
|
|
: description line. |
2381
|
|
|
|
|
|
|
|
2382
|
|
|
|
|
|
|
=cut |
2383
|
|
|
|
|
|
|
|
2384
|
|
|
|
|
|
|
=head2 min_query_length |
2385
|
|
|
|
|
|
|
|
2386
|
|
|
|
|
|
|
Usage : $obj->min_query_length(); |
2387
|
|
|
|
|
|
|
Purpose : Gets the query sequence length used as screening criteria. |
2388
|
|
|
|
|
|
|
This is the value of the -min_query_len parameter supplied to new(). |
2389
|
|
|
|
|
|
|
Hits with sequence length below this are skipped. |
2390
|
|
|
|
|
|
|
Returns : Integer |
2391
|
|
|
|
|
|
|
Argument : n/a |
2392
|
|
|
|
|
|
|
|
2393
|
|
|
|
|
|
|
=cut |
2394
|
|
|
|
|
|
|
|
2395
|
|
|
|
|
|
|
=head2 best_hit_only |
2396
|
|
|
|
|
|
|
|
2397
|
|
|
|
|
|
|
Title : best_hit_only |
2398
|
|
|
|
|
|
|
Usage : print "only getting best hit.\n" if $obj->best_hit_only; |
2399
|
|
|
|
|
|
|
Purpose : Set/Get the indicator for whether or not to process only |
2400
|
|
|
|
|
|
|
: the best BlastHit. |
2401
|
|
|
|
|
|
|
Returns : Boolean (1 | 0) |
2402
|
|
|
|
|
|
|
Argument : Boolean (1 | 0) (when setting) |
2403
|
|
|
|
|
|
|
|
2404
|
|
|
|
|
|
|
=cut |
2405
|
|
|
|
|
|
|
|
2406
|
|
|
|
|
|
|
=head2 check_all_hits |
2407
|
|
|
|
|
|
|
|
2408
|
|
|
|
|
|
|
Title : check_all_hits |
2409
|
|
|
|
|
|
|
Usage : print "checking all hits.\n" if $obj->check_all_hits; |
2410
|
|
|
|
|
|
|
Purpose : Set/Get the indicator for whether or not to process all hits. |
2411
|
|
|
|
|
|
|
: If false, the parser will stop processing hits after the |
2412
|
|
|
|
|
|
|
: the first non-significance hit or the first hit that fails |
2413
|
|
|
|
|
|
|
: any hit filter. |
2414
|
|
|
|
|
|
|
Returns : Boolean (1 | 0) |
2415
|
|
|
|
|
|
|
Argument : Boolean (1 | 0) (when setting) |
2416
|
|
|
|
|
|
|
|
2417
|
|
|
|
|
|
|
=cut |
2418
|
|
|
|
|
|
|
|
2419
|
|
|
|
|
|
|
# general private method used to make minimal hits from leftover |
2420
|
|
|
|
|
|
|
# data in the hit table |
2421
|
|
|
|
|
|
|
|
2422
|
|
|
|
|
|
|
sub _cleanup_hits { |
2423
|
8
|
|
|
8
|
|
43
|
my ($self, $hits) = @_; |
2424
|
8
|
|
|
|
|
21
|
while ( my $v = shift @{ $hits }) { |
|
1532
|
|
|
|
|
3043
|
|
2425
|
1524
|
50
|
|
|
|
2137
|
next unless defined $v; |
2426
|
1524
|
|
|
|
|
3601
|
$self->start_element( { 'Name' => 'Hit' } ); |
2427
|
1524
|
|
|
|
|
3586
|
my $id = $v->[2]; |
2428
|
1524
|
|
|
|
|
1437
|
my $desc = $v->[3]; |
2429
|
1524
|
|
|
|
|
4055
|
$self->element( |
2430
|
|
|
|
|
|
|
{ |
2431
|
|
|
|
|
|
|
'Name' => 'Hit_id', |
2432
|
|
|
|
|
|
|
'Data' => $id |
2433
|
|
|
|
|
|
|
} |
2434
|
|
|
|
|
|
|
); |
2435
|
1524
|
|
|
|
|
3719
|
my ($gi, $acc, $version ) = $self->_get_seq_identifiers($id); |
2436
|
1524
|
|
|
|
|
3135
|
$self->element( |
2437
|
|
|
|
|
|
|
{ |
2438
|
|
|
|
|
|
|
'Name' => 'Hit_accession', |
2439
|
|
|
|
|
|
|
'Data' => $acc |
2440
|
|
|
|
|
|
|
} |
2441
|
|
|
|
|
|
|
); |
2442
|
1524
|
50
|
|
|
|
2725
|
if ( defined $v ) { |
2443
|
1524
|
|
|
|
|
3343
|
$self->element( |
2444
|
|
|
|
|
|
|
{ |
2445
|
|
|
|
|
|
|
'Name' => 'Hit_signif', |
2446
|
|
|
|
|
|
|
'Data' => $v->[0] |
2447
|
|
|
|
|
|
|
} |
2448
|
|
|
|
|
|
|
); |
2449
|
1524
|
50
|
|
|
|
2360
|
if (exists $self->{'_wublast'}) { |
2450
|
0
|
|
|
|
|
0
|
$self->element( |
2451
|
|
|
|
|
|
|
{ |
2452
|
|
|
|
|
|
|
'Name' => 'Hit_score', |
2453
|
|
|
|
|
|
|
'Data' => $v->[1] |
2454
|
|
|
|
|
|
|
} |
2455
|
|
|
|
|
|
|
); |
2456
|
|
|
|
|
|
|
} else { |
2457
|
1524
|
|
|
|
|
3175
|
$self->element( |
2458
|
|
|
|
|
|
|
{ |
2459
|
|
|
|
|
|
|
'Name' => 'Hit_bits', |
2460
|
|
|
|
|
|
|
'Data' => $v->[1] |
2461
|
|
|
|
|
|
|
} |
2462
|
|
|
|
|
|
|
); |
2463
|
|
|
|
|
|
|
} |
2464
|
|
|
|
|
|
|
|
2465
|
|
|
|
|
|
|
} |
2466
|
|
|
|
|
|
|
$self->element( |
2467
|
|
|
|
|
|
|
{ |
2468
|
1524
|
|
|
|
|
3300
|
'Name' => 'Hit_def', |
2469
|
|
|
|
|
|
|
'Data' => $desc |
2470
|
|
|
|
|
|
|
} |
2471
|
|
|
|
|
|
|
); |
2472
|
1524
|
|
|
|
|
3017
|
$self->end_element( { 'Name' => 'Hit' } ); |
2473
|
|
|
|
|
|
|
} |
2474
|
|
|
|
|
|
|
} |
2475
|
|
|
|
|
|
|
|
2476
|
|
|
|
|
|
|
|
2477
|
|
|
|
|
|
|
1; |
2478
|
|
|
|
|
|
|
|
2479
|
|
|
|
|
|
|
__END__ |