line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::SearchIO::erpin |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Chris Fields |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Chris Fields |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::SearchIO::erpin - SearchIO-based ERPIN parser |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# do not call this module directly. Use Bio::SearchIO. |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 DESCRIPTION |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
This is an experimental SearchIO-based parser for output from |
25
|
|
|
|
|
|
|
the erpin program. It currently parses erpin output for ERPIN |
26
|
|
|
|
|
|
|
versions 4.2.5 and above; older versions may work but will not be supported. |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 FEEDBACK |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head2 Mailing Lists |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
33
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
34
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
37
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head2 Support |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
I |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
46
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
47
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
48
|
|
|
|
|
|
|
with code and data examples if at all possible. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head2 Reporting Bugs |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
53
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via the |
54
|
|
|
|
|
|
|
web: |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=head1 AUTHOR - Chris Fields |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
Email cjfields-at-uiuc-dot-edu |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=head1 APPENDIX |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
65
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=cut |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# Let the code begin... |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
package Bio::SearchIO::erpin; |
72
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
27
|
|
73
|
|
|
|
|
|
|
|
74
|
1
|
|
|
1
|
|
5
|
use Data::Dumper; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
51
|
|
75
|
1
|
|
|
1
|
|
5
|
use base qw(Bio::SearchIO); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
1724
|
|
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
my %MODEMAP = ( |
78
|
|
|
|
|
|
|
'Result' => 'result', |
79
|
|
|
|
|
|
|
'Hit' => 'hit', |
80
|
|
|
|
|
|
|
'Hsp' => 'hsp' |
81
|
|
|
|
|
|
|
); |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
my %MAPPING = ( |
84
|
|
|
|
|
|
|
'Hsp_bit-score' => 'HSP-bits', |
85
|
|
|
|
|
|
|
'Hsp_score' => 'HSP-score', |
86
|
|
|
|
|
|
|
'Hsp_evalue' => 'HSP-evalue', # no evalues yet |
87
|
|
|
|
|
|
|
'Hsp_query-from' => 'HSP-query_start', |
88
|
|
|
|
|
|
|
'Hsp_query-to' => 'HSP-query_end', |
89
|
|
|
|
|
|
|
'Hsp_hit-from' => 'HSP-hit_start', # |
90
|
|
|
|
|
|
|
'Hsp_hit-to' => 'HSP-hit_end', # |
91
|
|
|
|
|
|
|
'Hsp_gaps' => 'HSP-hsp_gaps', |
92
|
|
|
|
|
|
|
'Hsp_hitgaps' => 'HSP-hit_gaps', |
93
|
|
|
|
|
|
|
'Hsp_querygaps' => 'HSP-query_gaps', |
94
|
|
|
|
|
|
|
'Hsp_qseq' => 'HSP-query_seq', |
95
|
|
|
|
|
|
|
'Hsp_hseq' => 'HSP-hit_seq', |
96
|
|
|
|
|
|
|
'Hsp_midline' => 'HSP-homology_seq', |
97
|
|
|
|
|
|
|
'Hsp_structure' => 'HSP-meta', |
98
|
|
|
|
|
|
|
'Hsp_align-len' => 'HSP-hsp_length', |
99
|
|
|
|
|
|
|
'Hsp_stranded' => 'HSP-stranded', |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# not supported yet |
102
|
|
|
|
|
|
|
'Hsp_positive' => 'HSP-conserved', |
103
|
|
|
|
|
|
|
'Hsp_identity' => 'HSP-identical', |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
'Hit_id' => 'HIT-name', |
106
|
|
|
|
|
|
|
'Hit_len' => 'HIT-length', |
107
|
|
|
|
|
|
|
'Hit_gi' => 'HIT-ncbi_gi', |
108
|
|
|
|
|
|
|
'Hit_accession' => 'HIT-accession', |
109
|
|
|
|
|
|
|
'Hit_def' => 'HIT-description', |
110
|
|
|
|
|
|
|
'Hit_signif' => 'HIT-significance', # none yet |
111
|
|
|
|
|
|
|
'Hit_score' => 'HIT-score', # best HSP bit score |
112
|
|
|
|
|
|
|
'Hit_bits' => 'HIT-bits', # best HSP bit score |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
'ERPIN_program' => 'RESULT-algorithm_name', # get/set |
115
|
|
|
|
|
|
|
'ERPIN_version' => 'RESULT-algorithm_version', # get/set |
116
|
|
|
|
|
|
|
'ERPIN_query-def'=> 'RESULT-query_name', # get/set |
117
|
|
|
|
|
|
|
'ERPIN_query-len'=> 'RESULT-query_length', |
118
|
|
|
|
|
|
|
'ERPIN_query-acc'=> 'RESULT-query_accession', # get/set |
119
|
|
|
|
|
|
|
'ERPIN_querydesc'=> 'RESULT-query_description', # get/set |
120
|
|
|
|
|
|
|
'ERPIN_db' => 'RESULT-database_name', # get/set |
121
|
|
|
|
|
|
|
'ERPIN_db-len' => 'RESULT-database_entries', # none yet |
122
|
|
|
|
|
|
|
'ERPIN_db-let' => 'RESULT-database_letters', # none yet |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
'Parameters_cutoff' => { 'RESULT-parameters' => 'cutoff' }, |
125
|
|
|
|
|
|
|
'Parameters_expect' => { 'RESULT-parameters' => 'expect' }, |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
'Parameters_include' => { 'RESULT-parameters' => 'include' }, |
128
|
|
|
|
|
|
|
'Parameters_sc-match' => { 'RESULT-parameters' => 'match' }, |
129
|
|
|
|
|
|
|
'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch' }, |
130
|
|
|
|
|
|
|
'Parameters_gap-open' => { 'RESULT-parameters' => 'gapopen' }, |
131
|
|
|
|
|
|
|
'Parameters_gap-extend' => { 'RESULT-parameters' => 'gapext' }, |
132
|
|
|
|
|
|
|
'Parameters_filter' => { 'RESULT-parameters' => 'filter' }, |
133
|
|
|
|
|
|
|
'Parameters_allowgaps' => { 'RESULT-parameters' => 'allowgaps' }, |
134
|
|
|
|
|
|
|
'Parameters_full_dbpath' => { 'RESULT-parameters' => 'full_dbpath' }, |
135
|
|
|
|
|
|
|
'Statistics_db-let' => { 'RESULT-statistics' => 'dbletters' }, |
136
|
|
|
|
|
|
|
); |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
my $MINSCORE = 0; |
139
|
|
|
|
|
|
|
my $DEFAULT_VERSION = '4.2.5'; |
140
|
|
|
|
|
|
|
my $DEFAULT_ALGORITHM = 'erpin'; |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=head2 new |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
Title : new |
145
|
|
|
|
|
|
|
Usage : my $obj = Bio::SearchIO::infernal->new(); |
146
|
|
|
|
|
|
|
Function: Builds a new Bio::SearchIO::infernal object |
147
|
|
|
|
|
|
|
Returns : Bio::SearchIO::infernal |
148
|
|
|
|
|
|
|
Args : -fh/-file => cmsearch (infernal) filename |
149
|
|
|
|
|
|
|
-format => 'erpin' |
150
|
|
|
|
|
|
|
-algorithm => algorithm (default 'Infernal') |
151
|
|
|
|
|
|
|
-query_acc => query accession, eg. Rfam accession (default undef) |
152
|
|
|
|
|
|
|
-hsp_minscore => minimum HSP score cutoff |
153
|
|
|
|
|
|
|
-version => ERPIN version (not reported in output) |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=cut |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
sub _initialize { |
158
|
1
|
|
|
1
|
|
3
|
my ( $self, @args ) = @_; |
159
|
1
|
|
|
|
|
5
|
$self->SUPER::_initialize(@args); |
160
|
1
|
|
|
|
|
6
|
my ($cutoff, $accession, $version) = |
161
|
|
|
|
|
|
|
$self->_rearrange([qw(HSP_MINSCORE QUERY_ACC VERSION)],@args); |
162
|
1
|
|
|
|
|
7
|
my $handler = $self->_eventHandler; |
163
|
1
|
|
|
|
|
5
|
$handler->register_factory( |
164
|
|
|
|
|
|
|
'result', |
165
|
|
|
|
|
|
|
Bio::Factory::ObjectFactory->new( |
166
|
|
|
|
|
|
|
-type => 'Bio::Search::Result::GenericResult', |
167
|
|
|
|
|
|
|
-interface => 'Bio::Search::Result::ResultI', |
168
|
|
|
|
|
|
|
-verbose => $self->verbose() |
169
|
|
|
|
|
|
|
) |
170
|
|
|
|
|
|
|
); |
171
|
|
|
|
|
|
|
|
172
|
1
|
|
|
|
|
4
|
$handler->register_factory( |
173
|
|
|
|
|
|
|
'hit', |
174
|
|
|
|
|
|
|
Bio::Factory::ObjectFactory->new( |
175
|
|
|
|
|
|
|
-type => 'Bio::Search::Hit::ModelHit', |
176
|
|
|
|
|
|
|
-interface => 'Bio::Search::Hit::HitI', |
177
|
|
|
|
|
|
|
-verbose => $self->verbose() |
178
|
|
|
|
|
|
|
) |
179
|
|
|
|
|
|
|
); |
180
|
|
|
|
|
|
|
|
181
|
1
|
|
|
|
|
5
|
$handler->register_factory( |
182
|
|
|
|
|
|
|
'hsp', |
183
|
|
|
|
|
|
|
Bio::Factory::ObjectFactory->new( |
184
|
|
|
|
|
|
|
-type => 'Bio::Search::HSP::ModelHSP', |
185
|
|
|
|
|
|
|
-interface => 'Bio::Search::HSP::HSPI', |
186
|
|
|
|
|
|
|
-verbose => $self->verbose() |
187
|
|
|
|
|
|
|
) |
188
|
|
|
|
|
|
|
); |
189
|
1
|
50
|
|
|
|
8
|
$accession && $self->query_accession($accession); |
190
|
1
|
|
33
|
|
|
5
|
$cutoff ||= $MINSCORE; |
191
|
1
|
|
|
|
|
4
|
$self->hsp_minscore($cutoff); |
192
|
1
|
|
33
|
|
|
3
|
$version ||= $DEFAULT_VERSION; |
193
|
1
|
|
|
|
|
5
|
$self->algorithm_version($version); |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=head2 next_result |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
Title : next_result |
199
|
|
|
|
|
|
|
Usage : my $hit = $searchio->next_result; |
200
|
|
|
|
|
|
|
Function: Returns the next Result from a search |
201
|
|
|
|
|
|
|
Returns : Bio::Search::Result::ResultI object |
202
|
|
|
|
|
|
|
Args : none |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=cut |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub next_result { |
207
|
1
|
|
|
1
|
1
|
9
|
my ($self) = @_; |
208
|
1
|
|
|
|
|
1
|
my $seentop = 0; |
209
|
1
|
|
|
|
|
6
|
local $/ = "\n"; |
210
|
1
|
|
|
|
|
1
|
local $_; |
211
|
1
|
|
|
|
|
3
|
my $accession = $self->query_accession; |
212
|
1
|
|
|
|
|
3
|
my $minscore = $self->hsp_minscore; |
213
|
1
|
|
|
|
|
2
|
my $version = $self->algorithm_version; |
214
|
1
|
|
|
|
|
3
|
my $verbose = $self->verbose; # cache for speed? |
215
|
1
|
|
|
|
|
5
|
$self->start_document(); |
216
|
1
|
|
|
|
|
2
|
my ($lasthit, $lastscore, $lastlen, $lasteval); |
217
|
|
|
|
|
|
|
#my $hitline; |
218
|
|
|
|
|
|
|
PARSER: |
219
|
1
|
|
|
|
|
14
|
while ( defined( my $line = $self->_readline ) ) { |
220
|
16
|
100
|
|
|
|
58
|
next if $line =~ m{^\s*$}; |
221
|
13
|
100
|
100
|
|
|
66
|
if ($line =~ m{^Training\sset:\s+"(.*)"}xmso) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
222
|
1
|
50
|
|
|
|
2
|
if ($seentop) { |
223
|
0
|
|
|
|
|
0
|
$self->_pushback($line); |
224
|
0
|
|
|
|
|
0
|
last PARSER; |
225
|
|
|
|
|
|
|
} |
226
|
1
|
|
|
|
|
7
|
$self->start_element({'Name' => 'Result'}); |
227
|
1
|
|
|
|
|
11
|
$self->element_hash( { |
228
|
|
|
|
|
|
|
'ERPIN_query-def' => $1, |
229
|
|
|
|
|
|
|
'ERPIN_program' =>'erpin', |
230
|
|
|
|
|
|
|
'ERPIN_version' => $version, |
231
|
|
|
|
|
|
|
'ERPIN_query-acc' => $accession, |
232
|
|
|
|
|
|
|
}); |
233
|
1
|
|
|
|
|
3
|
$seentop = 1; |
234
|
|
|
|
|
|
|
# parse rest of header here |
235
|
|
|
|
|
|
|
HEADER: |
236
|
1
|
|
|
|
|
5
|
while (defined ($line = $self->_readline) ) { |
237
|
9
|
100
|
|
|
|
25
|
next if $line =~ m{^\s*$}; |
238
|
7
|
100
|
66
|
|
|
28
|
if (index($line, '>') == 0 || |
239
|
|
|
|
|
|
|
index($line, '-------- at level 1 --------') == 0) { |
240
|
1
|
|
|
|
|
7
|
$self->_pushback($line); |
241
|
1
|
|
|
|
|
3
|
last HEADER; |
242
|
|
|
|
|
|
|
} |
243
|
6
|
100
|
|
|
|
46
|
if ($line =~ m{^\s+(\d+\ssequences\sof\slength\s\d+)}xmso) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
244
|
1
|
|
|
|
|
9
|
$self->element( |
245
|
|
|
|
|
|
|
{'Name' => 'ERPIN_querydesc', |
246
|
|
|
|
|
|
|
'Data' => $1} |
247
|
|
|
|
|
|
|
); |
248
|
|
|
|
|
|
|
} elsif ($line =~ m{^Cutoff:\s+(\S+)}xmso) { |
249
|
1
|
|
|
|
|
6
|
$self->element( |
250
|
|
|
|
|
|
|
{'Name' => 'Parameters_cutoff', |
251
|
|
|
|
|
|
|
'Data' => $1} |
252
|
|
|
|
|
|
|
); |
253
|
|
|
|
|
|
|
} elsif ($line =~ m{^Database:\s+"(.*)"}xmso) { |
254
|
1
|
|
|
|
|
5
|
$self->element( |
255
|
|
|
|
|
|
|
{'Name' => 'ERPIN_db', |
256
|
|
|
|
|
|
|
'Data' => $1} |
257
|
|
|
|
|
|
|
); |
258
|
|
|
|
|
|
|
} elsif ($line =~ m{^\s+(\d+)\snucleotides\sto\sbe\sprocessed\sin\s(\d+)\ssequences}xmso) { |
259
|
0
|
|
|
|
|
0
|
$self->element_hash( |
260
|
|
|
|
|
|
|
{'ERPIN_db-len' => $2, |
261
|
|
|
|
|
|
|
'ERPIN_db-let' => $1} |
262
|
|
|
|
|
|
|
); |
263
|
|
|
|
|
|
|
} elsif ($line =~ m{^E-value\sat\scutoff\s\S+\sfor\s\S+\sdouble\sstrand\sdata:\s+(\S+)}xmso) { |
264
|
1
|
|
|
|
|
11
|
$self->element( |
265
|
|
|
|
|
|
|
{'Name' => 'Parameters_expect', |
266
|
|
|
|
|
|
|
'Data' => $1} |
267
|
|
|
|
|
|
|
); |
268
|
|
|
|
|
|
|
} elsif ($line =~ m{^\s+(ATGC\sratios:\s+(?:\S+\s+\S+\s+\S+\s+\S+))}) { |
269
|
1
|
|
|
|
|
4
|
$self->element( |
270
|
|
|
|
|
|
|
{'Name' => 'Statistics_db-let', |
271
|
|
|
|
|
|
|
'Data' => $1} |
272
|
|
|
|
|
|
|
); |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
} elsif ($line =~ m{^>(\S+)\s+(.*)}xmso ) { |
276
|
2
|
|
|
|
|
8
|
my ($id, $desc) = ($1, $2); |
277
|
2
|
|
|
|
|
5
|
chomp $desc; |
278
|
|
|
|
|
|
|
# desc line is repeated for each strand, so must check |
279
|
|
|
|
|
|
|
# prior to starting a new hit |
280
|
2
|
100
|
66
|
|
|
8
|
if (!$lasthit || $id ne $lasthit) { |
281
|
1
|
50
|
|
|
|
5
|
if ($self->within_element('hit') ) { |
282
|
0
|
|
|
|
|
0
|
$self->element_hash({ |
283
|
|
|
|
|
|
|
'Hit_signif' => $lasteval, |
284
|
|
|
|
|
|
|
'Hit_score' => $lastscore, |
285
|
|
|
|
|
|
|
'Hit_bits' => $lastscore |
286
|
|
|
|
|
|
|
}); |
287
|
0
|
|
|
|
|
0
|
$self->end_element({'Name' => 'Hit'}); |
288
|
|
|
|
|
|
|
} |
289
|
1
|
|
|
|
|
5
|
$self->start_element({'Name' => 'Hit'}); |
290
|
1
|
|
|
|
|
6
|
my ($gi, $acc, $ver) = $self->_get_seq_identifiers($id); |
291
|
|
|
|
|
|
|
|
292
|
1
|
0
|
|
|
|
10
|
$self->element_hash({ |
|
|
50
|
|
|
|
|
|
293
|
|
|
|
|
|
|
'Hit_id' => $id, |
294
|
|
|
|
|
|
|
'Hit_gi' => $gi, |
295
|
|
|
|
|
|
|
'Hit_accession' => $ver ? "$acc.$ver" : |
296
|
|
|
|
|
|
|
$acc ? $acc : $id, |
297
|
|
|
|
|
|
|
'Hit_def' => $desc |
298
|
|
|
|
|
|
|
}); |
299
|
|
|
|
|
|
|
} |
300
|
2
|
|
|
|
|
7
|
$lasthit = $id; |
301
|
|
|
|
|
|
|
} elsif ( (index($line, 'FW') == 0) || (index($line, 'RC') == 0)) { |
302
|
4
|
|
|
|
|
51
|
my ($str, $hn, $pos, $score, $eval) = split ' ', $line; |
303
|
4
|
50
|
|
|
|
16
|
if ($minscore < $score) { |
304
|
4
|
|
|
|
|
15
|
$self->start_element({'Name' => 'Hsp'}); |
305
|
|
|
|
|
|
|
|
306
|
4
|
|
|
|
|
14
|
my ($start, $end) = split m{\.\.}, $pos, 2; |
307
|
4
|
100
|
|
|
|
13
|
($start, $end) = ($end, $start) if ($str eq 'RC'); |
308
|
4
|
|
|
|
|
11
|
$line = $self->_readline; |
309
|
4
|
|
|
|
|
5
|
chomp $line; |
310
|
4
|
|
|
|
|
31
|
$self->element_hash({ |
311
|
|
|
|
|
|
|
'Hsp_stranded' => 'HIT', |
312
|
|
|
|
|
|
|
'Hsp_hit-from' => $start, |
313
|
|
|
|
|
|
|
'Hsp_hit-to' => $end, |
314
|
|
|
|
|
|
|
'Hsp_score' => $score, |
315
|
|
|
|
|
|
|
'Hsp_bit-score' => $score, |
316
|
|
|
|
|
|
|
'Hsp_evalue' => $eval, |
317
|
|
|
|
|
|
|
'Hsp_query-from' => 1, |
318
|
|
|
|
|
|
|
'Hsp_query-to' => length($line), |
319
|
|
|
|
|
|
|
'Hsp_align-len' => length($line), |
320
|
|
|
|
|
|
|
'Hsp_hseq' =>$line |
321
|
|
|
|
|
|
|
}); |
322
|
4
|
|
|
|
|
25
|
$self->end_element({'Name' => 'Hsp'}); |
323
|
4
|
100
|
100
|
|
|
28
|
$lastscore = $score if (!$lastscore || $lastscore < $score); |
324
|
4
|
100
|
100
|
|
|
31
|
$lasteval = $eval if (!$lasteval || $lasteval > $eval); |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
} else { |
327
|
|
|
|
|
|
|
#$self->debug("Dropped data: $line"); |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
} |
330
|
1
|
50
|
|
|
|
4
|
if ($seentop) { |
331
|
1
|
50
|
|
|
|
4
|
if ($self->within_element('hit')) { |
332
|
1
|
|
|
|
|
12
|
$self->element_hash({ |
333
|
|
|
|
|
|
|
'Hit_signif' => $lasteval, |
334
|
|
|
|
|
|
|
'Hit_score' => $lastscore, |
335
|
|
|
|
|
|
|
'Hit_bits' => $lastscore |
336
|
|
|
|
|
|
|
}); |
337
|
1
|
|
|
|
|
6
|
$self->end_element({'Name' => 'Hit'}); |
338
|
|
|
|
|
|
|
} |
339
|
1
|
|
|
|
|
5
|
$self->end_element({'Name' => 'Result'}); |
340
|
|
|
|
|
|
|
} |
341
|
1
|
|
|
|
|
5
|
return $self->end_document(); |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=head2 start_element |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
Title : start_element |
347
|
|
|
|
|
|
|
Usage : $eventgenerator->start_element |
348
|
|
|
|
|
|
|
Function: Handles a start element event |
349
|
|
|
|
|
|
|
Returns : none |
350
|
|
|
|
|
|
|
Args : hashref with at least 2 keys 'Data' and 'Name' |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
=cut |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
sub start_element { |
355
|
6
|
|
|
6
|
1
|
8
|
my ( $self, $data ) = @_; |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
# we currently don't care about attributes |
358
|
6
|
|
|
|
|
8
|
my $nm = $data->{'Name'}; |
359
|
6
|
|
|
|
|
9
|
my $type = $MODEMAP{$nm}; |
360
|
6
|
50
|
|
|
|
10
|
if ($type) { |
361
|
6
|
50
|
|
|
|
13
|
if ( $self->_eventHandler->will_handle($type) ) { |
362
|
6
|
|
|
|
|
27
|
my $func = sprintf( "start_%s", lc $type ); |
363
|
6
|
|
|
|
|
14
|
$self->_eventHandler->$func( $data->{'Attributes'} ); |
364
|
|
|
|
|
|
|
} |
365
|
6
|
|
|
|
|
11
|
unshift @{ $self->{'_elements'} }, $type; |
|
6
|
|
|
|
|
16
|
|
366
|
|
|
|
|
|
|
} |
367
|
6
|
100
|
66
|
|
|
24
|
if ( defined $type |
368
|
|
|
|
|
|
|
&& $type eq 'result' ) |
369
|
|
|
|
|
|
|
{ |
370
|
1
|
|
|
|
|
3
|
$self->{'_values'} = {}; |
371
|
1
|
|
|
|
|
2
|
$self->{'_result'} = undef; |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
=head2 end_element |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
Title : start_element |
378
|
|
|
|
|
|
|
Usage : $eventgenerator->end_element |
379
|
|
|
|
|
|
|
Function: Handles an end element event |
380
|
|
|
|
|
|
|
Returns : none |
381
|
|
|
|
|
|
|
Args : hashref with at least 2 keys, 'Data' and 'Name' |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=cut |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
sub end_element { |
387
|
11
|
|
|
11
|
1
|
15
|
my ( $self, $data ) = @_; |
388
|
11
|
|
|
|
|
16
|
my $nm = $data->{'Name'}; |
389
|
11
|
|
|
|
|
12
|
my $type = $MODEMAP{$nm}; |
390
|
11
|
|
|
|
|
11
|
my $rc; |
391
|
|
|
|
|
|
|
|
392
|
11
|
100
|
|
|
|
19
|
if ($type) { |
|
|
50
|
|
|
|
|
|
393
|
6
|
50
|
|
|
|
13
|
if ( $self->_eventHandler->will_handle($type) ) { |
394
|
6
|
|
|
|
|
17
|
my $func = sprintf( "end_%s", lc $type ); |
395
|
|
|
|
|
|
|
$rc = $self->_eventHandler->$func( $self->{'_reporttype'}, |
396
|
6
|
|
|
|
|
12
|
$self->{'_values'} ); |
397
|
|
|
|
|
|
|
} |
398
|
6
|
|
|
|
|
11
|
my $lastelem = shift @{ $self->{'_elements'} }; |
|
6
|
|
|
|
|
12
|
|
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
elsif ( $MAPPING{$nm} ) { |
401
|
5
|
100
|
|
|
|
15
|
if ( ref( $MAPPING{$nm} ) =~ /hash/i ) { |
402
|
3
|
|
|
|
|
4
|
my $key = ( keys %{ $MAPPING{$nm} } )[0]; |
|
3
|
|
|
|
|
8
|
|
403
|
|
|
|
|
|
|
$self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } = |
404
|
3
|
|
|
|
|
21
|
$self->{'_last_data'}; |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
else { |
407
|
2
|
|
|
|
|
8
|
$self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'}; |
408
|
|
|
|
|
|
|
} |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
else { |
411
|
0
|
|
|
|
|
0
|
$self->debug("unknown nm $nm, ignoring\n"); |
412
|
|
|
|
|
|
|
} |
413
|
11
|
|
|
|
|
16
|
$self->{'_last_data'} = ''; # remove read data if we are at |
414
|
|
|
|
|
|
|
# end of an element |
415
|
11
|
100
|
100
|
|
|
30
|
$self->{'_result'} = $rc if ( defined $type && $type eq 'result' ); |
416
|
11
|
|
|
|
|
22
|
return $rc; |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
=head2 element |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
Title : element |
422
|
|
|
|
|
|
|
Usage : $eventhandler->element({'Name' => $name, 'Data' => $str}); |
423
|
|
|
|
|
|
|
Function: Convenience method that calls start_element, characters, end_element |
424
|
|
|
|
|
|
|
Returns : none |
425
|
|
|
|
|
|
|
Args : Hash ref with the keys 'Name' and 'Data' |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
=cut |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
sub element { |
431
|
5
|
|
|
5
|
1
|
8
|
my ( $self, $data ) = @_; |
432
|
|
|
|
|
|
|
# simple data calls (%MAPPING) do not need start_element |
433
|
5
|
|
|
|
|
12
|
$self->characters($data); |
434
|
5
|
|
|
|
|
11
|
$self->end_element($data); |
435
|
|
|
|
|
|
|
} |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=head2 element_hash |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
Title : element |
440
|
|
|
|
|
|
|
Usage : $eventhandler->element_hash({'Hsp_hit-from' => $start, |
441
|
|
|
|
|
|
|
'Hsp_hit-to' => $end, |
442
|
|
|
|
|
|
|
'Hsp_score' => $lastscore}); |
443
|
|
|
|
|
|
|
Function: Convenience method that takes multiple simple data elements and |
444
|
|
|
|
|
|
|
maps to appropriate parameters |
445
|
|
|
|
|
|
|
Returns : none |
446
|
|
|
|
|
|
|
Args : Hash ref with the mapped key (in %MAPPING) and value |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
=cut |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
sub element_hash { |
451
|
7
|
|
|
7
|
1
|
12
|
my ($self, $data) = @_; |
452
|
7
|
50
|
33
|
|
|
23
|
$self->throw("Must provide data hash ref") if !$data || !ref($data); |
453
|
7
|
|
|
|
|
8
|
for my $nm (sort keys %{$data}) { |
|
7
|
|
|
|
|
36
|
|
454
|
51
|
50
|
33
|
|
|
449
|
next if $data->{$nm} && $data->{$nm} =~ m{^\s*$}o; |
455
|
51
|
50
|
|
|
|
77
|
if ( $MAPPING{$nm} ) { |
456
|
51
|
50
|
|
|
|
65
|
if ( ref( $MAPPING{$nm} ) =~ /hash/i ) { |
457
|
0
|
|
|
|
|
0
|
my $key = ( keys %{ $MAPPING{$nm} } )[0]; |
|
0
|
|
|
|
|
0
|
|
458
|
|
|
|
|
|
|
$self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } = |
459
|
0
|
|
|
|
|
0
|
$data->{$nm}; |
460
|
|
|
|
|
|
|
} |
461
|
|
|
|
|
|
|
else { |
462
|
51
|
|
|
|
|
100
|
$self->{'_values'}->{ $MAPPING{$nm} } = $data->{$nm}; |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
=head2 characters |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
Title : characters |
471
|
|
|
|
|
|
|
Usage : $eventgenerator->characters($str) |
472
|
|
|
|
|
|
|
Function: Send a character events |
473
|
|
|
|
|
|
|
Returns : none |
474
|
|
|
|
|
|
|
Args : string |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
=cut |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
sub characters { |
480
|
5
|
|
|
5
|
1
|
7
|
my ( $self, $data ) = @_; |
481
|
5
|
50
|
33
|
|
|
25
|
return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o ); |
482
|
5
|
|
|
|
|
11
|
$self->{'_last_data'} = $data->{'Data'}; |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
=head2 within_element |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
Title : within_element |
488
|
|
|
|
|
|
|
Usage : if( $eventgenerator->within_element($element) ) {} |
489
|
|
|
|
|
|
|
Function: Test if we are within a particular element |
490
|
|
|
|
|
|
|
This is different than 'in' because within can be tested |
491
|
|
|
|
|
|
|
for a whole block. |
492
|
|
|
|
|
|
|
Returns : boolean |
493
|
|
|
|
|
|
|
Args : string element name |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
=cut |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
sub within_element { |
498
|
2
|
|
|
2
|
1
|
5
|
my ( $self, $name ) = @_; |
499
|
|
|
|
|
|
|
return 0 |
500
|
|
|
|
|
|
|
if ( !defined $name |
501
|
|
|
|
|
|
|
|| !defined $self->{'_elements'} |
502
|
2
|
50
|
33
|
|
|
12
|
|| scalar @{ $self->{'_elements'} } == 0 ); |
|
2
|
|
33
|
|
|
6
|
|
503
|
2
|
|
|
|
|
4
|
foreach ( @{ $self->{'_elements'} } ) { |
|
2
|
|
|
|
|
4
|
|
504
|
2
|
100
|
|
|
|
8
|
return 1 if ( $_ eq $name ); |
505
|
|
|
|
|
|
|
} |
506
|
1
|
|
|
|
|
3
|
return 0; |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
=head2 in_element |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
Title : in_element |
512
|
|
|
|
|
|
|
Usage : if( $eventgenerator->in_element($element) ) {} |
513
|
|
|
|
|
|
|
Function: Test if we are in a particular element |
514
|
|
|
|
|
|
|
This is different than 'within' because 'in' only |
515
|
|
|
|
|
|
|
tests its immediate parent. |
516
|
|
|
|
|
|
|
Returns : boolean |
517
|
|
|
|
|
|
|
Args : string element name |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
=cut |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
sub in_element { |
522
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $name ) = @_; |
523
|
0
|
0
|
|
|
|
0
|
return 0 if !defined $self->{'_elements'}->[0]; |
524
|
0
|
|
|
|
|
0
|
return ( $self->{'_elements'}->[0] eq $name ); |
525
|
|
|
|
|
|
|
} |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
=head2 start_document |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
Title : start_document |
530
|
|
|
|
|
|
|
Usage : $eventgenerator->start_document |
531
|
|
|
|
|
|
|
Function: Handle a start document event |
532
|
|
|
|
|
|
|
Returns : none |
533
|
|
|
|
|
|
|
Args : none |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
=cut |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
sub start_document { |
538
|
1
|
|
|
1
|
1
|
2
|
my ($self) = @_; |
539
|
1
|
|
|
|
|
2
|
$self->{'_lasttype'} = ''; |
540
|
1
|
|
|
|
|
3
|
$self->{'_values'} = {}; |
541
|
1
|
|
|
|
|
3
|
$self->{'_result'} = undef; |
542
|
1
|
|
|
|
|
2
|
$self->{'_elements'} = []; |
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
=head2 end_document |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
Title : end_document |
548
|
|
|
|
|
|
|
Usage : $eventgenerator->end_document |
549
|
|
|
|
|
|
|
Function: Handles an end document event |
550
|
|
|
|
|
|
|
Returns : Bio::Search::Result::ResultI object |
551
|
|
|
|
|
|
|
Args : none |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
=cut |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
sub end_document { |
556
|
1
|
|
|
1
|
1
|
2
|
my ($self) = @_; |
557
|
1
|
|
|
|
|
6
|
return $self->{'_result'}; |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
=head2 result_count |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
Title : result_count |
563
|
|
|
|
|
|
|
Usage : my $count = $searchio->result_count |
564
|
|
|
|
|
|
|
Function: Returns the number of results we have processed |
565
|
|
|
|
|
|
|
Returns : integer |
566
|
|
|
|
|
|
|
Args : none |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
=cut |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
sub result_count { |
571
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
572
|
0
|
|
|
|
|
0
|
return $self->{'_result_count'}; |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
=head2 query_accession |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
Title : query_accession |
578
|
|
|
|
|
|
|
Usage : my $acc = $parser->query_accession(); |
579
|
|
|
|
|
|
|
Function: Get/Set query (model) accession; Infernal currently does not output |
580
|
|
|
|
|
|
|
the accession number (Rfam accession #) |
581
|
|
|
|
|
|
|
Returns : String (accession) |
582
|
|
|
|
|
|
|
Args : [optional] String (accession) |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
=cut |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
sub query_accession { |
587
|
2
|
|
|
2
|
1
|
4
|
my $self = shift; |
588
|
2
|
100
|
|
|
|
7
|
return $self->{'_query_accession'} = shift if @_; |
589
|
1
|
|
|
|
|
2
|
return $self->{'_query_accession'}; |
590
|
|
|
|
|
|
|
} |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
=head2 hsp_minscore |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
Title : hsp_minscore |
595
|
|
|
|
|
|
|
Usage : my $cutoff = $parser->hsp_minscore(); |
596
|
|
|
|
|
|
|
Function: Get/Set min bit score cutoff (for generating Hits/HSPs) |
597
|
|
|
|
|
|
|
Returns : score (number) |
598
|
|
|
|
|
|
|
Args : [optional] score (number) |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
=cut |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
sub hsp_minscore { |
603
|
2
|
|
|
2
|
1
|
2
|
my $self = shift; |
604
|
2
|
100
|
|
|
|
7
|
return $self->{'_hsp_minscore'} = shift if @_; |
605
|
1
|
|
|
|
|
2
|
return $self->{'_hsp_minscore'}; |
606
|
|
|
|
|
|
|
} |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
=head2 algorithm_version |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
Title : algorithm_version |
611
|
|
|
|
|
|
|
Usage : my $ver = $parser->algorithm_version(); |
612
|
|
|
|
|
|
|
Function: Get/Set algorithm version (not defined in RNAMotif output) |
613
|
|
|
|
|
|
|
Returns : String (accession) |
614
|
|
|
|
|
|
|
Args : [optional] String (accession) |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
=cut |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
sub algorithm_version { |
619
|
2
|
|
|
2
|
1
|
2
|
my $self = shift; |
620
|
2
|
100
|
|
|
|
10
|
return $self->{'_algorithm'} = shift if @_; |
621
|
1
|
|
|
|
|
1
|
return $self->{'_algorithm'}; |
622
|
|
|
|
|
|
|
} |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
1; |