line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::SearchIO::rnamotif |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Chris Fields |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Chris Fields |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::SearchIO::rnamotif - SearchIO-based RNAMotif parser |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# do not call this module directly. Use Bio::SearchIO. |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 DESCRIPTION |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
This is a highly experimental SearchIO-based parser for output from the rnamotif |
25
|
|
|
|
|
|
|
program (one of the programs in the RNAMotif suite). It currently parses only |
26
|
|
|
|
|
|
|
raw rnamotif output for RNAMotif versions 3.0 and above; older versions may work |
27
|
|
|
|
|
|
|
but will not be supported. rmfmt output will not be supported at this time. |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 FEEDBACK |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head2 Mailing Lists |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
34
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
35
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
38
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head2 Support |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
I |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
47
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
48
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
49
|
|
|
|
|
|
|
with code and data examples if at all possible. |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head2 Reporting Bugs |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
54
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via the |
55
|
|
|
|
|
|
|
web: |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head1 AUTHOR - Chris Fields |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Email cjfields-at-uiuc-dot-edu |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head1 APPENDIX |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
66
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=cut |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# Let the code begin... |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
package Bio::SearchIO::rnamotif; |
73
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
26
|
|
74
|
|
|
|
|
|
|
|
75
|
1
|
|
|
1
|
|
3
|
use base qw(Bio::SearchIO); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
1778
|
|
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
my %MODEMAP = ( |
78
|
|
|
|
|
|
|
'Result' => 'result', |
79
|
|
|
|
|
|
|
'Hit' => 'hit', |
80
|
|
|
|
|
|
|
'Hsp' => 'hsp' |
81
|
|
|
|
|
|
|
); |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
my %MAPPING = ( |
84
|
|
|
|
|
|
|
# commented out tags have not been assigned |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
'Hsp_score' => 'HSP-score', |
87
|
|
|
|
|
|
|
'Hsp_custom-data' => 'HSP-custom_score', |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
# rnamotif has no evalue |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# descriptor has no start, end; same as hit start, end |
92
|
|
|
|
|
|
|
'Hsp_query-from' => 'HSP-query_start', |
93
|
|
|
|
|
|
|
'Hsp_query-to' => 'HSP-query_end', |
94
|
|
|
|
|
|
|
'Hsp_hit-from' => 'HSP-hit_start', |
95
|
|
|
|
|
|
|
'Hsp_hit-to' => 'HSP-hit_end', |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# descriptor has no start, end |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
'Hsp_hseq' => 'HSP-hit_seq', |
100
|
|
|
|
|
|
|
'Hsp_align-len' => 'HSP-hsp_length', |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
# build this from scratch, simple WUSS-format |
103
|
|
|
|
|
|
|
'Hsp_structure' => 'HSP-meta', |
104
|
|
|
|
|
|
|
'Hsp_stranded' => 'HSP-stranded', |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
# not supported for RNAMotif |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
'Hit_id' => 'HIT-name', |
109
|
|
|
|
|
|
|
'Hit_accession' => 'HIT-accession', |
110
|
|
|
|
|
|
|
'Hit_gi' => 'HIT-ncbi_gi', |
111
|
|
|
|
|
|
|
'Hit_def' => 'HIT-description', |
112
|
|
|
|
|
|
|
'Hit_score' => 'HIT-score', # best HSP score |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
'RNAMotif_program' => 'RESULT-algorithm_name', # get/set |
115
|
|
|
|
|
|
|
'RNAMotif_version' => 'RESULT-algorithm_version', # get/set |
116
|
|
|
|
|
|
|
'RNAMotif_query-def'=> 'RESULT-query_name', # get/set |
117
|
|
|
|
|
|
|
# No length (query is a descriptor) |
118
|
|
|
|
|
|
|
'RNAMotif_query-acc'=> 'RESULT-query_accession', # get/set |
119
|
|
|
|
|
|
|
'RNAMotif_querydesc'=> 'RESULT-query_description', # get/set |
120
|
|
|
|
|
|
|
'RNAMotif_db' => 'RESULT-database_name', # get/set |
121
|
|
|
|
|
|
|
); |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
# use structure_delimiters to set custom delimiters |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
my @VALID_SYMBOLS = qw(5-prime 3-prime single-strand unknown); |
126
|
|
|
|
|
|
|
my %STRUCTURE_SYMBOLS = ( |
127
|
|
|
|
|
|
|
'5-prime' => '<', |
128
|
|
|
|
|
|
|
'3-prime' => '>', |
129
|
|
|
|
|
|
|
'single-strand' => '.', |
130
|
|
|
|
|
|
|
'unknown' => '?' |
131
|
|
|
|
|
|
|
# may add more for quartets, triplets |
132
|
|
|
|
|
|
|
); |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
my $DEFAULT_VERSION = '3.0.3'; |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=head2 new |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Title : new |
139
|
|
|
|
|
|
|
Usage : my $obj = Bio::SearchIO->new(); |
140
|
|
|
|
|
|
|
Function: Builds a new Bio::SearchIO::rnamotif object |
141
|
|
|
|
|
|
|
Returns : Bio::SearchIO::rnamotif parser |
142
|
|
|
|
|
|
|
Args : -fh/-file => RNAMotif filename |
143
|
|
|
|
|
|
|
-format => 'rnamotif' |
144
|
|
|
|
|
|
|
-model => query model (or descriptor, in this case) |
145
|
|
|
|
|
|
|
-database => database name (default undef) |
146
|
|
|
|
|
|
|
-query_acc => query accession (default undef) |
147
|
|
|
|
|
|
|
-hsp_minscore => minimum HSP score cutoff |
148
|
|
|
|
|
|
|
-hsp_maxscore => maximum HSP score cutoff |
149
|
|
|
|
|
|
|
-symbols => hash ref of structure symbols to use |
150
|
|
|
|
|
|
|
(default symbols in %STRUCTURE_SYMBOLS hash) |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=cut |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sub _initialize { |
155
|
1
|
|
|
1
|
|
2
|
my ( $self, @args ) = @_; |
156
|
1
|
|
|
|
|
4
|
$self->SUPER::_initialize(@args); |
157
|
1
|
|
|
|
|
6
|
my ($version, $model, $database, $maxcutoff, $mincutoff, $seqdistance, |
158
|
|
|
|
|
|
|
$accession, $symbols) = |
159
|
|
|
|
|
|
|
$self->_rearrange([qw(VERSION MODEL DATABASE HSP_MAXSCORE |
160
|
|
|
|
|
|
|
HSP_MINSCORE SEQ_DISTANCE QUERY_ACC SYMBOLS)],@args); |
161
|
1
|
|
|
|
|
6
|
my $handler = $self->_eventHandler; |
162
|
1
|
|
|
|
|
6
|
$handler->register_factory( |
163
|
|
|
|
|
|
|
'result', |
164
|
|
|
|
|
|
|
Bio::Factory::ObjectFactory->new( |
165
|
|
|
|
|
|
|
-type => 'Bio::Search::Result::GenericResult', |
166
|
|
|
|
|
|
|
-interface => 'Bio::Search::Result::ResultI', |
167
|
|
|
|
|
|
|
-verbose => $self->verbose |
168
|
|
|
|
|
|
|
) |
169
|
|
|
|
|
|
|
); |
170
|
|
|
|
|
|
|
|
171
|
1
|
|
|
|
|
4
|
$handler->register_factory( |
172
|
|
|
|
|
|
|
'hit', |
173
|
|
|
|
|
|
|
Bio::Factory::ObjectFactory->new( |
174
|
|
|
|
|
|
|
-type => 'Bio::Search::Hit::ModelHit', |
175
|
|
|
|
|
|
|
-interface => 'Bio::Search::Hit::HitI', |
176
|
|
|
|
|
|
|
-verbose => $self->verbose |
177
|
|
|
|
|
|
|
) |
178
|
|
|
|
|
|
|
); |
179
|
|
|
|
|
|
|
|
180
|
1
|
|
|
|
|
2
|
$handler->register_factory( |
181
|
|
|
|
|
|
|
'hsp', |
182
|
|
|
|
|
|
|
Bio::Factory::ObjectFactory->new( |
183
|
|
|
|
|
|
|
-type => 'Bio::Search::HSP::ModelHSP', |
184
|
|
|
|
|
|
|
-interface => 'Bio::Search::HSP::HSPI', |
185
|
|
|
|
|
|
|
-verbose => $self->verbose |
186
|
|
|
|
|
|
|
) |
187
|
|
|
|
|
|
|
); |
188
|
1
|
50
|
|
|
|
1
|
$model && $self->model($model); |
189
|
1
|
50
|
|
|
|
4
|
$database && $self->database($database); |
190
|
1
|
50
|
|
|
|
4
|
$accession && $self->query_accession($accession); |
191
|
1
|
|
33
|
|
|
12
|
$version ||= $DEFAULT_VERSION; |
192
|
1
|
|
|
|
|
2
|
$self->algorithm_version($version); |
193
|
1
|
50
|
33
|
|
|
4
|
$self->throw("Cannot define both a minimal and maximal cutoff") |
194
|
|
|
|
|
|
|
if (defined($mincutoff) && defined($maxcutoff)); |
195
|
1
|
50
|
|
|
|
3
|
defined($mincutoff) && $self->hsp_minscore($mincutoff); |
196
|
1
|
50
|
|
|
|
2
|
defined($maxcutoff) && $self->hsp_maxscore($maxcutoff); |
197
|
1
|
|
50
|
|
|
4
|
$symbols ||= \%STRUCTURE_SYMBOLS; |
198
|
1
|
|
|
|
|
3
|
$self->structure_symbols($symbols); |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=head2 next_result |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
Title : next_result |
204
|
|
|
|
|
|
|
Usage : my $hit = $searchio->next_result; |
205
|
|
|
|
|
|
|
Function: Returns the next Result from a search |
206
|
|
|
|
|
|
|
Returns : Bio::Search::Result::ResultI object |
207
|
|
|
|
|
|
|
Args : none |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=cut |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
sub next_result { |
212
|
1
|
|
|
1
|
1
|
7
|
my ($self) = @_; |
213
|
1
|
|
|
|
|
1
|
my $seentop = 0; |
214
|
1
|
|
|
|
|
4
|
local $/ = "\n"; |
215
|
1
|
|
|
|
|
1
|
local $_; |
216
|
1
|
|
|
|
|
1
|
my ($rm, $d, $descriptor, $file, $oktobuild); |
217
|
0
|
|
|
|
|
0
|
my ($hitid, $hitdesc, $hspid, $lastid, $lastscore); |
218
|
0
|
|
|
|
|
0
|
my $sprintf; |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
# user-determined Result data |
221
|
1
|
|
|
|
|
3
|
my ($accession, $db, $model) = |
222
|
|
|
|
|
|
|
($self->query_accession, $self->database, $self->model); |
223
|
|
|
|
|
|
|
# HSP building options |
224
|
1
|
|
|
|
|
4
|
my $hsp_min = $self->hsp_minscore; |
225
|
1
|
|
|
|
|
3
|
my $hsp_max = $self->hsp_maxscore; |
226
|
1
|
|
|
|
|
2
|
my $version = $self->algorithm_version; |
227
|
1
|
|
|
|
|
1
|
my $laststart; |
228
|
|
|
|
|
|
|
|
229
|
1
|
|
|
|
|
2
|
my $verbose = $self->verbose; # cache for speed? |
230
|
1
|
|
|
|
|
3
|
$self->start_document(); |
231
|
|
|
|
|
|
|
PARSER: |
232
|
1
|
|
|
|
|
9
|
while ( defined( my $line = $self->_readline ) ) { |
233
|
|
|
|
|
|
|
# start of report |
234
|
199
|
50
|
|
|
|
446
|
next if $line =~ m{^\s+$}; |
235
|
199
|
100
|
|
|
|
936
|
if (index($line,'#RM') == 0) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
236
|
3
|
100
|
|
|
|
10
|
if (index($line,'#RM scored') == 0 ) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
237
|
1
|
50
|
|
|
|
2
|
if ($seentop) { |
238
|
0
|
|
|
|
|
0
|
$self->_pushback($line); |
239
|
0
|
|
|
|
|
0
|
last PARSER; |
240
|
|
|
|
|
|
|
} |
241
|
1
|
|
|
|
|
5
|
$self->start_element({'Name' => 'Result'}); |
242
|
1
|
|
|
|
|
7
|
$self->element_hash({ |
243
|
|
|
|
|
|
|
'RNAMotif_program' => 'rnamotif', |
244
|
|
|
|
|
|
|
'RNAMotif_version' => $version, |
245
|
|
|
|
|
|
|
'RNAMotif_query-acc' => $accession, |
246
|
|
|
|
|
|
|
'RNAMotif_db' => $db |
247
|
|
|
|
|
|
|
}); |
248
|
1
|
|
|
|
|
4
|
$seentop = 1; |
249
|
|
|
|
|
|
|
#$self->debug("Start result\n"); |
250
|
|
|
|
|
|
|
} elsif (index($line,'#RM descr') == 0) { |
251
|
1
|
|
|
|
|
3
|
($rm, $d, $descriptor) = split ' ', $line, 3; |
252
|
|
|
|
|
|
|
# toss $rm, $d; keep $descr |
253
|
1
|
|
|
|
|
3
|
chomp $descriptor; |
254
|
1
|
|
|
|
|
2
|
$self->{'_descriptor'} = $descriptor; |
255
|
1
|
|
|
|
|
4
|
$self->element( |
256
|
|
|
|
|
|
|
{'Name' => 'RNAMotif_querydesc', |
257
|
|
|
|
|
|
|
'Data' => $descriptor} |
258
|
|
|
|
|
|
|
); |
259
|
|
|
|
|
|
|
} elsif(index($line,'#RM dfile') == 0) { |
260
|
1
|
|
|
|
|
3
|
($rm, $d, $file) = split ' ', $line, 3; |
261
|
|
|
|
|
|
|
# toss $rm, $d; keep $file |
262
|
1
|
|
|
|
|
2
|
chomp $file; |
263
|
1
|
|
|
|
|
4
|
$self->element( |
264
|
|
|
|
|
|
|
{'Name' => 'RNAMotif_query-def', |
265
|
|
|
|
|
|
|
'Data' => $file} |
266
|
|
|
|
|
|
|
); |
267
|
|
|
|
|
|
|
} else { |
268
|
0
|
|
|
|
|
0
|
$self->debug("Unrecognized line: $line"); |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
} elsif ($line =~ s{^>}{}) { |
271
|
98
|
|
|
|
|
88
|
chomp $line; |
272
|
98
|
|
|
|
|
158
|
($hitid, $hitdesc) = split ' ',$line,2; |
273
|
|
|
|
|
|
|
|
274
|
98
|
100
|
100
|
|
|
177
|
if ($self->within_element('hit') && ($hitid ne $lastid)) { |
|
|
100
|
|
|
|
|
|
275
|
27
|
50
|
|
|
|
39
|
$self->element( |
276
|
|
|
|
|
|
|
{'Name' => 'Hit_score', |
277
|
|
|
|
|
|
|
'Data' => $lastscore} |
278
|
|
|
|
|
|
|
) if $lastscore; |
279
|
27
|
|
|
|
|
51
|
$self->end_element({'Name' => 'Hit'}); |
280
|
27
|
|
|
|
|
61
|
$self->start_element({'Name' => 'Hit'}); |
281
|
|
|
|
|
|
|
} elsif (!$self->within_element('hit')) { |
282
|
1
|
|
|
|
|
3
|
$self->start_element({'Name' => 'Hit'}); |
283
|
|
|
|
|
|
|
} |
284
|
98
|
|
|
|
|
176
|
my ($gi, $acc, $ver) = $self->_get_seq_identifiers($hitid); |
285
|
|
|
|
|
|
|
|
286
|
98
|
50
|
|
|
|
348
|
$self->element_hash({ |
|
|
50
|
|
|
|
|
|
287
|
|
|
|
|
|
|
'Hit_id' => $hitid, |
288
|
|
|
|
|
|
|
'Hit_gi' => $gi, |
289
|
|
|
|
|
|
|
'Hit_accession' => $ver ? "$acc.$ver" : |
290
|
|
|
|
|
|
|
$acc ? $acc : $hitid, |
291
|
|
|
|
|
|
|
'Hit_def' => $hitdesc} |
292
|
|
|
|
|
|
|
); |
293
|
98
|
|
|
|
|
289
|
$lastid = $hitid; |
294
|
|
|
|
|
|
|
} elsif ($line =~ m{^(\S+)\s+(.+?)\s+(\d+)\s+(\d+)\s+(\d+)\s(.*)$}xm) { |
295
|
98
|
|
|
|
|
128
|
chomp $line; |
296
|
98
|
|
|
|
|
133
|
my $hspid = $1; |
297
|
98
|
|
|
|
|
187
|
my ($score, $strand, $start, $length , $seq) = ($2, $3, $4, $5, $6); |
298
|
98
|
|
|
|
|
157
|
$score *= 1; # implicitly cast any odd '0.000' to float |
299
|
|
|
|
|
|
|
# sanity check ids |
300
|
98
|
50
|
|
|
|
146
|
unless ($hitid eq $hspid) { |
301
|
0
|
|
|
|
|
0
|
$self->throw("IDs do not match!"); |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
# check score for possible sprintf data, mark as such, cache result |
304
|
98
|
100
|
|
|
|
144
|
if (!defined($sprintf)) { |
305
|
1
|
50
|
|
|
|
6
|
if ($score =~ m{[^0-9.-]+}gxms) { |
306
|
0
|
0
|
0
|
|
|
0
|
if (defined $hsp_min || defined $hsp_max ) { |
307
|
0
|
|
|
|
|
0
|
$self->warn("HSP data likely contains custom score; ". |
308
|
|
|
|
|
|
|
"ignoring min/maxscore"); |
309
|
|
|
|
|
|
|
} |
310
|
0
|
|
|
|
|
0
|
$sprintf = $oktobuild = 1; |
311
|
|
|
|
|
|
|
} else { |
312
|
1
|
|
|
|
|
1
|
$sprintf = 0; |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
98
|
50
|
|
|
|
123
|
if (!$sprintf) { |
317
|
98
|
50
|
33
|
|
|
302
|
if (($hsp_min && $score <= $hsp_min) |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
318
|
|
|
|
|
|
|
|| ($hsp_max && ($score >= $hsp_max)) ) { |
319
|
|
|
|
|
|
|
# do not build HSP |
320
|
0
|
|
|
|
|
0
|
$oktobuild = 0; |
321
|
|
|
|
|
|
|
} else { |
322
|
98
|
|
|
|
|
76
|
$oktobuild = 1; |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
# store best hit score based on the hsp min/maxscore only |
325
|
98
|
50
|
33
|
|
|
290
|
if (defined $hsp_min && $score > $hsp_min) { |
|
|
50
|
33
|
|
|
|
|
326
|
0
|
0
|
0
|
|
|
0
|
$lastscore = $score if !$lastscore || $score > $lastscore; |
327
|
|
|
|
|
|
|
} elsif (defined $hsp_max && $score < $hsp_max) { |
328
|
0
|
0
|
0
|
|
|
0
|
$lastscore = $score if !$lastscore || $score < $lastscore; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
# build HSP |
334
|
98
|
50
|
|
|
|
121
|
if ($oktobuild) { |
335
|
98
|
|
|
|
|
55
|
my $end; |
336
|
|
|
|
|
|
|
# calculate start/end |
337
|
98
|
50
|
|
|
|
112
|
if( $strand==0 ) { |
338
|
98
|
|
|
|
|
103
|
$end = $start + $length -1; |
339
|
|
|
|
|
|
|
} else { |
340
|
0
|
|
|
|
|
0
|
$end = $start - $length + 1; |
341
|
|
|
|
|
|
|
} |
342
|
|
|
|
|
|
|
|
343
|
98
|
|
|
|
|
160
|
my ($rna, $meta) = $self->_motif2meta($seq, $descriptor); |
344
|
|
|
|
|
|
|
|
345
|
98
|
|
|
|
|
189
|
$self->start_element({'Name' => 'Hsp'}); |
346
|
98
|
|
|
|
|
139
|
my $rnalen = $rna =~ tr{ATGCatgc}{ATGCatgc}; |
347
|
98
|
50
|
|
|
|
544
|
$self->element_hash({ |
|
|
50
|
|
|
|
|
|
348
|
|
|
|
|
|
|
'Hsp_stranded' => 'HIT', |
349
|
|
|
|
|
|
|
'Hsp_hseq' => $rna, |
350
|
|
|
|
|
|
|
'Hsp_query-from' => 1, |
351
|
|
|
|
|
|
|
'Hsp_query-to' =>length($rna), |
352
|
|
|
|
|
|
|
'Hsp_hit-from' => $start, |
353
|
|
|
|
|
|
|
'Hsp_hit-to' => $end, |
354
|
|
|
|
|
|
|
'Hsp_structure' => $meta, |
355
|
|
|
|
|
|
|
'Hsp_align-len' => length($rna), |
356
|
|
|
|
|
|
|
'Hsp_score' => $sprintf ? undef : $score, |
357
|
|
|
|
|
|
|
'Hsp_custom-data' => $sprintf ? $score : undef, |
358
|
|
|
|
|
|
|
}); |
359
|
98
|
|
|
|
|
346
|
$self->end_element({'Name' => 'Hsp'}); |
360
|
98
|
50
|
|
|
|
350
|
$oktobuild = 0 if (!$sprintf); |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
} |
363
|
|
|
|
|
|
|
} |
364
|
1
|
50
|
|
|
|
3
|
if ($self->within_element('hit')) { |
365
|
1
|
50
|
|
|
|
3
|
$self->element( |
366
|
|
|
|
|
|
|
{'Name' => 'Hit_score', |
367
|
|
|
|
|
|
|
'Data' => $lastscore} |
368
|
|
|
|
|
|
|
) if $lastscore; |
369
|
1
|
|
|
|
|
3
|
$self->end_element( { 'Name' => 'Hit' } ); |
370
|
|
|
|
|
|
|
} |
371
|
1
|
50
|
|
|
|
4
|
if ($seentop) { |
372
|
1
|
|
|
|
|
4
|
$self->end_element( { 'Name' => 'Result' } ); |
373
|
|
|
|
|
|
|
} |
374
|
1
|
|
|
|
|
4
|
return $self->end_document(); |
375
|
|
|
|
|
|
|
} |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=head2 start_element |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
Title : start_element |
380
|
|
|
|
|
|
|
Usage : $eventgenerator->start_element |
381
|
|
|
|
|
|
|
Function: Handles a start element event |
382
|
|
|
|
|
|
|
Returns : none |
383
|
|
|
|
|
|
|
Args : hashref with at least 2 keys 'Data' and 'Name' |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=cut |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
sub start_element { |
389
|
127
|
|
|
127
|
1
|
106
|
my ( $self, $data ) = @_; |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
# we currently don't care about attributes |
392
|
127
|
|
|
|
|
111
|
my $nm = $data->{'Name'}; |
393
|
127
|
|
|
|
|
107
|
my $type = $MODEMAP{$nm}; |
394
|
127
|
50
|
|
|
|
171
|
if ($type) { |
395
|
127
|
50
|
|
|
|
208
|
if ( $self->_eventHandler->will_handle($type) ) { |
396
|
127
|
|
|
|
|
254
|
my $func = sprintf( "start_%s", lc $type ); |
397
|
127
|
|
|
|
|
146
|
$self->_eventHandler->$func( $data->{'Attributes'} ); |
398
|
|
|
|
|
|
|
} |
399
|
127
|
|
|
|
|
159
|
unshift @{ $self->{'_elements'} }, $type; |
|
127
|
|
|
|
|
191
|
|
400
|
|
|
|
|
|
|
} |
401
|
127
|
100
|
66
|
|
|
449
|
if ( defined $type |
402
|
|
|
|
|
|
|
&& $type eq 'result' ) |
403
|
|
|
|
|
|
|
{ |
404
|
1
|
|
|
|
|
2
|
$self->{'_values'} = {}; |
405
|
1
|
|
|
|
|
2
|
$self->{'_result'} = undef; |
406
|
|
|
|
|
|
|
} |
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=head2 end_element |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
Title : start_element |
412
|
|
|
|
|
|
|
Usage : $eventgenerator->end_element |
413
|
|
|
|
|
|
|
Function: Handles an end element event |
414
|
|
|
|
|
|
|
Returns : none |
415
|
|
|
|
|
|
|
Args : hashref with at least 2 keys, 'Data' and 'Name' |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
=cut |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
sub end_element { |
421
|
129
|
|
|
129
|
1
|
108
|
my ( $self, $data ) = @_; |
422
|
129
|
|
|
|
|
113
|
my $nm = $data->{'Name'}; |
423
|
129
|
|
|
|
|
97
|
my $type = $MODEMAP{$nm}; |
424
|
129
|
|
|
|
|
76
|
my $rc; |
425
|
|
|
|
|
|
|
|
426
|
129
|
100
|
|
|
|
140
|
if ($type) { |
|
|
50
|
|
|
|
|
|
427
|
127
|
50
|
|
|
|
188
|
if ( $self->_eventHandler->will_handle($type) ) { |
428
|
127
|
|
|
|
|
225
|
my $func = sprintf( "end_%s", lc $type ); |
429
|
|
|
|
|
|
|
$rc = $self->_eventHandler->$func( $self->{'_reporttype'}, |
430
|
127
|
|
|
|
|
146
|
$self->{'_values'} ); |
431
|
|
|
|
|
|
|
} |
432
|
127
|
|
|
|
|
109
|
my $lastelem = shift @{ $self->{'_elements'} }; |
|
127
|
|
|
|
|
148
|
|
433
|
|
|
|
|
|
|
} |
434
|
|
|
|
|
|
|
elsif ( $MAPPING{$nm} ) { |
435
|
2
|
50
|
|
|
|
3
|
if ( ref( $MAPPING{$nm} ) =~ /hash/i ) { |
436
|
0
|
|
|
|
|
0
|
my $key = ( keys %{ $MAPPING{$nm} } )[0]; |
|
0
|
|
|
|
|
0
|
|
437
|
|
|
|
|
|
|
$self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } = |
438
|
0
|
|
|
|
|
0
|
$self->{'_last_data'}; |
439
|
|
|
|
|
|
|
} |
440
|
|
|
|
|
|
|
else { |
441
|
2
|
|
|
|
|
5
|
$self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'}; |
442
|
|
|
|
|
|
|
} |
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
else { |
445
|
0
|
|
|
|
|
0
|
$self->debug("unknown nm $nm, ignoring\n"); |
446
|
|
|
|
|
|
|
} |
447
|
129
|
|
|
|
|
122
|
$self->{'_last_data'} = ''; # remove read data if we are at |
448
|
|
|
|
|
|
|
# end of an element |
449
|
129
|
100
|
100
|
|
|
371
|
$self->{'_result'} = $rc if ( defined $type && $type eq 'result' ); |
450
|
129
|
|
|
|
|
123
|
return $rc; |
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
=head2 element |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
Title : element |
456
|
|
|
|
|
|
|
Usage : $eventhandler->element({'Name' => $name, 'Data' => $str}); |
457
|
|
|
|
|
|
|
Function: Convenience method that calls start_element, characters, end_element |
458
|
|
|
|
|
|
|
Returns : none |
459
|
|
|
|
|
|
|
Args : Hash ref with the keys 'Name' and 'Data' |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
=cut |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
sub element { |
465
|
2
|
|
|
2
|
1
|
3
|
my ( $self, $data ) = @_; |
466
|
|
|
|
|
|
|
# simple data calls (%MAPPING) do not need start_element |
467
|
2
|
|
|
|
|
3
|
$self->characters($data); |
468
|
2
|
|
|
|
|
4
|
$self->end_element($data); |
469
|
|
|
|
|
|
|
} |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
=head2 element_hash |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
Title : element |
475
|
|
|
|
|
|
|
Usage : $eventhandler->element_hash({'Hsp_hit-from' => $start, |
476
|
|
|
|
|
|
|
'Hsp_hit-to' => $end, |
477
|
|
|
|
|
|
|
'Hsp_score' => $lastscore}); |
478
|
|
|
|
|
|
|
Function: Convenience method that takes multiple simple data elements and |
479
|
|
|
|
|
|
|
maps to appropriate parameters |
480
|
|
|
|
|
|
|
Returns : none |
481
|
|
|
|
|
|
|
Args : Hash ref with the mapped key (in %MAPPING) and value |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
=cut |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
sub element_hash { |
486
|
197
|
|
|
197
|
1
|
160
|
my ($self, $data) = @_; |
487
|
197
|
50
|
33
|
|
|
452
|
$self->throw("Must provide data hash ref") if !$data || !ref($data); |
488
|
197
|
|
|
|
|
136
|
for my $nm (sort keys %{$data}) { |
|
197
|
|
|
|
|
744
|
|
489
|
1376
|
50
|
66
|
|
|
3564
|
next if $data->{$nm} && $data->{$nm} =~ m{^\s*$}o; |
490
|
1376
|
50
|
|
|
|
1569
|
if ( $MAPPING{$nm} ) { |
491
|
1376
|
50
|
|
|
|
1290
|
if ( ref( $MAPPING{$nm} ) =~ /hash/i ) { |
492
|
0
|
|
|
|
|
0
|
my $key = ( keys %{ $MAPPING{$nm} } )[0]; |
|
0
|
|
|
|
|
0
|
|
493
|
|
|
|
|
|
|
$self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } = |
494
|
0
|
|
|
|
|
0
|
$data->{$nm}; |
495
|
|
|
|
|
|
|
} |
496
|
|
|
|
|
|
|
else { |
497
|
1376
|
|
|
|
|
1678
|
$self->{'_values'}->{ $MAPPING{$nm} } = $data->{$nm}; |
498
|
|
|
|
|
|
|
} |
499
|
|
|
|
|
|
|
} |
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
=head2 characters |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
Title : characters |
506
|
|
|
|
|
|
|
Usage : $eventgenerator->characters($str) |
507
|
|
|
|
|
|
|
Function: Send a character events |
508
|
|
|
|
|
|
|
Returns : none |
509
|
|
|
|
|
|
|
Args : string |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
=cut |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
sub characters { |
515
|
2
|
|
|
2
|
1
|
2
|
my ( $self, $data ) = @_; |
516
|
2
|
50
|
33
|
|
|
12
|
return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o ); |
517
|
2
|
|
|
|
|
3
|
$self->{'_last_data'} = $data->{'Data'}; |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
=head2 within_element |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
Title : within_element |
523
|
|
|
|
|
|
|
Usage : if( $eventgenerator->within_element($element) ) {} |
524
|
|
|
|
|
|
|
Function: Test if we are within a particular element |
525
|
|
|
|
|
|
|
This is different than 'in' because within can be tested |
526
|
|
|
|
|
|
|
for a whole block. |
527
|
|
|
|
|
|
|
Returns : boolean |
528
|
|
|
|
|
|
|
Args : string element name |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=cut |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
sub within_element { |
533
|
170
|
|
|
170
|
1
|
143
|
my ( $self, $name ) = @_; |
534
|
|
|
|
|
|
|
return 0 |
535
|
|
|
|
|
|
|
if ( !defined $name |
536
|
|
|
|
|
|
|
|| !defined $self->{'_elements'} |
537
|
170
|
50
|
33
|
|
|
477
|
|| scalar @{ $self->{'_elements'} } == 0 ); |
|
170
|
|
33
|
|
|
378
|
|
538
|
170
|
|
|
|
|
110
|
foreach ( @{ $self->{'_elements'} } ) { |
|
170
|
|
|
|
|
208
|
|
539
|
170
|
100
|
|
|
|
588
|
return 1 if ( $_ eq $name ); |
540
|
|
|
|
|
|
|
} |
541
|
2
|
|
|
|
|
8
|
return 0; |
542
|
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
=head2 in_element |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
Title : in_element |
547
|
|
|
|
|
|
|
Usage : if( $eventgenerator->in_element($element) ) {} |
548
|
|
|
|
|
|
|
Function: Test if we are in a particular element |
549
|
|
|
|
|
|
|
This is different than 'within' because 'in' only |
550
|
|
|
|
|
|
|
tests its immediate parent. |
551
|
|
|
|
|
|
|
Returns : boolean |
552
|
|
|
|
|
|
|
Args : string element name |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
=cut |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
sub in_element { |
557
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $name ) = @_; |
558
|
0
|
0
|
|
|
|
0
|
return 0 if !defined $self->{'_elements'}->[0]; |
559
|
0
|
|
|
|
|
0
|
return ( $self->{'_elements'}->[0] eq $name ); |
560
|
|
|
|
|
|
|
} |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
=head2 start_document |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
Title : start_document |
565
|
|
|
|
|
|
|
Usage : $eventgenerator->start_document |
566
|
|
|
|
|
|
|
Function: Handle a start document event |
567
|
|
|
|
|
|
|
Returns : none |
568
|
|
|
|
|
|
|
Args : none |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
=cut |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
sub start_document { |
573
|
1
|
|
|
1
|
1
|
2
|
my ($self) = @_; |
574
|
1
|
|
|
|
|
2
|
$self->{'_lasttype'} = ''; |
575
|
1
|
|
|
|
|
2
|
$self->{'_values'} = {}; |
576
|
1
|
|
|
|
|
2
|
$self->{'_result'} = undef; |
577
|
1
|
|
|
|
|
2
|
$self->{'_elements'} = []; |
578
|
|
|
|
|
|
|
} |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
=head2 end_document |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
Title : end_document |
583
|
|
|
|
|
|
|
Usage : $eventgenerator->end_document |
584
|
|
|
|
|
|
|
Function: Handles an end document event |
585
|
|
|
|
|
|
|
Returns : Bio::Search::Result::ResultI object |
586
|
|
|
|
|
|
|
Args : none |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
=cut |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
sub end_document { |
591
|
1
|
|
|
1
|
1
|
2
|
my ($self) = @_; |
592
|
1
|
|
|
|
|
6
|
return $self->{'_result'}; |
593
|
|
|
|
|
|
|
} |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
=head2 result_count |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
Title : result_count |
598
|
|
|
|
|
|
|
Usage : my $count = $searchio->result_count |
599
|
|
|
|
|
|
|
Function: Returns the number of results we have processed |
600
|
|
|
|
|
|
|
Returns : integer |
601
|
|
|
|
|
|
|
Args : none |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
=cut |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
sub result_count { |
606
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
607
|
0
|
|
|
|
|
0
|
return $self->{'_result_count'}; |
608
|
|
|
|
|
|
|
} |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
=head2 descriptor |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
Title : descriptor |
613
|
|
|
|
|
|
|
Usage : my $descr = $parser->descriptor(); |
614
|
|
|
|
|
|
|
Function: Get/Set descriptor name. Some versions of RNAMotif do not add the |
615
|
|
|
|
|
|
|
descriptor name to the output. This also overrides any name found |
616
|
|
|
|
|
|
|
while parsing. |
617
|
|
|
|
|
|
|
Returns : String (name of model) |
618
|
|
|
|
|
|
|
Args : [optional] String (name of model) |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
=cut |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
sub descriptor { |
623
|
2
|
|
|
2
|
1
|
2
|
my $self = shift; |
624
|
2
|
100
|
|
|
|
6
|
return $self->{'_descriptor'} = shift if @_; |
625
|
1
|
|
|
|
|
1
|
return $self->{'_descriptor'}; |
626
|
|
|
|
|
|
|
} |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
=head2 model |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
Title : model |
631
|
|
|
|
|
|
|
Usage : my $model = $parser->model(); |
632
|
|
|
|
|
|
|
Function: Get/Set model; Infernal currently does not output |
633
|
|
|
|
|
|
|
the model name (Rfam ID) |
634
|
|
|
|
|
|
|
Returns : String (name of model) |
635
|
|
|
|
|
|
|
Args : [optional] String (name of model) |
636
|
|
|
|
|
|
|
Note : this is a synonym for descriptor() |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
=cut |
639
|
|
|
|
|
|
|
|
640
|
2
|
|
|
2
|
1
|
6
|
sub model { shift->descriptor(@_) } |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
=head2 database |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
Title : database |
645
|
|
|
|
|
|
|
Usage : my $database = $parser->database(); |
646
|
|
|
|
|
|
|
Function: Get/Set database; Infernal currently does not output |
647
|
|
|
|
|
|
|
the database name |
648
|
|
|
|
|
|
|
Returns : String (database name) |
649
|
|
|
|
|
|
|
Args : [optional] String (database name) |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
=cut |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
sub database { |
654
|
2
|
|
|
2
|
1
|
3
|
my $self = shift; |
655
|
2
|
100
|
|
|
|
4
|
return $self->{'_database'} = shift if @_; |
656
|
1
|
|
|
|
|
2
|
return $self->{'_database'}; |
657
|
|
|
|
|
|
|
} |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
=head2 query_accession |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
Title : query_accession |
662
|
|
|
|
|
|
|
Usage : my $acc = $parser->query_accession(); |
663
|
|
|
|
|
|
|
Function: Get/Set query (model) accession; RNAMotif currently does not output |
664
|
|
|
|
|
|
|
the accession number |
665
|
|
|
|
|
|
|
Returns : String (accession) |
666
|
|
|
|
|
|
|
Args : [optional] String (accession) |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
=cut |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
sub query_accession { |
671
|
2
|
|
|
2
|
1
|
1
|
my $self = shift; |
672
|
2
|
100
|
|
|
|
5
|
return $self->{'_query_accession'} = shift if @_; |
673
|
1
|
|
|
|
|
3
|
return $self->{'_query_accession'}; |
674
|
|
|
|
|
|
|
} |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
=head2 algorithm_version |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
Title : algorithm_version |
679
|
|
|
|
|
|
|
Usage : my $ver = $parser->algorithm_version(); |
680
|
|
|
|
|
|
|
Function: Get/Set algorithm version (not defined in RNAMotif output) |
681
|
|
|
|
|
|
|
Returns : String (accession) |
682
|
|
|
|
|
|
|
Args : [optional] String (accession) |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
=cut |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
sub algorithm_version { |
687
|
2
|
|
|
2
|
1
|
3
|
my $self = shift; |
688
|
2
|
100
|
|
|
|
5
|
return $self->{'_algorithm'} = shift if @_; |
689
|
1
|
|
|
|
|
1
|
return $self->{'_algorithm'}; |
690
|
|
|
|
|
|
|
} |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
=head2 hsp_minscore |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
Title : hsp_minscore |
695
|
|
|
|
|
|
|
Usage : my $cutoff = $parser->hsp_minscore(); |
696
|
|
|
|
|
|
|
Function: Get/Set min score cutoff (for generating Hits/HSPs). |
697
|
|
|
|
|
|
|
Returns : score (number) |
698
|
|
|
|
|
|
|
Args : [optional] score (number) |
699
|
|
|
|
|
|
|
Note : Cannot be set along with hsp_maxscore() |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
=cut |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
sub hsp_minscore { |
704
|
1
|
|
|
1
|
1
|
2
|
my ($self, $score) = shift; |
705
|
1
|
50
|
33
|
|
|
3
|
$self->throw('Minscore not set to a number') if |
706
|
|
|
|
|
|
|
($score && $score !~ m{[0-9.]+}); |
707
|
1
|
50
|
|
|
|
2
|
return $self->{'_hsp_minscore'} = shift if @_; |
708
|
1
|
|
|
|
|
2
|
return $self->{'_hsp_minscore'}; |
709
|
|
|
|
|
|
|
} |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
=head2 hsp_maxscore |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
Title : hsp_maxscore |
714
|
|
|
|
|
|
|
Usage : my $cutoff = $parser->hsp_maxscore(); |
715
|
|
|
|
|
|
|
Function: Get/Set max score cutoff (for generating Hits/HSPs). |
716
|
|
|
|
|
|
|
Returns : score (number) |
717
|
|
|
|
|
|
|
Args : [optional] score (number) |
718
|
|
|
|
|
|
|
Note : Cannot be set along with hsp_minscore() |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
=cut |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
sub hsp_maxscore { |
723
|
1
|
|
|
1
|
1
|
2
|
my ($self, $score) = shift; |
724
|
1
|
50
|
33
|
|
|
4
|
$self->throw('Maxscore not set to a number') if |
725
|
|
|
|
|
|
|
($score && $score !~ m{[0-9.]+}); |
726
|
1
|
50
|
|
|
|
2
|
return $self->{'_hsp_maxscore'} = shift if @_; |
727
|
1
|
|
|
|
|
1
|
return $self->{'_hsp_maxscore'}; |
728
|
|
|
|
|
|
|
} |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
=head2 structure_symbols |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
Title : structure_symbols |
733
|
|
|
|
|
|
|
Usage : my $hashref = $parser->structure_symbols(); |
734
|
|
|
|
|
|
|
Function: Get/Set RNA structure symbols |
735
|
|
|
|
|
|
|
Returns : Hash ref of delimiters (5' stem, 3' stem, single-strand, etc) |
736
|
|
|
|
|
|
|
: default = < (5-prime) |
737
|
|
|
|
|
|
|
> (3-prime) |
738
|
|
|
|
|
|
|
. (single-strand) |
739
|
|
|
|
|
|
|
? (unknown) |
740
|
|
|
|
|
|
|
Args : Hash ref of substitute delimiters, using above keys. |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
=cut |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
sub structure_symbols { |
745
|
99
|
|
|
99
|
1
|
80
|
my ($self, $delim) = @_; |
746
|
99
|
100
|
|
|
|
140
|
if ($delim) { |
747
|
1
|
50
|
|
|
|
5
|
if (ref($delim) =~ m{HASH}) { |
748
|
1
|
|
|
|
|
2
|
my %data = %{ $delim }; |
|
1
|
|
|
|
|
6
|
|
749
|
1
|
|
|
|
|
3
|
for my $d (@VALID_SYMBOLS) { |
750
|
4
|
50
|
|
|
|
7
|
if ( exists $data{$d} ) { |
751
|
4
|
|
|
|
|
8
|
$self->{'_delimiter'}->{$d} = $data{$d}; |
752
|
|
|
|
|
|
|
} |
753
|
|
|
|
|
|
|
} |
754
|
|
|
|
|
|
|
} else { |
755
|
0
|
|
|
|
|
0
|
$self->throw("Args to helix_delimiters() should be in a hash reference"); |
756
|
|
|
|
|
|
|
} |
757
|
|
|
|
|
|
|
} |
758
|
99
|
|
|
|
|
151
|
return $self->{'_delimiter'}; |
759
|
|
|
|
|
|
|
} |
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
#Private methods |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
=head2 _motif2meta |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
Title : _motif2meta |
766
|
|
|
|
|
|
|
Usage : my ($rna, $meta) = $parser->_motif2meta($str, $descr); |
767
|
|
|
|
|
|
|
Function: Creates meta string from sequence and descriptor |
768
|
|
|
|
|
|
|
Returns : array of sequence, meta strings |
769
|
|
|
|
|
|
|
Args : Array of string data and descriptor data |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
Note: This is currently a quick and simple way of making simple |
772
|
|
|
|
|
|
|
RNA structures (stem-loops, helices, etc) from RNAMotif descriptor |
773
|
|
|
|
|
|
|
data in the output file. It does not currently work with pseudoknots, |
774
|
|
|
|
|
|
|
triplets, G-quartets, or other more complex RNA structural motifs. |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
=cut |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
sub _motif2meta { |
779
|
98
|
|
|
98
|
|
94
|
my ($self, $str, $descriptor) = @_; |
780
|
98
|
|
|
|
|
73
|
my ($rna, $meta); |
781
|
98
|
|
|
|
|
269
|
my @desc_el = split ' ',$descriptor; |
782
|
98
|
|
|
|
|
279
|
my @seq_el = split ' ',$str; |
783
|
98
|
|
|
|
|
134
|
my $symbol = $self->structure_symbols(); |
784
|
98
|
50
|
|
|
|
157
|
if ($#desc_el != $#seq_el) { |
785
|
0
|
|
|
|
|
0
|
$self->throw("Descriptor elements and seq elements do not match"); |
786
|
|
|
|
|
|
|
} |
787
|
98
|
|
|
|
|
148
|
while (@desc_el) { |
788
|
1470
|
|
|
|
|
768
|
my $struct; |
789
|
1470
|
|
|
|
|
1173
|
my ($seq, $motif) = (shift @seq_el, shift @desc_el); |
790
|
|
|
|
|
|
|
$struct = (index($motif,'h5') == 0) ? $symbol->{'5-prime'} : |
791
|
|
|
|
|
|
|
(index($motif,'h3') == 0) ? $symbol->{'3-prime'} : |
792
|
|
|
|
|
|
|
(index($motif,'ss') == 0) ? $symbol->{'single-strand'} : |
793
|
|
|
|
|
|
|
(index($motif,'ctx')== 0) ? $symbol->{'single-strand'} : |
794
|
1470
|
0
|
|
|
|
2258
|
$symbol->{'unknown'}; |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
795
|
1470
|
|
|
|
|
1222
|
$meta .= $struct x (length($seq)); |
796
|
1470
|
|
|
|
|
1793
|
$rna .= $seq; |
797
|
|
|
|
|
|
|
} |
798
|
98
|
|
|
|
|
147
|
return ($rna, $meta); |
799
|
|
|
|
|
|
|
} |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
1; |