line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
=head1 NAME |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
Bio::Tools::TandemRepeatsFinder - a parser for Tandem Repeats Finder output |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
=head1 SYNOPSIS |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
use Bio::Tools::TandemRepeatsFinder; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
# create parser |
11
|
|
|
|
|
|
|
my $parser = Bio::Tools::Bio::Tools::TandemRepeatsFinder->new(-file => 'tandem_repeats.out'); |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# loop through results |
14
|
|
|
|
|
|
|
while( my $feature = $parser->next_result ) { |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# print the source sequence id, start, end, percent matches, and the consensus sequence |
17
|
|
|
|
|
|
|
my ($percent_matches) = $feat->get_tag_values('percent_matches'); |
18
|
|
|
|
|
|
|
my ($consensus_sequence) = $feat->get_tag_values('consensus_sequence'); |
19
|
|
|
|
|
|
|
print $feat->seq_id()."\t".$feat->start()."\t".$feat->end()."\t$percent_matches\t$consensus_sequence\n"; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
} |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 DESCRIPTION |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
A parser for Tandem Repeats Finder output. |
26
|
|
|
|
|
|
|
Written and tested for version 4.00 |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
Location, seq_id, and score are stored in Bio::SeqFeature::Generic feature. |
29
|
|
|
|
|
|
|
All other data is stored in tags. The availabale tags are |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
period_size |
32
|
|
|
|
|
|
|
copy_number |
33
|
|
|
|
|
|
|
consensus_size |
34
|
|
|
|
|
|
|
percent_matches |
35
|
|
|
|
|
|
|
percent_indels |
36
|
|
|
|
|
|
|
percent_a |
37
|
|
|
|
|
|
|
percent_c |
38
|
|
|
|
|
|
|
percent_g |
39
|
|
|
|
|
|
|
percent_t |
40
|
|
|
|
|
|
|
entropy |
41
|
|
|
|
|
|
|
consensus_sequence |
42
|
|
|
|
|
|
|
repeat_sequence |
43
|
|
|
|
|
|
|
run_parameters |
44
|
|
|
|
|
|
|
sequence_description |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
The run_parameters are stored in a hashref with the following key: |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
match_weight |
49
|
|
|
|
|
|
|
mismatch_weight |
50
|
|
|
|
|
|
|
indel_weight |
51
|
|
|
|
|
|
|
match_prob |
52
|
|
|
|
|
|
|
indel_prob |
53
|
|
|
|
|
|
|
min_score |
54
|
|
|
|
|
|
|
max_period_size |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=head1 FEEDBACK |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=head2 Mailing Lists |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
61
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
62
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
65
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head2 Support |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
I |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
74
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
75
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
76
|
|
|
|
|
|
|
with code and data examples if at all possible. |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head2 Reporting Bugs |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
81
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via |
82
|
|
|
|
|
|
|
the web: |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 AUTHOR - Eric Just |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Email e-just@northwestern.edu |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=head1 APPENDIX |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
93
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=cut |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
package Bio::Tools::TandemRepeatsFinder; |
98
|
1
|
|
|
1
|
|
549
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
27
|
|
99
|
1
|
|
|
1
|
|
4
|
use constant DEBUG => 0; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
57
|
|
100
|
1
|
|
|
1
|
|
301
|
use Bio::SeqFeature::Generic; |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
57
|
|
101
|
|
|
|
|
|
|
|
102
|
1
|
|
|
1
|
|
9
|
use base qw(Bio::Root::Root Bio::Root::IO); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
668
|
|
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=head2 new |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Title : new |
107
|
|
|
|
|
|
|
Usage : my $obj = Bio::Tools::TandemRepeatsFinder->new(); |
108
|
|
|
|
|
|
|
Function: Builds a new Bio::Tools::TandemRepeatsFinder object |
109
|
|
|
|
|
|
|
Returns : Bio::Tools::TandemRepeatsFinder |
110
|
|
|
|
|
|
|
Args : -fh/-file => $val, for initing input, see Bio::Root::IO |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=cut |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub new { |
115
|
3
|
|
|
3
|
1
|
35
|
my ( $class, @args ) = @_; |
116
|
|
|
|
|
|
|
|
117
|
3
|
|
|
|
|
22
|
my $self = $class->SUPER::new(@args); |
118
|
3
|
|
|
|
|
16
|
$self->_initialize_io(@args); |
119
|
|
|
|
|
|
|
|
120
|
3
|
|
|
|
|
21
|
return $self; |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=head2 version |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
Title : version |
126
|
|
|
|
|
|
|
Usage : $self->version( $version ) |
127
|
|
|
|
|
|
|
Function: get/set the version of Tandem Repeats finder that was used in analysis |
128
|
|
|
|
|
|
|
Returns : value of version of |
129
|
|
|
|
|
|
|
Args : new value (optional) |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=cut |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
sub version { |
134
|
3
|
|
|
3
|
1
|
6
|
my ( $self, $value ) = @_; |
135
|
3
|
50
|
|
|
|
7
|
if ( defined $value ) { |
136
|
3
|
|
|
|
|
8
|
$self->{'version'} = $value; |
137
|
|
|
|
|
|
|
} |
138
|
3
|
|
|
|
|
6
|
return $self->{'version'}; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=head2 _current_seq_id |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
Title : _current_seq_id |
144
|
|
|
|
|
|
|
Usage : $self->_current_seq_id( $current_seq_id ) |
145
|
|
|
|
|
|
|
Function: get/set the _current_seq_id |
146
|
|
|
|
|
|
|
Returns : value of _current_seq_id |
147
|
|
|
|
|
|
|
Args : new value (optional) |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=cut |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
sub _current_seq_id { |
152
|
8
|
|
|
8
|
|
16
|
my ( $self, $value ) = @_; |
153
|
8
|
100
|
|
|
|
18
|
if ( defined $value ) { |
154
|
4
|
|
|
|
|
8
|
$self->{'_current_seq_id'} = $value; |
155
|
|
|
|
|
|
|
} |
156
|
8
|
|
|
|
|
42
|
return $self->{'_current_seq_id'}; |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head2 _current_seq_description |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Title : _current_seq_description |
162
|
|
|
|
|
|
|
Usage : $self->_current_seq_description( $current_seq_id ) |
163
|
|
|
|
|
|
|
Function: get/set the _current_seq_description |
164
|
|
|
|
|
|
|
Returns : value of _current_seq_description |
165
|
|
|
|
|
|
|
Args : new value (optional) |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=cut |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
sub _current_seq_description { |
170
|
8
|
|
|
8
|
|
13
|
my ( $self, $value ) = @_; |
171
|
8
|
100
|
|
|
|
19
|
if ( defined $value ) { |
172
|
2
|
|
|
|
|
4
|
$self->{'_current_seq_description'} = $value; |
173
|
|
|
|
|
|
|
} |
174
|
8
|
|
|
|
|
49
|
return $self->{'_current_seq_description'}; |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=head2 _current_parameters |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
Title : _current_parameters |
180
|
|
|
|
|
|
|
Usage : $self->_current_parameters( $parameters_hashref ) |
181
|
|
|
|
|
|
|
Function: get/set the _current_parameters |
182
|
|
|
|
|
|
|
Returns : hashref representing current parameters parsed from results file |
183
|
|
|
|
|
|
|
: keys are |
184
|
|
|
|
|
|
|
match_weight |
185
|
|
|
|
|
|
|
mismatch_weight |
186
|
|
|
|
|
|
|
indel_weight |
187
|
|
|
|
|
|
|
match_prob |
188
|
|
|
|
|
|
|
indel_prob |
189
|
|
|
|
|
|
|
min_score |
190
|
|
|
|
|
|
|
max_period_size |
191
|
|
|
|
|
|
|
Args : parameters hashref (optional) |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=cut |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
sub _current_parameters { |
196
|
8
|
|
|
8
|
|
14
|
my ( $self, $value ) = @_; |
197
|
8
|
100
|
|
|
|
17
|
if ( defined $value ) { |
198
|
4
|
|
|
|
|
9
|
$self->{'_current_parameters'} = $value; |
199
|
|
|
|
|
|
|
} |
200
|
8
|
|
|
|
|
23
|
return $self->{'_current_parameters'}; |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=head2 next_result |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
Title : next_result |
206
|
|
|
|
|
|
|
Usage : my $r = $trf->next_result() |
207
|
|
|
|
|
|
|
Function: Get the next result set from parser data |
208
|
|
|
|
|
|
|
Returns : Bio::SeqFeature::Generic |
209
|
|
|
|
|
|
|
Args : none |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=cut |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub next_result { |
214
|
6
|
|
|
6
|
1
|
2753
|
my ($self) = @_; |
215
|
6
|
|
|
|
|
27
|
while ( defined( $_ = $self->_readline() ) ) { |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
# Parse Version line |
218
|
117
|
100
|
|
|
|
342
|
if (/^Version (.+)/) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
219
|
3
|
|
|
|
|
9
|
my $version = $1; |
220
|
3
|
|
|
|
|
3
|
$self->warn("parsed version: $version\n") if DEBUG; |
221
|
3
|
50
|
|
|
|
13
|
$self->warn( qq{ Bio::Tools::TandemRepeatsFinder was written and tested for Tandem Repeats Masker Version 4.00 output |
222
|
|
|
|
|
|
|
You appear to be using Verion $version. Use at your own risk.}) if ($version != 4); |
223
|
3
|
|
|
|
|
8
|
$self->version($version); |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# Parse Sequence identifier |
227
|
|
|
|
|
|
|
# i.e. Sequence: DDB0215018 |Masked Chromosomal Sequence| Chr 2f |
228
|
|
|
|
|
|
|
elsif ( /^Sequence: ([^\s]+)\s(.+)?/ ) { |
229
|
4
|
|
|
|
|
12
|
my $seq_id = $1; |
230
|
4
|
|
|
|
|
10
|
my $seq_description = $2; |
231
|
4
|
|
|
|
|
7
|
$self->warn("parsed sequence_id: $seq_id\n") if DEBUG; |
232
|
4
|
|
|
|
|
14
|
$self->_current_seq_id($seq_id); |
233
|
4
|
|
|
|
|
11
|
$self->_current_seq_description($seq_description); |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
# Parse Parameters |
237
|
|
|
|
|
|
|
# i.e. Parameters: 2 7 7 80 10 50 12 |
238
|
|
|
|
|
|
|
elsif (/^Parameters: (.+)/) { |
239
|
4
|
|
|
|
|
9
|
my $params = $1; |
240
|
4
|
|
|
|
|
4
|
$self->warn("parsed parameters: $params\n") if DEBUG; |
241
|
|
|
|
|
|
|
|
242
|
4
|
|
|
|
|
28
|
my @param_array = split /\s/, $params; |
243
|
|
|
|
|
|
|
|
244
|
4
|
|
|
|
|
27
|
my $param_hash = { |
245
|
|
|
|
|
|
|
match_weight => $param_array[0], |
246
|
|
|
|
|
|
|
mismatch_weight => $param_array[1], |
247
|
|
|
|
|
|
|
indel_weight => $param_array[2], |
248
|
|
|
|
|
|
|
match_prob => $param_array[3], |
249
|
|
|
|
|
|
|
indel_prob => $param_array[4], |
250
|
|
|
|
|
|
|
min_score => $param_array[5], |
251
|
|
|
|
|
|
|
max_period_size => $param_array[6] |
252
|
|
|
|
|
|
|
}; |
253
|
4
|
|
|
|
|
14
|
$self->_current_parameters($param_hash); |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# Parse Data |
257
|
|
|
|
|
|
|
# i.e. 13936 13960 12 2.1 12 100 0 50 16 8 52 24 1.70 T TTTTTTTTTT |
258
|
|
|
|
|
|
|
elsif (/^\d+\s\d+\s\d+/) { |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# call internal method to create Bio::SeqFeature::Generic |
261
|
|
|
|
|
|
|
# to represent tandem repeat |
262
|
4
|
|
|
|
|
13
|
return $self->_create_feature($_); |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
|
265
|
0
|
|
|
|
|
0
|
elsif (DEBUG) { |
266
|
|
|
|
|
|
|
$self->warn( "UNPARSED LINE:\n" . $_ ); |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
} |
269
|
2
|
|
|
|
|
6
|
return; |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=head2 _create_feature |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
Title : _create_feature |
275
|
|
|
|
|
|
|
Usage : internal method used by 'next_feature' |
276
|
|
|
|
|
|
|
Function: Takes a line from the results file and creates a bioperl object |
277
|
|
|
|
|
|
|
Returns : Bio::SeqFeature::Generic |
278
|
|
|
|
|
|
|
Args : none |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=cut |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
sub _create_feature { |
283
|
4
|
|
|
4
|
|
8
|
my ( $self, $line ) = @_; |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
# split the line and store into named variables |
286
|
4
|
|
|
|
|
33
|
my @element = split /\s/, $line; |
287
|
|
|
|
|
|
|
my ( |
288
|
4
|
|
|
|
|
20
|
$start, $end, $period_size, |
289
|
|
|
|
|
|
|
$copy_number, $consensus_size, $percent_matches, |
290
|
|
|
|
|
|
|
$percent_indels, $score, $percent_a, |
291
|
|
|
|
|
|
|
$percent_c, $percent_g, $percent_t, |
292
|
|
|
|
|
|
|
$entropy, $consensus_sequence, $repeat_sequence |
293
|
|
|
|
|
|
|
) = @element; |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
# create tag hash from data in line |
296
|
4
|
|
|
|
|
16
|
my $tags = { |
297
|
|
|
|
|
|
|
period_size => $period_size, |
298
|
|
|
|
|
|
|
copy_number => $copy_number, |
299
|
|
|
|
|
|
|
consensus_size => $consensus_size, |
300
|
|
|
|
|
|
|
percent_matches => $percent_matches, |
301
|
|
|
|
|
|
|
percent_indels => $percent_indels, |
302
|
|
|
|
|
|
|
percent_a => $percent_a, |
303
|
|
|
|
|
|
|
percent_c => $percent_c, |
304
|
|
|
|
|
|
|
percent_g => $percent_g, |
305
|
|
|
|
|
|
|
percent_t => $percent_t, |
306
|
|
|
|
|
|
|
entropy => $entropy, |
307
|
|
|
|
|
|
|
consensus_sequence => $consensus_sequence, |
308
|
|
|
|
|
|
|
repeat_sequence => $repeat_sequence, |
309
|
|
|
|
|
|
|
run_parameters => $self->_current_parameters(), |
310
|
|
|
|
|
|
|
sequence_description => $self->_current_seq_description() |
311
|
|
|
|
|
|
|
}; |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
# create feature from start/end etc |
314
|
4
|
|
|
|
|
13
|
my $feat = Bio::SeqFeature::Generic->new( |
315
|
|
|
|
|
|
|
-seq_id => $self->_current_seq_id(), |
316
|
|
|
|
|
|
|
-score => $score, |
317
|
|
|
|
|
|
|
-start => $start, |
318
|
|
|
|
|
|
|
-end => $end, |
319
|
|
|
|
|
|
|
-source_tag => 'Tandem Repeats Finder', |
320
|
|
|
|
|
|
|
-primary_tag => 'tandem repeat', |
321
|
|
|
|
|
|
|
-tag => $tags |
322
|
|
|
|
|
|
|
); |
323
|
|
|
|
|
|
|
|
324
|
4
|
|
|
|
|
30
|
return $feat; |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
1; |
329
|
|
|
|
|
|
|
|