File Coverage

Bio/Tools/TandemRepeatsFinder.pm
Criterion Covered Total %
statement 57 58 98.2
branch 16 18 88.8
condition n/a
subroutine 11 11 100.0
pod 3 3 100.0
total 87 90 96.6


line stmt bran cond sub pod time code
1              
2             =head1 NAME
3              
4             Bio::Tools::TandemRepeatsFinder - a parser for Tandem Repeats Finder output
5              
6             =head1 SYNOPSIS
7              
8             use Bio::Tools::TandemRepeatsFinder;
9              
10             # create parser
11             my $parser = Bio::Tools::Bio::Tools::TandemRepeatsFinder->new(-file => 'tandem_repeats.out');
12              
13             # loop through results
14             while( my $feature = $parser->next_result ) {
15              
16             # print the source sequence id, start, end, percent matches, and the consensus sequence
17             my ($percent_matches) = $feat->get_tag_values('percent_matches');
18             my ($consensus_sequence) = $feat->get_tag_values('consensus_sequence');
19             print $feat->seq_id()."\t".$feat->start()."\t".$feat->end()."\t$percent_matches\t$consensus_sequence\n";
20              
21             }
22              
23             =head1 DESCRIPTION
24              
25             A parser for Tandem Repeats Finder output.
26             Written and tested for version 4.00
27              
28             Location, seq_id, and score are stored in Bio::SeqFeature::Generic feature.
29             All other data is stored in tags. The availabale tags are
30              
31             period_size
32             copy_number
33             consensus_size
34             percent_matches
35             percent_indels
36             percent_a
37             percent_c
38             percent_g
39             percent_t
40             entropy
41             consensus_sequence
42             repeat_sequence
43             run_parameters
44             sequence_description
45              
46             The run_parameters are stored in a hashref with the following key:
47              
48             match_weight
49             mismatch_weight
50             indel_weight
51             match_prob
52             indel_prob
53             min_score
54             max_period_size
55              
56             =head1 FEEDBACK
57              
58             =head2 Mailing Lists
59              
60             User feedback is an integral part of the evolution of this and other
61             Bioperl modules. Send your comments and suggestions preferably to
62             the Bioperl mailing list. Your participation is much appreciated.
63              
64             bioperl-l@bioperl.org - General discussion
65             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
66              
67             =head2 Support
68              
69             Please direct usage questions or support issues to the mailing list:
70              
71             I
72              
73             rather than to the module maintainer directly. Many experienced and
74             reponsive experts will be able look at the problem and quickly
75             address it. Please include a thorough description of the problem
76             with code and data examples if at all possible.
77              
78             =head2 Reporting Bugs
79              
80             Report bugs to the Bioperl bug tracking system to help us keep track
81             of the bugs and their resolution. Bug reports can be submitted via
82             the web:
83              
84             https://github.com/bioperl/bioperl-live/issues
85              
86             =head1 AUTHOR - Eric Just
87              
88             Email e-just@northwestern.edu
89              
90             =head1 APPENDIX
91              
92             The rest of the documentation details each of the object methods.
93             Internal methods are usually preceded with a _
94              
95             =cut
96              
97             package Bio::Tools::TandemRepeatsFinder;
98 1     1   397 use strict;
  1         1  
  1         24  
99 1     1   3 use constant DEBUG => 0;
  1         1  
  1         41  
100 1     1   339 use Bio::SeqFeature::Generic;
  1         1  
  1         26  
101              
102 1     1   4 use base qw(Bio::Root::Root Bio::Root::IO);
  1         1  
  1         647  
103              
104             =head2 new
105              
106             Title : new
107             Usage : my $obj = Bio::Tools::TandemRepeatsFinder->new();
108             Function: Builds a new Bio::Tools::TandemRepeatsFinder object
109             Returns : Bio::Tools::TandemRepeatsFinder
110             Args : -fh/-file => $val, for initing input, see Bio::Root::IO
111              
112             =cut
113              
114             sub new {
115 3     3 1 7 my ( $class, @args ) = @_;
116              
117 3         14 my $self = $class->SUPER::new(@args);
118 3         13 $self->_initialize_io(@args);
119              
120 3         7 return $self;
121             }
122              
123             =head2 version
124              
125             Title : version
126             Usage : $self->version( $version )
127             Function: get/set the version of Tandem Repeats finder that was used in analysis
128             Returns : value of version of
129             Args : new value (optional)
130              
131             =cut
132              
133             sub version {
134 3     3 1 4 my ( $self, $value ) = @_;
135 3 50       6 if ( defined $value ) {
136 3         5 $self->{'version'} = $value;
137             }
138 3         6 return $self->{'version'};
139             }
140              
141             =head2 _current_seq_id
142              
143             Title : _current_seq_id
144             Usage : $self->_current_seq_id( $current_seq_id )
145             Function: get/set the _current_seq_id
146             Returns : value of _current_seq_id
147             Args : new value (optional)
148              
149             =cut
150              
151             sub _current_seq_id {
152 8     8   9 my ( $self, $value ) = @_;
153 8 100       11 if ( defined $value ) {
154 4         5 $self->{'_current_seq_id'} = $value;
155             }
156 8         28 return $self->{'_current_seq_id'};
157             }
158              
159             =head2 _current_seq_description
160              
161             Title : _current_seq_description
162             Usage : $self->_current_seq_description( $current_seq_id )
163             Function: get/set the _current_seq_description
164             Returns : value of _current_seq_description
165             Args : new value (optional)
166              
167             =cut
168              
169             sub _current_seq_description {
170 8     8   8 my ( $self, $value ) = @_;
171 8 100       13 if ( defined $value ) {
172 2         4 $self->{'_current_seq_description'} = $value;
173             }
174 8         35 return $self->{'_current_seq_description'};
175             }
176              
177             =head2 _current_parameters
178              
179             Title : _current_parameters
180             Usage : $self->_current_parameters( $parameters_hashref )
181             Function: get/set the _current_parameters
182             Returns : hashref representing current parameters parsed from results file
183             : keys are
184             match_weight
185             mismatch_weight
186             indel_weight
187             match_prob
188             indel_prob
189             min_score
190             max_period_size
191             Args : parameters hashref (optional)
192              
193             =cut
194              
195             sub _current_parameters {
196 8     8   9 my ( $self, $value ) = @_;
197 8 100       11 if ( defined $value ) {
198 4         7 $self->{'_current_parameters'} = $value;
199             }
200 8         18 return $self->{'_current_parameters'};
201             }
202              
203             =head2 next_result
204              
205             Title : next_result
206             Usage : my $r = $trf->next_result()
207             Function: Get the next result set from parser data
208             Returns : Bio::SeqFeature::Generic
209             Args : none
210              
211             =cut
212              
213             sub next_result {
214 6     6 1 1822 my ($self) = @_;
215 6         19 while ( defined( $_ = $self->_readline() ) ) {
216              
217             # Parse Version line
218 117 100       325 if (/^Version (.+)/) {
    100          
    100          
    100          
219 3         6 my $version = $1;
220 3         4 $self->warn("parsed version: $version\n") if DEBUG;
221 3 50       10 $self->warn( qq{ Bio::Tools::TandemRepeatsFinder was written and tested for Tandem Repeats Masker Version 4.00 output
222             You appear to be using Verion $version. Use at your own risk.}) if ($version != 4);
223 3         7 $self->version($version);
224             }
225              
226             # Parse Sequence identifier
227             # i.e. Sequence: DDB0215018 |Masked Chromosomal Sequence| Chr 2f
228             elsif ( /^Sequence: ([^\s]+)\s(.+)?/ ) {
229 4         6 my $seq_id = $1;
230 4         6 my $seq_description = $2;
231 4         4 $self->warn("parsed sequence_id: $seq_id\n") if DEBUG;
232 4         7 $self->_current_seq_id($seq_id);
233 4         6 $self->_current_seq_description($seq_description);
234             }
235              
236             # Parse Parameters
237             # i.e. Parameters: 2 7 7 80 10 50 12
238             elsif (/^Parameters: (.+)/) {
239 4         7 my $params = $1;
240 4         4 $self->warn("parsed parameters: $params\n") if DEBUG;
241              
242 4         19 my @param_array = split /\s/, $params;
243              
244 4         19 my $param_hash = {
245             match_weight => $param_array[0],
246             mismatch_weight => $param_array[1],
247             indel_weight => $param_array[2],
248             match_prob => $param_array[3],
249             indel_prob => $param_array[4],
250             min_score => $param_array[5],
251             max_period_size => $param_array[6]
252             };
253 4         8 $self->_current_parameters($param_hash);
254             }
255              
256             # Parse Data
257             # i.e. 13936 13960 12 2.1 12 100 0 50 16 8 52 24 1.70 T TTTTTTTTTT
258             elsif (/^\d+\s\d+\s\d+/) {
259              
260             # call internal method to create Bio::SeqFeature::Generic
261             # to represent tandem repeat
262 4         9 return $self->_create_feature($_);
263             }
264              
265 0         0 elsif (DEBUG) {
266             $self->warn( "UNPARSED LINE:\n" . $_ );
267             }
268             }
269 2         3 return;
270             }
271              
272             =head2 _create_feature
273              
274             Title : _create_feature
275             Usage : internal method used by 'next_feature'
276             Function: Takes a line from the results file and creates a bioperl object
277             Returns : Bio::SeqFeature::Generic
278             Args : none
279              
280             =cut
281              
282             sub _create_feature {
283 4     4   5 my ( $self, $line ) = @_;
284              
285             # split the line and store into named variables
286 4         25 my @element = split /\s/, $line;
287             my (
288 4         12 $start, $end, $period_size,
289             $copy_number, $consensus_size, $percent_matches,
290             $percent_indels, $score, $percent_a,
291             $percent_c, $percent_g, $percent_t,
292             $entropy, $consensus_sequence, $repeat_sequence
293             ) = @element;
294              
295             # create tag hash from data in line
296 4         9 my $tags = {
297             period_size => $period_size,
298             copy_number => $copy_number,
299             consensus_size => $consensus_size,
300             percent_matches => $percent_matches,
301             percent_indels => $percent_indels,
302             percent_a => $percent_a,
303             percent_c => $percent_c,
304             percent_g => $percent_g,
305             percent_t => $percent_t,
306             entropy => $entropy,
307             consensus_sequence => $consensus_sequence,
308             repeat_sequence => $repeat_sequence,
309             run_parameters => $self->_current_parameters(),
310             sequence_description => $self->_current_seq_description()
311             };
312              
313             # create feature from start/end etc
314 4         11 my $feat = Bio::SeqFeature::Generic->new(
315             -seq_id => $self->_current_seq_id(),
316             -score => $score,
317             -start => $start,
318             -end => $end,
319             -source_tag => 'Tandem Repeats Finder',
320             -primary_tag => 'tandem repeat',
321             -tag => $tags
322             );
323              
324 4         22 return $feat;
325              
326             }
327              
328             1;
329