File Coverage

Bio/Tools/Genewise.pm
Criterion Covered Total %
statement 122 128 95.3
branch 27 36 75.0
condition 2 3 66.6
subroutine 18 18 100.0
pod 2 3 66.6
total 171 188 90.9


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Genewise
3             #
4             # Copyright Fugu Team
5             #
6             # You may distribute this module under the same terms as perl itself
7              
8             # POD documentation - main docs before the code
9              
10             =head1 NAME
11              
12             Bio::Tools::Genewise - Results of one Genewise run
13              
14             =head1 SYNOPSIS
15              
16             use Bio::Tools::Genewise;
17             my $gw = Bio::Tools::Genewise(-file=>"genewise.out");
18              
19             while (my $gene = $gw->next_prediction){
20             my @transcripts = $gene->transcripts;
21             foreach my $t(@transcripts){
22             my @exons = $t->exons;
23             foreach my $e(@exons){
24             print $e->start." ".$e->end."\n";
25             }
26             }
27             }
28              
29             =head1 DESCRIPTION
30              
31             This is the parser for the output of Genewise. It takes either a file
32             handle or a file name and returns a
33             Bio::SeqFeature::Gene::GeneStructure object.
34              
35             =head1 FEEDBACK
36              
37             =head2 Mailing Lists
38              
39             User feedback is an integral part of the evolution of this and other
40             Bioperl modules. Send your comments and suggestions preferably to one
41             of the Bioperl mailing lists. Your participation is much appreciated.
42              
43             bioperl-l@bioperl.org - General discussion
44             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
45              
46             =head2 Support
47              
48             Please direct usage questions or support issues to the mailing list:
49              
50             I
51              
52             rather than to the module maintainer directly. Many experienced and
53             reponsive experts will be able look at the problem and quickly
54             address it. Please include a thorough description of the problem
55             with code and data examples if at all possible.
56              
57             =head2 Reporting Bugs
58              
59             Report bugs to the Bioperl bug tracking system to help us keep track
60             the bugs and their resolution. Bug reports can be submitted via the
61             web:
62              
63             https://github.com/bioperl/bioperl-live/issues
64              
65             =head1 AUTHOR - Fugu Team, Jason Stajich
66              
67             Email: fugui@worf.fugu-sg.org
68             Email: jason-at-bioperl.org
69              
70             =head1 APPENDIX
71              
72             The rest of the documentation details each of the object
73             methods. Internal methods are usually preceded with a _
74              
75             =cut
76              
77              
78             # Let the code begin...
79              
80              
81             package Bio::Tools::Genewise;
82 3     3   398 use vars qw($Srctag);
  3         6  
  3         103  
83 3     3   13 use strict;
  3         4  
  3         52  
84 3     3   10 use Symbol;
  3         5  
  3         134  
85              
86 3     3   461 use Bio::Tools::AnalysisResult;
  3         4  
  3         63  
87 3     3   620 use Bio::SeqFeature::Generic;
  3         7  
  3         74  
88 3     3   642 use Bio::SeqFeature::Gene::Exon;
  3         6  
  3         83  
89 3     3   594 use Bio::SeqFeature::FeaturePair;
  3         6  
  3         67  
90 3     3   567 use Bio::SeqFeature::Gene::Transcript;
  3         5  
  3         90  
91 3     3   550 use Bio::SeqFeature::Gene::GeneStructure;
  3         7  
  3         76  
92              
93 3     3   14 use base qw(Bio::Root::Root Bio::Root::IO);
  3         5  
  3         3033  
94             $Srctag = 'genewise';
95              
96             =head2 new
97              
98             Title : new
99             Usage : $obj->new(-file=>"genewise.out");
100             $obj->new(-fh=>\*GW);
101             Function: Constructor for genewise wrapper. Takes either a file or filehandle
102             Example :
103             Returns : Bio::Tools::Genewise object
104              
105             See L
106              
107             =cut
108              
109             sub new {
110 5     5 1 22 my($class,@args) = @_;
111 5         30 my $self = $class->SUPER::new(@args);
112 5         27 $self->_initialize_io(@args);
113 5         17 return $self;
114             }
115              
116             =head2 _get_strand
117              
118             Title : _get_strand
119             Usage : $obj->_get_strand
120             Function: takes start and end values, swap them if start>end and
121             returns end
122             Example :
123             Returns :$start,$end,$strand
124              
125             =cut
126              
127             sub _get_strand {
128 185     185   261 my ($self,$start,$end) = @_;
129 185 50       256 defined($start) || $self->throw("Need a start");
130 185 50       262 defined($end) || $self->throw("Need an end");
131 185         163 my $strand;
132 185 50       348 if ($start > $end) {
133 0         0 my $tmp = $start;
134 0         0 $start = $end;
135 0         0 $end = $tmp;
136 0         0 $strand = -1;
137             }
138             else {
139 185         183 $strand = 1;
140             }
141 185         393 return ($start,$end,$strand);
142             }
143              
144             =head2 _score
145              
146             Title : _score
147             Usage : $obj->_score
148             Function: get/set for score info
149             Returns : a score value
150              
151             =cut
152              
153             sub _score {
154 25     25   27 my $self = shift;
155 25 100       48 return $self->{'_score'} = shift if @_;
156 22         71 return $self->{'_score'};
157             }
158              
159             =head2 _prot_id
160              
161             Title : _prot_id
162             Usage : $obj->_prot_id
163             Function: get/set for protein id
164             Returns :a protein id
165              
166             =cut
167              
168             sub _prot_id {
169 283     283   262 my $self = shift;
170 283 100       350 return $self->{'_prot_id'} = shift if @_;
171 280         645 return $self->{'_prot_id'};
172             }
173              
174             =head2 _target_id
175              
176             Title : _target_id
177             Usage : $obj->_target_id
178             Function: get/set for genomic sequence id
179             Example :
180             Returns :a target id
181              
182             =cut
183              
184             sub _target_id {
185 150     150   148 my $self = shift;
186 150 100       218 return $self->{'_target_id'} = shift if @_;
187 147         519 return $self->{'_target_id'};
188             }
189              
190              
191             =head2 next_prediction
192              
193             Title : next_prediction
194             Usage : while($gene = $genewise->next_prediction()) {
195             # do something
196             }
197             Function: Returns the gene structure prediction of the Genewise result
198             file. Call this method repeatedly until FALSE is returned.
199              
200             Example :
201             Returns : a Bio::SeqFeature::Gene::GeneStructure object
202             Args :
203              
204             See L
205              
206             =cut
207              
208              
209             sub next_prediction {
210 5     5 1 22 my ($self) = @_;
211              
212 5 100       10 unless ( $self->parsed ){
213 3         7 $self->_parse_genes;
214 3         11 $self->parsed(1);
215             }
216 5         4 return shift @{$self->{'_genes'}};
  5         15  
217             }
218              
219             sub parsed {
220 8     8 0 11 my $self = shift;
221 8 50 66     34 return $self->{'_parsed'} = 1 if @_ && $_[0];
222 5         13 return $self->{'_parsed'};
223             }
224            
225             sub _parse_genes {
226 3     3   5 my ($self) = @_;
227 3         5 my (@alignments,@genes);
228 3         10 local ($/) = "//";
229              
230 3         13 while ( defined($_ = $self->_readline) ) {
231 9         29 $self->debug( $_ );
232 9         57 while( /Alignment\s+(\d+)\s+Score\s+(\S+)\s+\(Bits\)/g ) {
233 0         0 $alignments[$1] = $2;
234             }
235 9 100       31 if( /Score\s+(\-?\d+(\.\d+)?)/ ) {
236 3         10 $self->_score($1);# unless defined $self->_score;
237             }
238              
239 9 100       26 if( /Query\s+(?:protein|model)\:\s+(\S+)/ ) {
240 3         9 $self->_prot_id($1); #unless defined $self->_prot_id;
241             }
242              
243 9 100       23 if( /Target Sequence\s+(\S+)/ ) {
244 3         7 $self->_target_id($1);# unless defined $self->_target_id;
245             }
246            
247 9 100       37 next unless /Gene\s+\d+\n/;
248 3         17 my @genes_txt = split(/Gene\s+\d+\n/,$_);
249 3         6 shift @genes_txt; #remove first empty entry
250 3         6 my $gene_num = 1;
251 3         5 foreach my $gene_txt (@genes_txt) {
252 3         6 my $score = $alignments[$gene_num];
253             # If genewise has assigned a strand to the gene as a whole
254             # overall gene start and end
255 3         16 my ($g_start, $g_end, $type) =
256             $gene_txt =~ m/Gene\s+
257             (\d+)[\s-]+ # start (1-based)
258             (\d+)\s+ # end
259             (?:\[(\w+)\])? #
260             /x;
261 3         5 my $g_strand;
262 3 50       8 my $source_tag = $type ? "$Srctag". "_$type" : $Srctag;
263 3         7 my $genes = Bio::SeqFeature::Gene::GeneStructure->new
264             (-source => $source_tag,
265             -score => $self->_score,
266             );
267 3         9 $genes->add_tag_value('ID', $self->_prot_id.".gene");
268 3         21 my $transcript = Bio::SeqFeature::Gene::Transcript->new
269             (-source => $source_tag,
270             -score => $score);
271 3         9 ($g_start, $g_end, $g_strand) = $self->_get_strand($g_start,$g_end);
272 3         12 $genes->strand($g_strand);
273              
274             # grab exon + supporting feature info
275 3         3 my @exons;
276 3 50       56 unless ( @exons = $gene_txt =~ m/(Exon .+\s+Supporting .+)/g ) {
277 0         0 @exons = $gene_txt =~ m/(Exon .+\s+)/g;
278             }
279 3         7 my $nbr = 1;
280              
281             # loop through each exon-supporting feature pair
282 3         5 foreach my $e (@exons){
283 54         327 my ($e_start,$e_end,$phase) =
284             $e =~ m/Exon\s+
285             (\d+)[\s-]+ # start (1 based)
286             (\d+)\s+ # end
287             phase\s+(\d+) # phase
288             /x;
289 54         74 my $e_strand;
290 54         95 ($e_start,$e_end,$e_strand) = $self->_get_strand($e_start,
291             $e_end);
292 54 100       99 $transcript->strand($e_strand) unless $transcript->strand != 0;
293 54         94 my $exon = Bio::SeqFeature::Gene::Exon->new
294             (-seq_id =>$self->_target_id,
295             -source => $source_tag,
296             -start =>$e_start,
297             -end =>$e_end,
298             -score => $score,
299             #-frame => $phase,
300             -strand =>$e_strand);
301              
302 54         135 $exon->add_tag_value('phase',$phase);
303 54         104 $exon->is_coding(1);
304 54 50       90 if( $self->_prot_id ) {
305 54         72 $exon->add_tag_value('Parent',$self->_prot_id);
306             }
307 54         112 $exon->add_tag_value("Exon",$nbr++);
308 54 50       254 if( $e =~ m/Supporting\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) {
309 54         173 my ($geno_start,$geno_end,
310             $prot_start, $prot_end) = ($1,$2,$3,$4);
311 54         51 my $prot_strand;
312 54         86 ($prot_start,$prot_end,
313             $prot_strand) = $self->_get_strand($prot_start,$prot_end);
314 54         87 my $pf = Bio::SeqFeature::Generic->new
315             ( -start => $prot_start,
316             -end => $prot_end,
317             -seq_id => $self->_prot_id,
318             -score => $score,
319             -strand => $prot_strand,
320             -source => $source_tag,
321             -primary_tag => 'supporting_protein_feature',);
322 54         75 my $geno_strand;
323 54         95 ($geno_start,$geno_end,
324             $geno_strand) = $self->_get_strand($geno_start,$geno_end);
325 54         100 my $gf = Bio::SeqFeature::Generic->new
326             ( -start => $geno_start,
327             -end => $geno_end,
328             -seq_id => $self->_target_id,
329             -score => $score,
330             -strand => $geno_strand,
331             -source => $source_tag,
332             -primary_tag => 'supporting_genomic_feature',);
333 54         212 my $fp = Bio::SeqFeature::FeaturePair->new
334             (-feature1 =>$gf,
335             -feature2 =>$pf);
336 54         135 $exon->add_tag_value( 'supporting_feature',$fp );
337 54 50       97 if( $self->_prot_id ) {
338 54         74 $exon->add_tag_value('Target','Protein:'.$self->_prot_id);
339 54         97 $exon->add_tag_value('Target',$prot_start);
340 54         75 $exon->add_tag_value('Target',$prot_end);
341             }
342             }
343 54         114 $transcript->add_exon($exon);
344             }
345 3         8 $transcript->seq_id($self->_target_id);
346 3         5 $transcript->add_tag_value('ID', $self->_prot_id);
347 3         7 $transcript->add_tag_value('Parent', $self->_prot_id.".gene");
348 3         13 $genes->add_transcript($transcript);
349 3         9 $genes->seq_id($self->_target_id);
350 3         9 push @genes, $genes;
351 3         20 $gene_num++;
352             }
353             }
354 3         16 $self->{'_genes'} = \@genes;
355             }
356              
357             1;