File Coverage

blib/lib/Bio/Tools/Run/Genewise.pm
Criterion Covered Total %
statement 58 157 36.9
branch 5 52 9.6
condition 0 12 0.0
subroutine 17 26 65.3
pod 6 7 85.7
total 86 254 33.8


line stmt bran cond sub pod time code
1             #
2             # Please direct questions and support issues to
3             #
4             # Cared for by
5             #
6             # Copyright to a FUGU Student Intern
7             #
8             # You may distribute this module under the same terms as perl itself
9             # POD documentation - main docs before the code
10              
11             =head1 NAME
12              
13             Bio::Tools::Run::Genewise - Object for predicting genes in a
14             given sequence given a protein
15              
16             =head1 SYNOPSIS
17              
18             # Build a Genewise alignment factory
19             my $factory = Bio::Tools::Run::Genewise->new();
20              
21             # Pass the factory 2 Bio:SeqI objects (in the order of query peptide
22             # and target_genomic).
23              
24             # @genes is an array of Bio::SeqFeature::Gene::GeneStructure objects
25             my @genes = $factory->run($protein_seq, $genomic_seq);
26              
27             # Alternatively pass the factory a profile HMM filename and a
28             # Bio:SeqI object (in the order of query HMM and target_genomic).
29              
30             # Set hmmer switch first to tell genewise to expect an HMM
31             $factory->hmmer(1);
32             my @genes = $factory->run($hmmfile, $genomic_seq);
33              
34              
35             =head1 DESCRIPTION
36              
37             Genewise is a gene prediction program developed by Ewan Birney
38             http://www.sanger.ac.uk/software/wise2.
39              
40             =head2 Available Params:
41              
42             NB: These should be passed without the '-' or they will be ignored,
43             except switches such as 'hmmer' (which have no corresponding value)
44             which should be set on the factory object using the AUTOLOADed methods
45             of the same name.
46              
47             Model [-codon,-gene,-cfreq,-splice,-subs,-indel,-intron,-null]
48             Alg [-kbyte,-alg]
49             HMM [-hmmer]
50             Output [-gff,-gener,-alb,-pal,-block,-divide]
51             Standard [-help,-version,-silent,-quiet,-errorlog]
52              
53              
54             =head1 FEEDBACK
55              
56             =head2 Mailing Lists
57              
58             User feedback is an integral part of the evolution of this and other
59             Bioperl modules. Send your comments and suggestions preferably to one
60             of the Bioperl mailing lists. Your participation is much appreciated.
61              
62             bioperl-l@bioperl.org - General discussion
63             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
64              
65             =head2 Support
66              
67             Please direct usage questions or support issues to the mailing list:
68              
69             I
70              
71             rather than to the module maintainer directly. Many experienced and
72             reponsive experts will be able look at the problem and quickly
73             address it. Please include a thorough description of the problem
74             with code and data examples if at all possible.
75              
76             =head2 Reporting Bugs
77              
78             Report bugs to the Bioperl bug tracking system to help us keep track
79             the bugs and their resolution. Bug reports can be submitted via
80             the web:
81              
82             http://redmine.open-bio.org/projects/bioperl/
83              
84             =head1 AUTHOR - FUGU Student Intern
85              
86             Email: fugui@worf.fugu-sg.org
87              
88             =head1 CONTRIBUTORS
89              
90             Jason Stajich jason-AT-bioperl_DOT_org
91             Keith James kdj@sanger.ac.uk
92              
93             =head1 APPENDIX
94              
95             The rest of the documentation details each of the object
96             methods. Internal methods are usually preceded with a _
97              
98             =cut
99              
100             package Bio::Tools::Run::Genewise;
101 1         77 use vars qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME
102             @GENEWISE_SWITCHES @GENEWISE_PARAMS
103 1     1   114516 @OTHER_SWITCHES %OK_FIELD);
  1         3  
104 1     1   304 use Bio::SeqIO;
  1         36353  
  1         29  
105 1     1   318 use Bio::SeqFeature::Generic;
  1         37481  
  1         28  
106 1     1   274 use Bio::SeqFeature::Gene::Exon;
  1         1019  
  1         31  
107 1     1   5 use Bio::Root::Root;
  1         2  
  1         15  
108 1     1   330 use Bio::Tools::Run::WrapperBase;
  1         2  
  1         23  
109 1     1   260 use Bio::SeqFeature::FeaturePair;
  1         1460  
  1         30  
110 1     1   315 use Bio::SeqFeature::Gene::Transcript;
  1         2451  
  1         30  
111 1     1   294 use Bio::SeqFeature::Gene::GeneStructure;
  1         1494  
  1         28  
112 1     1   280 use Bio::Tools::Genewise;
  1         2402  
  1         23  
113 1     1   6 use Bio::Tools::AnalysisResult;
  1         2  
  1         18  
114 1     1   4 use strict;
  1         1  
  1         71  
115              
116             @ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase );
117              
118             # Two ways to run the program .....
119             # 1. define an environmental variable WISEDIR
120             # export WISEDIR =/usr/local/share/wise2.2.0
121             # where the wise2.2.20 package is installed
122             #
123             # 2. include a definition of an environmental variable WISEDIR in
124             # every script that will use DBA.pm
125             # $ENV{WISEDIR} = '/usr/local/share/wise2.2.20';
126              
127             BEGIN {
128 1     1   6 @GENEWISE_PARAMS = qw( DYMEM CODON GENE CFREQ SPLICE GENESTATS INIT
129             SUBS INDEL INTRON NULL INSERT SPLICE_MAX_COLLAR SPLICE_MIN_COLLAR
130             GW_EDGEQUERY GW_EDGETARGET GW_SPLICESPREAD
131             KBYTE HNAME ALG BLOCK DIVIDE GENER U V S T G E M);
132              
133 1         2 @GENEWISE_SWITCHES = qw(HELP SILENT QUIET ERROROFFSTD TREV PSEUDO NOSPLICE_GTAG
134             SPLICE_GTAG NOGWHSP GWHSP
135             TFOR TABS BOTH HMMER );
136              
137             # Authorize attribute fields
138 1         3 foreach my $attr ( @GENEWISE_PARAMS, @GENEWISE_SWITCHES,
139 44         1088 @OTHER_SWITCHES) { $OK_FIELD{$attr}++; }
140             }
141              
142             =head2 program_name
143              
144             Title : program_name
145             Usage : $factory>program_name()
146             Function: holds the program name
147             Returns: string
148             Args : None
149              
150             =cut
151              
152             sub program_name {
153 6     6 1 26 return 'genewise';
154             }
155              
156             =head2 program_dir
157              
158             Title : program_dir
159             Usage : $factory->program_dir(@params)
160             Function: returns the program directory, obtained from ENV variable.
161             Returns: string
162             Args :
163              
164             =cut
165              
166             sub program_dir {
167 3 50   3 1 11 return Bio::Root::IO->catfile($ENV{WISEDIR},"/src/bin/") if $ENV{WISEDIR};
168             }
169              
170             sub new {
171 1     1 1 131 my ($class, @args) = @_;
172 1         12 my $self = $class->SUPER::new(@args);
173              
174 1         40 my ($attr, $value);
175 1         4 while (@args) {
176 3         6 $attr = shift @args;
177 3         12 $value = shift @args;
178 3 100       10 next if( $attr =~ /^-/ ); # don't want named parameters
179 2         17 $self->$attr($value);
180             }
181              
182 1         3 return $self;
183             }
184              
185             sub AUTOLOAD {
186 1     1   2 my $self = shift;
187 1         2 my $attr = $AUTOLOAD;
188 1         5 $attr =~ s/.*:://;
189 1         2 $attr = uc $attr;
190 1 50       3 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
191 1 50       4 $self->{$attr} = shift if @_;
192 1         2 return $self->{$attr};
193             }
194              
195             =head2 version
196              
197             Title : version
198             Usage : exit if $prog->version() < 1.8
199             Function: Determine the version number of the program
200             Example :
201             Returns : float or undef
202             Args : none
203              
204             =cut
205              
206             sub version {
207 0     0 1   my ($self) = @_;
208              
209 0 0         return undef unless $self->executable;
210 0           my $prog = $self->executable;
211 0           my $string = `$prog -version`;
212 0 0         if( $string =~ /Version:\s+\$\s*Name:\s+(\S+)\s+\$/ ) {
    0          
213 0           return $1;
214             } elsif( $string =~ /(Version *)/i ) {
215 0           return $1;
216             } else {
217 0           return undef;
218             }
219             }
220              
221             =head2 predict_genes
222              
223             Title : predict_genes
224             Usage : DEPRECATED. Use $factory->run($seq1,$seq2)
225             Function: Predict genes
226             Returns : A Bio::Seqfeature::Gene:GeneStructure object
227             Args : Name of a file containing a set of 2 fasta sequences in the order of
228             peptide and genomic sequences
229             or else 2 Bio::Seq objects.
230              
231             Throws an exception if argument is not either a string (eg a filename)
232             or 2 Bio::Seq objects. If arguments are strings, throws exception if
233             file corresponding to string name can not be found.
234              
235             =cut
236              
237             sub predict_genes {
238 0     0 1   return shift->run(@_);
239             }
240              
241             =head2 run
242              
243             Title : run
244             Usage : 2 sequence objects
245             $genes = $factory->run($seq1, $seq2);
246             Function: run
247             Returns : A Bio::Seqfeature::Gene:GeneStructure object
248             Args : Names of a files each containing a fasta sequence in the order
249             of either (peptide sequence, genomic sequence) or (profile HMM,
250             genomic sequence). Alternatively any of the fasta sequence
251             filenames may be substituted with a Bio::Seq object.
252              
253             Throws an exception if argument is not either a string (eg a filename)
254             or Bio::Seq objects. If arguments are strings, throws exception if
255             file corresponding to string name can not be found. Also throws an
256             exception if a profile HMM is expected (the -hmmer genewise switch has
257             been set).
258              
259             =cut
260              
261             sub run{
262 0     0 1   my ($self, $seq1, $seq2) = @_;
263 0           my ($attr, $value, $switch);
264 0           $self->io->_io_cleanup();
265             # Create input file pointer
266 0           my ($infile1,$infile2)= $self->_setinput($seq1, $seq2);
267 0 0 0       if (!($infile1 && $infile2)) {$self->throw("Bad input data (sequences need an id ) ");}
  0            
268              
269             # run genewise
270 0           my @genes = $self->_run($infile1,$infile2);
271 0           return @genes;
272             }
273              
274             =head2 _run
275              
276             Title : _run
277             Usage : Internal function, not to be called directly
278             Function: Makes actual system call to a genewise program
279             Example :
280             Returns : L
281             Args : Name of a files containing 2 sequences in the order of peptide and genomic
282              
283             =cut
284              
285             sub _run {
286 0     0     my ($self,$infile1,$infile2) = @_;
287 0           my $instring;
288 0           $self->debug("Program ".$self->executable."\n");
289 0 0         unless ( $self->executable ) {
290 0           $self->throw("Cannot run Genewise unless the executable is found. Check your environment variables or make sure genewise is in your path.");
291             }
292 0           my $paramstring = $self->_setparams;
293              
294 0           my $commandstring = $self->executable." $paramstring $infile1 $infile2";
295             # this is to capture STDERR messages which leak out when you run programs
296             # with open(FH, "... |");
297 0 0 0       if (($self->silent && $self->quiet) &&
      0        
298             ($^O !~ /os2|dos|amigaos/)) {
299 0 0         my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
300 0           $commandstring .= " 2> $null";
301             }
302 0           my ($tfh1,$outfile1) = $self->io->tempfile(-dir=>$self->tempdir);
303 0           $self->debug("genewise command = $commandstring");
304 0           my $status = system("$commandstring > $outfile1");
305 0 0         $self->throw("Genewise call $commandstring crashed: $? \n") unless $status == 0;
306            
307 0           my $genewiseParser = Bio::Tools::Genewise->new(-file=> $outfile1);
308 0           my @genes;
309 0           while (my $gene = $genewiseParser->next_prediction()) {
310 0           push @genes, $gene;
311             }
312 0           close ($tfh1);
313 0           undef ($tfh1);
314 0           return @genes;
315             }
316              
317             sub get_strand {
318 0     0 0   my ($self,$start,$end) = @_;
319 0 0         $start || $self->throw("Need a start");
320 0 0         $end || $self->throw("Need an end");
321 0           my $strand;
322 0 0         if ($start > $end) {
323 0           my $tmp = $start;
324 0           $start = $end;
325 0           $end = $tmp;
326 0           $strand = -1;
327             }
328             else {
329 0           $strand = 1;
330             }
331 0           return ($start,$end,$strand);
332             }
333              
334             sub _setinput {
335 0     0     my ($self, $arg1, $seq2) = @_;
336 0           my ($tfh1,$tfh2,$outfile1,$outfile2);
337              
338 0 0 0       $self->throw("calling with not enough arguments") unless $arg1 && $seq2;
339              
340             # Not going to set _query_pep/_subject_dna_seq if you pass in a
341             # filename
342              
343 0 0         unless( ref($arg1) ) {
344 0 0         unless( -e $arg1 ) {
345 0 0         if ($self->hmmer) {
346 0           $self->throw("Argument1 was not a HMMER profile HMM file\n")
347             }
348             else {
349 0           $self->throw("Argument1 is not a Bio::PrimarySeqI object nor file\n");
350             }
351             }
352 0           $outfile1 = $arg1;
353             }
354             else {
355 0 0         if ($self->hmmer) {
356 0           $self->throw("Argument1 was not a HMMER profile HMM file\n")
357             }
358             else {
359 0           ($tfh1,$outfile1) = $self->io->tempfile(-dir=>$self->tempdir);
360 0           my $out1 = Bio::SeqIO->new('-fh' => $tfh1,
361             '-format' => 'fasta');
362 0           $out1->write_seq($arg1);
363 0           $self->_query_pep_seq($arg1);
364             # Make sure you close things - this is what creates
365             # Out of filehandle errors
366 0           close($tfh1);
367 0           undef $tfh1;
368             }
369             }
370              
371 0 0         unless( ref($seq2) ) {
372 0 0         unless( -e $seq2 ) {
373 0           $self->throw("Sequence2 is not a Bio::PrimarySeqI object nor file\n");
374             }
375 0           $outfile2 = $seq2;
376             }
377             else {
378 0           ($tfh2,$outfile2) = $self->io->tempfile(-dir=>$self->tempdir);
379 0           my $out2 = Bio::SeqIO->new('-fh' => $tfh2,
380             '-format' => 'fasta');
381 0           $out2->write_seq($seq2);
382              
383 0           $self->_subject_dna_seq($seq2);
384             # Make sure you close things - this is what creates
385             # Out of filehandle errors
386 0           close($tfh2);
387 0           undef $tfh2;
388             }
389 0           return ($outfile1,$outfile2);
390             }
391              
392             =head2 _setparams
393              
394             Title : _setparams
395             Usage : Internal function, not to be called directly
396             Function: creates a string of params to be used in the command string
397             Example :
398             Returns : string of params
399             Args :
400              
401             =cut
402              
403             sub _setparams {
404 0     0     my ($self) = @_;
405 0           my $param_string;
406 0           foreach my $attr(@GENEWISE_PARAMS){
407 0           my $value = $self->$attr();
408 0 0         next unless (defined $value);
409 0           my $attr_key = ' -'.(lc $attr);
410 0           $param_string .= $attr_key.' '.$value;
411             }
412 0           foreach my $attr(@GENEWISE_SWITCHES){
413 0           my $value = $self->$attr();
414 0 0         next unless (defined $value);
415 0           my $attr_key = ' -'.(lc $attr);
416 0           $param_string .=$attr_key;
417             }
418              
419 0           $param_string = $param_string." -genesf"; #specify the output option
420 0           return $param_string;
421             }
422              
423             =head2 _query_pep_seq
424              
425             Title : _query_pep_seq
426             Usage : Internal function, not to be called directly
427             Function: get/set for the query sequence
428             Example :
429             Returns :
430             Args :
431              
432             =cut
433              
434             sub _query_pep_seq {
435 0     0     my ($self,$seq) = @_;
436 0 0         if(defined $seq){
437 0           $self->{'_query_pep_seq'} = $seq;
438             }
439 0           return $self->{'_query_pep_seq'};
440             }
441              
442             =head2 _subject_dna_seq
443              
444             Title : _subject_dna_seq
445             Usage : Internal function, not to be called directly
446             Function: get/set for the subject sequence
447             Example :
448             Returns :
449              
450             Args :
451              
452             =cut
453              
454             sub _subject_dna_seq {
455 0     0     my ($self,$seq) = @_;
456 0 0         if(defined $seq){
457 0           $self->{'_subject_dna_seq'} = $seq;
458             }
459 0           return $self->{'_subject_dna_seq'};
460             }
461              
462             1;