File Coverage

blib/lib/Bio/Tools/Run/Phylo/LVB.pm
Criterion Covered Total %
statement 53 128 41.4
branch 2 38 5.2
condition 0 5 0.0
subroutine 15 20 75.0
pod 5 5 100.0
total 75 196 38.2


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Run::Phylo::LVB
3             #
4             # Created by Daniel Barker, based on ProtPars.pm by Shawn Hoon
5             #
6             # You may distribute this module under the same terms as perl itself
7             #
8             # POD documentation - main docs before the code
9              
10             =head1 NAME
11              
12             Bio::Tools::Run::Phylo::LVB - Object for using the LVB program to create
13             an array of L objects from a nucleotide multiple alignment
14             file or a nucleotide SimpleAlign object. Works with LVB version 2.1.
15              
16             =head1 SYNOPSIS
17              
18             use Bio::Tools::Run::Phylo::LVB;
19              
20             # Create a SimpleAlign object.
21             # NOTE. Aligning nucleotide sequence directly, as below, makes
22             # sense for non-coding nucleotide sequence (e.g., structural RNA
23             # genes, introns, ITS). For protein-coding genes, to prevent
24             # Clustal intronducing frameshifts one should instead align the
25             # translations of the genes, then convert the multiple alignment
26             # to nucleotide by referring to the corresponding transcript
27             # sequences (e.g., using EMBOSS tranalign).
28             use Bio::Tools::Run::Alignment::Clustalw;
29             $aln_factory = Bio::Tools::Run::Alignment::Clustalw->new(quiet => 1);
30             $inputfilename = "/Users/daniel/nuc.fa";
31             $aln = $aln_factory->align($inputfilename);
32              
33             # Create the tree or trees.
34             $tree_factory = Bio::Tools::Run::Phylo::LVB->new(quiet => 1);
35             @trees = $tree_factory->run($aln);
36              
37             # Or one can pass in a file name containing a nucleotide multiple
38             # alignment in Phylip 3.6 format:
39             $tree_factory = Bio::Tools::Run::Phylo::LVB->new(quiet => 1);
40             $tree = $tree_factory->run("/Users/daniel/nuc.phy");
41              
42             =head1 DESCRIPTION
43              
44             Wrapper for LVB, which uses a simulated annealing heuristic search
45             to seek parsimonious trees from a nucleotide multiple alignment.
46              
47             =head1 FEEDBACK
48              
49             =head2 Mailing Lists
50              
51             User feedback is an integral part of the evolution of this and other
52             Bioperl modules. Send your comments and suggestions preferably to one
53             of the Bioperl mailing lists. Your participation is much appreciated.
54              
55             bioperl-l@bioperl.org - General discussion
56             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
57              
58             =head2 Support
59              
60             Please direct usage questions or support issues to the mailing list:
61              
62             I
63              
64             rather than to the module maintainer directly. Many experienced and
65             reponsive experts will be able look at the problem and quickly
66             address it. Please include a thorough description of the problem
67             with code and data examples if at all possible.
68              
69             =head2 Reporting Bugs
70              
71             Report bugs to the Bioperl bug tracking system to help us keep track
72             the bugs and their resolution. Bug reports can be submitted via the
73             web:
74              
75             http://redmine.open-bio.org/projects/bioperl/
76              
77             =head1 PARAMETERS FOR LVB COMPUTATION
78              
79             =head2 FORMAT
80              
81             Title : FORMAT
82             Description : (optional)
83             When running LVB from a Phylip 3.6-format
84             multiple alignment file, this specifies
85             the layout of the file. It may be
86             "interleaved" or "sequential". FORMAT is
87             automatically set to "interleaved" if
88             running from a SimpleAlign object.
89             Defaults to "interleaved".
90              
91             =head2 GAPS
92              
93             Title : GAPS
94             Description : (optional)
95             LVB can treat gaps represented in the
96             multiple alignment by "-" as either
97             "fifthstate" or "unknown". "fifthstate"
98             regards "-" as equivalent to "O", which
99             is an unambiguous character state
100             distinct from all nucleotides. "unknown"
101             regards "-" as equivalent to "?", which
102             is as an ambiguous site that may contain
103             "A" or "C" or "G" or "T" or "O".
104             Defaults to "unknown".
105              
106             =head2 SEED
107              
108             Title : SEED
109             Description : (optional)
110             This specifies the random number seed
111             for LVB. SEED must be an integer in the
112             range 0 to 900000000 inclusive. If no
113             seed is specified, LVB takes a seed from
114             the system clock. By default, no seed is
115             specified.
116              
117             =head2 DURATION
118              
119             Title : DURATION
120             Description : (optional)
121             This specifies the duration of the
122             analysis, which may be "fast" or "slow".
123             "slow" causes LVB to perform a more
124             thorough and more time-consuming search
125             than "fast". Defaults to "slow".
126              
127             =head2 BOOTSTRAPS
128              
129             Title : BOOTSTRAPS
130             Description : (optional)
131             This specifies the number of bootstrap
132             replicates to use, which must be a
133             positive integer. Set bootstraps to 0 for
134             no bootstrapping. Defaults to 0.
135              
136             =head1 AUTHOR
137              
138             Daniel Barker
139              
140             =head1 CONTRIBUTORS
141              
142             Email jason-AT-bioperl_DOT_org
143              
144             =head1 APPENDIX
145              
146             The rest of the documentation details each of the object
147             methods. Internal methods are usually preceded with a _
148              
149             =cut
150              
151             #'
152              
153             package Bio::Tools::Run::Phylo::LVB;
154              
155 1         67 use vars qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME
156             @LVB_PARAMS @OTHER_SWITCHES
157 1     1   95514 %OK_FIELD);
  1         1  
158 1     1   5 use strict;
  1         2  
  1         17  
159 1     1   468 use Bio::SimpleAlign;
  1         82533  
  1         39  
160 1     1   11 use Cwd;
  1         2  
  1         73  
161 1     1   386 use Bio::AlignIO;
  1         4051  
  1         30  
162 1     1   259 use Bio::TreeIO;
  1         13658  
  1         31  
163 1     1   7 use Bio::Root::Root;
  1         2  
  1         17  
164 1     1   300 use Bio::Tools::Run::WrapperBase;
  1         4  
  1         40  
165 1     1   10 use Bio::Root::IO;
  1         2  
  1         22  
166 1     1   4 use File::Copy;
  1         2  
  1         85  
167              
168             @ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase);
169              
170             # You will need to enable the LVB program.
171             # You can set the path to the program through doing:
172             # my @params('executable'=>'/usr/local/bin/lvb');
173             # my $lvb_factory = Bio::Tools::Run::Phylo::LVB->new(@params);
174             #
175              
176             BEGIN {
177             # NOTE. The order of the members of @LVB_PARAMS is vital!
178 1     1   4 @LVB_PARAMS = qw(FORMAT GAPS SEED DURATION BOOTSTRAPS);
179 1         2 @OTHER_SWITCHES = qw(QUIET);
180 1         2 foreach my $attr(@LVB_PARAMS, @OTHER_SWITCHES) {
181 6         820 $OK_FIELD{$attr}++;
182             }
183             }
184              
185             =head2 program_name
186              
187             Title : program_name
188             Usage : ->program_name()
189             Function: holds the program name
190             Returns: string
191             Args : None
192              
193             =cut
194              
195             sub program_name {
196 6     6 1 24 return 'lvb';
197             }
198              
199             =head2 program_dir
200              
201             Title : program_dir
202             Usage : ->program_dir()
203             Function: returns undef
204             Args :
205              
206             =cut
207              
208             sub program_dir {
209 3     3 1 7 return undef;
210             }
211              
212             sub new {
213 1     1 1 85 my ($class,@args) = @_;
214 1         10 my $self = $class->SUPER::new(@args);
215              
216             # set defaults
217 1         20 $self->FORMAT("interleaved");
218 1         7 $self->GAPS("unknown");
219 1         13 $self->SEED("");
220 1         8 $self->DURATION("slow");
221 1         11 $self->BOOTSTRAPS(0);
222              
223             # re-set with user's values where specified
224 1         3 my ($attr, $value);
225 1         5 while (@args) {
226 0         0 $attr = shift @args;
227 0         0 $value = shift @args;
228 0 0       0 next if( $attr =~ /^-/ ); # don't want named parameters
229 0         0 $self->$attr($value);
230             }
231 1         3 return $self;
232             }
233              
234             sub AUTOLOAD {
235 5     5   6 my $self = shift;
236 5         8 my $attr = $AUTOLOAD;
237 5         17 $attr =~ s/.*:://;
238 5         8 $attr = uc $attr;
239 5 50       11 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
240 5 50       26 $self->{$attr} = shift if @_;
241 5         10 return $self->{$attr};
242             }
243              
244             =head2 run
245              
246             Title : run
247             Usage :
248             $inputfilename = '/Users/daniel/nuc.phy';
249             @trees = $factory->run($inputfilename);
250             Function: Create one or more LVB trees from a SimpleAlign
251             object or a file containing a Phylip 3.6-format
252             nucleotide multiple alignment.
253             Example :
254             Returns : Array of L objects
255             Args : Name of a file containing a nucleotide multiple
256             alignment in Phylip 3.6 format, or a SimpleAlign
257             object
258              
259             =cut
260              
261             sub run{
262            
263 0     0 1   my ($self,$input) = @_;
264 0           my ($infilename);
265              
266             # Create input file pointer
267 0           $infilename = $self->_setinput($input);
268 0 0         if (!$infilename) {$self->throw("Problems setting up for lvb. Probably bad input data in $input !");}
  0            
269            
270             # Create parameter string to pass to lvb program
271 0           my $param_string = $self->_setparams();
272              
273             # run lvb
274 0           my @trees = $self->_run($infilename,$param_string);
275             }
276              
277             =head2 create_tree
278              
279             Title : create_tree
280             Usage :
281             $inputfilename = '/Users/daniel/nuc.phy';
282             @trees = $factory->create_tree($inputfilename);
283             Function: Create one or more LVB trees from a SimpleAlign
284             object or a file containing a Phylip 3.6-format
285             nucleotide multiple alignment.
286             Example :
287             Returns : Array of L objects
288             Args : Name of a file containing a nucleotide multiple
289             alignment in Phylip 3.6 format, or a SimpleAlign
290             object
291              
292             =cut
293              
294             sub create_tree{
295 0     0 1   return shift->run(@_);
296             }
297              
298             #################################################
299              
300             =head2 _run
301              
302             Title : _run
303             Usage : Internal function, not to be called directly
304             Function: makes actual system call to lvb program
305             Example :
306             Returns : Array of Bio::Tree objects
307             Args : Name of a file containing a multiple alignment
308             in Phylip 3.6 format and a parameter string to be
309             passed to LVB
310              
311             =cut
312              
313             sub _run {
314 0     0     my ($self,$infile,$param_string) = @_;
315 0 0         return unless( $self->executable );
316              
317 0           my $instring;
318 0           my $curpath = cwd;
319 0 0         unless( File::Spec->file_name_is_absolute($infile) ) {
320 0           $infile = $self->io->catfile($curpath,$infile);
321             }
322 0           $instring = $param_string;
323 0   0       $self->debug( "Program ".$self->executable || ''."\n");
324              
325             # create LVB's working copy of the input file, which must be named "infile"
326             # NOTE, we cut trailing spaces since they can cause trouble with LVB 2.1
327 0           my $lvb_infile = $self->tempdir . "/infile";
328 0           open(LVB_SUB_RUN_TMP_IN_FH, "$infile");
329 0           open(LVB_SUB_RUN_TMP_OUT_FH, ">$lvb_infile");
330 0           while () {
331 0           s/ +$//;
332 0 0         print LVB_SUB_RUN_TMP_OUT_FH
333             or $self->throw("output error on $lvb_infile");
334             }
335 0           chdir($self->tempdir);
336             #open a pipe to run lvb to bypass interactive menus
337 0 0 0       if ($self->quiet() || $self->verbose() < 0) {
338 0 0         my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
339 0           open(LVB_PIPE,"|".$self->executable.">$null");
340             }
341             else {
342 0           open(LVB_PIPE,"|".$self->executable);
343             }
344 0           print LVB_PIPE $instring;
345 0           close(LVB_PIPE);
346 0           chdir($curpath);
347             #get the results
348 0           my $treefile = $self->tempdir . "/outtree";
349            
350 0 0         $self->throw("LVB did not create treefile correctly")
351             unless (-e $treefile);
352              
353             #create the trees
354 0           my $in = Bio::TreeIO->new(-file => $treefile, '-format' => 'newick');
355 0           my @trees = ();
356 0           while (my $tree = $in->next_tree()) {
357 0           push @trees, $tree;
358             }
359              
360 0 0         unless ( $self->save_tempfiles ) {
361             # Clean up the temporary files created along the way...
362 0           unlink $lvb_infile;
363 0           unlink $treefile;
364             }
365 0           return @trees;
366             }
367              
368             =head2 _setinput
369              
370             Title : _setinput
371             Usage : Internal function, not to be called directly
372             Function: Create input file for lvb program
373             Example :
374             Returns : name of file containing a multiple alignment in
375             Phylip 3.6 format
376             Args : SimpleAlign object reference or input file name
377              
378             =cut
379              
380             sub _setinput {
381 0     0     my ($self, $input, $suffix) = @_;
382 0           my ($alnfilename,$infilename, $temp, $tfh,$input_tmp,$input_fh);
383              
384             # If $input is not a reference it better be the name of a
385             # file with the sequence/
386              
387             # a phy formatted alignment file
388 0 0         unless (ref $input) {
389             # check that file exists or throw
390 0           $alnfilename= $input;
391 0 0         unless (-e $input) {return 0;}
  0            
392 0           return $alnfilename;
393             }
394              
395             # $input may be a SimpleAlign Object
396 0 0         if ($input->isa("Bio::Align::AlignI")) {
397             # Open temporary file for both reading & writing of BioSeq array
398 0           ($tfh,$alnfilename) = $self->io->tempfile(-dir=>$self->tempdir);
399 0           my $alnIO = Bio::AlignIO->new(-fh => $tfh, -format=>'phylip',idlength=>$10);
400 0           $alnIO->write_aln($input);
401 0           $alnIO->close();
402 0           close($tfh);
403 0           $tfh = undef;
404 0 0         unless ($self->format() =~ /^interleaved$/i) {
405 0           $self->warn("resetting LVB format to interleaved");
406 0           $self->format("interleaved");
407             }
408 0           return $alnfilename;
409             }
410 0           return 0;
411             }
412              
413             =head2 _setparams
414              
415             Title : _setparams
416             Usage : Internal function, not to be called directly
417             Function: Create parameter inputs for lvb program
418             Example :
419             Returns : parameter string to be passed to LVB
420             Args : name of calling object
421              
422             =cut
423              
424             sub _setparams {
425 0     0     my ($attr, $value, $self);
426              
427 0           $self = shift;
428 0           my $param_string = "";
429              
430 0           for $attr (@LVB_PARAMS) {
431 0           $value = $self->$attr();
432 0 0         if ($attr =~/SEED/i) {
    0          
433 0 0         $value = "" unless defined $value;
434 0           $param_string .= "$value\n";
435             } elsif ($attr =~ /BOOTSTRAPS/i) {
436 0 0         $value = 0 unless defined $value;
437 0           $param_string .= "$value\n";
438             } else { # we want I for "interleaved" or S for "sequential",
439             # U for "unknown" or F for "fifthstate",
440             # F for "fast" or S for "slow"
441 0           $param_string .= uc(substr $value, 0, 1) . "\n";
442             }
443             }
444              
445 0           return $param_string;
446             }
447              
448             1; # Needed to keep compiler happy