File Coverage

blib/lib/Bio/Tools/Run/Phylo/Semphy.pm
Criterion Covered Total %
statement 23 64 35.9
branch 0 22 0.0
condition 0 2 0.0
subroutine 8 11 72.7
pod 4 4 100.0
total 35 103 33.9


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Run::Phylo::Semphy
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::Run::Phylo::Semphy - Wrapper for Semphy
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tools::Run::Phylo::Semphy;
21              
22             # Make a Semphy factory
23             $factory = Bio::Tools::Run::Phylo::Semphy->new();
24              
25             # Run Semphy with an alignment
26             my $tree = $factory->run($alignfilename);
27              
28             # or with alignment object
29             $tree = $factory->run($bio_simplalign);
30              
31             # you can supply an initial tree as well, which can be a newick tree file,
32             # Bio::Tree::Tree object...
33             $tree = $factory->run($bio_simplalign, $tree_obj);
34              
35             # ... or Bio::DB::Taxonomy object
36             $tree = $factory->run($bio_simplalign, $bio_db_taxonomy);
37              
38             # (mixtures of all the above are possible)
39              
40             # $tree isa Bio::Tree::Tree
41              
42             =head1 DESCRIPTION
43              
44             This is a wrapper for running the Semphy application by N. Friedman et a.. You
45             can get details here: http://compbio.cs.huji.ac.il/semphy/. Semphy is used for
46             phylogenetic reconstruction (making a tree with branch lengths from an aligned
47             set of input sequences).
48              
49             You can try supplying normal Semphy command-line arguments to new(), eg.
50             new(-hky => 1) or calling arg-named methods (excluding the initial hyphen(s),
51             eg. $factory->hky(1) to set the --hky switch to true).
52             Note that Semphy args are case-sensitive. To distinguish between Bioperl's
53             -verbose and the Semphy's --verbose, you must set Semphy's verbosity with
54             -semphy_verbose or the semphy_verbose() method.
55              
56              
57             You will need to enable this Semphy wrapper to find the Semphy program.
58             This can be done in (at least) three ways:
59              
60             1. Make sure the Semphy executable is in your path.
61             2. Define an environmental variable SEMPHYDIR which is a
62             directory which contains the Semphy application:
63             In bash:
64              
65             export SEMPHYDIR=/home/username/semphy/
66              
67             In csh/tcsh:
68              
69             setenv SEMPHYDIR /home/username/semphy
70              
71             3. Include a definition of an environmental variable SEMPHYDIR in
72             every script that will use this Semphy wrapper module, e.g.:
73              
74             BEGIN { $ENV{SEMPHYDIR} = '/home/username/semphy/' }
75             use Bio::Tools::Run::Phylo::Semphy;
76              
77             =head1 FEEDBACK
78              
79             =head2 Mailing Lists
80              
81             User feedback is an integral part of the evolution of this and other
82             Bioperl modules. Send your comments and suggestions preferably to
83             the Bioperl mailing list. Your participation is much appreciated.
84              
85             bioperl-l@bioperl.org - General discussion
86             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
87              
88             =head2 Support
89              
90             Please direct usage questions or support issues to the mailing list:
91              
92             I
93              
94             rather than to the module maintainer directly. Many experienced and
95             reponsive experts will be able look at the problem and quickly
96             address it. Please include a thorough description of the problem
97             with code and data examples if at all possible.
98              
99             =head2 Reporting Bugs
100              
101             Report bugs to the Bioperl bug tracking system to help us keep track
102             of the bugs and their resolution. Bug reports can be submitted via
103             the web:
104              
105             http://redmine.open-bio.org/projects/bioperl/
106              
107             =head1 AUTHOR - Sendu Bala
108              
109             Email bix@sendu.me.uk
110              
111             =head1 APPENDIX
112              
113             The rest of the documentation details each of the object methods.
114             Internal methods are usually preceded with a _
115              
116             =cut
117              
118             package Bio::Tools::Run::Phylo::Semphy;
119 1     1   112264 use strict;
  1         7  
  1         28  
120              
121 1     1   4 use File::Spec;
  1         1  
  1         14  
122 1     1   514 use Bio::AlignIO;
  1         98752  
  1         41  
123 1     1   499 use Bio::TreeIO;
  1         15986  
  1         30  
124              
125 1     1   6 use base qw(Bio::Tools::Run::Phylo::PhyloBase);
  1         1  
  1         497  
126              
127             our $PROGRAM_NAME = 'semphy';
128             our $PROGRAM_DIR = $ENV{'SEMPHYDIR'};
129              
130             # methods for the semphy args we support
131             our %PARAMS = (outputfile => 'o',
132             treeoutputfile => 'T',
133             constraint => 'c',
134             gaps => 'g',
135             seed => 'r',
136             Logfile => 'l',
137             alphabet => 'a',
138             ratio => 'z',
139             ACGprob => 'p',
140             BPrepeats => 'BPrepeats',
141             BPconsensus => 'BPconsensus',
142             SEMPHY => 'S',
143             modelfile => 'modelfile',
144             alpha => 'A',
145             categories => 'C',
146             semphy_verbose => 'semphy_verbose');
147             our %SWITCHES = (homogeneousRatesDTME => 'homogeneousRatesDTME',
148             NJ => 'J',
149             pairwiseGammaDTME => 'pairwiseGammaDTME',
150             commonAlphaDTME => 'commonAlphaDTME',
151             rate4siteDTME => 'rate4siteDTME',
152             posteriorDTME => 'posteriorDTME',
153             BPonUserTree => 'BPonUserTree',
154             nucjc => 'nucjc',
155             aaJC => 'aaJC',
156             k2p => 'k2p',
157             hky => 'hky',
158             day => 'day',
159             jtt => 'jtt',
160             rev => 'rev',
161             wag => 'wag',
162             cprev => 'cprev',
163             homogeneous => 'H',
164             optimizeAlpha => 'O',
165             bbl => 'n',
166             likelihood => 'L',
167             PerPosLike => 'P',
168             PerPosPosterior => 'B',
169             rate => 'R');
170              
171             # just to be explicit, args we don't support (yet) or we handle ourselves
172             our @UNSUPPORTED = qw(h help full-help s sequence t tree);
173              
174              
175             =head2 program_name
176              
177             Title : program_name
178             Usage : $factory>program_name()
179             Function: holds the program name
180             Returns : string
181             Args : None
182              
183             =cut
184              
185             sub program_name {
186 7     7 1 43 return $PROGRAM_NAME;
187             }
188              
189             =head2 program_dir
190              
191             Title : program_dir
192             Usage : $factory->program_dir(@params)
193             Function: returns the program directory, obtained from ENV variable.
194             Returns : string
195             Args : None
196              
197             =cut
198              
199             sub program_dir {
200 4     4 1 1790 return $PROGRAM_DIR;
201             }
202              
203             =head2 new
204              
205             Title : new
206             Usage : $factory = Bio::Tools::Run::Phylo::Semphy->new()
207             Function: creates a new Semphy factory
208             Returns : Bio::Tools::Run::Phylo::Semphy
209             Args : Most options understood by Semphy can be supplied as key =>
210             value pairs, with a true value for switches.
211              
212             These options can NOT be used with this wrapper (they are handled
213             internally or don't make sense in this context):
214             -h | --help | --fill-help
215             -s | --sequence
216             -t | --tree
217              
218             To distinguish between Bioperl's -verbose and the Semphy's --verbose,
219             you must set Semphy's verbosity with -semphy_verbose
220              
221             =cut
222              
223             sub new {
224 1     1 1 120 my ($class, @args) = @_;
225 1         15 my $self = $class->SUPER::new(@args);
226            
227 16         32 $self->_set_from_args(\@args, -methods => {(map { $_ => $PARAMS{$_} } keys %PARAMS),
228 1         49 (map { $_ => $SWITCHES{$_} } keys %SWITCHES),
  23         43  
229             quiet => 'quiet'},
230             -create => 1,
231             -case_sensitive => 1);
232            
233 1         22 return $self;
234             }
235              
236             =head2 run
237              
238             Title : run
239             Usage : $result = $factory->run($fasta_align_file);
240             -or-
241             $result = $factory->run($align_object);
242             -or-
243             $result = $factory->run($fasta_align_file, $newick_tree_file);
244             -or-
245             $result = $factory->run($align_object, $tree_object);
246             -or-
247             $result = $factory->run($align_object, $db_taxonomy_object);
248             Function: Runs Semphy on an alignment.
249             Returns : Bio::Tree::Tree
250             Args : The first argument represents an alignment, the second (optional)
251             argument a species tree (to set an initial tree: normally the -t
252             option to Semphy).
253             The alignment can be provided as a multi-fasta format alignment
254             filename, or a Bio::Align::AlignI compliant object (eg. a
255             Bio::SimpleAlign).
256             The species tree can be provided as a newick format tree filename
257             or a Bio::Tree::TreeI compliant object. Alternatively a
258             Bio::DB::Taxonomy object can be supplied, in which case the species
259             tree will be generated by using the alignment sequence names as
260             species names and looking for those in the supplied database.
261            
262             In all cases where an initial tree was supplied, the alignment
263             sequence names must correspond to node ids in the species tree.
264              
265             =cut
266              
267             sub run {
268 0     0 1   my ($self, $aln, $tree) = @_;
269            
270 0 0         $aln || $self->throw("alignment must be supplied");
271 0           $self->_alignment($aln);
272            
273 0 0         if ($tree) {
274 0           $self->_tree($tree);
275            
276             # check node and seq names match
277 0           $self->_check_names;
278             }
279            
280 0           return $self->_run;
281             }
282              
283             sub _run {
284 0     0     my $self = shift;
285            
286 0   0       my $exe = $self->executable || return;
287            
288 0           my $aln_file = $self->_write_alignment;
289            
290             # generate a semphy-friendly tree file
291 0           my $tree = $self->_tree;
292 0           my $tree_file = '';
293 0 0         if ($tree) {
294 0           $tree = $self->_write_tree;
295             }
296            
297 0 0         unless ($self->T) {
298 0           my ($tfh, $tempfile) = $self->io->tempfile(-dir => $self->tempdir);
299 0           $self->T($tempfile);
300 0           close($tfh);
301             }
302            
303 0           my $command = $exe.$self->_setparams($aln_file, $tree_file);
304 0           $self->debug("semphy command = $command\n");
305            
306 0 0         open(my $pipe, "$command |") || $self->throw("semphy call ($command) failed to start: $? | $!");
307 0           my $error = '';
308 0           while (<$pipe>) {
309 0 0         print unless $self->quiet;
310 0           $error .= $_;
311             }
312 0 0         close($pipe) || ($error ? $self->warn("semphy call ($command) failed: $error") : $self->throw("semphy call ($command) crashed: $?"));
    0          
313            
314 0           my $result_file = $self->T();
315 0           my $tio = Bio::TreeIO->new(-format => 'newick', -file => $result_file);
316 0           my $result_tree = $tio->next_tree;
317            
318 0           return $result_tree;
319             }
320              
321             =head2 _setparams
322              
323             Title : _setparams
324             Usage : Internal function, not to be called directly
325             Function: Creates a string of params to be used in the command string
326             Returns : string of params
327             Args : alignment and tree file names
328              
329             =cut
330              
331             sub _setparams {
332 0     0     my ($self, $aln_file, $tree_file) = @_;
333            
334 0           my $param_string = ' -s '.$aln_file;
335 0 0         $param_string .= ' -t '.$tree_file if $tree_file;
336            
337 0           my %methods = map { $_ => $_ } keys %PARAMS;
  0            
338 0           $methods{'semphy_verbose'} = 'verbose';
339 0           $param_string .= $self->SUPER::_setparams(-params => \%methods,
340             -switches => [keys %SWITCHES],
341             -double_dash => 1);
342            
343 0           $param_string .= ' 2>&1';
344 0 0         my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
345 0 0         $param_string .= " 1>$null" if $self->quiet;
346            
347 0           return $param_string;
348             }
349              
350             1;