File Coverage

blib/lib/Bio/Tools/Run/Phylo/Phylip/ProtPars.pm
Criterion Covered Total %
statement 60 122 49.1
branch 7 34 20.5
condition 0 3 0.0
subroutine 16 21 76.1
pod 6 6 100.0
total 89 186 47.8


line stmt bran cond sub pod time code
1             # $Id$
2             # BioPerl module for Bio::Tools::Run::Phylo::Phylip::ProtPars
3             #
4             # Created by Shawn Hoon
5             #
6             # You may distribute this module under the same terms as perl itself
7             #
8             # POD documentation - main docs before the code
9              
10             =head1 NAME
11              
12             Bio::Tools::Run::Phylo::Phylip::ProtPars - Object for creating a L object from a multiple alignment file or a SimpleAlign object
13              
14             14 Nov 2002 Shawn
15             Works with Phylip version 3.6
16              
17             =head1 SYNOPSIS
18              
19             #Create a SimpleAlign object
20             @params = ('ktuple' => 2, 'matrix' => 'BLOSUM');
21             $factory = Bio::Tools::Run::Alignment::Clustalw->new(@params);
22             $inputfilename = 't/data/cysprot.fa';
23             $aln = $factory->run($inputfilename); # $aln is a SimpleAlign object.
24              
25             #Create the Tree
26             #using a threshold value of 30 and id name lengths limit of 30
27             #note to use id name length greater than the standard 10 in protpars,
28             # you will need to modify the protpars source code
29             $tree_factory = Bio::Tools::Run::Phylo::Phylip::ProtPars->
30             new(idlength=>30,threshold=>10,jumble=>"17,10",outgroup=>2);
31             $tree = $tree_factory->run($aln);
32              
33             #Or one can pass in a file name containing a multiple alignment
34             #in phylip format:
35              
36             $tree_factory =
37             Bio::Tools::Run::Phylo::Phylip::ProtPars->new(idlength=>30,threshold=>10);
38             $tree = $tree_factory->run("/usr/users/shawnh/COMPARA/prot.phy");
39              
40             # To prevent PHYLIP from truncating sequence names:
41             # Step 1. Shelf the original names:
42             my ($aln_safe, $ref_name)= # $aln_safe has serial names
43             $aln->set_displayname_safe(); # $ref_name holds original names
44             # Step 2. Run ProtPars:
45             $tree = $protpars_factory->run($aln_safe); # Use $aln_safe instead of $aln
46             # Step 3. Retrieve orgininal OTU names:
47             use Bio::Tree::Tree;
48             my @nodes=$tree->get_nodes();
49             foreach my $nd (@nodes){
50             $nd->id($ref_name->{$nd->id_output}) if $nd->is_Leaf;
51             }
52              
53             =head1 PARAMTERS FOR PROTPARS COMPUTATION
54              
55             =head2 THRESHOLD
56              
57             Title : THRESHOLD
58             Description : (optional)
59             This sets a threshold such that if the number of
60             steps counted in a character is higher than the
61             threshold, it will be taken to be the threshold
62             value rather than the actual number of steps. You
63             should use a positive real number greater than 1.
64             Please see the documetation from the phylip package
65             for more information.
66              
67             =head2 OUTGROUP
68              
69             Title : OUTGROUP
70             Description : (optional)
71              
72             This specifies which species is to be used to root
73             the tree by having it become the outgroup. Input
74             values are integers specifying which species to use.
75             Defaults to 1
76              
77             =head2 JUMBLE
78              
79             Title : JUMBLE
80             Description : (optional)
81             This enables you to tell the program to use a random
82             number generator to choose the input order of
83             species. Input values is of the format:
84             seed,iterations eg 17,10 seed: an integer between 1
85             and 32767 and of the form 4n+1 which means that it
86             must give a remainder of 1 when divided by 4. Each
87             different seed leads to a different sequence of
88             addition of species. By simply changing the random
89             number seed and re-running programs one can look for
90             other, and better trees. iterations: For a value of
91             10, this will tell the program to try ten different
92             orders of species in constructing the trees, and the
93             results printed out will reflect this entire search
94             process (that is, the best trees found among all 10
95             runs will be printed out, not the best trees from
96             each individual run).
97              
98             =head1 FEEDBACK
99              
100             =head2 Mailing Lists
101              
102             User feedback is an integral part of the evolution of this and other
103             Bioperl modules. Send your comments and suggestions preferably to one
104             of the Bioperl mailing lists. Your participation is much appreciated.
105              
106             bioperl-l@bioperl.org - General discussion
107             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
108              
109             =head2 Support
110              
111             Please direct usage questions or support issues to the mailing list:
112              
113             I
114              
115             rather than to the module maintainer directly. Many experienced and
116             reponsive experts will be able look at the problem and quickly
117             address it. Please include a thorough description of the problem
118             with code and data examples if at all possible.
119              
120             =head2 Reporting Bugs
121              
122             Report bugs to the Bioperl bug tracking system to help us keep track
123             the bugs and their resolution. Bug reports can be submitted via the
124             web:
125              
126             http://redmine.open-bio.org/projects/bioperl/
127              
128             =head1 AUTHOR - Shawn Hoon
129              
130             Email shawnh@fugu-sg.org
131              
132             =head1 CONTRIBUTORS
133              
134             Email jason-AT-bioperl_DOT_org
135              
136              
137             =head1 APPENDIX
138              
139             The rest of the documentation details each of the object
140             methods. Internal methods are usually preceded with a _
141              
142             =cut
143              
144              
145            
146             package Bio::Tools::Run::Phylo::Phylip::ProtPars;
147              
148 1         69 use vars qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME
149             @PROTPARS_PARAMS @OTHER_SWITCHES
150 1     1   105481 %OK_FIELD);
  1         2  
151 1     1   3 use strict;
  1         1  
  1         16  
152 1     1   647 use Bio::SimpleAlign;
  1         84759  
  1         34  
153 1     1   8 use Cwd;
  1         2  
  1         59  
154 1     1   446 use Bio::AlignIO;
  1         4105  
  1         22  
155 1     1   388 use Bio::TreeIO;
  1         13695  
  1         25  
156 1     1   5 use Bio::Root::Root;
  1         2  
  1         13  
157 1     1   3 use Bio::Root::IO;
  1         1  
  1         16  
158 1     1   400 use Bio::Tools::Run::Phylo::Phylip::Base;
  1         2  
  1         25  
159 1     1   5 use Bio::Tools::Run::Phylo::Phylip::PhylipConf qw(%Menu);
  1         1  
  1         140  
160              
161             @ISA = qw(Bio::Tools::Run::Phylo::Phylip::Base);
162              
163             # You will need to enable the protpars program. This
164             # can be done in (at least) two ways:
165             #
166             # 1. define an environmental variable PHYLIPDIR:
167             # export PHYLIPDIR=/home/shawnh/PHYLIP/bin
168             #
169             # 2. include a definition of an environmental variable CLUSTALDIR in
170             # every script that will use Clustal.pm.
171             # $ENV{PHYLIPDIR} = '/home/shawnh/PHYLIP/bin';
172             #
173             # 3. You can set the path to the program through doing:
174             # my @params('program'=>'/usr/local/bin/protdist');
175             # my $protpars_factory = Bio::Tools::Run::Phylo::Phylip::ProtPars->new(@params);
176             #
177              
178              
179             BEGIN {
180 1     1   2 @PROTPARS_PARAMS = qw(THRESHOLD JUMBLE OUTGROUP);
181 1         1 @OTHER_SWITCHES = qw(QUIET);
182 1         2 foreach my $attr(@PROTPARS_PARAMS,@OTHER_SWITCHES) {
183 4         739 $OK_FIELD{$attr}++;
184             }
185             }
186              
187             =head2 program_name
188              
189             Title : program_name
190             Usage : >program_name()
191             Function: holds the program name
192             Returns: string
193             Args : None
194              
195             =cut
196              
197             sub program_name {
198 6     6 1 22 return 'protpars';
199             }
200              
201             =head2 program_dir
202              
203             Title : program_dir
204             Usage : ->program_dir()
205             Function: returns the program directory, obtained from ENV variable.
206             Returns: string
207             Args :
208              
209             =cut
210              
211             sub program_dir {
212 3 50   3 1 14 return Bio::Root::IO->catfile($ENV{PHYLIPDIR}) if $ENV{PHYLIPDIR};
213             }
214              
215             sub new {
216 1     1 1 92 my ($class,@args) = @_;
217 1         10 my $self = $class->SUPER::new(@args);
218              
219 1         33 my ($attr, $value);
220 1         3 while (@args) {
221 4         3 $attr = shift @args;
222 4         4 $value = shift @args;
223 4 50       8 next if( $attr =~ /^-/ ); # don't want named parameters
224 4 100       9 if ($attr =~ /IDLENGTH/i){
225 1         2 $self->idlength($value);
226 1         2 next;
227             }
228 3         16 $self->$attr($value);
229             }
230 1         3 return $self;
231             }
232              
233             sub AUTOLOAD {
234 3     3   3 my $self = shift;
235 3         3 my $attr = $AUTOLOAD;
236 3         8 $attr =~ s/.*:://;
237 3         5 $attr = uc $attr;
238 3 50       6 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
239 3 50       8 $self->{$attr} = shift if @_;
240 3         7 return $self->{$attr};
241             }
242              
243             =head2 idlength
244              
245             Title : idlength
246             Usage : $obj->idlength ($newval)
247             Function:
248             Returns : value of idlength
249             Args : newvalue (optional)
250              
251              
252             =cut
253              
254             sub idlength{
255 1     1 1 2 my $self = shift;
256 1 50       3 if( @_ ) {
257 1         1 my $value = shift;
258 1         3 $self->{'idlength'} = $value;
259             }
260 1         1 return $self->{'idlength'};
261              
262             }
263              
264              
265             =head2 run
266              
267             Title : run
268             Usage :
269             $inputfilename = 't/data/prot.phy';
270             $tree = $factory->run($inputfilename);
271             or
272             $seq_array_ref = \@seq_array; @seq_array is array of Seq objs
273             $aln = $factory->run($seq_array_ref);
274             $tree = $treefactory->run($aln);
275             Function: Create a protpars tree from a SimpleAlign object
276             Example :
277             Returns : L object
278             Args : Name of a file containing a multiple alignment in Phylip format
279             or an SimpleAlign object
280              
281             Throws an exception if argument is not either a string (eg a
282             filename) or a Bio::SimpleAlign object. If
283             argument is string, throws exception if file corresponding to string
284             name can not be found.
285              
286             =cut
287              
288             sub run{
289              
290 0     0 1   my ($self,$input) = @_;
291 0           my ($infilename);
292              
293             # Create input file pointer
294 0           $infilename = $self->_setinput($input);
295 0 0         if (!$infilename) {$self->throw("Problems setting up for protpars. Probably bad input data in $input !");}
  0            
296              
297             # Create parameter string to pass to protpars program
298 0           my $param_string = $self->_setparams();
299              
300             # run protpars
301 0           my $aln = $self->_run($infilename,$param_string);
302             }
303              
304             =head2 create_tree
305              
306             Title : create_tree
307             Usage :
308             $inputfilename = 't/data/prot.phy';
309             $tree = $factory->create_tree($inputfilename);
310             or
311             $seq_array_ref = \@seq_array; @seq_array is array of Seq objs
312             $aln = $factory->align($seq_array_ref);
313             $tree = $treefactory->create_tree($aln);
314             Function: Create a protpars tree from a SimpleAlign object
315             Example :
316             Returns : L object
317             Args : Name of a file containing a multiple alignment in Phylip format
318             or an SimpleAlign object
319              
320             Throws an exception if argument is not either a string (eg a
321             filename) or a Bio::SimpleAlign object. If
322             argument is string, throws exception if file corresponding to string
323             name can not be found.
324              
325             =cut
326              
327             sub create_tree{
328 0     0 1   return shift->run(@_);
329             }
330              
331             #################################################
332              
333             =head2 _run
334              
335             Title : _run
336             Usage : Internal function, not to be called directly
337             Function: makes actual system call to protpars program
338             Example :
339             Returns : Bio::Tree object
340             Args : Name of a file containing a set of multiple alignments
341             in Phylip format and a parameter string to be passed to protpars
342              
343              
344             =cut
345              
346             sub _run {
347 0     0     my ($self,$infile,$param_string) = @_;
348 0           my $instring;
349 0           my $curpath = cwd;
350 0 0         unless( File::Spec->file_name_is_absolute($infile) ) {
351 0           $infile = $self->io->catfile($curpath,$infile);
352             }
353 0           $instring = $infile."\n$param_string";
354 0           $self->debug( "Program ".$self->executable."\n");
355 0           chdir($self->tempdir);
356             #open a pipe to run protpars to bypass interactive menus
357 0 0 0       if ($self->quiet() || $self->verbose() < 0) {
358 0 0         my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
359 0           open(PROTPARS,"|".$self->executable.">$null");
360             }
361             else {
362 0           open(PROTPARS,"|".$self->executable);
363             }
364 0           print PROTPARS $instring;
365 0           close(PROTPARS);
366 0           chdir($curpath);
367             #get the results
368 0           my $outfile = $self->io->catfile($self->tempdir,$self->outfile);
369 0           my $treefile = $self->io->catfile($self->tempdir,$self->treefile);
370            
371 0 0         $self->throw("Protpars did not create treefile correctly")
372             unless (-e $treefile);
373              
374             #create the tree
375 0           my $in = Bio::TreeIO->new(-file => $treefile, '-format' => 'newick');
376 0           my $tree = $in->next_tree();
377              
378 0 0         unless ( $self->save_tempfiles ) {
379             # Clean up the temporary files created along the way...
380 0           unlink $treefile;
381 0           unlink $outfile;
382             }
383 0           return $tree;
384             }
385              
386              
387             =head2 _setinput()
388              
389             Title : _setinput
390             Usage : Internal function, not to be called directly
391             Function: Create input file for protpars program
392             Example :
393             Returns : name of file containing a multiple alignment in Phylip format
394             Args : SimpleAlign object reference or input file name
395              
396              
397             =cut
398              
399             sub _setinput {
400 0     0     my ($self, $input, $suffix) = @_;
401 0           my ($alnfilename,$infilename, $temp, $tfh,$input_tmp,$input_fh);
402              
403             # If $input is not a reference it better be the name of a
404             # file with the sequence/
405              
406             # a phy formatted alignment file
407 0 0         unless (ref $input) {
408             # check that file exists or throw
409 0           $alnfilename= $input;
410 0 0         unless (-e $input) {return 0;}
  0            
411 0           return $alnfilename;
412             }
413              
414             # $input may be a SimpleAlign Object
415 0 0         if ($input->isa("Bio::Align::AlignI")) {
416             # Open temporary file for both reading & writing of BioSeq array
417 0           ($tfh,$alnfilename) = $self->io->tempfile(-dir=>$self->tempdir);
418 0           my $alnIO = Bio::AlignIO->new(-fh => $tfh, -format=>'phylip',idlength=>$self->idlength());
419 0           $alnIO->write_aln($input);
420 0           $alnIO->close();
421 0           close($tfh);
422 0           $tfh = undef;
423 0           return $alnfilename;
424             }
425 0           return 0;
426             }
427              
428             =head2 _setparams()
429              
430             Title : _setparams
431             Usage : Internal function, not to be called directly
432             Function: Create parameter inputs for protpars program
433             Example :
434             Returns : parameter string to be passed to protpars
435             Args : name of calling object
436              
437             =cut
438              
439             sub _setparams {
440 0     0     my ($attr, $value, $self);
441              
442 0           $self = shift;
443 0           my $param_string = "";
444            
445 0           my %menu = %{$Menu{$self->version}->{'PROTPARS'}};
  0            
446              
447 0           for $attr ( @PROTPARS_PARAMS) {
448 0           $value = $self->$attr();
449 0 0         next unless (defined $value);
450 0 0         if ($attr =~/JUMBLE/i){
451 0           my ($seed,$itr) = split(",",$value);
452 0           $param_string .=$menu{'JUMBLE'}."$seed\n$itr\n";
453             }
454             else {
455 0           $param_string .= $menu{uc $attr}."$value\n";
456             }
457             }
458 0           $param_string .="Y\n";
459              
460 0           return $param_string;
461             }
462              
463             1; # Needed to keep compiler happy