File Coverage

blib/lib/Bio/Tools/Run/Alignment/MAFFT.pm
Criterion Covered Total %
statement 80 190 42.1
branch 24 92 26.0
condition 4 26 15.3
subroutine 16 23 69.5
pod 10 10 100.0
total 134 341 39.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Run::Alignment::MAFFT
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::Run::Alignment::MAFFT - run the MAFFT alignment tools
17              
18             =head1 SYNOPSIS
19              
20             # Build a MAFFT alignment factory
21             $factory = Bio::Tools::Run::Alignment::MAFFT->new(@params);
22              
23             # Pass the factory a list of sequences to be aligned.
24             $inputfilename = 't/cysprot.fa';
25             # $aln is a SimpleAlign object.
26             $aln = $factory->align($inputfilename);
27              
28             # or where @seq_array is an array of Bio::Seq objects
29             $seq_array_ref = \@seq_array;
30             $aln = $factory->align($seq_array_ref);
31              
32             #There are various additional options available.
33              
34             =head1 DESCRIPTION
35              
36             You can get MAFFT from L.
37             "fftnsi" is the default method for Mafft version 4 in this
38             implementation.
39              
40             See Bio::Tools::Run::Alignment::Clustalw for a description on how to
41             specify parameters to the underlying alignment program. See the MAFFT
42             manual page for a description of the MAFFT parameters.
43              
44             =head1 FEEDBACK
45              
46             =head2 Mailing Lists
47              
48             User feedback is an integral part of the evolution of this and other
49             Bioperl modules. Send your comments and suggestions preferably to one
50             of the Bioperl mailing lists. Your participation is much appreciated.
51              
52             bioperl-l@bioperl.org - General discussion
53             http://bioperl.org/MailList.html - About the mailing lists
54              
55             =head2 Support
56              
57             Please direct usage questions or support issues to the mailing list:
58              
59             I
60              
61             rather than to the module maintainer directly. Many experienced and
62             reponsive experts will be able look at the problem and quickly
63             address it. Please include a thorough description of the problem
64             with code and data examples if at all possible.
65              
66             =head2 Reporting Bugs
67              
68             Report bugs to the Bioperl bug tracking system to help us keep track
69             the bugs and their resolution. Bug reports can be submitted via the web:
70              
71             http://redmine.open-bio.org/projects/bioperl/
72              
73             =head1 AUTHOR - Jason Stajich
74              
75             Email jason-at-bioperl.org
76              
77             =head1 APPENDIX
78              
79             The rest of the documentation details each of the object
80             methods. Internal methods are usually preceded with a _
81              
82             =cut
83              
84             package Bio::Tools::Run::Alignment::MAFFT;
85              
86 1         243 use vars qw($AUTOLOAD @ISA $PROGRAMNAME $PROGRAM %DEFAULTS
87             @MAFFT4_PARAMS @MAFFT4_SWITCHES @OTHER_SWITCHES %OK_FIELD
88             @MAFFT_ALN_METHODS @MAFFT6_PARAMS @MAFFT6_SWITCHES %OK_FIELD6
89 1     1   101095 );
  1         1  
90 1     1   14 use strict;
  1         2  
  1         24  
91 1     1   775 use Bio::Seq;
  1         47162  
  1         25  
92 1     1   463 use Bio::SeqIO;
  1         20302  
  1         29  
93 1     1   711 use Bio::SimpleAlign;
  1         29757  
  1         45  
94 1     1   586 use Bio::AlignIO;
  1         1066  
  1         27  
95              
96 1         422 use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase
97 1     1   5 Bio::Factory::ApplicationFactoryI);
  1         1  
98              
99             BEGIN {
100 1     1   228 %DEFAULTS = ( 'OUTPUT' => 'fasta',
101             'METHOD' => 'fftnsi',
102             'CYCLES' => 2);
103 1         2 @MAFFT4_PARAMS =qw( METHOD CYCLES );
104 1         2 @MAFFT4_SWITCHES = qw( NJ ALL_POSITIVE);
105              
106             # NB: Mafft6 options are case-sensitive (eg. --lop and --LOP is different)
107 1         6 @MAFFT6_PARAMS = qw( weighti retree maxiterate partsize groupsize
108             op ep lop lep lexp LOP LEXP bl jtt tm aamatrix fmodel seed );
109 1         3 @MAFFT6_SWITCHES = qw( auto 6merpair globalpair localpair genafpair
110             fastapair fft nofft noscore memsave parttree dpparttree fastaparttree
111             clustalout inputorder reorder treeout nuc amino
112             );
113              
114 1         1 @OTHER_SWITCHES = qw(QUIET ALIGN OUTPUT OUTFILE);
115 1         1 @MAFFT_ALN_METHODS = qw(fftnsi fftns nwnsi nwns fftnsrough nwnsrough);
116             #@MAFFT6_ALN_METHODS = qw(linsi ginsi einsi fftnsi fftns nwnsi nwns)
117             # Authorize attribute fields
118 1         3 foreach my $attr ( @MAFFT4_SWITCHES,@MAFFT4_PARAMS,@OTHER_SWITCHES ) {
119 8         12 $OK_FIELD{$attr}++;
120             }
121 1         2 foreach my $attr ( @MAFFT6_PARAMS, @MAFFT6_SWITCHES ) {
122 37         1383 $OK_FIELD6{$attr}++
123             }
124             }
125              
126             =head2 program_name
127              
128             Title : program_name
129             Usage : $factory->program_name()
130             Function: holds the program name
131             Returns: string
132             Args : None
133              
134             =cut
135              
136             sub program_name {
137 2     2 1 3 return 'mafft';
138             }
139              
140             =head2 executable
141              
142             Title : executable
143             Usage : my $exe = $blastfactory->executable('blastall');
144             Function: Finds the full path to the 'codeml' executable
145             Returns : string representing the full path to the exe
146             Args : [optional] name of executable to set path to
147             [optional] boolean flag whether or not warn when exe is not found
148              
149              
150             =cut
151              
152             sub executable {
153 2     2 1 621 my ($self, $exename, $exe,$warn) = @_;
154 2 50       9 $exename = $self->program_name unless (defined $exename );
155              
156 2 50 33     5 if( defined $exe && -x $exe ) {
157 0         0 $self->{'_pathtoexe'}->{$exename} = $exe;
158             }
159 2 50       5 unless( defined $self->{'_pathtoexe'}->{$exename} ) {
160 2         3 my $f = $self->program_path($exename);
161 2 50 33     69 $exe = $self->{'_pathtoexe'}->{$exename} = $f if(-e $f && -x $f );
162              
163             # This is how I meant to split up these conditionals --jason
164             # if exe is null we will execute this (handle the case where
165             # PROGRAMDIR pointed to something invalid)
166 2 50       5 unless( $exe ) { # we didn't find it in that last conditional
167 2 50 33     13 if( ($exe = $self->io->exists_exe($exename)) && -x $exe ) {
168 0         0 $self->{'_pathtoexe'}->{$exename} = $exe;
169             } else {
170 2 50       330 $self->warn("Cannot find executable for $exename") if $warn;
171 2         8 $self->{'_pathtoexe'}->{$exename} = undef;
172             }
173             }
174             }
175 2         18 return $self->{'_pathtoexe'}->{$exename};
176             }
177              
178              
179             =head2 program_path
180              
181             Title : program_path
182             Usage : my $path = $factory->program_path();
183             Function: Builds path for executable
184             Returns : string representing the full path to the exe
185             Args : none
186              
187             =cut
188              
189             sub program_path {
190 2     2 1 3 my ($self,$program_name) = @_;
191 2         2 my @path;
192 2 50       6 push @path, $self->program_dir if $self->program_dir;
193 2 50       12 push @path, $program_name .($^O =~ /mswin/i ?'.exe':'');
194              
195 2         13 return Bio::Root::IO->catfile(@path);
196             }
197              
198             =head2 program_dir
199              
200             Title : program_dir
201             Usage : $factory->program_dir(@params)
202             Function: returns the program directory, obtained from ENV variable.
203             Returns: string
204             Args :
205              
206             =cut
207              
208             sub program_dir {
209 2 50   2 1 5 return File::Spec->rel2abs($ENV{MAFFTDIR}) if $ENV{MAFFTDIR};
210             }
211              
212             sub new {
213 1     1 1 129 my ($class,@args) = @_;
214 1         14 my $self = $class->SUPER::new(@args);
215 1         48 my ($attr, $value);
216              
217 1         4 while (@args) {
218 2         3 $attr = shift @args;
219 2         3 $value = shift @args;
220 2 100       9 next if( $attr =~ /^-/); # don't want named parameters
221 1         8 $self->$attr($value);
222             }
223              
224 1 50       17 $self->output($DEFAULTS{'OUTPUT'}) unless( $self->output );
225 1 50       2 if ( ! $self->_version6 ) {
226 1 50       7 $self->method($DEFAULTS{'METHOD'}) unless( $self->method );
227             }
228 1         3 return $self;
229             }
230              
231             sub AUTOLOAD {
232 4     4   23 my $self = shift;
233 4         6 my $attr = $AUTOLOAD;
234 4         15 $attr =~ s/.*:://;
235              
236             # NB: Mafft6 options are case-sensitive
237 4 50       8 if ( $self->_version6 ) {
238 0 0       0 if ( $OK_FIELD6{ $attr } ) {
239             # Don't want the attrs to clash with bioperl attributes
240 0 0       0 $self->{version6attrs}{$attr} = shift if @_;
241 0         0 return $self->{version6attrs}{$attr};
242             }
243             }
244 4         5 $attr = uc $attr;
245             # aliasing
246 4 50       8 $attr = 'OUTFILE' if $attr eq 'OUTFILE_NAME';
247 4 50       9 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
248              
249 4 100       8 $self->{$attr} = shift if @_;
250 4         18 return $self->{$attr};
251             }
252              
253             =head2 error_string
254              
255             Title : error_string
256             Usage : $obj->error_string($newval)
257             Function: Where the output from the last analysis run is stored.
258             Returns : value of error_string
259             Args : newvalue (optional)
260              
261              
262             =cut
263              
264             sub error_string{
265 0     0 1 0 my ($self,$value) = @_;
266 0 0       0 if( defined $value) {
267 0         0 $self->{'error_string'} = $value;
268             }
269 0         0 return $self->{'error_string'};
270              
271             }
272              
273             =head2 version
274              
275             Title : version
276             Usage : exit if $prog->version() < 1.8
277             Function: Determine the version number of the program
278             Example :
279             Returns : float or undef
280             Args : none
281              
282             =cut
283              
284             sub version {
285 1     1 1 2 my ($self) = @_;
286 1         1 my $exe;
287 1 50       3 return unless $exe = $self->executable;
288             # this is a bit of a hack, but MAFFT is just a gawk script
289             # so we are actually grepping the scriptfile
290             # UPDATE (Torsten Seemann)
291             # it now seems to be a 'sh' script and the format has changed
292             # slightly. i've tried to make the change compatible with both...
293             # version="v5.860 (2006/06/12)"; export version
294              
295 0 0       0 if( open(my $NAME, "grep 'export version' $exe | ") ) {
296 0         0 while(<$NAME>) {
297 0 0       0 if( /version.*?([\d.a-z]+)\s+/ ) {
298 0         0 return $1;
299             }
300             }
301 0         0 $self->warn("No version found");
302 0         0 close($NAME);
303             } else {
304 0         0 $self->warn("$!");
305             }
306 0         0 return;
307             }
308              
309             =head2 run
310              
311             Title : run
312             Usage : my $output = $application->run(\@seqs);
313             Function: Generic run of an application
314             Returns : Bio::SimpleAlign object
315             Args : array ref of Bio::PrimarySeqI objects OR
316             filename of sequences to run with
317              
318             =cut
319              
320             sub run {
321 0     0 1 0 my ($self,$seqs) = @_;
322 0         0 return $self->align($seqs);
323             }
324              
325             =head2 align
326              
327             Title : align
328             Usage :
329             $inputfilename = 't/data/cysprot.fa';
330             $aln = $factory->align($inputfilename);
331             or
332             $seq_array_ref = \@seq_array;
333             # @seq_array is an array of Seq objs
334             $aln = $factory->align($seq_array_ref);
335             Function: Perform a multiple sequence alignment
336             Returns : Reference to a SimpleAlign object containing the
337             sequence alignment.
338             Args : Name of a file containing a set of unaligned fasta sequences
339             or else an array of references to Bio::Seq objects.
340              
341             Throws an exception if argument is not either a string (eg a
342             filename) or a reference to an array of Bio::Seq objects. If
343             argument is string, throws exception if file corresponding to string
344             name can not be found. If argument is Bio::Seq array, throws
345             exception if less than two sequence objects are in array.
346              
347             =cut
348              
349             sub align {
350 0     0 1 0 my ($self,$input) = @_;
351             # Create input file pointer
352 0         0 $self->io->_io_cleanup();
353 0         0 my ($infilename,$type) = $self->_setinput($input);
354 0 0       0 if (! $infilename) {
355 0         0 $self->throw("Bad input data or less than 2 sequences in $input !");
356             }
357              
358 0         0 my ($param_string,$outstr) = $self->_setparams();
359              
360             # run mafft
361 0         0 return $self->_run($infilename, $param_string,$outstr);
362             }
363              
364             =head2 _run
365              
366             Title : _run
367             Usage : Internal function, not to be called directly
368             Function: makes actual system call to tcoffee program
369             Example :
370             Returns : nothing; tcoffee output is written to a
371             temporary file OR specified output file
372             Args : Name of a file containing a set of unaligned fasta sequences
373             and hash of parameters to be passed to tcoffee
374              
375              
376             =cut
377              
378             sub _run {
379 0     0   0 my ($self,$infilename,$paramstr,$outstr) = @_;
380 0         0 my $commandstring = $self->executable()." $paramstr $infilename $outstr";
381              
382 0         0 $self->debug( "mafft command = $commandstring \n");
383              
384 0         0 my $status = system($commandstring);
385 0         0 my $outfile = $self->outfile();
386 0 0 0     0 if( !-e $outfile || -z $outfile ) {
387 0         0 $self->warn( "MAFFT call crashed: $? [command $commandstring]\n");
388 0         0 return;
389             }
390              
391 0         0 my $in = Bio::AlignIO->new('-file' => $outfile,
392             '-format' => $self->output);
393 0         0 my $aln = $in->next_aln();
394 0         0 return $aln;
395             }
396              
397              
398             =head2 _setinput
399              
400             Title : _setinput
401             Usage : Internal function, not to be called directly
402             Function: Create input file for mafft programs
403             Example :
404             Returns : name of file containing mafft data input
405             Args : Seq or Align object reference or input file name
406              
407              
408             =cut
409              
410             sub _setinput {
411 0     0   0 my ($self,$input) = @_;
412 0         0 my ($infilename, $seq, $temp, $tfh);
413 0 0       0 if (! ref $input) {
    0          
414             # check that file exists or throw
415 0         0 $infilename = $input;
416 0 0       0 unless (-e $input) {return 0;}
  0         0  
417 0         0 return ($infilename);
418             } elsif (ref($input) =~ /ARRAY/i ) { # $input may be an
419             # array of BioSeq objects...
420             # Open temporary file for both reading & writing of array
421 0         0 ($tfh,$infilename) = $self->io->tempfile();
422 0 0       0 if( ! ref($input->[0]) ) {
    0          
423 0         0 $self->warn("passed an array ref which did not contain objects to _setinput");
424 0         0 return;
425             } elsif ( $input->[0]->isa('Bio::PrimarySeqI') ) {
426 0         0 $temp = Bio::SeqIO->new('-fh' => $tfh,
427             '-format' => 'fasta');
428 0         0 my $ct = 1;
429 0         0 foreach $seq (@$input) {
430 0 0 0     0 return 0 unless ( ref($seq) &&
431             $seq->isa("Bio::PrimarySeqI") );
432 0 0 0     0 if( ! defined $seq->display_id ||
433             $seq->display_id =~ /^\s+$/
434             ) {
435 0         0 $seq->display_id( "Seq".$ct++);
436             }
437 0         0 $temp->write_seq($seq);
438             }
439 0         0 $temp->close();
440 0         0 undef $temp;
441 0         0 close($tfh);
442 0         0 $tfh = undef;
443             } else {
444 0         0 $self->warn( "got an array ref with 1st entry ".
445             $input->[0].
446             " and don't know what to do with it\n");
447             }
448              
449 0         0 return ($infilename);
450             } else {
451 0         0 $self->warn("Got $input and don't know what to do with it\n");
452             }
453 0         0 return 0;
454             }
455              
456              
457             =head2 _setparams
458              
459             Title : _setparams
460             Usage : Internal function, not to be called directly
461             Function: Create parameter inputs for mafft program
462             Example :
463             Returns : parameter string to be passed to mafft program
464             Args : name of calling object
465              
466             =cut
467              
468             sub _setparams {
469 0     0   0 my ($self) = @_;
470 0         0 my ($outfile,$param_string) = ('','');
471              
472             # Set default output file if no explicit output file selected
473 0 0       0 unless (defined($outfile = $self->outfile) ) {
474 0         0 my $tfh;
475 0         0 ($tfh, $outfile) = $self->io->tempfile(-dir=>$self->tempdir());
476 0         0 close($tfh);
477 0         0 undef $tfh;
478 0         0 $self->outfile($outfile);
479             }
480 0         0 my ($attr,$value);
481              
482 0 0       0 if ( $self->_version6 ) {
483 0         0 for $attr ( @MAFFT6_SWITCHES) {
484 0         0 $value = $self->$attr();
485 0 0       0 next unless defined $value;
486 0         0 my $attr_key = lc $attr; #put switches in format expected by mafft
487 0         0 $attr_key = ' --'.$attr_key;
488 0         0 $param_string .= $attr_key ;
489             }
490 0         0 for $attr ( @MAFFT6_PARAMS ) {
491 0         0 $value = $self->$attr();
492 0 0       0 next unless (defined $value);
493 0         0 my $attr_key = lc $attr;
494 0         0 $attr_key = ' --'.$attr_key;
495 0         0 $param_string .= $attr_key .' '.$value;
496             }
497 0 0       0 if ( ! $self->no_param_checks ) {
498 0         0 my @incompatible = qw/auto 6merpair globalpair localpair genafpair
499             fastapair/;
500 0         0 my @set = grep { $self->$_ } @incompatible;
  0         0  
501 0 0       0 if ( @set > 1 ) {
502 0         0 $self->throw("You can't specify more than one of @set");
503             }
504             }
505             }
506             else {
507 0         0 for $attr ( @MAFFT4_SWITCHES) {
508 0         0 $value = $self->$attr();
509 0 0       0 next unless defined $value;
510 0         0 my $attr_key = lc $attr; #put switches in format expected by mafft
511 0         0 $attr_key = ' --'.$attr_key;
512 0         0 $param_string .= $attr_key ;
513             }
514             # Method is a version 4 option
515 0         0 my $method = $self->method;
516 0 0       0 $self->throw("no method ") unless defined $method;
517 0 0 0     0 if( $method !~ /(rough|nsi)$/ &&
518             defined $self->cycles) {
519 0         0 $param_string .= " ".$self->cycles;
520             }
521             }
522              
523 0         0 my $outputstr = " 1>$outfile" ;
524              
525 0 0 0     0 if ($self->quiet() || $self->verbose < 0) {
526 0         0 $param_string .= " --quiet";
527 0 0       0 my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
528 0         0 $outputstr .= " 2> $null";
529             }
530 0         0 return ($param_string, $outputstr);
531             }
532              
533             =head2 methods
534              
535             Title : methods
536             Usage : my @methods = $self->methods()
537             Function: Get/Set Alignment methods - NOT VALIDATED
538             Returns : array of strings
539             Args : arrayref of strings
540              
541              
542             =cut
543              
544             sub methods {
545 0     0 1 0 my ($self) = shift;
546 0         0 return @MAFFT_ALN_METHODS;
547             }
548              
549              
550             =head2 _version6
551              
552             Title : _version6
553             Usage : Internal function, not to be called directly
554             Function: Check if the version of MAFFT is 6
555             Example :
556             Returns : Boolean
557             Args : None
558              
559             =cut
560              
561             sub _version6 {
562 5     5   5 my $self = shift;
563 5 100       10 if ( ! defined $self->{_version6} ) {
564 1   50     3 my $version = $self->version || '';
565 1 50       3 if ( $version =~ /^v6/ ) {
566 0         0 $self->{_version6} = 1;
567             }
568             else {
569 1         3 $self->{_version6} = '';
570             }
571             }
572 5         10 return $self->{_version6};
573             }
574              
575              
576             =head1 Bio::Tools::Run::BaseWrapper methods
577              
578             =cut
579              
580             =head2 no_param_checks
581              
582             Title : no_param_checks
583             Usage : $obj->no_param_checks($newval)
584             Function: Boolean flag as to whether or not we should
585             trust the sanity checks for parameter values
586             Returns : value of no_param_checks
587             Args : newvalue (optional)
588              
589              
590             =cut
591              
592             =head2 save_tempfiles
593              
594             Title : save_tempfiles
595             Usage : $obj->save_tempfiles($newval)
596             Function:
597             Returns : value of save_tempfiles
598             Args : newvalue (optional)
599              
600              
601             =cut
602              
603             =head2 outfile_name
604              
605             Title : outfile_name
606             Usage : my $outfile = $mafft->outfile_name();
607             Function: Get/Set the name of the output file for this run
608             (if you wanted to do something special)
609             Returns : string
610             Args : [optional] string to set value to
611              
612              
613             =cut
614              
615              
616             =head2 tempdir
617              
618             Title : tempdir
619             Usage : my $tmpdir = $self->tempdir();
620             Function: Retrieve a temporary directory name (which is created)
621             Returns : string which is the name of the temporary directory
622             Args : none
623              
624              
625             =cut
626              
627             =head2 cleanup
628              
629             Title : cleanup
630             Usage : $mafft->cleanup();
631             Function: Will cleanup the tempdir directory
632             Returns : none
633             Args : none
634              
635              
636             =cut
637              
638             =head2 io
639              
640             Title : io
641             Usage : $obj->io($newval)
642             Function: Gets a L object
643             Returns : L
644             Args : none
645              
646              
647             =cut
648              
649             1; # Needed to keep compiler happy