File Coverage

blib/lib/Bio/Tools/Run/Alignment/Amap.pm
Criterion Covered Total %
statement 49 146 33.5
branch 4 52 7.6
condition 0 11 0.0
subroutine 15 22 68.1
pod 7 7 100.0
total 75 238 31.5


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Run::Alignment::Amap
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Albert Vilella
7             #
8             #
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::Run::Alignment::Amap - Object for the calculation of an
17             iterative multiple sequence alignment from a set of unaligned
18             sequences or alignments using the Amap (2.0) program
19              
20             =head1 SYNOPSIS
21              
22             # Build a muscle alignment factory
23             $factory = Bio::Tools::Run::Alignment::Amap->new(@params);
24              
25             # Pass the factory a list of sequences to be aligned.
26             $inputfilename = 't/cysprot.fa';
27             # $aln is a SimpleAlign object.
28             $aln = $factory->align($inputfilename);
29              
30             # or where @seq_array is an array of Bio::Seq objects
31             $seq_array_ref = \@seq_array;
32             $aln = $factory->align($seq_array_ref);
33              
34             # Or one can pass the factory a pair of (sub)alignments
35             #to be aligned against each other, e.g.:
36              
37             #There are various additional options and input formats available.
38             #See the DESCRIPTION section that follows for additional details.
39              
40             #To run amap with training, try something like:
41              
42             #First round to generate train.params
43             $factory = Bio::Tools::Run::Alignment::Amap->new
44             (
45             'iterative-refinement' => '1000',
46             'consistency' => '5',
47             'pre-training' => '20',
48             'emissions' => '',
49             'verbose' => '',
50             'train' => "$dir/$subdir/$outdir/train.params",
51             );
52             $factory->outfile_name("$dir/$subdir/$outdir/train.params");
53              
54             #Second round to use train.params to get a high qual alignment
55              
56             $seq_array_ref = \@seq_array;
57             $aln = $factory->align($seq_array_ref);
58             $aln = '';
59             $factory = '';
60              
61             $factory = Bio::Tools::Run::Alignment::Amap->new
62             (
63             'iterative-refinement' => '1000',
64             'consistency' => '5',
65             'pre-training' => '20',
66             'verbose' => '',
67             'paramfile' => "$dir/$subdir/$outdir/train.params",
68             );
69             $factory->outfile_name("$dir/$subdir/$outdir/outfile.afa");
70             $aln = $factory->align($seq_array_ref);
71              
72             =head1 DESCRIPTION
73              
74             Amap uses a Sequence Annealing algorithm which is an incremental
75             method for building multiple alignments. You can get it and see
76             information about it at this URL http://bio.math.berkeley.edu/amap/
77              
78              
79             =head2 Helping the module find your executable
80              
81             FIXME: Amap uses the same parameters as Probcons, plus a few others. I
82             haven't had time to check all the changes from the Probcons.pm
83             runnable. Feel free to do it.
84              
85             You will need to enable Amap to find the amap program. This can be
86             done in (at least) three ways:
87              
88             1. Make sure the amap executable is in your path (i.e.
89             'which amap' returns a valid program
90             2. define an environmental variable AMAPDIR which points to a
91             directory containing the 'amap' app:
92             In bash
93             export AMAPDIR=/home/progs/amap or
94             In csh/tcsh
95             setenv AMAPDIR /home/progs/amap
96              
97             3. include a definition of an environmental variable AMAPDIR
98             in every script that will
99             BEGIN {$ENV{AMAPDIR} = '/home/progs/amap'; }
100             use Bio::Tools::Run::Alignment::Amap;
101              
102             =head1 FEEDBACK
103              
104             =head2 Mailing Lists
105              
106             User feedback is an integral part of the evolution of this and other
107             Bioperl modules. Send your comments and suggestions preferably to one
108             of the Bioperl mailing lists. Your participation is much appreciated.
109              
110             bioperl-l@bioperl.org - General discussion
111             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
112              
113             =head2 Support
114              
115             Please direct usage questions or support issues to the mailing list:
116              
117             I
118              
119             rather than to the module maintainer directly. Many experienced and
120             reponsive experts will be able look at the problem and quickly
121             address it. Please include a thorough description of the problem
122             with code and data examples if at all possible.
123              
124             =head2 Reporting Bugs
125              
126             Report bugs to the Bioperl bug tracking system to help us keep track
127             the bugs and their resolution. Bug reports can be submitted via the web:
128              
129             http://redmine.open-bio.org/projects/bioperl/
130              
131             =head1 AUTHOR - Albert Vilella
132              
133             Email foo@bar.com
134              
135             =head1 APPENDIX
136              
137             The rest of the documentation details each of the object
138             methods. Internal methods are usually preceded with a _
139              
140             =cut
141              
142             package Bio::Tools::Run::Alignment::Amap;
143              
144 1         83 use vars qw($AUTOLOAD @ISA $PROGRAMNAME $PROGRAM %DEFAULTS
145             @AMAP_PARAMS @AMAP_SWITCHES @OTHER_SWITCHES
146             %OK_FIELD
147 1     1   107426 );
  1         3  
148 1     1   5 use strict;
  1         2  
  1         17  
149 1     1   390 use Bio::Seq;
  1         48674  
  1         37  
150 1     1   489 use Bio::SeqIO;
  1         20813  
  1         33  
151 1     1   494 use Bio::SimpleAlign;
  1         31177  
  1         58  
152 1     1   587 use Bio::AlignIO;
  1         1228  
  1         27  
153 1     1   6 use Bio::Root::Root;
  1         2  
  1         17  
154 1     1   5 use Bio::Root::IO;
  1         2  
  1         15  
155 1     1   272 use Bio::Factory::ApplicationFactoryI;
  1         160  
  1         21  
156 1     1   387 use Bio::Tools::Run::WrapperBase;
  1         2  
  1         84  
157             @ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase
158             Bio::Factory::ApplicationFactoryI);
159              
160              
161             BEGIN {
162 1     1   5 %DEFAULTS = ( 'AFORMAT' => 'fasta' );
163 1         3 @AMAP_PARAMS = qw (CONSISTENCY ITERATIVE-REFINEMENT
164             PRE-TRAINING ANNOT TRAIN PARAMFILE MATRIXFILE
165             CLUSTALW PAIRS VITERBI VERBOSE EMISSIONS EDGE-WEIGHT-THRESHOLD GAP-FACTOR);
166             #FIXME: Last line are switches, dunno how to set them,
167             #gave as params
168 1         3 @AMAP_SWITCHES = qw();
169 1         2 @OTHER_SWITCHES = qw(PROGRESSIVE NOREORDER ALIGNMENT-ORDER MAXSTEP PRINT-POSTERIORS);
170              
171             # Authorize attribute fields
172 1         2 foreach my $attr ( @AMAP_PARAMS, @OTHER_SWITCHES ) {
173 19         1073 $OK_FIELD{$attr}++; }
174             }
175              
176             =head2 program_name
177              
178             Title : program_name
179             Usage : $factory->program_name()
180             Function: holds the program name
181             Returns: string
182             Args : None
183              
184             =cut
185              
186             sub program_name {
187 7     7 1 38 return 'amap';
188             }
189              
190             =head2 program_dir
191              
192             Title : program_dir
193             Usage : $factory->program_dir(@params)
194             Function: returns the program directory, obtained from ENV variable.
195             Returns: string
196             Args :
197              
198             =cut
199              
200             sub program_dir {
201 4 50   4 1 394 return Bio::Root::IO->catfile($ENV{AMAPDIR}) if $ENV{AMAPDIR};
202             }
203              
204             =head2 new
205              
206             Title : new
207             Usage : my $amap = Bio::Tools::Run::Alignment::Amap->new();
208             Function: Constructor
209             Returns : Bio::Tools::Run::Alignment::Amap
210             Args : -outfile_name => $outname
211              
212              
213             =cut
214              
215             sub new{
216 1     1 1 755 my ($class,@args) = @_;
217 1         13 my $self = $class->SUPER::new(@args);
218 1         22 my ($on) = $self->SUPER::_rearrange([qw(OUTFILE_NAME)], @args);
219 1 50       11 $self->outfile_name($on) if $on;
220 1         3 my ($attr, $value);
221 1         7 $self->aformat($DEFAULTS{'AFORMAT'});
222            
223 1         6 while ( @args) {
224 0         0 $attr = shift @args;
225 0         0 $value = shift @args;
226 0 0       0 next if( $attr =~ /^-/); # don't want named parameters
227 0         0 $self->$attr($value);
228             }
229 1         5 return $self;
230             }
231              
232             sub AUTOLOAD {
233 0     0   0 my $self = shift;
234 0         0 my $attr = $AUTOLOAD;
235 0         0 $attr =~ s/.*:://;
236 0         0 $attr = uc $attr;
237             # aliasing
238 0 0       0 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
239              
240 0 0       0 $self->{$attr} = shift if @_;
241 0         0 return $self->{$attr};
242             }
243              
244             =head2 version
245              
246             Title : version
247             Usage : exit if $prog->version() < 1.8
248             Function: Determine the version number of the program
249             Example :
250             Returns : float or undef
251             Args : none
252              
253             =cut
254              
255             sub version {
256 0     0 1 0 my ($self) = @_;
257 0         0 my $exe;
258 0 0       0 return undef unless $exe = $self->executable;
259 0         0 my $string = `$exe 2>&1` ;
260             #AMAP version 1.09 - align multiple protein sequences and print to standard output
261 0         0 $string =~ /AMAP\s+version.+(\d+\.\d+)/m;
262 0   0     0 return $1 || undef;
263             }
264              
265             =head2 run
266              
267             Title : run
268             Usage : my $output = $application->run(\@seqs);
269             Function: Generic run of an application
270             Returns : Bio::SimpleAlign object
271             Args : Arrayref of Bio::PrimarySeqI objects or
272             a filename to run on
273              
274             =cut
275              
276             sub run {
277 0     0 1 0 my $self = shift;
278 0         0 return $self->align(shift);
279             }
280              
281             =head2 align
282              
283             Title : align
284             Usage :
285             $inputfilename = 't/data/cysprot.fa';
286             $aln = $factory->align($inputfilename);
287             or
288             $seq_array_ref = \@seq_array;
289             # @seq_array is array of Seq objs
290             $aln = $factory->align($seq_array_ref);
291             Function: Perform a multiple sequence alignment
292             Returns : Reference to a SimpleAlign object containing the
293             sequence alignment.
294             Args : Name of a file containing a set of unaligned fasta sequences
295             or else an array of references to Bio::Seq objects.
296              
297             Throws an exception if argument is not either a string (eg a
298             filename) or a reference to an array of Bio::Seq objects. If
299             argument is string, throws exception if file corresponding to string
300             name can not be found. If argument is Bio::Seq array, throws
301             exception if less than two sequence objects are in array.
302              
303             =cut
304              
305             sub align {
306 0     0 1 0 my ($self,$input) = @_;
307             # Create input file pointer
308 0         0 $self->io->_io_cleanup();
309 0         0 my ($infilename) = $self->_setinput($input);
310 0 0       0 if (! $infilename) {
311 0         0 $self->throw("Bad input data or less than 2 sequences in $input !");
312             }
313              
314 0         0 my $param_string = $self->_setparams();
315              
316             # run amap
317 0         0 return &_run($self, $infilename, $param_string);
318             }
319              
320             =head2 _run
321              
322             Title : _run
323             Usage : Internal function, not to be called directly
324             Function: makes actual system call to amap program
325             Example :
326             Returns : nothing; amap output is written to a
327             temporary file OR specified output file
328             Args : Name of a file containing a set of unaligned fasta sequences
329             and hash of parameters to be passed to amap
330              
331              
332             =cut
333              
334             sub _run {
335 0     0   0 my ($self,$infilename,$params) = @_;
336 0         0 my $commandstring = $self->executable." $infilename $params";
337            
338 0         0 $self->debug( "amap command = $commandstring \n");
339              
340 0         0 my $status = system($commandstring);
341 0         0 my $outfile = $self->outfile_name();
342              
343 0 0 0     0 if( !-e $outfile || -z $outfile ) {
344 0         0 $self->warn( "Amap call crashed: $? [command $commandstring]\n");
345 0         0 return undef;
346             }
347              
348 0         0 my $in = Bio::AlignIO->new('-file' => $outfile,
349             '-format' => $self->aformat);
350 0         0 my $aln = $in->next_aln();
351 0         0 return $aln;
352             }
353              
354              
355             =head2 _setinput
356              
357             Title : _setinput
358             Usage : Internal function, not to be called directly
359             Function: Create input file for amap program
360             Example :
361             Returns : name of file containing amap data input AND
362             Args : Arrayref of Seqs or input file name
363              
364              
365             =cut
366              
367             sub _setinput {
368 0     0   0 my ($self,$input) = @_;
369 0         0 my ($infilename, $seq, $temp, $tfh);
370 0 0       0 if (! ref $input) {
    0          
371             # check that file exists or throw
372 0         0 $infilename = $input;
373 0 0       0 unless (-e $input) {return 0;}
  0         0  
374             # let's peek and guess
375 0 0       0 open(IN,$infilename) || $self->throw("Cannot open $infilename");
376 0         0 my $header;
377 0         0 while( defined ($header = ) ) {
378 0 0       0 last if $header !~ /^\s+$/;
379             }
380 0         0 close(IN);
381 0 0       0 if ( $header !~ /^>\s*\S+/ ){
382 0         0 $self->throw("Need to provide a FASTA format file to amap!");
383             }
384 0         0 return ($infilename);
385             } elsif (ref($input) =~ /ARRAY/i ) { # $input may be an
386             # array of BioSeq objects...
387             # Open temporary file for both reading & writing of array
388 0         0 ($tfh,$infilename) = $self->io->tempfile();
389 0 0       0 if( ! ref($input->[0]) ) {
    0          
390 0         0 $self->warn("passed an array ref which did not contain objects to _setinput");
391 0         0 return undef;
392             } elsif( $input->[0]->isa('Bio::PrimarySeqI') ) {
393 0         0 $temp = Bio::SeqIO->new('-fh' => $tfh,
394             '-format' => 'fasta');
395 0         0 my $ct = 1;
396 0         0 foreach $seq (@$input) {
397 0 0 0     0 return 0 unless ( ref($seq) &&
398             $seq->isa("Bio::PrimarySeqI") );
399 0 0 0     0 if( ! defined $seq->display_id ||
400             $seq->display_id =~ /^\s+$/) {
401 0         0 $seq->display_id( "Seq".$ct++);
402             }
403 0         0 $temp->write_seq($seq);
404             }
405 0         0 $temp->close();
406 0         0 undef $temp;
407 0         0 close($tfh);
408 0         0 $tfh = undef;
409             } else {
410 0         0 $self->warn( "got an array ref with 1st entry ".
411             $input->[0].
412             " and don't know what to do with it\n");
413             }
414 0         0 return ($infilename);
415             } else {
416 0         0 $self->warn("Got $input and don't know what to do with it\n");
417             }
418 0         0 return 0;
419             }
420              
421              
422             =head2 _setparams
423              
424             Title : _setparams
425             Usage : Internal function, not to be called directly
426             Function: Create parameter inputs for amap program
427             Example :
428             Returns : parameter string to be passed to amap
429             during align or profile_align
430             Args : name of calling object
431              
432             =cut
433              
434             sub _setparams {
435 0     0   0 my ($self) = @_;
436 0         0 my ($attr, $value,$param_string);
437 0         0 $param_string = '';
438 0         0 my $laststr;
439 0         0 for $attr ( @AMAP_PARAMS ) {
440 0         0 $value = $self->$attr();
441 0 0       0 next unless (defined $value);
442 0         0 my $attr_key = lc $attr;
443 0 0       0 $attr_key = ' --'.$attr_key unless ($attr eq 'ANNOT');
444 0 0       0 $attr_key = ' -'.$attr_key if ($attr eq 'ANNOT');
445 0         0 $param_string .= $attr_key .' '.$value;
446              
447             }
448 0         0 for $attr ( @AMAP_SWITCHES) {
449 0         0 $value = $self->$attr();
450 0 0       0 next unless ($value);
451 0         0 my $attr_key = lc $attr; #put switches in format expected by Amap
452 0         0 $attr_key = ' -'.$attr_key;
453 0         0 $param_string .= $attr_key ;
454             }
455             # Set default output file if no explicit output file selected
456 0 0       0 unless ($self->outfile_name ) {
457 0         0 my ($tfh, $outfile) = $self->io->tempfile(-dir=>$self->tempdir());
458 0         0 close($tfh);
459 0         0 undef $tfh;
460 0         0 $self->outfile_name($outfile);
461             }
462             #FIXME: This may be only for *nixes. Double check in other OSes
463 0         0 $param_string .= " > ".$self->outfile_name;
464            
465 0 0       0 if ($self->verbose < 0) {
466 0 0       0 my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
467 0         0 $param_string .= " 2> $null";
468             }
469 0         0 return $param_string;
470             }
471              
472             =head2 aformat
473              
474             Title : aformat
475             Usage : my $alignmentformat = $self->aformat();
476             Function: Get/Set alignment format
477             Returns : string
478             Args : string
479              
480              
481             =cut
482              
483             sub aformat{
484 2     2 1 5 my $self = shift;
485 2 100       21 $self->{'_aformat'} = shift if @_;
486 2         11 return $self->{'_aformat'};
487             }
488              
489             =head1 Bio::Tools::Run::BaseWrapper methods
490              
491             =cut
492              
493             =head2 no_param_checks
494              
495             Title : no_param_checks
496             Usage : $obj->no_param_checks($newval)
497             Function: Boolean flag as to whether or not we should
498             trust the sanity checks for parameter values
499             Returns : value of no_param_checks
500             Args : newvalue (optional)
501              
502              
503             =cut
504              
505             =head2 save_tempfiles
506              
507             Title : save_tempfiles
508             Usage : $obj->save_tempfiles($newval)
509             Function:
510             Returns : value of save_tempfiles
511             Args : newvalue (optional)
512              
513              
514             =cut
515              
516             =head2 outfile_name
517              
518             Title : outfile_name
519             Usage : my $outfile = $amap->outfile_name();
520             Function: Get/Set the name of the output file for this run
521             (if you wanted to do something special)
522             Returns : string
523             Args : [optional] string to set value to
524              
525              
526             =cut
527              
528              
529             =head2 tempdir
530              
531             Title : tempdir
532             Usage : my $tmpdir = $self->tempdir();
533             Function: Retrieve a temporary directory name (which is created)
534             Returns : string which is the name of the temporary directory
535             Args : none
536              
537              
538             =cut
539              
540             =head2 cleanup
541              
542             Title : cleanup
543             Usage : $amap->cleanup();
544             Function: Will cleanup the tempdir directory
545             Returns : none
546             Args : none
547              
548              
549             =cut
550              
551             =head2 io
552              
553             Title : io
554             Usage : $obj->io($newval)
555             Function: Gets a L object
556             Returns : L
557             Args : none
558              
559              
560             =cut
561              
562             1; # Needed to keep compiler happy