File Coverage

blib/lib/Bio/Tools/Run/Alignment/Probalign.pm
Criterion Covered Total %
statement 49 150 32.6
branch 2 52 3.8
condition 1 13 7.6
subroutine 15 23 65.2
pod 8 8 100.0
total 75 246 30.4


line stmt bran cond sub pod time code
1             # BioPerl module for Bio::Tools::Run::Alignment::Probalign
2             #
3             # Please direct questions and support issues to
4             #
5             # Cared for by Albert Vilella
6             #
7             #
8             #
9             # You may distribute this module under the same terms as perl itself
10             #
11             # POD documentation - main docs before the code
12              
13             =head1 NAME
14              
15             Bio::Tools::Run::Alignment::Probalign - Object for the calculation of
16             a multiple sequence alignment from a set of unaligned sequences or
17             alignments using the Probalign program
18              
19             =head1 SYNOPSIS
20              
21             # Build a muscle alignment factory
22             $factory = Bio::Tools::Run::Alignment::Probalign->new(@params);
23              
24             # Pass the factory a list of sequences to be aligned.
25             $inputfilename = 't/cysprot.fa';
26             # $aln is a SimpleAlign object.
27             $aln = $factory->align($inputfilename);
28              
29             # or where @seq_array is an array of Bio::Seq objects
30             $seq_array_ref = \@seq_array;
31             $aln = $factory->align($seq_array_ref);
32              
33             # Or one can pass the factory a pair of (sub)alignments
34             #to be aligned against each other, e.g.:
35              
36             #There are various additional options and input formats available.
37             #See the DESCRIPTION section that follows for additional details.
38              
39             $factory = Bio::Tools::Run::Alignment::Probalign->new();
40             $factory->outfile_name("$dir/$subdir/$outdir/outfile.afa");
41             $aln = $factory->align($seq_array_ref);
42              
43             =head1 DESCRIPTION
44              
45              
46             Probalign: multiple sequence alignment using partition function
47             posterior probabilities.
48              
49             Probalign uses partition function posterior probability estimates to
50             compute maximum expected accuracy multiple sequence alignments.
51             You can get it and see information about it at this URL
52             http://www.cs.njit.edu/usman/probalign
53              
54              
55             =head2 Helping the module find your executable
56              
57             You will need to enable Probalign to find the probalign program. This can be
58             done in (at least) three ways:
59              
60             1. Make sure the probalign executable is in your path (i.e.
61             'which probalign' returns a valid program
62             2. define an environmental variable PROBALIGNDIR which points to a
63             directory containing the 'probalign' app:
64             In bash
65             export PROBALIGNDIR=/home/progs/probalign or
66             In csh/tcsh
67             setenv PROBALIGNDIR /home/progs/probalign
68              
69             3. include a definition of an environmental variable PROBALIGNDIR
70             in every script that will
71             BEGIN {$ENV{PROBALIGNDIR} = '/home/progs/probalign'; }
72             use Bio::Tools::Run::Alignment::Probalign;
73              
74              
75             =head1 FEEDBACK
76              
77             =head2 Mailing Lists
78              
79             User feedback is an integral part of the evolution of this and other
80             Bioperl modules. Send your comments and suggestions preferably to one
81             of the Bioperl mailing lists. Your participation is much appreciated.
82              
83             bioperl-l@bioperl.org - General discussion
84             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
85              
86             =head2 Support
87              
88             Please direct usage questions or support issues to the mailing list:
89              
90             I
91              
92             rather than to the module maintainer directly. Many experienced and
93             reponsive experts will be able look at the problem and quickly
94             address it. Please include a thorough description of the problem
95             with code and data examples if at all possible.
96              
97             =head2 Reporting Bugs
98              
99             Report bugs to the Bioperl bug tracking system to help us keep track
100             the bugs and their resolution. Bug reports can be submitted via the web:
101              
102             http://redmine.open-bio.org/projects/bioperl/
103              
104             =head1 AUTHOR - Albert Vilella
105              
106             Email avilella-at-gmail-dot-com
107              
108             =head1 APPENDIX
109              
110             The rest of the documentation details each of the object
111             methods. Internal methods are usually preceded with a _
112              
113             =cut
114              
115             package Bio::Tools::Run::Alignment::Probalign;
116              
117 1         79 use vars qw($AUTOLOAD @ISA $PROGRAMNAME $PROGRAM %DEFAULTS
118             @PROBALIGN_PARAMS @PROBALIGN_SWITCHES @OTHER_SWITCHES
119             %OK_FIELD
120 1     1   102426 );
  1         1  
121 1     1   5 use strict;
  1         1  
  1         15  
122 1     1   520 use Bio::Seq;
  1         50671  
  1         26  
123 1     1   542 use Bio::SeqIO;
  1         19500  
  1         29  
124 1     1   674 use Bio::SimpleAlign;
  1         26625  
  1         36  
125 1     1   449 use Bio::AlignIO;
  1         1064  
  1         22  
126 1     1   5 use Bio::Root::Root;
  1         1  
  1         13  
127 1     1   3 use Bio::Root::IO;
  1         1  
  1         14  
128 1     1   382 use Bio::Factory::ApplicationFactoryI;
  1         104  
  1         17  
129 1     1   402 use Bio::Tools::Run::WrapperBase;
  1         2  
  1         70  
130             @ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase
131             Bio::Factory::ApplicationFactoryI);
132              
133              
134             BEGIN {
135 1     1   3 %DEFAULTS = ( 'AFORMAT' => 'fasta' );
136 1         2 @PROBALIGN_PARAMS = qw (TEMPERATURE SCORE_MATRIX GAP-OPEN GAP-EXTENSION);
137 1         1 @PROBALIGN_SWITCHES = qw(CLUSTALW VERBOSE ALIGNMENT-ORDER NUC PROT);
138 1         1 @OTHER_SWITCHES = qw();
139              
140             # Authorize attribute fields
141 1         2 foreach my $attr ( @PROBALIGN_PARAMS, @PROBALIGN_SWITCHES, @OTHER_SWITCHES ) {
142 9         966 $OK_FIELD{$attr}++; }
143             }
144              
145             =head2 program_name
146              
147             Title : program_name
148             Usage : $factory->program_name()
149             Function: holds the program name
150             Returns: string
151             Args : None
152              
153             =cut
154              
155             sub program_name {
156 6     6 1 22 return 'probalign';
157             }
158              
159             =head2 program_dir
160              
161             Title : program_dir
162             Usage : $factory->program_dir(@params)
163             Function: returns the program directory, obtained from ENV variable.
164             Returns: string
165             Args :
166              
167             =cut
168              
169             sub program_dir {
170 3 50   3 1 12 return Bio::Root::IO->catfile($ENV{PROBALIGNDIR}) if $ENV{PROBALIGNDIR};
171             }
172              
173             =head2 new
174              
175             Title : new
176             Usage : my $probalign = Bio::Tools::Run::Alignment::Probalign->new();
177             Function: Constructor
178             Returns : Bio::Tools::Run::Alignment::Probalign
179             Args : -outfile_name => $outname
180              
181              
182             =cut
183              
184             sub new{
185 1     1 1 70 my ($class,@args) = @_;
186 1         10 my $self = $class->SUPER::new(@args);
187 1         20 my ($on) = $self->SUPER::_rearrange([qw(OUTFILE_NAME)], @args);
188 1   50     15 $self->outfile_name($on || '');
189 1         1 my ($attr, $value);
190 1         4 $self->aformat($DEFAULTS{'AFORMAT'});
191              
192 1         3 while ( @args) {
193 0         0 $attr = shift @args;
194 0         0 $value = shift @args;
195 0 0       0 next if( $attr =~ /^-/); # don't want named parameters
196 0         0 $self->$attr($value);
197             }
198 1         2 return $self;
199             }
200              
201             sub AUTOLOAD {
202 0     0   0 my $self = shift;
203 0         0 my $attr = $AUTOLOAD;
204 0         0 $attr =~ s/.*:://;
205 0         0 $attr = uc $attr;
206             # aliasing
207 0 0       0 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
208              
209 0 0       0 $self->{$attr} = shift if @_;
210 0         0 return $self->{$attr};
211             }
212              
213             =head2 error_string
214              
215             Title : error_string
216             Usage : $obj->error_string($newval)
217             Function: Where the output from the last analysus run is stored.
218             Returns : value of error_string
219             Args : newvalue (optional)
220              
221              
222             =cut
223              
224             sub error_string{
225 0     0 1 0 my ($self,$value) = @_;
226 0 0       0 if( defined $value) {
227 0         0 $self->{'error_string'} = $value;
228             }
229 0         0 return $self->{'error_string'};
230              
231             }
232              
233             =head2 version
234              
235             Title : version
236             Usage : exit if $prog->version() < 1.8
237             Function: Determine the version number of the program
238             Example :
239             Returns : float or undef
240             Args : none
241              
242             =cut
243              
244             sub version {
245 0     0 1 0 my ($self) = @_;
246 0         0 my $exe;
247 0 0       0 return undef unless $exe = $self->executable;
248 0         0 my $string = `$exe 2>&1` ;
249             #PROBALIGN version 1.09 - align multiple protein sequences and print to standard output
250 0         0 $string =~ /PROBALIGN\s+Beta\s+Version\s+(\d+\.\d+)/m;
251 0   0     0 return $1 || undef;
252             }
253              
254             =head2 run
255              
256             Title : run
257             Usage : my $output = $application->run(\@seqs);
258             Function: Generic run of an application
259             Returns : Bio::SimpleAlign object
260             Args : Arrayref of Bio::PrimarySeqI objects or
261             a filename to run on
262              
263             =cut
264              
265             sub run {
266 0     0 1 0 my $self = shift;
267 0         0 return $self->align(shift);
268             }
269              
270             =head2 align
271              
272             Title : align
273             Usage :
274             $inputfilename = 't/data/cysprot.fa';
275             $aln = $factory->align($inputfilename);
276             or
277             $seq_array_ref = \@seq_array;
278             # @seq_array is array of Seq objs
279             $aln = $factory->align($seq_array_ref);
280             Function: Perform a multiple sequence alignment
281             Returns : Reference to a SimpleAlign object containing the
282             sequence alignment.
283             Args : Name of a file containing a set of unaligned fasta sequences
284             or else an array of references to Bio::Seq objects.
285              
286             Throws an exception if argument is not either a string (eg a
287             filename) or a reference to an array of Bio::Seq objects. If
288             argument is string, throws exception if file corresponding to string
289             name can not be found. If argument is Bio::Seq array, throws
290             exception if less than two sequence objects are in array.
291              
292             =cut
293              
294             sub align {
295 0     0 1 0 my ($self,$input) = @_;
296             # Create input file pointer
297 0         0 $self->io->_io_cleanup();
298 0         0 my ($infilename) = $self->_setinput($input);
299 0 0       0 if (! $infilename) {
300 0         0 $self->throw("Bad input data or less than 2 sequences in $input !");
301             }
302              
303 0         0 my $param_string = $self->_setparams();
304              
305             # run probalign
306 0         0 return &_run($self, $infilename, $param_string);
307             }
308              
309             =head2 _run
310              
311             Title : _run
312             Usage : Internal function, not to be called directly
313             Function: makes actual system call to probalign program
314             Example :
315             Returns : nothing; probalign output is written to a
316             temporary file OR specified output file
317             Args : Name of a file containing a set of unaligned fasta sequences
318             and hash of parameters to be passed to probalign
319              
320              
321             =cut
322              
323             sub _run {
324 0     0   0 my ($self,$infilename,$params) = @_;
325 0         0 my $commandstring = $self->executable." $infilename $params";
326              
327 0         0 $self->debug( "probalign command = $commandstring \n");
328              
329 0         0 my $status = system($commandstring);
330 0         0 my $outfile = $self->outfile_name();
331              
332 0 0 0     0 if( !-e $outfile || -z $outfile ) {
333 0         0 $self->warn( "Probalign call crashed: $? [command $commandstring]\n");
334 0         0 return undef;
335             }
336              
337 0         0 my $in = Bio::AlignIO->new('-file' => $outfile,
338             '-format' => $self->aformat);
339 0         0 my $aln = $in->next_aln();
340 0         0 return $aln;
341             }
342              
343              
344             =head2 _setinput
345              
346             Title : _setinput
347             Usage : Internal function, not to be called directly
348             Function: Create input file for probalign program
349             Example :
350             Returns : name of file containing probalign data input AND
351             Args : Arrayref of Seqs or input file name
352              
353              
354             =cut
355              
356             sub _setinput {
357 0     0   0 my ($self,$input) = @_;
358 0         0 my ($infilename, $seq, $temp, $tfh);
359 0 0       0 if (! ref $input) {
    0          
360             # check that file exists or throw
361 0         0 $infilename = $input;
362 0 0       0 unless (-e $input) {return 0;}
  0         0  
363             # let's peek and guess
364 0 0       0 open(IN,$infilename) || $self->throw("Cannot open $infilename");
365 0         0 my $header;
366 0         0 while( defined ($header = ) ) {
367 0 0       0 last if $header !~ /^\s+$/;
368             }
369 0         0 close(IN);
370 0 0       0 if ( $header !~ /^>\s*\S+/ ){
371 0         0 $self->throw("Need to provide a FASTA format file to probalign!");
372             }
373 0         0 return ($infilename);
374             } elsif (ref($input) =~ /ARRAY/i ) { # $input may be an
375             # array of BioSeq objects...
376             # Open temporary file for both reading & writing of array
377 0         0 ($tfh,$infilename) = $self->io->tempfile();
378 0 0       0 if( ! ref($input->[0]) ) {
    0          
379 0         0 $self->warn("passed an array ref which did not contain objects to _setinput");
380 0         0 return undef;
381             } elsif( $input->[0]->isa('Bio::PrimarySeqI') ) {
382 0         0 $temp = Bio::SeqIO->new('-fh' => $tfh,
383             '-format' => 'fasta');
384 0         0 my $ct = 1;
385 0         0 foreach $seq (@$input) {
386 0 0 0     0 return 0 unless ( ref($seq) &&
387             $seq->isa("Bio::PrimarySeqI") );
388 0 0 0     0 if( ! defined $seq->display_id ||
389             $seq->display_id =~ /^\s+$/) {
390 0         0 $seq->display_id( "Seq".$ct++);
391             }
392 0         0 $temp->write_seq($seq);
393             }
394 0         0 $temp->close();
395 0         0 undef $temp;
396 0         0 close($tfh);
397 0         0 $tfh = undef;
398             } else {
399 0         0 $self->warn( "got an array ref with 1st entry ".
400             $input->[0].
401             " and don't know what to do with it\n");
402             }
403 0         0 return ($infilename);
404             } else {
405 0         0 $self->warn("Got $input and don't know what to do with it\n");
406             }
407 0         0 return 0;
408             }
409              
410              
411             =head2 _setparams
412              
413             Title : _setparams
414             Usage : Internal function, not to be called directly
415             Function: Create parameter inputs for probalign program
416             Example :
417             Returns : parameter string to be passed to probalign
418             during align or profile_align
419             Args : name of calling object
420              
421             =cut
422              
423             sub _setparams {
424 0     0   0 my ($self) = @_;
425 0         0 my ($attr, $value,$param_string);
426 0         0 $param_string = '';
427 0         0 my $laststr;
428 0         0 for $attr ( @PROBALIGN_PARAMS ) {
429 0         0 $value = $self->$attr();
430 0 0       0 next unless (defined $value);
431 0         0 my $attr_key = lc $attr;
432 0 0       0 $attr_key = ' --'.$attr_key unless ($attr eq 'ANNOT');
433 0 0       0 $attr_key = ' -'.$attr_key if ($attr eq 'ANNOT');
434 0         0 $param_string .= $attr_key .' '.$value;
435              
436             }
437 0         0 for $attr ( @PROBALIGN_SWITCHES) {
438 0         0 $value = $self->$attr();
439 0 0       0 next unless ($value);
440 0         0 my $attr_key = lc $attr; #put switches in format expected by tcoffee
441 0         0 $attr_key = ' -'.$attr_key;
442 0         0 $param_string .= $attr_key ;
443             }
444             # Set default output file if no explicit output file selected
445 0 0       0 unless ($self->outfile_name ) {
446 0         0 my ($tfh, $outfile) = $self->io->tempfile(-dir=>$self->tempdir());
447 0         0 close($tfh);
448 0         0 undef $tfh;
449 0         0 $self->outfile_name($outfile);
450             }
451             #FIXME: This may be only for *nixes. Double check in other OSes
452 0         0 $param_string .= " > ".$self->outfile_name;
453              
454 0 0       0 if ($self->verbose < 0) {
455 0 0       0 my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
456 0         0 $param_string .= " 2> $null";
457             }
458 0         0 return $param_string;
459             }
460              
461             =head2 aformat
462              
463             Title : aformat
464             Usage : my $alignmentformat = $self->aformat();
465             Function: Get/Set alignment format
466             Returns : string
467             Args : string
468              
469              
470             =cut
471              
472             sub aformat{
473 1     1 1 2 my $self = shift;
474 1 50       2 $self->{'_aformat'} = shift if @_;
475 1         1 return $self->{'_aformat'};
476             }
477              
478             =head1 Bio::Tools::Run::BaseWrapper methods
479              
480             =cut
481              
482             =head2 no_param_checks
483              
484             Title : no_param_checks
485             Usage : $obj->no_param_checks($newval)
486             Function: Boolean flag as to whether or not we should
487             trust the sanity checks for parameter values
488             Returns : value of no_param_checks
489             Args : newvalue (optional)
490              
491              
492             =cut
493              
494             =head2 save_tempfiles
495              
496             Title : save_tempfiles
497             Usage : $obj->save_tempfiles($newval)
498             Function:
499             Returns : value of save_tempfiles
500             Args : newvalue (optional)
501              
502              
503             =cut
504              
505             =head2 outfile_name
506              
507             Title : outfile_name
508             Usage : my $outfile = $probalign->outfile_name();
509             Function: Get/Set the name of the output file for this run
510             (if you wanted to do something special)
511             Returns : string
512             Args : [optional] string to set value to
513              
514              
515             =cut
516              
517              
518             =head2 tempdir
519              
520             Title : tempdir
521             Usage : my $tmpdir = $self->tempdir();
522             Function: Retrieve a temporary directory name (which is created)
523             Returns : string which is the name of the temporary directory
524             Args : none
525              
526              
527             =cut
528              
529             =head2 cleanup
530              
531             Title : cleanup
532             Usage : $probalign->cleanup();
533             Function: Will cleanup the tempdir directory
534             Returns : none
535             Args : none
536              
537              
538             =cut
539              
540             =head2 io
541              
542             Title : io
543             Usage : $obj->io($newval)
544             Function: Gets a L object
545             Returns : L
546             Args : none
547              
548              
549             =cut
550              
551             1; # Needed to keep compiler happy