| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Bio::Tools::Run::Alignment::Clustalw; | 
| 2 |  |  |  |  |  |  | $Bio::Tools::Run::Alignment::Clustalw::VERSION = '1.7.2'; | 
| 3 | 1 |  |  | 1 |  | 238216 | use utf8; | 
|  | 1 |  |  |  |  | 6 |  | 
|  | 1 |  |  |  |  | 12 |  | 
| 4 | 1 |  |  | 1 |  | 43 | use strict; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 33 |  | 
| 5 | 1 |  |  | 1 |  | 8 | use warnings; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 50 |  | 
| 6 |  |  |  |  |  |  |  | 
| 7 | 1 |  |  | 1 |  | 9 | use Bio::Seq; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 32 |  | 
| 8 | 1 |  |  | 1 |  | 8 | use Bio::SeqIO; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 28 |  | 
| 9 | 1 |  |  | 1 |  | 8 | use Bio::SimpleAlign; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 76 |  | 
| 10 | 1 |  |  | 1 |  | 9 | use Bio::AlignIO; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 30 |  | 
| 11 | 1 |  |  | 1 |  | 416 | use Bio::TreeIO; | 
|  | 1 |  |  |  |  | 20554 |  | 
|  | 1 |  |  |  |  | 37 |  | 
| 12 | 1 |  |  | 1 |  | 9 | use Bio::Root::IO; | 
|  | 1 |  |  |  |  | 10 |  | 
|  | 1 |  |  |  |  | 21 |  | 
| 13 |  |  |  |  |  |  |  | 
| 14 | 1 |  |  | 1 |  | 8 | use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 368 |  | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | # ABSTRACT: Object for the calculation of a multiple sequence alignment from a set of unaligned sequences or alignments using the Clustalw program | 
| 17 |  |  |  |  |  |  | # AUTHOR: Peter Schattner | 
| 18 |  |  |  |  |  |  | # OWNER: Peter Schattner | 
| 19 |  |  |  |  |  |  | # LICENSE: Perl_5 | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | # AUTHOR: Jason Stajich | 
| 22 |  |  |  |  |  |  | # AUTHOR: Sendu Bala | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | our @CLUSTALW_PARAMS = qw(output ktuple topdiags window pairgap fixedgap | 
| 26 |  |  |  |  |  |  | floatgap matrix type transit dnamatrix outfile | 
| 27 |  |  |  |  |  |  | gapopen gapext maxdiv gapdist hgapresidues pwmatrix | 
| 28 |  |  |  |  |  |  | pwdnamatrix pwgapopen pwgapext score transweight | 
| 29 |  |  |  |  |  |  | seed helixgap outorder strandgap loopgap terminalgap | 
| 30 |  |  |  |  |  |  | helixendin helixendout strandendin strandendout program | 
| 31 |  |  |  |  |  |  | reps outputtree seed bootlabels bootstrap); | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | our @CLUSTALW_SWITCHES = qw(help check options negative noweights endgaps | 
| 34 |  |  |  |  |  |  | nopgap nohgap novgap kimura tossgaps | 
| 35 |  |  |  |  |  |  | kimura tossgaps njtree); | 
| 36 |  |  |  |  |  |  | our @OTHER_SWITCHES = qw(quiet); | 
| 37 |  |  |  |  |  |  | our $PROGRAM_NAME = 'clustalw'; | 
| 38 |  |  |  |  |  |  | our $PROGRAM_DIR = $ENV{'CLUSTALDIR'} || $ENV{'CLUSTALWDIR'}; | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | sub program_name { | 
| 42 | 7 |  |  | 7 | 1 | 2100 | return $PROGRAM_NAME; | 
| 43 |  |  |  |  |  |  | } | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | sub program_dir { | 
| 47 | 4 |  |  | 4 | 1 | 1397 | return $PROGRAM_DIR; | 
| 48 |  |  |  |  |  |  | } | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | sub new { | 
| 51 | 1 |  |  | 1 | 1 | 7359 | my ($class,@args) = @_; | 
| 52 | 1 |  |  |  |  | 23 | my $self = $class->SUPER::new(@args); | 
| 53 |  |  |  |  |  |  |  | 
| 54 | 1 |  |  |  |  | 170 | $self->_set_from_args(\@args, -methods => [@CLUSTALW_PARAMS, | 
| 55 |  |  |  |  |  |  | @CLUSTALW_SWITCHES, | 
| 56 |  |  |  |  |  |  | @OTHER_SWITCHES], | 
| 57 |  |  |  |  |  |  | -create => 1); | 
| 58 |  |  |  |  |  |  |  | 
| 59 | 1 |  |  |  |  | 11720 | return $self; | 
| 60 |  |  |  |  |  |  | } | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  |  | 
| 63 |  |  |  |  |  |  | sub version { | 
| 64 | 1 |  |  | 1 | 1 | 5 | my ($self) = @_; | 
| 65 |  |  |  |  |  |  |  | 
| 66 | 1 | 0 |  |  |  | 15 | return undef unless $self->executable; | 
| 67 | 0 |  |  |  |  |  | my $prog = $self->executable; | 
| 68 | 0 |  |  |  |  |  | my $string = `$prog -- 2>&1` ; | 
| 69 | 0 |  |  |  |  |  | $string =~ /\(?([\d.]+)\)?/xms; | 
| 70 | 0 |  | 0 |  |  |  | return $1 || undef; | 
| 71 |  |  |  |  |  |  | } | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | sub run { | 
| 75 | 0 |  |  | 0 | 1 |  | my ($self,$input) = @_; | 
| 76 | 0 |  |  |  |  |  | my ($temp,$infilename, $seq); | 
| 77 | 0 |  |  |  |  |  | my ($attr, $value, $switch); | 
| 78 |  |  |  |  |  |  |  | 
| 79 | 0 |  |  |  |  |  | $self->io->_io_cleanup(); | 
| 80 |  |  |  |  |  |  | # Create input file pointer | 
| 81 | 0 |  |  |  |  |  | $infilename = $self->_setinput($input); | 
| 82 | 0 | 0 |  |  |  |  | $self->throw("Bad input data (sequences need an id) or less than 2 sequences in $input!") unless $infilename; | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | # Create parameter string to pass to clustalw program | 
| 85 | 0 |  |  |  |  |  | my $param_string = $self->_setparams(); | 
| 86 |  |  |  |  |  |  |  | 
| 87 |  |  |  |  |  |  | # run clustalw | 
| 88 | 0 |  |  |  |  |  | return $self->_run('both', $infilename, $param_string); | 
| 89 |  |  |  |  |  |  | } | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  |  | 
| 92 |  |  |  |  |  |  | sub align { | 
| 93 | 0 |  |  | 0 | 1 |  | my ($self,$input) = @_; | 
| 94 |  |  |  |  |  |  |  | 
| 95 | 0 |  |  |  |  |  | $self->io->_io_cleanup(); | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | # Create input file pointer | 
| 98 | 0 |  |  |  |  |  | my $infilename = $self->_setinput($input); | 
| 99 | 0 | 0 |  |  |  |  | $self->throw("Bad input data (sequences need an id ) or less than 2 sequences in $input !") unless $infilename; | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | # Create parameter string to pass to clustalw program | 
| 102 | 0 |  |  |  |  |  | my $param_string = $self->_setparams(); | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | # run clustalw | 
| 105 | 0 |  |  |  |  |  | my $aln = $self->_run('align', $infilename, $param_string); | 
| 106 |  |  |  |  |  |  | } | 
| 107 |  |  |  |  |  |  |  | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  | sub profile_align { | 
| 111 | 0 |  |  | 0 | 1 |  | my ($self,$input1,$input2) = @_; | 
| 112 |  |  |  |  |  |  |  | 
| 113 | 0 |  |  |  |  |  | $self->io->_io_cleanup(); | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | # Create input file pointer | 
| 116 | 0 |  |  |  |  |  | my $infilename1 = $self->_setinput($input1, 1); | 
| 117 | 0 |  |  |  |  |  | my $infilename2 = $self->_setinput($input2, 2); | 
| 118 | 0 | 0 | 0 |  |  |  | if (!$infilename1 || !$infilename2) {$self->throw("Bad input data: $input1 or $input2 !");} | 
|  | 0 |  |  |  |  |  |  | 
| 119 | 0 | 0 | 0 |  |  |  | unless ( -e $infilename1 and -e  $infilename2) {$self->throw("Bad input file: $input1 or $input2 !");} | 
|  | 0 |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  |  | 
| 121 |  |  |  |  |  |  | # Create parameter string to pass to clustalw program | 
| 122 | 0 |  |  |  |  |  | my $param_string = $self->_setparams(); | 
| 123 |  |  |  |  |  |  |  | 
| 124 |  |  |  |  |  |  | # run clustalw | 
| 125 | 0 |  |  |  |  |  | my $aln = $self->_run('profile-aln', $infilename1, $infilename2, $param_string); | 
| 126 |  |  |  |  |  |  | } | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | sub add_sequences { | 
| 130 |  |  |  |  |  |  |  | 
| 131 | 0 |  |  | 0 | 1 |  | my ($self,$input1,$input2) = @_; | 
| 132 | 0 |  |  |  |  |  | my ($temp,$infilename1,$infilename2,$input,$seq); | 
| 133 |  |  |  |  |  |  |  | 
| 134 | 0 |  |  |  |  |  | $self->io->_io_cleanup(); | 
| 135 |  |  |  |  |  |  | # Create input file pointer | 
| 136 | 0 |  |  |  |  |  | $infilename1 = $self->_setinput($input1,1); | 
| 137 | 0 |  |  |  |  |  | $infilename2 = $self->_setinput($input2,2); | 
| 138 | 0 | 0 | 0 |  |  |  | if (!$infilename1 || !$infilename2) {$self->throw("Bad input data: $input1 or $input2 !");} | 
|  | 0 |  |  |  |  |  |  | 
| 139 | 0 | 0 | 0 |  |  |  | unless ( -e $infilename1 and -e  $infilename2) {$self->throw("Bad input file: $input1 or $input2 !");} | 
|  | 0 |  |  |  |  |  |  | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  |  | 
| 142 |  |  |  |  |  |  | # Create parameter string to pass to clustalw program | 
| 143 | 0 |  |  |  |  |  | my $param_string = $self->_setparams(); | 
| 144 |  |  |  |  |  |  | # run clustalw | 
| 145 | 0 |  |  |  |  |  | my $aln = $self->_run('add_sequences', $infilename1, | 
| 146 |  |  |  |  |  |  | $infilename2, $param_string); | 
| 147 |  |  |  |  |  |  |  | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  |  | 
| 151 |  |  |  |  |  |  | sub tree { | 
| 152 | 0 |  |  | 0 | 1 |  | my ($self,$input) = @_; | 
| 153 |  |  |  |  |  |  |  | 
| 154 | 0 |  |  |  |  |  | $self->io->_io_cleanup(); | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | # Create input file pointer | 
| 157 | 0 |  |  |  |  |  | my $infilename = $self->_setinput($input); | 
| 158 |  |  |  |  |  |  |  | 
| 159 | 0 | 0 |  |  |  |  | if (!$infilename) {$self->throw("Bad input data (sequences need an id ) or less than 2 sequences in $input !");} | 
|  | 0 |  |  |  |  |  |  | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | # Create parameter string to pass to clustalw program | 
| 162 | 0 |  |  |  |  |  | my $param_string = $self->_setparams(); | 
| 163 |  |  |  |  |  |  |  | 
| 164 |  |  |  |  |  |  | # run clustalw | 
| 165 | 0 |  |  |  |  |  | my $tree = $self->_run('tree', $infilename, $param_string); | 
| 166 |  |  |  |  |  |  | } | 
| 167 |  |  |  |  |  |  |  | 
| 168 |  |  |  |  |  |  |  | 
| 169 |  |  |  |  |  |  | sub footprint { | 
| 170 | 0 |  |  | 0 | 1 |  | my ($self, $in, $slice_size, $deviate) = @_; | 
| 171 |  |  |  |  |  |  |  | 
| 172 | 0 |  |  |  |  |  | my ($simplealn, $tree) = $self->run($in); | 
| 173 |  |  |  |  |  |  |  | 
| 174 |  |  |  |  |  |  | # total tree length? | 
| 175 | 0 |  |  |  |  |  | my $total_length = $tree->total_branch_length; | 
| 176 |  |  |  |  |  |  |  | 
| 177 |  |  |  |  |  |  | # tree length along sliding window, picking regions that significantly | 
| 178 |  |  |  |  |  |  | # deviate from the average tree length | 
| 179 | 0 |  | 0 |  |  |  | $slice_size ||= 5; | 
| 180 | 0 |  | 0 |  |  |  | $deviate ||= 33; | 
| 181 | 0 |  |  |  |  |  | my $threshold = $total_length - (($total_length / 100) * $deviate); | 
| 182 | 0 |  |  |  |  |  | my $length = $simplealn->length; | 
| 183 | 0 |  |  |  |  |  | my $below = 0; | 
| 184 | 0 |  |  |  |  |  | my $found_minima = 0; | 
| 185 | 0 |  |  |  |  |  | my $minima = [$threshold, '']; | 
| 186 | 0 |  |  |  |  |  | my @results; | 
| 187 | 0 |  |  |  |  |  | for my $i (1..($length - $slice_size + 1)) { | 
| 188 | 0 |  |  |  |  |  | my $slice = $simplealn->slice($i, ($i + $slice_size - 1), 1); | 
| 189 | 0 |  |  |  |  |  | my $tree = $self->tree($slice); | 
| 190 | 0 | 0 |  |  |  |  | $self->throw("No tree returned") unless defined $tree; | 
| 191 | 0 |  |  |  |  |  | my $slice_length = $tree->total_branch_length; | 
| 192 |  |  |  |  |  |  |  | 
| 193 | 0 | 0 |  |  |  |  | $slice_length <= $threshold ? ($below = 1) : ($below = 0); | 
| 194 | 0 | 0 |  |  |  |  | if ($below) { | 
| 195 | 0 | 0 |  |  |  |  | unless ($found_minima) { | 
| 196 | 0 | 0 |  |  |  |  | if ($slice_length < ${$minima}[0]) { | 
|  | 0 |  |  |  |  |  |  | 
| 197 | 0 |  |  |  |  |  | $minima = [$slice_length, $slice]; | 
| 198 |  |  |  |  |  |  | } | 
| 199 |  |  |  |  |  |  | else { | 
| 200 | 0 |  |  |  |  |  | push(@results, ${$minima}[1]); | 
|  | 0 |  |  |  |  |  |  | 
| 201 | 0 |  |  |  |  |  | $minima = [$threshold, '']; | 
| 202 | 0 |  |  |  |  |  | $found_minima = 1; | 
| 203 |  |  |  |  |  |  | } | 
| 204 |  |  |  |  |  |  | } | 
| 205 |  |  |  |  |  |  | } | 
| 206 |  |  |  |  |  |  | else { | 
| 207 | 0 |  |  |  |  |  | $found_minima = 0; | 
| 208 |  |  |  |  |  |  | } | 
| 209 |  |  |  |  |  |  | } | 
| 210 |  |  |  |  |  |  |  | 
| 211 | 0 |  |  |  |  |  | return @results; | 
| 212 |  |  |  |  |  |  | } | 
| 213 |  |  |  |  |  |  |  | 
| 214 |  |  |  |  |  |  |  | 
| 215 |  |  |  |  |  |  | sub _run { | 
| 216 | 0 |  |  | 0 |  |  | my ($self, $command, $infile1, $infile2, $param_string) = @_; | 
| 217 |  |  |  |  |  |  |  | 
| 218 | 0 |  |  |  |  |  | my ($instring, $tree); | 
| 219 | 0 |  | 0 |  |  |  | my $quiet = $self->quiet() || $self->verbose() < 0; | 
| 220 |  |  |  |  |  |  |  | 
| 221 | 0 | 0 |  |  |  |  | if ($command =~ /align|both/) { | 
| 222 | 0 | 0 |  |  |  |  | if ($^O eq 'dec_osf') { | 
| 223 | 0 |  |  |  |  |  | $instring = $infile1; | 
| 224 | 0 |  |  |  |  |  | $command = ''; | 
| 225 |  |  |  |  |  |  | } | 
| 226 |  |  |  |  |  |  | else { | 
| 227 | 0 |  |  |  |  |  | $instring = " -infile=". '"' . $infile1 . '"'; | 
| 228 |  |  |  |  |  |  | } | 
| 229 | 0 |  |  |  |  |  | $param_string .= " $infile2"; | 
| 230 |  |  |  |  |  |  | } | 
| 231 |  |  |  |  |  |  |  | 
| 232 | 0 | 0 |  |  |  |  | if ($command =~ /profile/) { | 
| 233 | 0 |  |  |  |  |  | $instring =  "-profile1=$infile1  -profile2=$infile2"; | 
| 234 | 0 |  |  |  |  |  | chmod 0777, $infile1, $infile2; | 
| 235 | 0 |  |  |  |  |  | $command = '-profile'; | 
| 236 |  |  |  |  |  |  | } | 
| 237 |  |  |  |  |  |  |  | 
| 238 | 0 | 0 |  |  |  |  | if ($command =~ /add_sequences/) { | 
| 239 | 0 |  |  |  |  |  | $instring =  "-profile1=$infile1  -profile2=$infile2"; | 
| 240 | 0 |  |  |  |  |  | chmod 0777, $infile1,$infile2; | 
| 241 | 0 |  |  |  |  |  | $command = '-sequences'; | 
| 242 |  |  |  |  |  |  | } | 
| 243 |  |  |  |  |  |  |  | 
| 244 | 0 | 0 |  |  |  |  | if ($command =~ /tree/) { | 
| 245 | 0 | 0 |  |  |  |  | if( $^O eq 'dec_osf' ) { | 
| 246 | 0 |  |  |  |  |  | $instring =  $infile1; | 
| 247 | 0 |  |  |  |  |  | $command = ''; | 
| 248 |  |  |  |  |  |  | } | 
| 249 |  |  |  |  |  |  | else { | 
| 250 | 0 |  |  |  |  |  | $instring = " -infile=". '"' . $infile1 . '"'; | 
| 251 |  |  |  |  |  |  | } | 
| 252 | 0 |  |  |  |  |  | $param_string .= " $infile2"; | 
| 253 |  |  |  |  |  |  |  | 
| 254 | 0 |  |  |  |  |  | $self->debug( "Program ".$self->executable."\n"); | 
| 255 | 0 |  |  |  |  |  | my $commandstring = $self->executable."$instring"."$param_string"; | 
| 256 | 0 | 0 |  |  |  |  | my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null'; | 
| 257 | 0 | 0 |  |  |  |  | $commandstring .= " 1>$null" if $quiet; | 
| 258 | 0 |  |  |  |  |  | $self->debug( "clustal command = $commandstring"); | 
| 259 |  |  |  |  |  |  |  | 
| 260 | 0 |  |  |  |  |  | my $status = system($commandstring); | 
| 261 | 0 | 0 |  |  |  |  | unless( $status == 0 ) { | 
| 262 | 0 |  |  |  |  |  | $self->warn( "Clustalw call ($commandstring) crashed: $? \n"); | 
| 263 | 0 |  |  |  |  |  | return undef; | 
| 264 |  |  |  |  |  |  | } | 
| 265 |  |  |  |  |  |  |  | 
| 266 | 0 |  |  |  |  |  | return $self->_get_tree($infile1, $param_string); | 
| 267 |  |  |  |  |  |  | } | 
| 268 |  |  |  |  |  |  |  | 
| 269 | 0 |  | 0 |  |  |  | my $output = $self->output || 'gcg'; | 
| 270 | 0 |  |  |  |  |  | $self->debug( "Program ".$self->executable."\n"); | 
| 271 | 0 |  |  |  |  |  | my $commandstring = $self->executable." $command"." $instring"." -output=$output". " $param_string"; | 
| 272 | 0 |  |  |  |  |  | $self->debug( "clustal command = $commandstring\n"); | 
| 273 |  |  |  |  |  |  |  | 
| 274 | 0 | 0 |  |  |  |  | open(my $pipe, "$commandstring |") || $self->throw("ClustalW call ($commandstring) failed to start: $? | $!"); | 
| 275 | 0 |  |  |  |  |  | my $score; | 
| 276 | 0 |  |  |  |  |  | while (<$pipe>) { | 
| 277 | 0 | 0 |  |  |  |  | print unless $quiet; | 
| 278 |  |  |  |  |  |  | # Kevin Brown suggested the following regex, though it matches multiple | 
| 279 |  |  |  |  |  |  | # times: we pick up the last one | 
| 280 | 0 | 0 |  |  |  |  | $score = $1 if ($_ =~ /Score:(\d+)/); | 
| 281 |  |  |  |  |  |  | # This one is printed at the end and seems the most appropriate to pick | 
| 282 |  |  |  |  |  |  | # up; we include the above regex incase 'Alignment Score' isn't given | 
| 283 | 0 | 0 |  |  |  |  | $score = $1 if ($_ =~ /Alignment Score (-?\d+)/); | 
| 284 |  |  |  |  |  |  | } | 
| 285 | 0 | 0 |  |  |  |  | close($pipe) || ($self->throw("ClustalW call ($commandstring) crashed: $?")); | 
| 286 |  |  |  |  |  |  |  | 
| 287 | 0 |  |  |  |  |  | my $outfile = $self->outfile(); | 
| 288 |  |  |  |  |  |  |  | 
| 289 |  |  |  |  |  |  | # retrieve alignment (Note: MSF format for AlignIO = GCG format of clustalw) | 
| 290 | 0 | 0 |  |  |  |  | my $format = $output =~ /gcg/i ? 'msf' : $output; | 
| 291 | 0 | 0 |  |  |  |  | if ($format =~ /clustal/i) { | 
| 292 | 0 |  |  |  |  |  | $format = 'clustalw'; # force clustalw incase 'clustal' is requested | 
| 293 |  |  |  |  |  |  | } | 
| 294 | 0 |  |  |  |  |  | my $in  = Bio::AlignIO->new(-file  => $outfile, -format=> $format); | 
| 295 | 0 |  |  |  |  |  | my $aln = $in->next_aln(); | 
| 296 | 0 |  |  |  |  |  | $in->close; | 
| 297 | 0 |  |  |  |  |  | $aln->score($score); | 
| 298 |  |  |  |  |  |  |  | 
| 299 | 0 | 0 |  |  |  |  | if ($command eq 'both') { | 
| 300 | 0 |  |  |  |  |  | $tree = $self->_get_tree($infile1, $param_string); | 
| 301 |  |  |  |  |  |  | } | 
| 302 |  |  |  |  |  |  |  | 
| 303 |  |  |  |  |  |  | # Clean up the temporary files created along the way... | 
| 304 |  |  |  |  |  |  | # Replace file suffix with dnd to find name of dendrogram file(s) to delete | 
| 305 | 0 | 0 |  |  |  |  | unless ( $self->save_tempfiles ) { | 
| 306 | 0 |  |  |  |  |  | foreach my $f ($infile1, $infile2) { | 
| 307 | 0 |  |  |  |  |  | $f =~ s/\.[^\.]*$// ; | 
| 308 | 0 | 0 |  |  |  |  | unlink $f .'.dnd' if ($f ne ''); | 
| 309 |  |  |  |  |  |  | } | 
| 310 |  |  |  |  |  |  | } | 
| 311 |  |  |  |  |  |  |  | 
| 312 | 0 | 0 |  |  |  |  | if ($command eq 'both') { | 
| 313 | 0 |  |  |  |  |  | return ($aln, $tree); | 
| 314 |  |  |  |  |  |  | } | 
| 315 | 0 |  |  |  |  |  | return $aln; | 
| 316 |  |  |  |  |  |  | } | 
| 317 |  |  |  |  |  |  |  | 
| 318 |  |  |  |  |  |  | sub _get_tree { | 
| 319 | 0 |  |  | 0 |  |  | my ($self, $treefile, $param_string) = @_; | 
| 320 |  |  |  |  |  |  |  | 
| 321 | 0 |  |  |  |  |  | $treefile =~ s/\.[^\.]*$// ; | 
| 322 |  |  |  |  |  |  |  | 
| 323 | 0 | 0 |  |  |  |  | if ($param_string =~ /-bootstrap/) { | 
|  |  | 0 |  |  |  |  |  | 
| 324 | 0 |  |  |  |  |  | $treefile .= '.phb'; | 
| 325 |  |  |  |  |  |  | } | 
| 326 |  |  |  |  |  |  | elsif ($param_string =~ /-tree/) { | 
| 327 | 0 |  |  |  |  |  | $treefile .= '.ph'; | 
| 328 |  |  |  |  |  |  | } | 
| 329 |  |  |  |  |  |  | else { | 
| 330 | 0 |  |  |  |  |  | $treefile .= '.dnd'; | 
| 331 |  |  |  |  |  |  | } | 
| 332 |  |  |  |  |  |  |  | 
| 333 | 0 |  |  |  |  |  | my $in = Bio::TreeIO->new('-file'  => $treefile, | 
| 334 |  |  |  |  |  |  | '-format'=> 'newick'); | 
| 335 |  |  |  |  |  |  |  | 
| 336 | 0 |  |  |  |  |  | my $tree = $in->next_tree; | 
| 337 | 0 | 0 |  |  |  |  | unless ( $self->save_tempfiles ) { | 
| 338 | 0 |  |  |  |  |  | foreach my $f ( $treefile ) { | 
| 339 | 0 | 0 |  |  |  |  | unlink $f if( $f ne '' ); | 
| 340 |  |  |  |  |  |  | } | 
| 341 |  |  |  |  |  |  | } | 
| 342 |  |  |  |  |  |  |  | 
| 343 | 0 |  |  |  |  |  | return $tree; | 
| 344 |  |  |  |  |  |  | } | 
| 345 |  |  |  |  |  |  |  | 
| 346 |  |  |  |  |  |  |  | 
| 347 |  |  |  |  |  |  | sub _setinput { | 
| 348 | 0 |  |  | 0 |  |  | my ($self, $input, $suffix) = @_; | 
| 349 | 0 |  |  |  |  |  | my ($infilename, $seq, $temp, $tfh); | 
| 350 |  |  |  |  |  |  |  | 
| 351 |  |  |  |  |  |  | # suffix is used to distinguish alignment files If $input is not a | 
| 352 |  |  |  |  |  |  | # reference it better be the name of a file with the sequence/ | 
| 353 |  |  |  |  |  |  |  | 
| 354 |  |  |  |  |  |  | # alignment data... | 
| 355 | 0 | 0 |  |  |  |  | unless (ref $input) { | 
| 356 |  |  |  |  |  |  | # check that file exists or throw | 
| 357 | 0 |  |  |  |  |  | $infilename = $input; | 
| 358 | 0 | 0 |  |  |  |  | return unless -e $input; | 
| 359 | 0 |  |  |  |  |  | return $infilename; | 
| 360 |  |  |  |  |  |  | } | 
| 361 |  |  |  |  |  |  |  | 
| 362 |  |  |  |  |  |  | # $input may be an array of BioSeq objects... | 
| 363 | 0 | 0 | 0 |  |  |  | if (ref($input) eq "ARRAY") { | 
|  |  | 0 | 0 |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 364 |  |  |  |  |  |  | #  Open temporary file for both reading & writing of BioSeq array | 
| 365 | 0 |  |  |  |  |  | ($tfh,$infilename) = $self->io->tempfile(-dir=>$self->tempdir); | 
| 366 | 0 |  |  |  |  |  | $temp = Bio::SeqIO->new('-fh'=>$tfh, '-format' =>'Fasta'); | 
| 367 |  |  |  |  |  |  |  | 
| 368 |  |  |  |  |  |  | # Need at least 2 seqs for alignment | 
| 369 | 0 | 0 |  |  |  |  | return unless (scalar(@$input) > 1); | 
| 370 |  |  |  |  |  |  |  | 
| 371 | 0 |  |  |  |  |  | foreach $seq (@$input) { | 
| 372 | 0 | 0 | 0 |  |  |  | return unless (defined $seq && $seq->isa("Bio::PrimarySeqI") and $seq->id()); | 
|  |  |  | 0 |  |  |  |  | 
| 373 | 0 |  |  |  |  |  | $temp->write_seq($seq); | 
| 374 |  |  |  |  |  |  | } | 
| 375 | 0 |  |  |  |  |  | $temp->close(); | 
| 376 | 0 |  |  |  |  |  | close($tfh); | 
| 377 | 0 |  |  |  |  |  | undef $tfh; | 
| 378 | 0 |  |  |  |  |  | return $infilename; | 
| 379 |  |  |  |  |  |  | } | 
| 380 |  |  |  |  |  |  |  | 
| 381 |  |  |  |  |  |  | # $input may be a SimpleAlign object. | 
| 382 |  |  |  |  |  |  | elsif (ref($input) eq "Bio::SimpleAlign") { | 
| 383 |  |  |  |  |  |  | # Open temporary file for both reading & writing of SimpleAlign object | 
| 384 | 0 |  |  |  |  |  | ($tfh,$infilename) = $self->io->tempfile(-dir=>$self->tempdir); | 
| 385 | 0 |  |  |  |  |  | $temp = Bio::AlignIO->new('-fh'=> $tfh, '-format' => 'fasta'); | 
| 386 | 0 |  |  |  |  |  | $temp->write_aln($input); | 
| 387 | 0 |  |  |  |  |  | close($tfh); | 
| 388 | 0 |  |  |  |  |  | undef $tfh; | 
| 389 | 0 |  |  |  |  |  | return $infilename; | 
| 390 |  |  |  |  |  |  | } | 
| 391 |  |  |  |  |  |  |  | 
| 392 |  |  |  |  |  |  | # or $input may be a single BioSeq object (to be added to a previous alignment) | 
| 393 |  |  |  |  |  |  | elsif (ref($input) && $input->isa("Bio::PrimarySeqI") && $suffix==2) { | 
| 394 |  |  |  |  |  |  | # Open temporary file for both reading & writing of BioSeq object | 
| 395 | 0 |  |  |  |  |  | ($tfh,$infilename) = $self->io->tempfile(); | 
| 396 | 0 |  |  |  |  |  | $temp = Bio::SeqIO->new(-fh=> $tfh, '-format' =>'Fasta'); | 
| 397 | 0 |  |  |  |  |  | $temp->write_seq($input); | 
| 398 | 0 |  |  |  |  |  | close($tfh); | 
| 399 | 0 |  |  |  |  |  | undef $tfh; | 
| 400 | 0 |  |  |  |  |  | return $infilename; | 
| 401 |  |  |  |  |  |  | } | 
| 402 |  |  |  |  |  |  |  | 
| 403 | 0 |  |  |  |  |  | return; | 
| 404 |  |  |  |  |  |  | } | 
| 405 |  |  |  |  |  |  |  | 
| 406 |  |  |  |  |  |  |  | 
| 407 |  |  |  |  |  |  | sub _setparams { | 
| 408 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 409 |  |  |  |  |  |  |  | 
| 410 | 0 |  |  |  |  |  | my $param_string = $self->SUPER::_setparams(-params => \@CLUSTALW_PARAMS, | 
| 411 |  |  |  |  |  |  | -switches => \@CLUSTALW_SWITCHES, | 
| 412 |  |  |  |  |  |  | -dash => 1, | 
| 413 |  |  |  |  |  |  | -lc => 1, | 
| 414 |  |  |  |  |  |  | -join => '='); | 
| 415 |  |  |  |  |  |  |  | 
| 416 |  |  |  |  |  |  | # Set default output file if no explicit output file selected | 
| 417 | 0 | 0 |  |  |  |  | unless ($param_string =~ /outfile/) { | 
| 418 | 0 |  |  |  |  |  | my ($tfh, $outfile) = $self->io->tempfile(-dir => $self->tempdir()); | 
| 419 | 0 |  |  |  |  |  | close($tfh); | 
| 420 | 0 |  |  |  |  |  | undef $tfh; | 
| 421 | 0 |  |  |  |  |  | $self->outfile($outfile); | 
| 422 | 0 |  |  |  |  |  | $param_string .= " -outfile=\"$outfile\"" ; | 
| 423 |  |  |  |  |  |  | } | 
| 424 |  |  |  |  |  |  |  | 
| 425 | 0 |  |  |  |  |  | $param_string .= ' 2>&1'; | 
| 426 |  |  |  |  |  |  |  | 
| 427 | 0 |  |  |  |  |  | return $param_string; | 
| 428 |  |  |  |  |  |  | } | 
| 429 |  |  |  |  |  |  |  | 
| 430 |  |  |  |  |  |  | 1; | 
| 431 |  |  |  |  |  |  |  | 
| 432 |  |  |  |  |  |  | __END__ |