line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Tools::Run::Alignment::TCoffee; |
2
|
|
|
|
|
|
|
$Bio::Tools::Run::Alignment::TCoffee::VERSION = '1.7.2'; |
3
|
1
|
|
|
1
|
|
186956
|
use utf8; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
9
|
|
4
|
1
|
|
|
1
|
|
43
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
30
|
|
5
|
1
|
|
|
1
|
|
9
|
use warnings; |
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
60
|
|
6
|
|
|
|
|
|
|
|
7
|
1
|
|
|
|
|
137
|
use vars qw($AUTOLOAD @ISA $PROGRAM_NAME $PROGRAM_DIR %DEFAULTS |
8
|
|
|
|
|
|
|
@TCOFFEE_PARAMS @TCOFFEE_SWITCHES @OTHER_SWITCHES %OK_FIELD |
9
|
1
|
|
|
1
|
|
9
|
); |
|
1
|
|
|
|
|
4
|
|
10
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
9
|
use Cwd; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
83
|
|
12
|
1
|
|
|
1
|
|
8
|
use Bio::Seq; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
36
|
|
13
|
1
|
|
|
1
|
|
8
|
use Bio::SeqIO; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
30
|
|
14
|
1
|
|
|
1
|
|
8
|
use Bio::SimpleAlign; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
36
|
|
15
|
1
|
|
|
1
|
|
7
|
use Bio::AlignIO; |
|
1
|
|
|
|
|
9
|
|
|
1
|
|
|
|
|
37
|
|
16
|
1
|
|
|
1
|
|
15
|
use Bio::Root::IO; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
38
|
|
17
|
1
|
|
|
1
|
|
378
|
use Bio::Factory::ApplicationFactoryI; |
|
1
|
|
|
|
|
174
|
|
|
1
|
|
|
|
|
34
|
|
18
|
1
|
|
|
1
|
|
444
|
use Bio::Tools::Run::WrapperBase; |
|
1
|
|
|
|
|
2033
|
|
|
1
|
|
|
|
|
227
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
@ISA = qw(Bio::Tools::Run::WrapperBase |
21
|
|
|
|
|
|
|
Bio::Factory::ApplicationFactoryI); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# ABSTRACT: Object for the calculation of a multiple sequence alignment from a set of unaligned sequences or alignments using the TCoffee program |
24
|
|
|
|
|
|
|
# AUTHOR: Jason Stajich |
25
|
|
|
|
|
|
|
# AUTHOR: Peter Schattner |
26
|
|
|
|
|
|
|
# OWNER: Jason Stajich |
27
|
|
|
|
|
|
|
# OWNER: Peter Schattner |
28
|
|
|
|
|
|
|
# LICENSE: Perl_5 |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# You will need to enable TCoffee to find the tcoffee program. This can be done |
32
|
|
|
|
|
|
|
# in (at least) twp ways: |
33
|
|
|
|
|
|
|
# 1. define an environmental variable TCOFFEE: |
34
|
|
|
|
|
|
|
# export TCOFFEEDIR=/home/progs/tcoffee or |
35
|
|
|
|
|
|
|
# 2. include a definition of an environmental variable TCOFFEEDIR |
36
|
|
|
|
|
|
|
# in every script that will |
37
|
|
|
|
|
|
|
# use Bio::Tools::Run::Alignment::TCoffee.pm. |
38
|
|
|
|
|
|
|
# BEGIN {$ENV{TCOFFEEDIR} = '/home/progs/tcoffee'; } |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
BEGIN { |
41
|
1
|
|
|
1
|
|
5
|
$PROGRAM_NAME = 't_coffee'; |
42
|
1
|
|
|
|
|
5
|
$PROGRAM_DIR = $ENV{'TCOFFEEDIR'}; |
43
|
1
|
|
|
|
|
7
|
%DEFAULTS = ( 'MATRIX' => 'blosum', |
44
|
|
|
|
|
|
|
'OUTPUT' => 'clustalw', |
45
|
|
|
|
|
|
|
'AFORMAT'=> 'msf', |
46
|
|
|
|
|
|
|
'METHODS' => [qw(lalign_id_pair clustalw_pair)] |
47
|
|
|
|
|
|
|
); |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
|
50
|
1
|
|
|
|
|
7
|
@TCOFFEE_PARAMS = qw(IN TYPE PARAMETERS DO_NORMALISE EXTEND |
51
|
|
|
|
|
|
|
DP_MODE KTUPLE NDIAGS DIAG_MODE SIM_MATRIX |
52
|
|
|
|
|
|
|
MATRIX GAPOPEN GAPEXT COSMETIC_PENALTY TG_MODE |
53
|
|
|
|
|
|
|
WEIGHT SEQ_TO_ALIGN NEWTREE USETREE TREE_MODE |
54
|
|
|
|
|
|
|
OUTFILE OUTPUT CASE CPU OUT_LIB OUTORDER SEQNOS |
55
|
|
|
|
|
|
|
RUN_NAME CONVERT |
56
|
|
|
|
|
|
|
); |
57
|
|
|
|
|
|
|
|
58
|
1
|
|
|
|
|
4
|
@TCOFFEE_SWITCHES = qw(QUICKTREE); |
59
|
|
|
|
|
|
|
|
60
|
1
|
|
|
|
|
4
|
@OTHER_SWITCHES = qw(QUIET ALIGN KEEPDND); |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# Authorize attribute fields |
63
|
1
|
|
|
|
|
4
|
foreach my $attr ( @TCOFFEE_PARAMS, @TCOFFEE_SWITCHES, @OTHER_SWITCHES ) { |
64
|
33
|
|
|
|
|
3431
|
$OK_FIELD{$attr}++; } |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub program_name { |
69
|
6
|
|
|
6
|
1
|
413
|
return $PROGRAM_NAME; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub program_dir { |
74
|
3
|
|
|
3
|
1
|
95
|
return $PROGRAM_DIR; |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub new { |
78
|
1
|
|
|
1
|
1
|
128
|
my ($class,@args) = @_; |
79
|
1
|
|
|
|
|
17
|
my $self = $class->SUPER::new(@args); |
80
|
1
|
|
|
|
|
13
|
my ($attr, $value); |
81
|
|
|
|
|
|
|
|
82
|
1
|
|
|
|
|
5
|
while (@args) { |
83
|
0
|
|
|
|
|
0
|
$attr = shift @args; |
84
|
0
|
|
|
|
|
0
|
$value = shift @args; |
85
|
0
|
0
|
|
|
|
0
|
next if( $attr =~ /^-/); # don't want named parameters |
86
|
0
|
|
|
|
|
0
|
$self->$attr($value); |
87
|
|
|
|
|
|
|
} |
88
|
1
|
50
|
|
|
|
14
|
$self->matrix($DEFAULTS{'MATRIX'}) unless( $self->matrix ); |
89
|
1
|
50
|
|
|
|
63
|
$self->output($DEFAULTS{'OUTPUT'}) unless( $self->output ); |
90
|
1
|
50
|
|
|
|
4
|
$self->methods($DEFAULTS{'METHODS'}) unless $self->methods; |
91
|
|
|
|
|
|
|
# $self->aformat($DEFAULTS{'AFORMAT'}) unless $self->aformat; |
92
|
|
|
|
|
|
|
|
93
|
1
|
|
|
|
|
3
|
return $self; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
sub AUTOLOAD { |
97
|
7
|
|
|
7
|
|
3766
|
my $self = shift; |
98
|
7
|
|
|
|
|
17
|
my $attr = $AUTOLOAD; |
99
|
7
|
|
|
|
|
41
|
$attr =~ s/.*:://; |
100
|
7
|
|
|
|
|
23
|
$attr = uc $attr; |
101
|
|
|
|
|
|
|
# aliasing |
102
|
7
|
50
|
|
|
|
22
|
$attr = 'OUTFILE' if $attr eq 'OUTFILE_NAME'; |
103
|
7
|
50
|
|
|
|
26
|
$self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr}; |
104
|
|
|
|
|
|
|
|
105
|
7
|
100
|
|
|
|
24
|
$self->{$attr} = shift if @_; |
106
|
7
|
|
|
|
|
99
|
return $self->{$attr}; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub error_string{ |
111
|
0
|
|
|
0
|
1
|
0
|
my ($self,$value) = @_; |
112
|
0
|
0
|
|
|
|
0
|
if( defined $value) { |
113
|
0
|
|
|
|
|
0
|
$self->{'error_string'} = $value; |
114
|
|
|
|
|
|
|
} |
115
|
0
|
|
|
|
|
0
|
return $self->{'error_string'}; |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub version { |
121
|
1
|
|
|
1
|
1
|
649
|
my ($self) = @_; |
122
|
1
|
|
|
|
|
5
|
my $exe; |
123
|
1
|
0
|
|
|
|
10
|
return undef unless $exe = $self->executable; |
124
|
0
|
|
|
|
|
0
|
my $string = `$exe -quiet=stdout 2>&1` ; |
125
|
0
|
|
|
|
|
0
|
$string =~ /Version_([\d.]+)/; |
126
|
0
|
|
0
|
|
|
0
|
return $1 || undef; |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
sub run{ |
132
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
133
|
0
|
|
|
|
|
0
|
my ($type,$seq,$profile) = $self->_rearrange([qw(TYPE |
134
|
|
|
|
|
|
|
SEQ |
135
|
|
|
|
|
|
|
PROFILE)], |
136
|
|
|
|
|
|
|
@args); |
137
|
0
|
0
|
|
|
|
0
|
if( $type =~ /align/i ) { |
|
|
0
|
|
|
|
|
|
138
|
0
|
|
|
|
|
0
|
return $self->align($seq); |
139
|
|
|
|
|
|
|
} elsif( $type =~ /profile/i ) { |
140
|
0
|
|
|
|
|
0
|
return $self->profile_align($profile,$seq); |
141
|
|
|
|
|
|
|
} else { |
142
|
0
|
|
|
|
|
0
|
$self->warn("unrecognized alignment type $type\n"); |
143
|
|
|
|
|
|
|
} |
144
|
0
|
|
|
|
|
0
|
return undef; |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub align { |
149
|
0
|
|
|
0
|
1
|
0
|
my ($self,$input) = @_; |
150
|
|
|
|
|
|
|
# Create input file pointer |
151
|
0
|
|
|
|
|
0
|
$self->io->_io_cleanup(); |
152
|
0
|
|
|
|
|
0
|
my ($infilename,$type) = $self->_setinput($input); |
153
|
0
|
0
|
|
|
|
0
|
if (!$infilename) { |
154
|
0
|
|
|
|
|
0
|
$self->throw("Bad input data or less than 2 sequences in $input !"); |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
0
|
|
|
|
|
0
|
my $param_string = $self->_setparams(); |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# run tcoffee |
160
|
0
|
|
|
|
|
0
|
return $self->_run('align', [$infilename,$type], $param_string); |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
################################################# |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
sub profile_align { |
167
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
168
|
0
|
|
|
|
|
0
|
my $input1 = shift; |
169
|
0
|
|
|
|
|
0
|
my $input2 = shift; |
170
|
|
|
|
|
|
|
|
171
|
0
|
|
|
|
|
0
|
my ($temp,$infilename1,$infilename2,$type1,$type2,$input,$seq); |
172
|
|
|
|
|
|
|
|
173
|
0
|
|
|
|
|
0
|
$self->io->_io_cleanup(); |
174
|
|
|
|
|
|
|
# Create input file pointers |
175
|
0
|
|
|
|
|
0
|
($infilename1,$type1) = $self->_setinput($input1); |
176
|
0
|
|
|
|
|
0
|
($infilename2,$type2) = $self->_setinput($input2); |
177
|
0
|
0
|
|
|
|
0
|
unless ($type1) { |
178
|
0
|
|
|
|
|
0
|
$self->throw("Unknown type for first argument"); |
179
|
|
|
|
|
|
|
} |
180
|
0
|
0
|
|
|
|
0
|
unless ($type2) { |
181
|
0
|
|
|
|
|
0
|
$self->throw("Unknown type for second argument") |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
0
|
0
|
0
|
|
|
0
|
if (!$infilename1 || !$infilename2) { |
185
|
0
|
|
|
|
|
0
|
$self->throw("Bad input data: $input1 or $input2 !"); |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
|
188
|
0
|
|
|
|
|
0
|
my $param_string = $self->_setparams(); |
189
|
|
|
|
|
|
|
# run tcoffee |
190
|
0
|
|
|
|
|
0
|
my $aln = $self->_run('profile-aln', |
191
|
|
|
|
|
|
|
[$infilename1,$type1], |
192
|
|
|
|
|
|
|
[$infilename2,$type2], |
193
|
|
|
|
|
|
|
$param_string) |
194
|
|
|
|
|
|
|
; |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
################################################# |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
sub _run { |
200
|
0
|
|
|
0
|
|
0
|
my ($infilename, $infile1,$infile2) = ('','',''); |
201
|
0
|
|
|
|
|
0
|
my $self = shift; |
202
|
0
|
|
|
|
|
0
|
my $command = shift; |
203
|
0
|
|
|
|
|
0
|
my $instring; |
204
|
0
|
0
|
|
|
|
0
|
if ($command =~ /align/) { |
205
|
0
|
|
|
|
|
0
|
my $infile = shift ; |
206
|
0
|
|
|
|
|
0
|
my $type; |
207
|
0
|
|
|
|
|
0
|
($infilename,$type) = @$infile; |
208
|
0
|
|
|
|
|
0
|
$instring = '-in='.join(',',($infilename, 'X'.$self->matrix, |
209
|
|
|
|
|
|
|
$self->methods)); |
210
|
|
|
|
|
|
|
} |
211
|
0
|
0
|
|
|
|
0
|
if ($command =~ /profile/) { |
212
|
0
|
|
|
|
|
0
|
my $in1 = shift ; |
213
|
0
|
|
|
|
|
0
|
my $in2 = shift ; |
214
|
0
|
|
|
|
|
0
|
my ($type1,$type2); |
215
|
0
|
|
|
|
|
0
|
($infile1,$type1) = @$in1; |
216
|
0
|
|
|
|
|
0
|
($infile2,$type2) = @$in2; |
217
|
|
|
|
|
|
|
# in later versions (tested on 5.72 and 7.54) the API for profile |
218
|
|
|
|
|
|
|
# alignment changed. This attempts to do the right thing for older |
219
|
|
|
|
|
|
|
# versions but corrects for newer ones |
220
|
0
|
0
|
0
|
|
|
0
|
if ($self->version && $self->version < 5) { |
221
|
|
|
|
|
|
|
# this breaks severely on newer TCoffee (>= v5) |
222
|
0
|
0
|
0
|
|
|
0
|
unless (($self->matrix =~ /none/i) || ($self->matrix =~ /null/i) ) { |
223
|
|
|
|
|
|
|
$instring = '-in='.join(',', |
224
|
|
|
|
|
|
|
($type2.$infile2), |
225
|
|
|
|
|
|
|
'X'.$self->matrix, |
226
|
0
|
|
|
|
|
0
|
(map {'M'.$_} $self->methods) |
|
0
|
|
|
|
|
0
|
|
227
|
|
|
|
|
|
|
); |
228
|
0
|
|
|
|
|
0
|
$instring .= ' -profile='.$infile1; |
229
|
|
|
|
|
|
|
} else { |
230
|
|
|
|
|
|
|
$instring = '-in='.join(',',( |
231
|
|
|
|
|
|
|
$type1.$infile1, |
232
|
|
|
|
|
|
|
$type2.$infile2, |
233
|
0
|
|
|
|
|
0
|
(map {'M'.$_} $self->methods) |
|
0
|
|
|
|
|
0
|
|
234
|
|
|
|
|
|
|
) |
235
|
|
|
|
|
|
|
); |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
} else { |
238
|
0
|
0
|
|
|
|
0
|
if ($type2 eq 'S') { |
239
|
|
|
|
|
|
|
# second infile is a sequence, not an alignment |
240
|
0
|
|
|
|
|
0
|
$instring .= ' -profile='.join(',',$infile1); |
241
|
0
|
|
|
|
|
0
|
$instring .= ' -seq='.join(',',$infile2); |
242
|
|
|
|
|
|
|
} else { |
243
|
0
|
|
|
|
|
0
|
$instring .= ' -profile='.join(',',$infile1,$infile2); |
244
|
|
|
|
|
|
|
} |
245
|
0
|
0
|
0
|
|
|
0
|
$instring .= ' -matrix='.$self->matrix unless (($self->matrix =~ /none/i) || ($self->matrix =~ /null/i)) ; |
246
|
0
|
0
|
|
|
|
0
|
$instring .= ' -method='.join(',',$self->methods) if ($self->methods) ; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
} |
249
|
0
|
|
|
|
|
0
|
my $param_string = shift; |
250
|
|
|
|
|
|
|
# my ($paramfh,$parameterFile) = $self->io->tempfile; |
251
|
|
|
|
|
|
|
# print $paramfh ( "$instring\n-output=gcg$param_string") ; |
252
|
|
|
|
|
|
|
# close($paramfh); |
253
|
|
|
|
|
|
|
# my $commandstring = "t_coffee -output=gcg -parameters $parameterFile" ; ##MJL |
254
|
|
|
|
|
|
|
|
255
|
0
|
|
|
|
|
0
|
my $commandstring = $self->executable." $instring $param_string"; |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
#$self->debug( "tcoffee command = $commandstring \n"); |
258
|
|
|
|
|
|
|
|
259
|
0
|
|
|
|
|
0
|
my $status = system($commandstring); |
260
|
0
|
|
|
|
|
0
|
my $outfile = $self->outfile(); |
261
|
|
|
|
|
|
|
|
262
|
0
|
0
|
0
|
|
|
0
|
if( !-e $outfile || -z $outfile ) { |
263
|
0
|
|
|
|
|
0
|
$self->warn( "TCoffee call crashed: $? [command $commandstring]\n"); |
264
|
0
|
|
|
|
|
0
|
return undef; |
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
# retrieve alignment (Note: MSF format for AlignIO = GCG format of |
268
|
|
|
|
|
|
|
# tcoffee) |
269
|
|
|
|
|
|
|
|
270
|
0
|
|
|
|
|
0
|
my $in = Bio::AlignIO->new('-file' => $outfile, '-format' => |
271
|
|
|
|
|
|
|
$self->output); |
272
|
0
|
|
|
|
|
0
|
my $aln = $in->next_aln(); |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
# Replace file suffix with dnd to find name of dendrogram file(s) to delete |
275
|
0
|
0
|
|
|
|
0
|
if( ! $self->keepdnd ) { |
276
|
0
|
|
|
|
|
0
|
foreach my $f ( $infilename, $infile1, $infile2 ) { |
277
|
0
|
0
|
0
|
|
|
0
|
next if( !defined $f || $f eq ''); |
278
|
0
|
|
|
|
|
0
|
$f =~ s/\.[^\.]*$// ; |
279
|
|
|
|
|
|
|
# because TCoffee writes these files to the CWD |
280
|
0
|
0
|
|
|
|
0
|
if( $Bio::Root::IO::PATHSEP ) { |
281
|
0
|
|
|
|
|
0
|
my @line = split(/$Bio::Root::IO::PATHSEP/, $f); |
282
|
0
|
|
|
|
|
0
|
$f = pop @line; |
283
|
|
|
|
|
|
|
} else { |
284
|
0
|
|
|
|
|
0
|
(undef, undef, $f) = File::Spec->splitpath($f); |
285
|
|
|
|
|
|
|
} |
286
|
0
|
0
|
|
|
|
0
|
unlink $f .'.dnd' if( $f ne '' ); |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
} |
289
|
0
|
|
|
|
|
0
|
return $aln; |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
sub _setinput { |
295
|
0
|
|
|
0
|
|
0
|
my ($self,$input) = @_; |
296
|
0
|
|
|
|
|
0
|
my ($infilename, $seq, $temp, $tfh); |
297
|
|
|
|
|
|
|
# If $input is not a reference it better be the name of a |
298
|
|
|
|
|
|
|
# file with the sequence/ alignment data... |
299
|
0
|
|
|
|
|
0
|
my $type = ''; |
300
|
0
|
0
|
|
|
|
0
|
if (! ref $input) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
301
|
|
|
|
|
|
|
# check that file exists or throw |
302
|
0
|
|
|
|
|
0
|
$infilename = $input; |
303
|
0
|
0
|
|
|
|
0
|
unless (-e $input) {return 0;} |
|
0
|
|
|
|
|
0
|
|
304
|
|
|
|
|
|
|
# let's peek and guess |
305
|
0
|
0
|
|
|
|
0
|
open(my $IN,$infilename) || $self->throw("Cannot open $infilename"); |
306
|
0
|
|
|
|
|
0
|
my $header = <$IN>; |
307
|
0
|
0
|
0
|
|
|
0
|
if( $header =~ /^\s+\d+\s+\d+/ || |
|
|
|
0
|
|
|
|
|
308
|
|
|
|
|
|
|
$header =~ /Pileup/i || |
309
|
|
|
|
|
|
|
$header =~ /clustal/i ) { # phylip |
310
|
0
|
|
|
|
|
0
|
$type = 'A'; |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
# On some systems, having filenames with / in them (ie. a file in a |
314
|
|
|
|
|
|
|
# directory) causes t-coffee to completely fail. It warns on all systems. |
315
|
|
|
|
|
|
|
# The -no_warning option solves this, but there is still some strange |
316
|
|
|
|
|
|
|
# bug when doing certain profile-related things. This is magically solved |
317
|
|
|
|
|
|
|
# by copying the profile file to a temp file in the current directory, so |
318
|
|
|
|
|
|
|
# it its filename supplied to t-coffee contains no / |
319
|
|
|
|
|
|
|
# (It's messy here - I just do this to /all/ input files to most easily |
320
|
|
|
|
|
|
|
# catch all variants of providing a profile - it may only be the last |
321
|
|
|
|
|
|
|
# form (isa("Bio::PrimarySeqI")) that causes a problem?) |
322
|
|
|
|
|
|
|
|
323
|
0
|
|
|
|
|
0
|
my (undef, undef, $adjustedfilename) = File::Spec->splitpath($infilename); |
324
|
0
|
0
|
|
|
|
0
|
if ($adjustedfilename ne $infilename) { |
325
|
0
|
|
|
|
|
0
|
my ($fh, $tempfile) = $self->io->tempfile(-dir => cwd()); |
326
|
0
|
|
|
|
|
0
|
seek($IN, 0, 0); |
327
|
0
|
|
|
|
|
0
|
while (<$IN>) { |
328
|
0
|
|
|
|
|
0
|
print $fh $_; |
329
|
|
|
|
|
|
|
} |
330
|
0
|
|
|
|
|
0
|
close($fh); |
331
|
0
|
|
|
|
|
0
|
(undef, undef, $tempfile) = File::Spec->splitpath($tempfile); |
332
|
0
|
|
|
|
|
0
|
$infilename = $tempfile; |
333
|
0
|
|
|
|
|
0
|
$type = 'S'; |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
|
336
|
0
|
|
|
|
|
0
|
close($IN); |
337
|
0
|
|
|
|
|
0
|
return ($infilename,$type); |
338
|
|
|
|
|
|
|
} elsif (ref($input) =~ /ARRAY/i ) { |
339
|
|
|
|
|
|
|
# $input may be an array of BioSeq objects... |
340
|
|
|
|
|
|
|
# Open temporary file for both reading & writing of array |
341
|
0
|
|
|
|
|
0
|
($tfh,$infilename) = $self->io->tempfile(-dir => cwd()); |
342
|
0
|
|
|
|
|
0
|
(undef, undef, $infilename) = File::Spec->splitpath($infilename); |
343
|
0
|
0
|
|
|
|
0
|
if( ! ref($input->[0]) ) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
344
|
0
|
|
|
|
|
0
|
$self->warn("passed an array ref which did not contain objects to _setinput"); |
345
|
0
|
|
|
|
|
0
|
return undef; |
346
|
|
|
|
|
|
|
} elsif( $input->[0]->isa('Bio::PrimarySeqI') ) { |
347
|
0
|
|
|
|
|
0
|
$temp = Bio::SeqIO->new('-fh' => $tfh, |
348
|
|
|
|
|
|
|
'-format' => 'fasta'); |
349
|
0
|
|
|
|
|
0
|
my $ct = 1; |
350
|
0
|
|
|
|
|
0
|
foreach $seq (@$input) { |
351
|
0
|
0
|
0
|
|
|
0
|
return 0 unless ( ref($seq) && |
352
|
|
|
|
|
|
|
$seq->isa("Bio::PrimarySeqI") ); |
353
|
0
|
0
|
0
|
|
|
0
|
if( ! defined $seq->display_id || |
354
|
|
|
|
|
|
|
$seq->display_id =~ /^\s+$/) { |
355
|
0
|
|
|
|
|
0
|
$seq->display_id( "Seq".$ct++); |
356
|
|
|
|
|
|
|
} |
357
|
0
|
|
|
|
|
0
|
$temp->write_seq($seq); |
358
|
|
|
|
|
|
|
} |
359
|
0
|
|
|
|
|
0
|
$temp->close(); |
360
|
0
|
|
|
|
|
0
|
undef $temp; |
361
|
0
|
|
|
|
|
0
|
close($tfh); |
362
|
0
|
|
|
|
|
0
|
$tfh = undef; |
363
|
0
|
|
|
|
|
0
|
$type = 'S'; |
364
|
|
|
|
|
|
|
} elsif( $input->[0]->isa('Bio::Align::AlignI' ) ) { |
365
|
0
|
|
|
|
|
0
|
$temp = Bio::AlignIO->new('-fh' => $tfh, |
366
|
|
|
|
|
|
|
'-format' => $self->aformat); |
367
|
0
|
|
|
|
|
0
|
foreach my $aln (@$input) { |
368
|
0
|
0
|
0
|
|
|
0
|
next unless ( ref($aln) && |
369
|
|
|
|
|
|
|
$aln->isa("Bio::Align::AlignI") ); |
370
|
0
|
|
|
|
|
0
|
$temp->write_aln($aln); |
371
|
|
|
|
|
|
|
} |
372
|
0
|
|
|
|
|
0
|
$temp->close(); |
373
|
0
|
|
|
|
|
0
|
undef $temp; |
374
|
0
|
|
|
|
|
0
|
close($tfh); |
375
|
0
|
|
|
|
|
0
|
$tfh = undef; |
376
|
0
|
|
|
|
|
0
|
$type = 'A'; |
377
|
|
|
|
|
|
|
} else { |
378
|
0
|
|
|
|
|
0
|
$self->warn( "got an array ref with 1st entry ". |
379
|
|
|
|
|
|
|
$input->[0]. |
380
|
|
|
|
|
|
|
" and don't know what to do with it\n"); |
381
|
|
|
|
|
|
|
} |
382
|
0
|
|
|
|
|
0
|
return ($infilename,$type); |
383
|
|
|
|
|
|
|
# $input may be a SimpleAlign object. |
384
|
|
|
|
|
|
|
} elsif ( $input->isa("Bio::Align::AlignI") ) { |
385
|
|
|
|
|
|
|
# Open temporary file for both reading & writing of SimpleAlign object |
386
|
0
|
|
|
|
|
0
|
($tfh, $infilename) = $self->io->tempfile(-dir => cwd()); |
387
|
0
|
|
|
|
|
0
|
(undef, undef, $infilename) = File::Spec->splitpath($infilename); |
388
|
0
|
|
|
|
|
0
|
$temp = Bio::AlignIO->new(-fh=>$tfh, |
389
|
|
|
|
|
|
|
'-format' => 'clustalw'); |
390
|
0
|
|
|
|
|
0
|
$temp->write_aln($input); |
391
|
0
|
|
|
|
|
0
|
close($tfh); |
392
|
0
|
|
|
|
|
0
|
undef $tfh; |
393
|
0
|
|
|
|
|
0
|
return ($infilename,'A'); |
394
|
|
|
|
|
|
|
} |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
# or $input may be a single BioSeq object (to be added to |
397
|
|
|
|
|
|
|
# a previous alignment) |
398
|
|
|
|
|
|
|
elsif ( $input->isa("Bio::PrimarySeqI")) { |
399
|
|
|
|
|
|
|
# Open temporary file for both reading & writing of BioSeq object |
400
|
0
|
|
|
|
|
0
|
($tfh,$infilename) = $self->io->tempfile(-dir => cwd()); |
401
|
0
|
|
|
|
|
0
|
(undef, undef, $infilename) = File::Spec->splitpath($infilename); |
402
|
0
|
|
|
|
|
0
|
$temp = Bio::SeqIO->new(-fh=> $tfh, '-format' =>'Fasta'); |
403
|
0
|
|
|
|
|
0
|
$temp->write_seq($input); |
404
|
0
|
|
|
|
|
0
|
$temp->close(); |
405
|
0
|
|
|
|
|
0
|
close($tfh); |
406
|
0
|
|
|
|
|
0
|
undef $tfh; |
407
|
0
|
|
|
|
|
0
|
return ($infilename,'S'); |
408
|
|
|
|
|
|
|
} else { |
409
|
0
|
|
|
|
|
0
|
$self->warn("Got $input and don't know what to do with it\n"); |
410
|
|
|
|
|
|
|
} |
411
|
0
|
|
|
|
|
0
|
return 0; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
sub _setparams { |
417
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
418
|
0
|
|
|
|
|
0
|
my ($attr, $value,$param_string); |
419
|
0
|
|
|
|
|
0
|
$param_string = ''; |
420
|
0
|
|
|
|
|
0
|
my $laststr; |
421
|
0
|
|
|
|
|
0
|
for $attr ( @TCOFFEE_PARAMS ) { |
422
|
0
|
|
|
|
|
0
|
$value = $self->$attr(); |
423
|
0
|
0
|
|
|
|
0
|
next unless (defined $value); |
424
|
0
|
|
|
|
|
0
|
my $attr_key = lc $attr; |
425
|
0
|
0
|
|
|
|
0
|
if( $attr_key =~ /matrix/ ) { |
426
|
0
|
|
|
|
|
0
|
$self->{'_in'} = [ "X".lc($value) ]; |
427
|
|
|
|
|
|
|
} else { |
428
|
0
|
|
|
|
|
0
|
$attr_key = ' -'.$attr_key; |
429
|
0
|
|
|
|
|
0
|
$param_string .= $attr_key .'='.$value; |
430
|
|
|
|
|
|
|
} |
431
|
|
|
|
|
|
|
} |
432
|
0
|
|
|
|
|
0
|
for $attr ( @TCOFFEE_SWITCHES) { |
433
|
0
|
|
|
|
|
0
|
$value = $self->$attr(); |
434
|
0
|
0
|
|
|
|
0
|
next unless ($value); |
435
|
0
|
|
|
|
|
0
|
my $attr_key = lc $attr; #put switches in format expected by tcoffee |
436
|
0
|
|
|
|
|
0
|
$attr_key = ' -'.$attr_key; |
437
|
0
|
|
|
|
|
0
|
$param_string .= $attr_key ; |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
# Set default output file if no explicit output file selected |
441
|
0
|
0
|
|
|
|
0
|
unless ($self->outfile ) { |
442
|
0
|
|
|
|
|
0
|
my ($tfh, $outfile) = $self->io->tempfile(-dir=>$self->tempdir()); |
443
|
0
|
|
|
|
|
0
|
close($tfh); |
444
|
0
|
|
|
|
|
0
|
undef $tfh; |
445
|
0
|
|
|
|
|
0
|
$self->outfile($outfile); |
446
|
0
|
|
|
|
|
0
|
$param_string .= " -outfile=$outfile" ; |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
|
449
|
0
|
0
|
0
|
|
|
0
|
if ($self->quiet() || $self->verbose < 0) { $param_string .= ' -quiet';} |
|
0
|
|
|
|
|
0
|
|
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
# -no_warning is required on some systems with certain versions or failure |
452
|
|
|
|
|
|
|
# is guaranteed |
453
|
0
|
0
|
0
|
|
|
0
|
if ($self->version >= 4 && $self->version < 4.7) { |
454
|
0
|
|
|
|
|
0
|
$param_string .= ' -no_warning'; |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
|
457
|
0
|
|
|
|
|
0
|
return $param_string; |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
sub aformat{ |
462
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
463
|
0
|
0
|
|
|
|
0
|
$self->{'_aformat'} = shift if @_; |
464
|
0
|
|
|
|
|
0
|
return $self->{'_aformat'}; |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
sub methods{ |
470
|
2
|
|
|
2
|
1
|
9
|
my ($self) = shift; |
471
|
|
|
|
|
|
|
|
472
|
2
|
50
|
|
|
|
6
|
@{$self->{'_methods'}} = @{shift || []} if @_; |
|
1
|
100
|
|
|
|
4
|
|
|
1
|
|
|
|
|
4
|
|
473
|
2
|
100
|
|
|
|
3
|
return @{$self->{'_methods'} || []}; |
|
2
|
|
|
|
|
13
|
|
474
|
|
|
|
|
|
|
} |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
1; |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
__END__ |