line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Tools::Run::Alignment::TCoffee |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Jason Stajich, Peter Schattner |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Jason Stajich, Peter Schattner |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Tools::Run::Alignment::TCoffee - Object for the calculation of a |
17
|
|
|
|
|
|
|
multiple sequence alignment from a set of unaligned sequences or |
18
|
|
|
|
|
|
|
alignments using the TCoffee program |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 SYNOPSIS |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# Build a tcoffee alignment factory |
23
|
|
|
|
|
|
|
@params = ('ktuple' => 2, 'matrix' => 'BLOSUM'); |
24
|
|
|
|
|
|
|
$factory = Bio::Tools::Run::Alignment::TCoffee->new(@params); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# Pass the factory a list of sequences to be aligned. |
27
|
|
|
|
|
|
|
$inputfilename = 't/cysprot.fa'; |
28
|
|
|
|
|
|
|
# $aln is a SimpleAlign object. |
29
|
|
|
|
|
|
|
$aln = $factory->align($inputfilename); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# or where @seq_array is an array of Bio::Seq objects |
32
|
|
|
|
|
|
|
$seq_array_ref = \@seq_array; |
33
|
|
|
|
|
|
|
$aln = $factory->align($seq_array_ref); |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# Or one can pass the factory a pair of (sub)alignments |
36
|
|
|
|
|
|
|
#to be aligned against each other, e.g.: |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# where $aln1 and $aln2 are Bio::SimpleAlign objects. |
39
|
|
|
|
|
|
|
$aln = $factory->profile_align($aln1,$aln2); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# Or one can pass the factory an alignment and one or more |
42
|
|
|
|
|
|
|
# unaligned sequences to be added to the alignment. For example: |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# $seq is a Bio::Seq object. |
45
|
|
|
|
|
|
|
$aln = $factory->profile_align($aln1,$seq); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
#There are various additional options and input formats available. |
48
|
|
|
|
|
|
|
#See the DESCRIPTION section that follows for additional details. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head1 DESCRIPTION |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
Note: this DESCRIPTION only documents the (Bio)perl interface to |
53
|
|
|
|
|
|
|
TCoffee. |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head2 Helping the module find your executable |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
You will need to enable TCoffee to find the t_coffee program. This |
58
|
|
|
|
|
|
|
can be done in (at least) three ways: |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
1. Make sure the t_coffee executable is in your path so that |
61
|
|
|
|
|
|
|
which t_coffee returns a t_coffee executable on your system. |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
2. Define an environmental variable TCOFFEEDIR which is a dir |
64
|
|
|
|
|
|
|
which contains the 't_coffee' app: |
65
|
|
|
|
|
|
|
In bash |
66
|
|
|
|
|
|
|
export TCOFFEEDIR=/home/username/progs/T-COFFEE_distribution_Version_1.37/bin |
67
|
|
|
|
|
|
|
In csh/tcsh |
68
|
|
|
|
|
|
|
setenv TCOFFEEDIR /home/username/progs/T-COFFEE_distribution_Version_1.37/bin |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
3. Include a definition of an environmental variable TCOFFEEDIR in |
71
|
|
|
|
|
|
|
every script that will use this TCoffee wrapper module. |
72
|
|
|
|
|
|
|
BEGIN { $ENV{TCOFFEDIR} = '/home/username/progs/T-COFFEE_distribution_Version_1.37/bin' } |
73
|
|
|
|
|
|
|
use Bio::Tools::Run::Alignment::TCoffee; |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
If you are running an application on a webserver make sure the |
76
|
|
|
|
|
|
|
webserver environment has the proper PATH set or use the options 2 or |
77
|
|
|
|
|
|
|
3 to set the variables. |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head1 PARAMETERS FOR ALIGNMENT COMPUTATION |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
There are a number of possible parameters one can pass in TCoffee. |
82
|
|
|
|
|
|
|
One should really read the online manual for the best explanation of |
83
|
|
|
|
|
|
|
all the features. See |
84
|
|
|
|
|
|
|
http://igs-server.cnrs-mrs.fr/~cnotred/Documentation/t_coffee/t_coffee_doc.html |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
These can be specified as parameters when instantiating a new TCoffee |
87
|
|
|
|
|
|
|
object, or through get/set methods of the same name (lowercase). |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=head2 IN |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
Title : IN |
92
|
|
|
|
|
|
|
Description : (optional) input filename, this is specified when |
93
|
|
|
|
|
|
|
align so should not use this directly unless one |
94
|
|
|
|
|
|
|
understand TCoffee program very well. |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=head2 TYPE |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
Title : TYPE |
99
|
|
|
|
|
|
|
Args : [string] DNA, PROTEIN |
100
|
|
|
|
|
|
|
Description : (optional) set the sequence type, guessed automatically |
101
|
|
|
|
|
|
|
so should not use this directly |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head2 PARAMETERS |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Title : PARAMETERS |
106
|
|
|
|
|
|
|
Description : (optional) Indicates a file containing extra parameters |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=head2 EXTEND |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
Title : EXTEND |
111
|
|
|
|
|
|
|
Args : 0, 1, or positive value |
112
|
|
|
|
|
|
|
Default : 1 |
113
|
|
|
|
|
|
|
Description : Flag indicating that library extension should be |
114
|
|
|
|
|
|
|
carried out when performing multiple alignments, if set |
115
|
|
|
|
|
|
|
to 0 then extension is not made, if set to 1 extension |
116
|
|
|
|
|
|
|
is made on all pairs in the library. If extension is |
117
|
|
|
|
|
|
|
set to another positive value, the extension is only |
118
|
|
|
|
|
|
|
carried out on pairs having a weigth value superior to |
119
|
|
|
|
|
|
|
the specified limit. |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=head2 DP_NORMALISE |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Title : DP_NORMALISE |
124
|
|
|
|
|
|
|
Args : 0 or positive value |
125
|
|
|
|
|
|
|
Default : 1000 |
126
|
|
|
|
|
|
|
Description : When using a value different from 0, this flag sets the |
127
|
|
|
|
|
|
|
score of the highest scoring pair to 1000. |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head2 DP_MODE |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
Title : DP_MODE |
132
|
|
|
|
|
|
|
Args : [string] gotoh_pair_wise, myers_miller_pair_wise, |
133
|
|
|
|
|
|
|
fasta_pair_wise cfasta_pair_wise |
134
|
|
|
|
|
|
|
Default : cfast_fair_wise |
135
|
|
|
|
|
|
|
Description : Indicates the type of dynamic programming used by |
136
|
|
|
|
|
|
|
the program |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
gotoh_pair_wise : implementation of the gotoh algorithm |
139
|
|
|
|
|
|
|
(quadratic in memory and time) |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
myers_miller_pair_wise : implementation of the Myers and Miller |
142
|
|
|
|
|
|
|
dynamic programming algorithm ( quadratic in time and linear in |
143
|
|
|
|
|
|
|
space). This algorithm is recommended for very long sequences. It |
144
|
|
|
|
|
|
|
is about 2 time slower than gotoh. It only accepts tg_mode=1. |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
fasta_pair_wise: implementation of the fasta algorithm. The |
147
|
|
|
|
|
|
|
sequence is hashed, looking for ktuples words. Dynamic programming |
148
|
|
|
|
|
|
|
is only carried out on the ndiag best scoring diagonals. This is |
149
|
|
|
|
|
|
|
much faster but less accurate than the two previous. |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
cfasta_pair_wise : c stands for checked. It is the same |
152
|
|
|
|
|
|
|
algorithm. The dynamic programming is made on the ndiag best |
153
|
|
|
|
|
|
|
diagonals, and then on the 2*ndiags, and so on until the scores |
154
|
|
|
|
|
|
|
converge. Complexity will depend on the level of divergence of the |
155
|
|
|
|
|
|
|
sequences, but will usually be L*log(L), with an accuracy |
156
|
|
|
|
|
|
|
comparable to the two first mode ( this was checked on BaliBase). |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=head2 KTUPLE |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
Title : KTUPLE |
161
|
|
|
|
|
|
|
Args : numeric value |
162
|
|
|
|
|
|
|
Default : 1 or 2 (1 for protein, 2 for DNA ) |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Description : Indicates the ktuple size for cfasta_pair_wise dp_mode |
165
|
|
|
|
|
|
|
and fasta_pair_wise. It is set to 1 for proteins, and 2 |
166
|
|
|
|
|
|
|
for DNA. The alphabet used for protein is not the 20 |
167
|
|
|
|
|
|
|
letter code, but a mildly degenerated version, where |
168
|
|
|
|
|
|
|
some residues are grouped under one letter, based on |
169
|
|
|
|
|
|
|
physicochemical properties: |
170
|
|
|
|
|
|
|
rk, de, qh, vilm, fy (the other residues are |
171
|
|
|
|
|
|
|
not degenerated). |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=head2 NDIAGS |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Title : NDIAGS |
176
|
|
|
|
|
|
|
Args : numeric value |
177
|
|
|
|
|
|
|
Default : 0 |
178
|
|
|
|
|
|
|
Description : Indicates the number of diagonals used by the |
179
|
|
|
|
|
|
|
fasta_pair_wise algorithm. When set to 0, |
180
|
|
|
|
|
|
|
n_diag=Log (length of the smallest sequence) |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
=head2 DIAG_MODE |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
Title : DIAG_MODE |
185
|
|
|
|
|
|
|
Args : numeric value |
186
|
|
|
|
|
|
|
Default : 0 |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
Description : Indicates the manner in which diagonals are scored |
190
|
|
|
|
|
|
|
during the fasta hashing. |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
0 indicates that the score of a diagonal is equal to the |
193
|
|
|
|
|
|
|
sum of the scores of the exact matches it contains. |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
1 indicates that this score is set equal to the score of |
197
|
|
|
|
|
|
|
the best uninterrupted segment |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
1 can be useful when dealing with fragments of sequences. |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=head2 SIM_MATRIX |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
Title : SIM_MATRIX |
204
|
|
|
|
|
|
|
Args : string |
205
|
|
|
|
|
|
|
Default : vasiliky |
206
|
|
|
|
|
|
|
Description : Indicates the manner in which the amino acid is being |
207
|
|
|
|
|
|
|
degenerated when hashing. All the substitution matrix |
208
|
|
|
|
|
|
|
are acceptable. Categories will be defined as sub-group |
209
|
|
|
|
|
|
|
of residues all having a positive substitution score |
210
|
|
|
|
|
|
|
(they can overlap). |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
If you wish to keep the non degenerated amino acid |
213
|
|
|
|
|
|
|
alphabet, use 'idmat' |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=head2 MATRIX |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
Title : MATRIX |
218
|
|
|
|
|
|
|
Args : |
219
|
|
|
|
|
|
|
Default : |
220
|
|
|
|
|
|
|
Description : This flag is provided for compatibility with |
221
|
|
|
|
|
|
|
ClustalW. Setting matrix = 'blosum' is equivalent to |
222
|
|
|
|
|
|
|
-in=Xblosum62mt , -matrix=pam is equivalent to |
223
|
|
|
|
|
|
|
in=Xpam250mt . Apart from this, the rules are similar |
224
|
|
|
|
|
|
|
to those applying when declaring a matrix with the |
225
|
|
|
|
|
|
|
-in=X fl |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
=head2 GAPOPEN |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
Title : GAPOPEN |
230
|
|
|
|
|
|
|
Args : numeric |
231
|
|
|
|
|
|
|
Default : 0 |
232
|
|
|
|
|
|
|
Description : Indicates the penalty applied for opening a gap. The |
233
|
|
|
|
|
|
|
penalty must be negative. If you provide a positive |
234
|
|
|
|
|
|
|
value, it will automatically be turned into a negative |
235
|
|
|
|
|
|
|
number. We recommend a value of 10 with pam matrices, |
236
|
|
|
|
|
|
|
and a value of 0 when a library is used. |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
=head2 GAPEXT |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
Title : GAPEXT |
241
|
|
|
|
|
|
|
Args : numeric |
242
|
|
|
|
|
|
|
Default : 0 |
243
|
|
|
|
|
|
|
Description : Indicates the penalty applied for extending a gap. |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=head2 COSMETIC_PENALTY |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
Title : COSMETIC_PENALTY |
249
|
|
|
|
|
|
|
Args : numeric |
250
|
|
|
|
|
|
|
Default : 100 |
251
|
|
|
|
|
|
|
Description : Indicates the penalty applied for opening a gap. This |
252
|
|
|
|
|
|
|
penalty is set to a very low value. It will only have |
253
|
|
|
|
|
|
|
an influence on the portions of the alignment that are |
254
|
|
|
|
|
|
|
unalignable. It will not make them more correct, but |
255
|
|
|
|
|
|
|
only more pleasing to the eye ( i.e. Avoid stretches of |
256
|
|
|
|
|
|
|
lonely residues). |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
The cosmetic penalty is automatically turned off if a |
259
|
|
|
|
|
|
|
substitution matrix is used rather than a library. |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=head2 TG_MODE |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
Title : TG_MODE |
264
|
|
|
|
|
|
|
Args : 0,1,2 |
265
|
|
|
|
|
|
|
Default : 1 |
266
|
|
|
|
|
|
|
Description : (Terminal Gaps) |
267
|
|
|
|
|
|
|
0: indicates that terminal gaps must be panelized with |
268
|
|
|
|
|
|
|
a gapopen and a gapext penalty. |
269
|
|
|
|
|
|
|
1: indicates that terminal gaps must be penalized only |
270
|
|
|
|
|
|
|
with a gapext penalty |
271
|
|
|
|
|
|
|
2: indicates that terminal gaps must not be penalized. |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
=head2 WEIGHT |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
Title : WEIGHT |
276
|
|
|
|
|
|
|
Args : sim or sim_ or integer value |
277
|
|
|
|
|
|
|
Default : sim |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
Description : Weight defines the way alignments are weighted when |
281
|
|
|
|
|
|
|
turned into a library. |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
sim indicates that the weight equals the average |
284
|
|
|
|
|
|
|
identity within the match residues. |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sim_matrix_name indicates the average identity with two |
287
|
|
|
|
|
|
|
residues regarded as identical when their |
288
|
|
|
|
|
|
|
substitution value is positive. The valid matrices |
289
|
|
|
|
|
|
|
names are in matrices.h (pam250mt) . Matrices not |
290
|
|
|
|
|
|
|
found in this header are considered to be |
291
|
|
|
|
|
|
|
filenames. See the format section for matrices. For |
292
|
|
|
|
|
|
|
instance, -weight=sim_pam250mt indicates that the |
293
|
|
|
|
|
|
|
grouping used for similarity will be the set of |
294
|
|
|
|
|
|
|
classes with positive substitutions. Other groups |
295
|
|
|
|
|
|
|
include |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
sim_clustalw_col ( categories of clustalw |
298
|
|
|
|
|
|
|
marked with :) |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
sim_clustalw_dot ( categories of clustalw |
301
|
|
|
|
|
|
|
marked with .) |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
Value indicates that all the pairs found in the |
305
|
|
|
|
|
|
|
alignments must be given the same weight equal to |
306
|
|
|
|
|
|
|
value. This is useful when the alignment one wishes to |
307
|
|
|
|
|
|
|
turn into a library must be given a pre-specified score |
308
|
|
|
|
|
|
|
(for instance if they come from a structure |
309
|
|
|
|
|
|
|
super-imposition program). Value is an integer: |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
-weight=1000 |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
Note : Weight only affects methods that return an alignment to |
314
|
|
|
|
|
|
|
T-Coffee, such as ClustalW. On the contrary, the |
315
|
|
|
|
|
|
|
version of Lalign we use here returns a library where |
316
|
|
|
|
|
|
|
weights have already been applied and are therefore |
317
|
|
|
|
|
|
|
insensitive to the -weight flag. |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
=head2 SEQ_TO_ALIGN |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
Title : SEQ_TO_ALIGN |
322
|
|
|
|
|
|
|
Args : filename |
323
|
|
|
|
|
|
|
Default : no file - align all the sequences |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
Description : You may not wish to align all the sequences brought in |
326
|
|
|
|
|
|
|
by the -in flag. Supplying the seq_to_align flag allows |
327
|
|
|
|
|
|
|
for this, the file is simply a list of names in Fasta |
328
|
|
|
|
|
|
|
format. |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
However, note that library extension will be carried out |
331
|
|
|
|
|
|
|
on all the sequences. |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
=head1 PARAMETERS FOR TREE COMPUTATION AND OUTPUT |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=head2 NEWTREE |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
Title : NEWTREE |
338
|
|
|
|
|
|
|
Args : treefile |
339
|
|
|
|
|
|
|
Default : no file |
340
|
|
|
|
|
|
|
Description : Indicates the name of the new tree to compute. The |
341
|
|
|
|
|
|
|
default will be .dnd, or . |
342
|
|
|
|
|
|
|
Format is Phylip/Newick tree format |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=head2 USETREE |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
Title : USETREE |
347
|
|
|
|
|
|
|
Args : treefile |
348
|
|
|
|
|
|
|
Default : no file specified |
349
|
|
|
|
|
|
|
Description : This flag indicates that rather than computing a new |
350
|
|
|
|
|
|
|
dendrogram, t_coffee can use a pre-computed one. The |
351
|
|
|
|
|
|
|
tree files are in phylips format and compatible with |
352
|
|
|
|
|
|
|
ClustalW. In most cases, using a pre-computed tree will |
353
|
|
|
|
|
|
|
halve the computation time required by t_coffee. It is |
354
|
|
|
|
|
|
|
also possible to use trees output by ClustalW or |
355
|
|
|
|
|
|
|
Phylips. Format is Phylips tree format |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=head2 TREE_MODE |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
Title : TREE_MODE |
360
|
|
|
|
|
|
|
Args : slow, fast, very_fast |
361
|
|
|
|
|
|
|
Default : very_fast |
362
|
|
|
|
|
|
|
Description : This flag indicates the method used for computing the |
363
|
|
|
|
|
|
|
dendrogram. |
364
|
|
|
|
|
|
|
slow : the chosen dp_mode using the extended library, |
365
|
|
|
|
|
|
|
fast : The fasta dp_mode using the extended library. |
366
|
|
|
|
|
|
|
very_fast: The fasta dp_mode using pam250mt. |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=head2 QUICKTREE |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
Title : QUICKTREE |
371
|
|
|
|
|
|
|
Args : |
372
|
|
|
|
|
|
|
Default : |
373
|
|
|
|
|
|
|
Description : This flag is kept for compatibility with ClustalW. |
374
|
|
|
|
|
|
|
It indicates that: -tree_mode=very_fast |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
=head1 PARAMETERS FOR ALIGNMENT OUTPUT |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
=head2 OUTFILE |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
Title : OUTFILE |
381
|
|
|
|
|
|
|
Args : out_aln file, default, no |
382
|
|
|
|
|
|
|
Default : default ( yourseqfile.aln) |
383
|
|
|
|
|
|
|
Description : indicates name of output alignment file |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
=head2 OUTPUT |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
Title : OUTPUT |
388
|
|
|
|
|
|
|
Args : format1, format2 |
389
|
|
|
|
|
|
|
Default : clustalw |
390
|
|
|
|
|
|
|
Description : Indicated format for outputting outputfile |
391
|
|
|
|
|
|
|
Supported formats are: |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
clustalw_aln, clustalw: ClustalW format. |
394
|
|
|
|
|
|
|
gcg, msf_aln : Msf alignment. |
395
|
|
|
|
|
|
|
pir_aln : pir alignment. |
396
|
|
|
|
|
|
|
fasta_aln : fasta alignment. |
397
|
|
|
|
|
|
|
phylip : Phylip format. |
398
|
|
|
|
|
|
|
pir_seq : pir sequences (no gap). |
399
|
|
|
|
|
|
|
fasta_seq : fasta sequences (no gap). |
400
|
|
|
|
|
|
|
As well as: |
401
|
|
|
|
|
|
|
score_html : causes the output to be a reliability |
402
|
|
|
|
|
|
|
plot in HTML |
403
|
|
|
|
|
|
|
score_pdf : idem in PDF. |
404
|
|
|
|
|
|
|
score_ps : idem in postscript. |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
More than one format can be indicated: |
407
|
|
|
|
|
|
|
-output=clustalw,gcg, score_html |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=head2 CASE |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
Title : CASE |
412
|
|
|
|
|
|
|
Args : upper, lower |
413
|
|
|
|
|
|
|
Default : upper |
414
|
|
|
|
|
|
|
Description : triggers choice of the case for output |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
=head2 CPU |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
Title : CPU |
419
|
|
|
|
|
|
|
Args : value |
420
|
|
|
|
|
|
|
Default : 0 |
421
|
|
|
|
|
|
|
Description : Indicates the cpu time (micro seconds) that must be |
422
|
|
|
|
|
|
|
added to the t_coffee computation time. |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
=head2 OUT_LIB |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
Title : OUT_LIB |
427
|
|
|
|
|
|
|
Args : name of library, default, no |
428
|
|
|
|
|
|
|
Default : default |
429
|
|
|
|
|
|
|
Description : Sets the name of the library output. Default implies |
430
|
|
|
|
|
|
|
.tc_lib |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
=head2 OUTORDER |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
Title : OUTORDER |
435
|
|
|
|
|
|
|
Args : input or aligned |
436
|
|
|
|
|
|
|
Default : input |
437
|
|
|
|
|
|
|
Description : Sets the name of the library output. Default implies |
438
|
|
|
|
|
|
|
.tc_lib |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
=head2 SEQNOS |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
Title : SEQNOS |
443
|
|
|
|
|
|
|
Args : on or off |
444
|
|
|
|
|
|
|
Default : off |
445
|
|
|
|
|
|
|
Description : Causes the output alignment to contain residue numbers |
446
|
|
|
|
|
|
|
at the end of each line: |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
=head1 PARAMETERS FOR GENERIC OUTPUT |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=head2 RUN_NAME |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
Title : RUN_NAME |
453
|
|
|
|
|
|
|
Args : your run name |
454
|
|
|
|
|
|
|
Default : |
455
|
|
|
|
|
|
|
Description : This flag causes the prefix to be |
456
|
|
|
|
|
|
|
replaced by when renaming the default |
457
|
|
|
|
|
|
|
files. |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=head2 ALIGN |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
Title : ALIGN |
462
|
|
|
|
|
|
|
Args : |
463
|
|
|
|
|
|
|
Default : |
464
|
|
|
|
|
|
|
Description : Indicates that the program must produce the |
465
|
|
|
|
|
|
|
alignment. This flag is here for compatibility with |
466
|
|
|
|
|
|
|
ClustalW |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
=head2 QUIET |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
Title : QUIET |
471
|
|
|
|
|
|
|
Args : stderr, stdout, or filename, or nothing |
472
|
|
|
|
|
|
|
Default : stderr |
473
|
|
|
|
|
|
|
Description : Redirects the standard output to either a file. |
474
|
|
|
|
|
|
|
-quiet on its own redirect the output to /dev/null. |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=head2 CONVERT |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
Title : CONVERT |
479
|
|
|
|
|
|
|
Args : |
480
|
|
|
|
|
|
|
Default : |
481
|
|
|
|
|
|
|
Description : Indicates that the program must not compute the |
482
|
|
|
|
|
|
|
alignment but simply convert all the sequences, |
483
|
|
|
|
|
|
|
alignments and libraries into the format indicated with |
484
|
|
|
|
|
|
|
-output. This flag can also be used if you simply want |
485
|
|
|
|
|
|
|
to compute a library ( i.e. You have an alignment and |
486
|
|
|
|
|
|
|
you want to turn it into a library). |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
=head1 FEEDBACK |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=head2 Mailing Lists |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
493
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
494
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
497
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
=head2 Support |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
I |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
506
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
507
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
508
|
|
|
|
|
|
|
with code and data examples if at all possible. |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
=head2 Reporting Bugs |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
513
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the web: |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
http://redmine.open-bio.org/projects/bioperl/ |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=head1 AUTHOR - Jason Stajich, Peter Schattner |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
Email jason-at-bioperl-dot-org, schattner@alum.mit.edu |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
=head1 APPENDIX |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
524
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
=cut |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
package Bio::Tools::Run::Alignment::TCoffee; |
529
|
|
|
|
|
|
|
|
530
|
1
|
|
|
|
|
72
|
use vars qw($AUTOLOAD @ISA $PROGRAM_NAME $PROGRAM_DIR %DEFAULTS |
531
|
|
|
|
|
|
|
@TCOFFEE_PARAMS @TCOFFEE_SWITCHES @OTHER_SWITCHES %OK_FIELD |
532
|
1
|
|
|
1
|
|
101561
|
); |
|
1
|
|
|
|
|
1
|
|
533
|
1
|
|
|
1
|
|
3
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
15
|
|
534
|
1
|
|
|
1
|
|
3
|
use Cwd; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
43
|
|
535
|
1
|
|
|
1
|
|
538
|
use Bio::Seq; |
|
1
|
|
|
|
|
44113
|
|
|
1
|
|
|
|
|
23
|
|
536
|
1
|
|
|
1
|
|
462
|
use Bio::SeqIO; |
|
1
|
|
|
|
|
19348
|
|
|
1
|
|
|
|
|
27
|
|
537
|
1
|
|
|
1
|
|
615
|
use Bio::SimpleAlign; |
|
1
|
|
|
|
|
27076
|
|
|
1
|
|
|
|
|
35
|
|
538
|
1
|
|
|
1
|
|
446
|
use Bio::AlignIO; |
|
1
|
|
|
|
|
1039
|
|
|
1
|
|
|
|
|
24
|
|
539
|
1
|
|
|
1
|
|
5
|
use Bio::Root::IO; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
16
|
|
540
|
1
|
|
|
1
|
|
374
|
use Bio::Factory::ApplicationFactoryI; |
|
1
|
|
|
|
|
112
|
|
|
1
|
|
|
|
|
30
|
|
541
|
1
|
|
|
1
|
|
425
|
use Bio::Tools::Run::WrapperBase; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
102
|
|
542
|
|
|
|
|
|
|
@ISA = qw(Bio::Tools::Run::WrapperBase |
543
|
|
|
|
|
|
|
Bio::Factory::ApplicationFactoryI); |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
# You will need to enable TCoffee to find the tcoffee program. This can be done |
546
|
|
|
|
|
|
|
# in (at least) twp ways: |
547
|
|
|
|
|
|
|
# 1. define an environmental variable TCOFFEE: |
548
|
|
|
|
|
|
|
# export TCOFFEEDIR=/home/progs/tcoffee or |
549
|
|
|
|
|
|
|
# 2. include a definition of an environmental variable TCOFFEEDIR |
550
|
|
|
|
|
|
|
# in every script that will |
551
|
|
|
|
|
|
|
# use Bio::Tools::Run::Alignment::TCoffee.pm. |
552
|
|
|
|
|
|
|
# BEGIN {$ENV{TCOFFEEDIR} = '/home/progs/tcoffee'; } |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
BEGIN { |
555
|
1
|
|
|
1
|
|
1
|
$PROGRAM_NAME = 't_coffee'; |
556
|
1
|
|
|
|
|
2
|
$PROGRAM_DIR = $ENV{'TCOFFEEDIR'}; |
557
|
1
|
|
|
|
|
4
|
%DEFAULTS = ( 'MATRIX' => 'blosum', |
558
|
|
|
|
|
|
|
'OUTPUT' => 'clustalw', |
559
|
|
|
|
|
|
|
'AFORMAT'=> 'msf', |
560
|
|
|
|
|
|
|
'METHODS' => [qw(lalign_id_pair clustalw_pair)] |
561
|
|
|
|
|
|
|
); |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
|
564
|
1
|
|
|
|
|
6
|
@TCOFFEE_PARAMS = qw(IN TYPE PARAMETERS DO_NORMALISE EXTEND |
565
|
|
|
|
|
|
|
DP_MODE KTUPLE NDIAGS DIAG_MODE SIM_MATRIX |
566
|
|
|
|
|
|
|
MATRIX GAPOPEN GAPEXT COSMETIC_PENALTY TG_MODE |
567
|
|
|
|
|
|
|
WEIGHT SEQ_TO_ALIGN NEWTREE USETREE TREE_MODE |
568
|
|
|
|
|
|
|
OUTFILE OUTPUT CASE CPU OUT_LIB OUTORDER SEQNOS |
569
|
|
|
|
|
|
|
RUN_NAME CONVERT |
570
|
|
|
|
|
|
|
); |
571
|
|
|
|
|
|
|
|
572
|
1
|
|
|
|
|
2
|
@TCOFFEE_SWITCHES = qw(QUICKTREE); |
573
|
|
|
|
|
|
|
|
574
|
1
|
|
|
|
|
1
|
@OTHER_SWITCHES = qw(QUIET ALIGN KEEPDND); |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
# Authorize attribute fields |
577
|
1
|
|
|
|
|
2
|
foreach my $attr ( @TCOFFEE_PARAMS, @TCOFFEE_SWITCHES, @OTHER_SWITCHES ) { |
578
|
33
|
|
|
|
|
1810
|
$OK_FIELD{$attr}++; } |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
=head2 program_name |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
Title : program_name |
584
|
|
|
|
|
|
|
Usage : $factory->program_name() |
585
|
|
|
|
|
|
|
Function: holds the program name |
586
|
|
|
|
|
|
|
Returns: string |
587
|
|
|
|
|
|
|
Args : None |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
=cut |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
sub program_name { |
592
|
6
|
|
|
6
|
1
|
22
|
return $PROGRAM_NAME; |
593
|
|
|
|
|
|
|
} |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
=head2 program_dir |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
Title : program_dir |
598
|
|
|
|
|
|
|
Usage : $factory->program_dir(@params) |
599
|
|
|
|
|
|
|
Function: returns the program directory, obtained from ENV variable. |
600
|
|
|
|
|
|
|
Returns: string |
601
|
|
|
|
|
|
|
Args : |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
=cut |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
sub program_dir { |
606
|
3
|
|
|
3
|
1
|
8
|
return $PROGRAM_DIR; |
607
|
|
|
|
|
|
|
} |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
sub new { |
610
|
1
|
|
|
1
|
1
|
88
|
my ($class,@args) = @_; |
611
|
1
|
|
|
|
|
10
|
my $self = $class->SUPER::new(@args); |
612
|
1
|
|
|
|
|
9
|
my ($attr, $value); |
613
|
|
|
|
|
|
|
|
614
|
1
|
|
|
|
|
4
|
while (@args) { |
615
|
0
|
|
|
|
|
0
|
$attr = shift @args; |
616
|
0
|
|
|
|
|
0
|
$value = shift @args; |
617
|
0
|
0
|
|
|
|
0
|
next if( $attr =~ /^-/); # don't want named parameters |
618
|
0
|
|
|
|
|
0
|
$self->$attr($value); |
619
|
|
|
|
|
|
|
} |
620
|
1
|
50
|
|
|
|
11
|
$self->matrix($DEFAULTS{'MATRIX'}) unless( $self->matrix ); |
621
|
1
|
50
|
|
|
|
6
|
$self->output($DEFAULTS{'OUTPUT'}) unless( $self->output ); |
622
|
1
|
50
|
|
|
|
4
|
$self->methods($DEFAULTS{'METHODS'}) unless $self->methods; |
623
|
|
|
|
|
|
|
# $self->aformat($DEFAULTS{'AFORMAT'}) unless $self->aformat; |
624
|
|
|
|
|
|
|
|
625
|
1
|
|
|
|
|
2
|
return $self; |
626
|
|
|
|
|
|
|
} |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
sub AUTOLOAD { |
629
|
7
|
|
|
7
|
|
1084
|
my $self = shift; |
630
|
7
|
|
|
|
|
17
|
my $attr = $AUTOLOAD; |
631
|
7
|
|
|
|
|
19
|
$attr =~ s/.*:://; |
632
|
7
|
|
|
|
|
9
|
$attr = uc $attr; |
633
|
|
|
|
|
|
|
# aliasing |
634
|
7
|
50
|
|
|
|
10
|
$attr = 'OUTFILE' if $attr eq 'OUTFILE_NAME'; |
635
|
7
|
50
|
|
|
|
15
|
$self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr}; |
636
|
|
|
|
|
|
|
|
637
|
7
|
100
|
|
|
|
13
|
$self->{$attr} = shift if @_; |
638
|
7
|
|
|
|
|
20
|
return $self->{$attr}; |
639
|
|
|
|
|
|
|
} |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
=head2 error_string |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
Title : error_string |
644
|
|
|
|
|
|
|
Usage : $obj->error_string($newval) |
645
|
|
|
|
|
|
|
Function: Where the output from the last analysus run is stored. |
646
|
|
|
|
|
|
|
Returns : value of error_string |
647
|
|
|
|
|
|
|
Args : newvalue (optional) |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
=cut |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
sub error_string{ |
653
|
0
|
|
|
0
|
1
|
0
|
my ($self,$value) = @_; |
654
|
0
|
0
|
|
|
|
0
|
if( defined $value) { |
655
|
0
|
|
|
|
|
0
|
$self->{'error_string'} = $value; |
656
|
|
|
|
|
|
|
} |
657
|
0
|
|
|
|
|
0
|
return $self->{'error_string'}; |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
} |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
=head2 version |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
Title : version |
664
|
|
|
|
|
|
|
Usage : exit if $prog->version() < 1.8 |
665
|
|
|
|
|
|
|
Function: Determine the version number of the program |
666
|
|
|
|
|
|
|
Example : |
667
|
|
|
|
|
|
|
Returns : float or undef |
668
|
|
|
|
|
|
|
Args : none |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
=cut |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
sub version { |
673
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
674
|
0
|
|
|
|
|
0
|
my $exe; |
675
|
0
|
0
|
|
|
|
0
|
return undef unless $exe = $self->executable; |
676
|
0
|
|
|
|
|
0
|
my $string = `$exe -quiet=stdout 2>&1` ; |
677
|
0
|
|
|
|
|
0
|
$string =~ /Version_([\d.]+)/; |
678
|
0
|
|
0
|
|
|
0
|
return $1 || undef; |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
} |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
=head2 run |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
Title : run |
685
|
|
|
|
|
|
|
Usage : my $output = $application->run(-seq => $seq, |
686
|
|
|
|
|
|
|
-profile => $profile, |
687
|
|
|
|
|
|
|
-type => 'profile-aln'); |
688
|
|
|
|
|
|
|
Function: Generic run of an application |
689
|
|
|
|
|
|
|
Returns : Bio::SimpleAlign object |
690
|
|
|
|
|
|
|
Args : key-value parameters allowed for TCoffee runs AND |
691
|
|
|
|
|
|
|
-type => profile-aln or alignment for profile alignments or |
692
|
|
|
|
|
|
|
just multiple sequence alignment |
693
|
|
|
|
|
|
|
-seq => either Bio::PrimarySeqI object OR |
694
|
|
|
|
|
|
|
array ref of Bio::PrimarySeqI objects OR |
695
|
|
|
|
|
|
|
filename of sequences to run with |
696
|
|
|
|
|
|
|
-profile => profile to align to, if this is an array ref |
697
|
|
|
|
|
|
|
will specify the first two entries as the two |
698
|
|
|
|
|
|
|
profiles to align to each other |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
=cut |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
sub run{ |
703
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
704
|
0
|
|
|
|
|
0
|
my ($type,$seq,$profile) = $self->_rearrange([qw(TYPE |
705
|
|
|
|
|
|
|
SEQ |
706
|
|
|
|
|
|
|
PROFILE)], |
707
|
|
|
|
|
|
|
@args); |
708
|
0
|
0
|
|
|
|
0
|
if( $type =~ /align/i ) { |
|
|
0
|
|
|
|
|
|
709
|
0
|
|
|
|
|
0
|
return $self->align($seq); |
710
|
|
|
|
|
|
|
} elsif( $type =~ /profile/i ) { |
711
|
0
|
|
|
|
|
0
|
return $self->profile_align($profile,$seq); |
712
|
|
|
|
|
|
|
} else { |
713
|
0
|
|
|
|
|
0
|
$self->warn("unrecognized alignment type $type\n"); |
714
|
|
|
|
|
|
|
} |
715
|
0
|
|
|
|
|
0
|
return undef; |
716
|
|
|
|
|
|
|
} |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
=head2 align |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
Title : align |
721
|
|
|
|
|
|
|
Usage : |
722
|
|
|
|
|
|
|
$inputfilename = 't/data/cysprot.fa'; |
723
|
|
|
|
|
|
|
$aln = $factory->align($inputfilename); |
724
|
|
|
|
|
|
|
or |
725
|
|
|
|
|
|
|
$seq_array_ref = \@seq_array; |
726
|
|
|
|
|
|
|
# @seq_array is array of Seq objs |
727
|
|
|
|
|
|
|
$aln = $factory->align($seq_array_ref); |
728
|
|
|
|
|
|
|
Function: Perform a multiple sequence alignment |
729
|
|
|
|
|
|
|
Returns : Reference to a SimpleAlign object containing the |
730
|
|
|
|
|
|
|
sequence alignment. |
731
|
|
|
|
|
|
|
Args : Name of a file containing a set of unaligned fasta sequences |
732
|
|
|
|
|
|
|
or else an array of references to Bio::Seq objects. |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
Throws an exception if argument is not either a string (eg a |
735
|
|
|
|
|
|
|
filename) or a reference to an array of Bio::Seq objects. If |
736
|
|
|
|
|
|
|
argument is string, throws exception if file corresponding to string |
737
|
|
|
|
|
|
|
name can not be found. If argument is Bio::Seq array, throws |
738
|
|
|
|
|
|
|
exception if less than two sequence objects are in array. |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
=cut |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
sub align { |
743
|
0
|
|
|
0
|
1
|
0
|
my ($self,$input) = @_; |
744
|
|
|
|
|
|
|
# Create input file pointer |
745
|
0
|
|
|
|
|
0
|
$self->io->_io_cleanup(); |
746
|
0
|
|
|
|
|
0
|
my ($infilename,$type) = $self->_setinput($input); |
747
|
0
|
0
|
|
|
|
0
|
if (!$infilename) { |
748
|
0
|
|
|
|
|
0
|
$self->throw("Bad input data or less than 2 sequences in $input !"); |
749
|
|
|
|
|
|
|
} |
750
|
|
|
|
|
|
|
|
751
|
0
|
|
|
|
|
0
|
my $param_string = $self->_setparams(); |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
# run tcoffee |
754
|
0
|
|
|
|
|
0
|
return $self->_run('align', [$infilename,$type], $param_string); |
755
|
|
|
|
|
|
|
} |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
################################################# |
758
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
=head2 profile_align |
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
Title : profile_align |
762
|
|
|
|
|
|
|
Usage : |
763
|
|
|
|
|
|
|
Function: Perform an alignment of 2 (sub)alignments |
764
|
|
|
|
|
|
|
Example : |
765
|
|
|
|
|
|
|
Returns : Reference to a SimpleAlign object containing the (super)alignment. |
766
|
|
|
|
|
|
|
Args : Names of 2 files containing the subalignments |
767
|
|
|
|
|
|
|
or references to 2 Bio::SimpleAlign objects. |
768
|
|
|
|
|
|
|
Note : Needs to be updated to run with newer TCoffee code, which |
769
|
|
|
|
|
|
|
allows more than two profile alignments. |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
Throws an exception if arguments are not either strings (eg filenames) |
772
|
|
|
|
|
|
|
or references to SimpleAlign objects. |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
=cut |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
sub profile_align { |
777
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
778
|
0
|
|
|
|
|
0
|
my $input1 = shift; |
779
|
0
|
|
|
|
|
0
|
my $input2 = shift; |
780
|
|
|
|
|
|
|
|
781
|
0
|
|
|
|
|
0
|
my ($temp,$infilename1,$infilename2,$type1,$type2,$input,$seq); |
782
|
|
|
|
|
|
|
|
783
|
0
|
|
|
|
|
0
|
$self->io->_io_cleanup(); |
784
|
|
|
|
|
|
|
# Create input file pointers |
785
|
0
|
|
|
|
|
0
|
($infilename1,$type1) = $self->_setinput($input1); |
786
|
0
|
|
|
|
|
0
|
($infilename2,$type2) = $self->_setinput($input2); |
787
|
0
|
0
|
|
|
|
0
|
unless ($type1) { |
788
|
0
|
|
|
|
|
0
|
$self->throw("Unknown type for first argument"); |
789
|
|
|
|
|
|
|
} |
790
|
0
|
0
|
|
|
|
0
|
unless ($type2) { |
791
|
0
|
|
|
|
|
0
|
$self->throw("Unknown type for second argument") |
792
|
|
|
|
|
|
|
} |
793
|
|
|
|
|
|
|
|
794
|
0
|
0
|
0
|
|
|
0
|
if (!$infilename1 || !$infilename2) { |
795
|
0
|
|
|
|
|
0
|
$self->throw("Bad input data: $input1 or $input2 !"); |
796
|
|
|
|
|
|
|
} |
797
|
|
|
|
|
|
|
|
798
|
0
|
|
|
|
|
0
|
my $param_string = $self->_setparams(); |
799
|
|
|
|
|
|
|
# run tcoffee |
800
|
0
|
|
|
|
|
0
|
my $aln = $self->_run('profile-aln', |
801
|
|
|
|
|
|
|
[$infilename1,$type1], |
802
|
|
|
|
|
|
|
[$infilename2,$type2], |
803
|
|
|
|
|
|
|
$param_string) |
804
|
|
|
|
|
|
|
; |
805
|
|
|
|
|
|
|
} |
806
|
|
|
|
|
|
|
################################################# |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
=head2 _run |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
Title : _run |
811
|
|
|
|
|
|
|
Usage : Internal function, not to be called directly |
812
|
|
|
|
|
|
|
Function: makes actual system call to tcoffee program |
813
|
|
|
|
|
|
|
Example : |
814
|
|
|
|
|
|
|
Returns : nothing; tcoffee output is written to a |
815
|
|
|
|
|
|
|
temporary file OR specified output file |
816
|
|
|
|
|
|
|
Args : Name of a file containing a set of unaligned fasta sequences |
817
|
|
|
|
|
|
|
and hash of parameters to be passed to tcoffee |
818
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
=cut |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
sub _run { |
822
|
0
|
|
|
0
|
|
0
|
my ($infilename, $infile1,$infile2) = ('','',''); |
823
|
0
|
|
|
|
|
0
|
my $self = shift; |
824
|
0
|
|
|
|
|
0
|
my $command = shift; |
825
|
0
|
|
|
|
|
0
|
my $instring; |
826
|
0
|
0
|
|
|
|
0
|
if ($command =~ /align/) { |
827
|
0
|
|
|
|
|
0
|
my $infile = shift ; |
828
|
0
|
|
|
|
|
0
|
my $type; |
829
|
0
|
|
|
|
|
0
|
($infilename,$type) = @$infile; |
830
|
0
|
|
|
|
|
0
|
$instring = '-in='.join(',',($infilename, 'X'.$self->matrix, |
831
|
|
|
|
|
|
|
$self->methods)); |
832
|
|
|
|
|
|
|
} |
833
|
0
|
0
|
|
|
|
0
|
if ($command =~ /profile/) { |
834
|
0
|
|
|
|
|
0
|
my $in1 = shift ; |
835
|
0
|
|
|
|
|
0
|
my $in2 = shift ; |
836
|
0
|
|
|
|
|
0
|
my ($type1,$type2); |
837
|
0
|
|
|
|
|
0
|
($infile1,$type1) = @$in1; |
838
|
0
|
|
|
|
|
0
|
($infile2,$type2) = @$in2; |
839
|
|
|
|
|
|
|
# in later versions (tested on 5.72 and 7.54) the API for profile |
840
|
|
|
|
|
|
|
# alignment changed. This attempts to do the right thing for older |
841
|
|
|
|
|
|
|
# versions but corrects for newer ones |
842
|
0
|
0
|
0
|
|
|
0
|
if ($self->version && $self->version < 5) { |
843
|
|
|
|
|
|
|
# this breaks severely on newer TCoffee (>= v5) |
844
|
0
|
0
|
0
|
|
|
0
|
unless (($self->matrix =~ /none/i) || ($self->matrix =~ /null/i) ) { |
845
|
|
|
|
|
|
|
$instring = '-in='.join(',', |
846
|
|
|
|
|
|
|
($type2.$infile2), |
847
|
|
|
|
|
|
|
'X'.$self->matrix, |
848
|
0
|
|
|
|
|
0
|
(map {'M'.$_} $self->methods) |
|
0
|
|
|
|
|
0
|
|
849
|
|
|
|
|
|
|
); |
850
|
0
|
|
|
|
|
0
|
$instring .= ' -profile='.$infile1; |
851
|
|
|
|
|
|
|
} else { |
852
|
|
|
|
|
|
|
$instring = '-in='.join(',',( |
853
|
|
|
|
|
|
|
$type1.$infile1, |
854
|
|
|
|
|
|
|
$type2.$infile2, |
855
|
0
|
|
|
|
|
0
|
(map {'M'.$_} $self->methods) |
|
0
|
|
|
|
|
0
|
|
856
|
|
|
|
|
|
|
) |
857
|
|
|
|
|
|
|
); |
858
|
|
|
|
|
|
|
} |
859
|
|
|
|
|
|
|
} else { |
860
|
0
|
0
|
|
|
|
0
|
if ($type2 eq 'S') { |
861
|
|
|
|
|
|
|
# second infile is a sequence, not an alignment |
862
|
0
|
|
|
|
|
0
|
$instring .= ' -profile='.join(',',$infile1); |
863
|
0
|
|
|
|
|
0
|
$instring .= ' -seq='.join(',',$infile2); |
864
|
|
|
|
|
|
|
} else { |
865
|
0
|
|
|
|
|
0
|
$instring .= ' -profile='.join(',',$infile1,$infile2); |
866
|
|
|
|
|
|
|
} |
867
|
0
|
0
|
0
|
|
|
0
|
$instring .= ' -matrix='.$self->matrix unless (($self->matrix =~ /none/i) || ($self->matrix =~ /null/i)) ; |
868
|
0
|
0
|
|
|
|
0
|
$instring .= ' -method='.join(',',$self->methods) if ($self->methods) ; |
869
|
|
|
|
|
|
|
} |
870
|
|
|
|
|
|
|
} |
871
|
0
|
|
|
|
|
0
|
my $param_string = shift; |
872
|
|
|
|
|
|
|
# my ($paramfh,$parameterFile) = $self->io->tempfile; |
873
|
|
|
|
|
|
|
# print $paramfh ( "$instring\n-output=gcg$param_string") ; |
874
|
|
|
|
|
|
|
# close($paramfh); |
875
|
|
|
|
|
|
|
# my $commandstring = "t_coffee -output=gcg -parameters $parameterFile" ; ##MJL |
876
|
|
|
|
|
|
|
|
877
|
0
|
|
|
|
|
0
|
my $commandstring = $self->executable." $instring $param_string"; |
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
#$self->debug( "tcoffee command = $commandstring \n"); |
880
|
|
|
|
|
|
|
|
881
|
0
|
|
|
|
|
0
|
my $status = system($commandstring); |
882
|
0
|
|
|
|
|
0
|
my $outfile = $self->outfile(); |
883
|
|
|
|
|
|
|
|
884
|
0
|
0
|
0
|
|
|
0
|
if( !-e $outfile || -z $outfile ) { |
885
|
0
|
|
|
|
|
0
|
$self->warn( "TCoffee call crashed: $? [command $commandstring]\n"); |
886
|
0
|
|
|
|
|
0
|
return undef; |
887
|
|
|
|
|
|
|
} |
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
# retrieve alignment (Note: MSF format for AlignIO = GCG format of |
890
|
|
|
|
|
|
|
# tcoffee) |
891
|
|
|
|
|
|
|
|
892
|
0
|
|
|
|
|
0
|
my $in = Bio::AlignIO->new('-file' => $outfile, '-format' => |
893
|
|
|
|
|
|
|
$self->output); |
894
|
0
|
|
|
|
|
0
|
my $aln = $in->next_aln(); |
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
# Replace file suffix with dnd to find name of dendrogram file(s) to delete |
897
|
0
|
0
|
|
|
|
0
|
if( ! $self->keepdnd ) { |
898
|
0
|
|
|
|
|
0
|
foreach my $f ( $infilename, $infile1, $infile2 ) { |
899
|
0
|
0
|
0
|
|
|
0
|
next if( !defined $f || $f eq ''); |
900
|
0
|
|
|
|
|
0
|
$f =~ s/\.[^\.]*$// ; |
901
|
|
|
|
|
|
|
# because TCoffee writes these files to the CWD |
902
|
0
|
0
|
|
|
|
0
|
if( $Bio::Root::IO::PATHSEP ) { |
903
|
0
|
|
|
|
|
0
|
my @line = split(/$Bio::Root::IO::PATHSEP/, $f); |
904
|
0
|
|
|
|
|
0
|
$f = pop @line; |
905
|
|
|
|
|
|
|
} else { |
906
|
0
|
|
|
|
|
0
|
(undef, undef, $f) = File::Spec->splitpath($f); |
907
|
|
|
|
|
|
|
} |
908
|
0
|
0
|
|
|
|
0
|
unlink $f .'.dnd' if( $f ne '' ); |
909
|
|
|
|
|
|
|
} |
910
|
|
|
|
|
|
|
} |
911
|
0
|
|
|
|
|
0
|
return $aln; |
912
|
|
|
|
|
|
|
} |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
=head2 _setinput |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
Title : _setinput |
918
|
|
|
|
|
|
|
Usage : Internal function, not to be called directly |
919
|
|
|
|
|
|
|
Function: Create input file for tcoffee program |
920
|
|
|
|
|
|
|
Example : |
921
|
|
|
|
|
|
|
Returns : name of file containing tcoffee data input AND |
922
|
|
|
|
|
|
|
type of file (if known, S for sequence, L for sequence library, |
923
|
|
|
|
|
|
|
A for sequence alignment) |
924
|
|
|
|
|
|
|
Args : Seq or Align object reference or input file name |
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
=cut |
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
sub _setinput { |
930
|
0
|
|
|
0
|
|
0
|
my ($self,$input) = @_; |
931
|
0
|
|
|
|
|
0
|
my ($infilename, $seq, $temp, $tfh); |
932
|
|
|
|
|
|
|
# If $input is not a reference it better be the name of a |
933
|
|
|
|
|
|
|
# file with the sequence/ alignment data... |
934
|
0
|
|
|
|
|
0
|
my $type = ''; |
935
|
0
|
0
|
|
|
|
0
|
if (! ref $input) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
936
|
|
|
|
|
|
|
# check that file exists or throw |
937
|
0
|
|
|
|
|
0
|
$infilename = $input; |
938
|
0
|
0
|
|
|
|
0
|
unless (-e $input) {return 0;} |
|
0
|
|
|
|
|
0
|
|
939
|
|
|
|
|
|
|
# let's peek and guess |
940
|
0
|
0
|
|
|
|
0
|
open(my $IN,$infilename) || $self->throw("Cannot open $infilename"); |
941
|
0
|
|
|
|
|
0
|
my $header = <$IN>; |
942
|
0
|
0
|
0
|
|
|
0
|
if( $header =~ /^\s+\d+\s+\d+/ || |
|
|
|
0
|
|
|
|
|
943
|
|
|
|
|
|
|
$header =~ /Pileup/i || |
944
|
|
|
|
|
|
|
$header =~ /clustal/i ) { # phylip |
945
|
0
|
|
|
|
|
0
|
$type = 'A'; |
946
|
|
|
|
|
|
|
} |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
# On some systems, having filenames with / in them (ie. a file in a |
949
|
|
|
|
|
|
|
# directory) causes t-coffee to completely fail. It warns on all systems. |
950
|
|
|
|
|
|
|
# The -no_warning option solves this, but there is still some strange |
951
|
|
|
|
|
|
|
# bug when doing certain profile-related things. This is magically solved |
952
|
|
|
|
|
|
|
# by copying the profile file to a temp file in the current directory, so |
953
|
|
|
|
|
|
|
# it its filename supplied to t-coffee contains no / |
954
|
|
|
|
|
|
|
# (It's messy here - I just do this to /all/ input files to most easily |
955
|
|
|
|
|
|
|
# catch all variants of providing a profile - it may only be the last |
956
|
|
|
|
|
|
|
# form (isa("Bio::PrimarySeqI")) that causes a problem?) |
957
|
|
|
|
|
|
|
|
958
|
0
|
|
|
|
|
0
|
my (undef, undef, $adjustedfilename) = File::Spec->splitpath($infilename); |
959
|
0
|
0
|
|
|
|
0
|
if ($adjustedfilename ne $infilename) { |
960
|
0
|
|
|
|
|
0
|
my ($fh, $tempfile) = $self->io->tempfile(-dir => cwd()); |
961
|
0
|
|
|
|
|
0
|
seek($IN, 0, 0); |
962
|
0
|
|
|
|
|
0
|
while (<$IN>) { |
963
|
0
|
|
|
|
|
0
|
print $fh $_; |
964
|
|
|
|
|
|
|
} |
965
|
0
|
|
|
|
|
0
|
close($fh); |
966
|
0
|
|
|
|
|
0
|
(undef, undef, $tempfile) = File::Spec->splitpath($tempfile); |
967
|
0
|
|
|
|
|
0
|
$infilename = $tempfile; |
968
|
0
|
|
|
|
|
0
|
$type = 'S'; |
969
|
|
|
|
|
|
|
} |
970
|
|
|
|
|
|
|
|
971
|
0
|
|
|
|
|
0
|
close($IN); |
972
|
0
|
|
|
|
|
0
|
return ($infilename,$type); |
973
|
|
|
|
|
|
|
} elsif (ref($input) =~ /ARRAY/i ) { |
974
|
|
|
|
|
|
|
# $input may be an array of BioSeq objects... |
975
|
|
|
|
|
|
|
# Open temporary file for both reading & writing of array |
976
|
0
|
|
|
|
|
0
|
($tfh,$infilename) = $self->io->tempfile(-dir => cwd()); |
977
|
0
|
|
|
|
|
0
|
(undef, undef, $infilename) = File::Spec->splitpath($infilename); |
978
|
0
|
0
|
|
|
|
0
|
if( ! ref($input->[0]) ) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
979
|
0
|
|
|
|
|
0
|
$self->warn("passed an array ref which did not contain objects to _setinput"); |
980
|
0
|
|
|
|
|
0
|
return undef; |
981
|
|
|
|
|
|
|
} elsif( $input->[0]->isa('Bio::PrimarySeqI') ) { |
982
|
0
|
|
|
|
|
0
|
$temp = Bio::SeqIO->new('-fh' => $tfh, |
983
|
|
|
|
|
|
|
'-format' => 'fasta'); |
984
|
0
|
|
|
|
|
0
|
my $ct = 1; |
985
|
0
|
|
|
|
|
0
|
foreach $seq (@$input) { |
986
|
0
|
0
|
0
|
|
|
0
|
return 0 unless ( ref($seq) && |
987
|
|
|
|
|
|
|
$seq->isa("Bio::PrimarySeqI") ); |
988
|
0
|
0
|
0
|
|
|
0
|
if( ! defined $seq->display_id || |
989
|
|
|
|
|
|
|
$seq->display_id =~ /^\s+$/) { |
990
|
0
|
|
|
|
|
0
|
$seq->display_id( "Seq".$ct++); |
991
|
|
|
|
|
|
|
} |
992
|
0
|
|
|
|
|
0
|
$temp->write_seq($seq); |
993
|
|
|
|
|
|
|
} |
994
|
0
|
|
|
|
|
0
|
$temp->close(); |
995
|
0
|
|
|
|
|
0
|
undef $temp; |
996
|
0
|
|
|
|
|
0
|
close($tfh); |
997
|
0
|
|
|
|
|
0
|
$tfh = undef; |
998
|
0
|
|
|
|
|
0
|
$type = 'S'; |
999
|
|
|
|
|
|
|
} elsif( $input->[0]->isa('Bio::Align::AlignI' ) ) { |
1000
|
0
|
|
|
|
|
0
|
$temp = Bio::AlignIO->new('-fh' => $tfh, |
1001
|
|
|
|
|
|
|
'-format' => $self->aformat); |
1002
|
0
|
|
|
|
|
0
|
foreach my $aln (@$input) { |
1003
|
0
|
0
|
0
|
|
|
0
|
next unless ( ref($aln) && |
1004
|
|
|
|
|
|
|
$aln->isa("Bio::Align::AlignI") ); |
1005
|
0
|
|
|
|
|
0
|
$temp->write_aln($aln); |
1006
|
|
|
|
|
|
|
} |
1007
|
0
|
|
|
|
|
0
|
$temp->close(); |
1008
|
0
|
|
|
|
|
0
|
undef $temp; |
1009
|
0
|
|
|
|
|
0
|
close($tfh); |
1010
|
0
|
|
|
|
|
0
|
$tfh = undef; |
1011
|
0
|
|
|
|
|
0
|
$type = 'A'; |
1012
|
|
|
|
|
|
|
} else { |
1013
|
0
|
|
|
|
|
0
|
$self->warn( "got an array ref with 1st entry ". |
1014
|
|
|
|
|
|
|
$input->[0]. |
1015
|
|
|
|
|
|
|
" and don't know what to do with it\n"); |
1016
|
|
|
|
|
|
|
} |
1017
|
0
|
|
|
|
|
0
|
return ($infilename,$type); |
1018
|
|
|
|
|
|
|
# $input may be a SimpleAlign object. |
1019
|
|
|
|
|
|
|
} elsif ( $input->isa("Bio::Align::AlignI") ) { |
1020
|
|
|
|
|
|
|
# Open temporary file for both reading & writing of SimpleAlign object |
1021
|
0
|
|
|
|
|
0
|
($tfh, $infilename) = $self->io->tempfile(-dir => cwd()); |
1022
|
0
|
|
|
|
|
0
|
(undef, undef, $infilename) = File::Spec->splitpath($infilename); |
1023
|
0
|
|
|
|
|
0
|
$temp = Bio::AlignIO->new(-fh=>$tfh, |
1024
|
|
|
|
|
|
|
'-format' => 'clustalw'); |
1025
|
0
|
|
|
|
|
0
|
$temp->write_aln($input); |
1026
|
0
|
|
|
|
|
0
|
close($tfh); |
1027
|
0
|
|
|
|
|
0
|
undef $tfh; |
1028
|
0
|
|
|
|
|
0
|
return ($infilename,'A'); |
1029
|
|
|
|
|
|
|
} |
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
# or $input may be a single BioSeq object (to be added to |
1032
|
|
|
|
|
|
|
# a previous alignment) |
1033
|
|
|
|
|
|
|
elsif ( $input->isa("Bio::PrimarySeqI")) { |
1034
|
|
|
|
|
|
|
# Open temporary file for both reading & writing of BioSeq object |
1035
|
0
|
|
|
|
|
0
|
($tfh,$infilename) = $self->io->tempfile(-dir => cwd()); |
1036
|
0
|
|
|
|
|
0
|
(undef, undef, $infilename) = File::Spec->splitpath($infilename); |
1037
|
0
|
|
|
|
|
0
|
$temp = Bio::SeqIO->new(-fh=> $tfh, '-format' =>'Fasta'); |
1038
|
0
|
|
|
|
|
0
|
$temp->write_seq($input); |
1039
|
0
|
|
|
|
|
0
|
$temp->close(); |
1040
|
0
|
|
|
|
|
0
|
close($tfh); |
1041
|
0
|
|
|
|
|
0
|
undef $tfh; |
1042
|
0
|
|
|
|
|
0
|
return ($infilename,'S'); |
1043
|
|
|
|
|
|
|
} else { |
1044
|
0
|
|
|
|
|
0
|
$self->warn("Got $input and don't know what to do with it\n"); |
1045
|
|
|
|
|
|
|
} |
1046
|
0
|
|
|
|
|
0
|
return 0; |
1047
|
|
|
|
|
|
|
} |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
=head2 _setparams |
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
Title : _setparams |
1053
|
|
|
|
|
|
|
Usage : Internal function, not to be called directly |
1054
|
|
|
|
|
|
|
Function: Create parameter inputs for tcoffee program |
1055
|
|
|
|
|
|
|
Example : |
1056
|
|
|
|
|
|
|
Returns : parameter string to be passed to tcoffee |
1057
|
|
|
|
|
|
|
during align or profile_align |
1058
|
|
|
|
|
|
|
Args : name of calling object |
1059
|
|
|
|
|
|
|
|
1060
|
|
|
|
|
|
|
=cut |
1061
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
sub _setparams { |
1063
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
1064
|
0
|
|
|
|
|
0
|
my ($attr, $value,$param_string); |
1065
|
0
|
|
|
|
|
0
|
$param_string = ''; |
1066
|
0
|
|
|
|
|
0
|
my $laststr; |
1067
|
0
|
|
|
|
|
0
|
for $attr ( @TCOFFEE_PARAMS ) { |
1068
|
0
|
|
|
|
|
0
|
$value = $self->$attr(); |
1069
|
0
|
0
|
|
|
|
0
|
next unless (defined $value); |
1070
|
0
|
|
|
|
|
0
|
my $attr_key = lc $attr; |
1071
|
0
|
0
|
|
|
|
0
|
if( $attr_key =~ /matrix/ ) { |
1072
|
0
|
|
|
|
|
0
|
$self->{'_in'} = [ "X".lc($value) ]; |
1073
|
|
|
|
|
|
|
} else { |
1074
|
0
|
|
|
|
|
0
|
$attr_key = ' -'.$attr_key; |
1075
|
0
|
|
|
|
|
0
|
$param_string .= $attr_key .'='.$value; |
1076
|
|
|
|
|
|
|
} |
1077
|
|
|
|
|
|
|
} |
1078
|
0
|
|
|
|
|
0
|
for $attr ( @TCOFFEE_SWITCHES) { |
1079
|
0
|
|
|
|
|
0
|
$value = $self->$attr(); |
1080
|
0
|
0
|
|
|
|
0
|
next unless ($value); |
1081
|
0
|
|
|
|
|
0
|
my $attr_key = lc $attr; #put switches in format expected by tcoffee |
1082
|
0
|
|
|
|
|
0
|
$attr_key = ' -'.$attr_key; |
1083
|
0
|
|
|
|
|
0
|
$param_string .= $attr_key ; |
1084
|
|
|
|
|
|
|
} |
1085
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
# Set default output file if no explicit output file selected |
1087
|
0
|
0
|
|
|
|
0
|
unless ($self->outfile ) { |
1088
|
0
|
|
|
|
|
0
|
my ($tfh, $outfile) = $self->io->tempfile(-dir=>$self->tempdir()); |
1089
|
0
|
|
|
|
|
0
|
close($tfh); |
1090
|
0
|
|
|
|
|
0
|
undef $tfh; |
1091
|
0
|
|
|
|
|
0
|
$self->outfile($outfile); |
1092
|
0
|
|
|
|
|
0
|
$param_string .= " -outfile=$outfile" ; |
1093
|
|
|
|
|
|
|
} |
1094
|
|
|
|
|
|
|
|
1095
|
0
|
0
|
0
|
|
|
0
|
if ($self->quiet() || $self->verbose < 0) { $param_string .= ' -quiet';} |
|
0
|
|
|
|
|
0
|
|
1096
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
# -no_warning is required on some systems with certain versions or failure |
1098
|
|
|
|
|
|
|
# is guaranteed |
1099
|
0
|
0
|
0
|
|
|
0
|
if ($self->version >= 4 && $self->version < 4.7) { |
1100
|
0
|
|
|
|
|
0
|
$param_string .= ' -no_warning'; |
1101
|
|
|
|
|
|
|
} |
1102
|
|
|
|
|
|
|
|
1103
|
0
|
|
|
|
|
0
|
return $param_string; |
1104
|
|
|
|
|
|
|
} |
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
=head2 aformat |
1107
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
Title : aformat |
1109
|
|
|
|
|
|
|
Usage : my $alignmentformat = $self->aformat(); |
1110
|
|
|
|
|
|
|
Function: Get/Set alignment format |
1111
|
|
|
|
|
|
|
Returns : string |
1112
|
|
|
|
|
|
|
Args : string |
1113
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
=cut |
1116
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
sub aformat{ |
1118
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
1119
|
0
|
0
|
|
|
|
0
|
$self->{'_aformat'} = shift if @_; |
1120
|
0
|
|
|
|
|
0
|
return $self->{'_aformat'}; |
1121
|
|
|
|
|
|
|
} |
1122
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
=head2 methods |
1125
|
|
|
|
|
|
|
|
1126
|
|
|
|
|
|
|
Title : methods |
1127
|
|
|
|
|
|
|
Usage : my @methods = $self->methods() |
1128
|
|
|
|
|
|
|
Function: Get/Set Alignment methods - NOT VALIDATED |
1129
|
|
|
|
|
|
|
Returns : array of strings |
1130
|
|
|
|
|
|
|
Args : arrayref of strings |
1131
|
|
|
|
|
|
|
|
1132
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
=cut |
1134
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
sub methods{ |
1136
|
2
|
|
|
2
|
1
|
3
|
my ($self) = shift; |
1137
|
|
|
|
|
|
|
|
1138
|
2
|
50
|
|
|
|
4
|
@{$self->{'_methods'}} = @{shift || []} if @_; |
|
1
|
100
|
|
|
|
3
|
|
|
1
|
|
|
|
|
3
|
|
1139
|
2
|
100
|
|
|
|
2
|
return @{$self->{'_methods'} || []}; |
|
2
|
|
|
|
|
10
|
|
1140
|
|
|
|
|
|
|
} |
1141
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
=head1 Bio::Tools::Run::BaseWrapper methods |
1144
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
=cut |
1146
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
=head2 no_param_checks |
1148
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
Title : no_param_checks |
1150
|
|
|
|
|
|
|
Usage : $obj->no_param_checks($newval) |
1151
|
|
|
|
|
|
|
Function: Boolean flag as to whether or not we should |
1152
|
|
|
|
|
|
|
trust the sanity checks for parameter values |
1153
|
|
|
|
|
|
|
Returns : value of no_param_checks |
1154
|
|
|
|
|
|
|
Args : newvalue (optional) |
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
|
1157
|
|
|
|
|
|
|
=cut |
1158
|
|
|
|
|
|
|
|
1159
|
|
|
|
|
|
|
=head2 save_tempfiles |
1160
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
Title : save_tempfiles |
1162
|
|
|
|
|
|
|
Usage : $obj->save_tempfiles($newval) |
1163
|
|
|
|
|
|
|
Function: |
1164
|
|
|
|
|
|
|
Returns : value of save_tempfiles |
1165
|
|
|
|
|
|
|
Args : newvalue (optional) |
1166
|
|
|
|
|
|
|
|
1167
|
|
|
|
|
|
|
|
1168
|
|
|
|
|
|
|
=cut |
1169
|
|
|
|
|
|
|
|
1170
|
|
|
|
|
|
|
=head2 outfile_name |
1171
|
|
|
|
|
|
|
|
1172
|
|
|
|
|
|
|
Title : outfile_name |
1173
|
|
|
|
|
|
|
Usage : my $outfile = $tcoffee->outfile_name(); |
1174
|
|
|
|
|
|
|
Function: Get/Set the name of the output file for this run |
1175
|
|
|
|
|
|
|
(if you wanted to do something special) |
1176
|
|
|
|
|
|
|
Returns : string |
1177
|
|
|
|
|
|
|
Args : [optional] string to set value to |
1178
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
=cut |
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
|
1183
|
|
|
|
|
|
|
=head2 tempdir |
1184
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
Title : tempdir |
1186
|
|
|
|
|
|
|
Usage : my $tmpdir = $self->tempdir(); |
1187
|
|
|
|
|
|
|
Function: Retrieve a temporary directory name (which is created) |
1188
|
|
|
|
|
|
|
Returns : string which is the name of the temporary directory |
1189
|
|
|
|
|
|
|
Args : none |
1190
|
|
|
|
|
|
|
|
1191
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
=cut |
1193
|
|
|
|
|
|
|
|
1194
|
|
|
|
|
|
|
=head2 cleanup |
1195
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
Title : cleanup |
1197
|
|
|
|
|
|
|
Usage : $tcoffee->cleanup(); |
1198
|
|
|
|
|
|
|
Function: Will cleanup the tempdir directory |
1199
|
|
|
|
|
|
|
Returns : none |
1200
|
|
|
|
|
|
|
Args : none |
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
=cut |
1204
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
=head2 io |
1206
|
|
|
|
|
|
|
|
1207
|
|
|
|
|
|
|
Title : io |
1208
|
|
|
|
|
|
|
Usage : $obj->io($newval) |
1209
|
|
|
|
|
|
|
Function: Gets a L object |
1210
|
|
|
|
|
|
|
Returns : L |
1211
|
|
|
|
|
|
|
Args : none |
1212
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
=cut |
1215
|
|
|
|
|
|
|
|
1216
|
|
|
|
|
|
|
1; # Needed to keep compiler happy |