line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# BioPerl module for Bio::Tools::Run::Phylo::Phylip::ProtDist |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# Created by |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# Shawn Hoon |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Bio::Tools::Run::Phylo::Phylip::ProtDist - Wrapper for the phylip |
14
|
|
|
|
|
|
|
program protdist |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 SYNOPSIS |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
#Create a SimpleAlign object |
19
|
|
|
|
|
|
|
@params = ('ktuple' => 2, 'matrix' => 'BLOSUM'); |
20
|
|
|
|
|
|
|
$factory = Bio::Tools::Run::Alignment::Clustalw->new(@params); |
21
|
|
|
|
|
|
|
$inputfilename = 't/data/cysprot.fa'; |
22
|
|
|
|
|
|
|
$aln = $factory->run($inputfilename); # $aln is a SimpleAlign object. |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# Create the Distance Matrix using a default PAM matrix and id name |
26
|
|
|
|
|
|
|
# lengths limit of 30 note to use id name length greater than the |
27
|
|
|
|
|
|
|
# standard 10 in protdist, you will need to modify the protdist source |
28
|
|
|
|
|
|
|
# code |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
@params = ('MODEL' => 'PAM'); |
31
|
|
|
|
|
|
|
$protdist_factory = Bio::Tools::Run::Phylo::Phylip::ProtDist->new(@params); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
my ($matrix) = $protdist_factory->run($aln); # an array of Bio::Matrix::PhylipDist matrix |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
#finding the distance between two sequences |
36
|
|
|
|
|
|
|
my $distance = $matrix->get_entry('protein_name_1','protein_name_2'); |
37
|
|
|
|
|
|
|
my @column = $matrix->get_column('protein_name_1'); |
38
|
|
|
|
|
|
|
my @row = $martrix->get_row('protein_name_1'); |
39
|
|
|
|
|
|
|
my @diag = $matrix->get_diagonal(); |
40
|
|
|
|
|
|
|
print $matrix->print_matrix; |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
#Alternatively, one can create the matrix by passing in a file |
44
|
|
|
|
|
|
|
#name containing a multiple alignment in phylip format |
45
|
|
|
|
|
|
|
$protdist_factory = Bio::Tools::Run::Phylo::Phylip::ProtDist->new(@params); |
46
|
|
|
|
|
|
|
my ($matrix) = $protdist_factory->run('/home/shawnh/prot.phy'); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# To prevent PHYLIP from truncating sequence names: |
49
|
|
|
|
|
|
|
# Step 1. Shelf the original names: |
50
|
|
|
|
|
|
|
my ($aln_safe, $ref_name)= # $aln_safe has serial names |
51
|
|
|
|
|
|
|
$aln->set_displayname_safe(); # $ref_name holds original names |
52
|
|
|
|
|
|
|
# Step 2. Run ProtDist and Neighbor: |
53
|
|
|
|
|
|
|
($matrix) = $protdist_factory-> |
54
|
|
|
|
|
|
|
create_distance_matrix($aln_safe); # Use $aln_safe instead of $aln |
55
|
|
|
|
|
|
|
($tree) = $neighbor_factory->run($matrix); |
56
|
|
|
|
|
|
|
# Step 3. Retrieve orgininal OTU names: |
57
|
|
|
|
|
|
|
use Bio::Tree::Tree; |
58
|
|
|
|
|
|
|
my @nodes=$tree->get_nodes(); |
59
|
|
|
|
|
|
|
foreach my $nd (@nodes){ |
60
|
|
|
|
|
|
|
$nd->id($ref_name->{$nd->id_output}) if $nd->is_Leaf; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head1 DESCRIPTION |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
Wrapper for protdist Joseph Felsentein for creating a distance matrix |
66
|
|
|
|
|
|
|
comparing protein sequences from a multiple alignment file or a |
67
|
|
|
|
|
|
|
L object and returns a L object; |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
VERSION Support |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
This wrapper currently supports v3.5 of phylip. There is also support |
72
|
|
|
|
|
|
|
for v3.6. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head1 PARAMETERS FOR PROTDIST COMPUTATION |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=head2 MODEL |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
Title : MODEL |
79
|
|
|
|
|
|
|
Description : (optional) |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
This sets the model of amino acid substitution used |
82
|
|
|
|
|
|
|
in the calculation of the distances. 3 different |
83
|
|
|
|
|
|
|
models are supported: |
84
|
|
|
|
|
|
|
PAM Dayhoff PAM Matrix(default) |
85
|
|
|
|
|
|
|
KIMURA Kimura's Distance CAT |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Categories Distance Usage: @params = |
88
|
|
|
|
|
|
|
('model'=>'X');#where X is one of the values above |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
Defaults to PAM For more information on the usage of |
91
|
|
|
|
|
|
|
the different models, please refer to the |
92
|
|
|
|
|
|
|
documentation |
93
|
|
|
|
|
|
|
defaults to Equal |
94
|
|
|
|
|
|
|
(0.25,0.25,0.25,0.25) found in the phylip package. |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
Additional models in PHYLIP 3.6 |
97
|
|
|
|
|
|
|
PMB - Henikoff/Tillier PMB matrix |
98
|
|
|
|
|
|
|
JTT - Jones/Taylor/Thornton |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=head2 MULTIPLE |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Title : MULTIPLE |
103
|
|
|
|
|
|
|
Description: (optional) |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
This allows multiple distance matrices to be generated from multiple |
106
|
|
|
|
|
|
|
MSA. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Usage: @params = ('MULTIPLE'=>100) where the value specifyies the |
109
|
|
|
|
|
|
|
number of aligments given. |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=head2 ALL SUBSEQUENT PARAMETERS WILL ONLY WORK IN CONJUNCTION WITH |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
THE Categories Distance MODEL* |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head2 GENCODE |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Title : GENCODE |
118
|
|
|
|
|
|
|
Description : (optional) |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
This option allows the user to select among various |
121
|
|
|
|
|
|
|
nuclear and mitochondrial genetic codes. |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Acceptable Values: |
124
|
|
|
|
|
|
|
U Universal |
125
|
|
|
|
|
|
|
M Mitochondrial |
126
|
|
|
|
|
|
|
V Vertebrate mitochondrial |
127
|
|
|
|
|
|
|
F Fly mitochondrial |
128
|
|
|
|
|
|
|
Y Yeast mitochondrial |
129
|
|
|
|
|
|
|
Usage : @params = ('gencode'=>'X'); |
130
|
|
|
|
|
|
|
where X is one of the letters above |
131
|
|
|
|
|
|
|
Defaults to U |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=head2 CATEGORY |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
Title : CATEGORY |
136
|
|
|
|
|
|
|
Description : (optional) |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
This option sets the categorization of amino acids |
139
|
|
|
|
|
|
|
all have groups: (Glu Gln Asp Asn), (Lys Arg His), |
140
|
|
|
|
|
|
|
(Phe Tyr Trp) plus: |
141
|
|
|
|
|
|
|
G George/Hunt/Barker: |
142
|
|
|
|
|
|
|
(Cys), (Met Val Leu Ileu), |
143
|
|
|
|
|
|
|
(Gly Ala Ser Thr Pro) |
144
|
|
|
|
|
|
|
C Chemical: |
145
|
|
|
|
|
|
|
(Cys Met), (Val Leu Ileu Gly Ala Ser Thr), |
146
|
|
|
|
|
|
|
(Pro) |
147
|
|
|
|
|
|
|
H Hall: |
148
|
|
|
|
|
|
|
(Cys), (Met Val Leu Ileu), (Gly Ala Ser Thr), |
149
|
|
|
|
|
|
|
(Pro) |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Usage : @params = ('category'=>'X'); |
152
|
|
|
|
|
|
|
where X is one of the letters above |
153
|
|
|
|
|
|
|
Defaults to G |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=head2 PROBCHANGE |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
Title : PROBCHANGE |
158
|
|
|
|
|
|
|
Description : (optional) |
159
|
|
|
|
|
|
|
This option sets the ease of changing category of amino |
160
|
|
|
|
|
|
|
acid. (1.0 if no difficulty of changing,less if less |
161
|
|
|
|
|
|
|
easy. Can't be negative) |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
Usage : @params = ('probchange'=>X) where 0<=X<=1 |
164
|
|
|
|
|
|
|
Defaults to 0.4570 |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=head2 TRANS |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
Title : TRANS |
169
|
|
|
|
|
|
|
Description : (optional) |
170
|
|
|
|
|
|
|
This option sets transition/transversion ratio can be |
171
|
|
|
|
|
|
|
any positive number |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
Usage : @params = ('trans'=>X) where X >= 0 |
174
|
|
|
|
|
|
|
Defaults to 2 |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=head2 FREQ |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
Title : FREQ |
179
|
|
|
|
|
|
|
Description : (optional) |
180
|
|
|
|
|
|
|
This option sets the frequency of each base (A,C,G,T) |
181
|
|
|
|
|
|
|
The sum of the frequency must sum to 1. |
182
|
|
|
|
|
|
|
For example A,C,G,T = (0.25,0.5,0.125,0.125) |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
Usage : @params = ('freq'=>('W','X','Y','Z') |
185
|
|
|
|
|
|
|
where W + X + Y + Z = 1 |
186
|
|
|
|
|
|
|
Defaults to Equal (0.25,0.25,0.25,0.25) |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=head1 FEEDBACK |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=head2 Mailing Lists |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
194
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
195
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
198
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head2 Support |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
I |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
207
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
208
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
209
|
|
|
|
|
|
|
with code and data examples if at all possible. |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=head2 Reporting Bugs |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
214
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
215
|
|
|
|
|
|
|
web: |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
http://redmine.open-bio.org/projects/bioperl/ |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=head1 AUTHOR - Shawn Hoon |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
Email shawnh@fugu-sg.org |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
=head1 APPENDIX |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
226
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
=cut |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
#' |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
package Bio::Tools::Run::Phylo::Phylip::ProtDist; |
234
|
|
|
|
|
|
|
|
235
|
2
|
|
|
|
|
139
|
use vars qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME |
236
|
|
|
|
|
|
|
@PROTDIST_PARAMS @OTHER_SWITCHES |
237
|
2
|
|
|
2
|
|
204196
|
%OK_FIELD); |
|
2
|
|
|
|
|
2
|
|
238
|
2
|
|
|
2
|
|
6
|
use strict; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
30
|
|
239
|
2
|
|
|
2
|
|
1268
|
use Bio::SimpleAlign; |
|
2
|
|
|
|
|
162155
|
|
|
2
|
|
|
|
|
76
|
|
240
|
2
|
|
|
2
|
|
1061
|
use Bio::AlignIO; |
|
2
|
|
|
|
|
8198
|
|
|
2
|
|
|
|
|
47
|
|
241
|
2
|
|
|
2
|
|
778
|
use Bio::TreeIO; |
|
2
|
|
|
|
|
26913
|
|
|
2
|
|
|
|
|
50
|
|
242
|
2
|
|
|
2
|
|
795
|
use Bio::Tools::Run::Phylo::Phylip::Base; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
53
|
|
243
|
2
|
|
|
2
|
|
9
|
use Bio::Tools::Run::Phylo::Phylip::PhylipConf qw(%Menu); |
|
2
|
|
|
|
|
1
|
|
|
2
|
|
|
|
|
209
|
|
244
|
2
|
|
|
2
|
|
861
|
use Bio::Tools::Phylo::Phylip::ProtDist; |
|
2
|
|
|
|
|
5286
|
|
|
2
|
|
|
|
|
44
|
|
245
|
2
|
|
|
2
|
|
8
|
use Cwd; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
210
|
|
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# inherit from Phylip::Base which has some methods for dealing with |
249
|
|
|
|
|
|
|
# Phylip specifics |
250
|
|
|
|
|
|
|
@ISA = qw(Bio::Tools::Run::Phylo::Phylip::Base); |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
# You will need to enable the protdist program. This |
253
|
|
|
|
|
|
|
# can be done in (at least) 3 ways: |
254
|
|
|
|
|
|
|
# |
255
|
|
|
|
|
|
|
# 1. define an environmental variable PHYLIPDIR: |
256
|
|
|
|
|
|
|
# export PHYLIPDIR=/home/shawnh/PHYLIP/bin |
257
|
|
|
|
|
|
|
# |
258
|
|
|
|
|
|
|
# 2. include a definition of an environmental variable CLUSTALDIR in |
259
|
|
|
|
|
|
|
# every script that will use Clustal.pm. |
260
|
|
|
|
|
|
|
# $ENV{PHYLIPDIR} = '/home/shawnh/PHYLIP/bin'; |
261
|
|
|
|
|
|
|
# |
262
|
|
|
|
|
|
|
# 3. You can set the path to the program through doing: |
263
|
|
|
|
|
|
|
# my @params('program'=>'/usr/local/bin/protdist'); |
264
|
|
|
|
|
|
|
# my $protdist_factory = Bio::Tools::Run::Phylo::Phylip::ProtDist->new(@params); |
265
|
|
|
|
|
|
|
# |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
BEGIN { |
269
|
2
|
|
|
2
|
|
5
|
@PROTDIST_PARAMS = qw(MODEL GENCODE CATEGORY PROBCHANGE TRANS WEIGHTS FREQ MULTIPLE); |
270
|
2
|
|
|
|
|
4
|
@OTHER_SWITCHES = qw(QUIET); |
271
|
2
|
|
|
|
|
4
|
foreach my $attr(@PROTDIST_PARAMS,@OTHER_SWITCHES) { |
272
|
18
|
|
|
|
|
2175
|
$OK_FIELD{$attr}++; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=head2 program_name |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
Title : program_name |
279
|
|
|
|
|
|
|
Usage : >program_name() |
280
|
|
|
|
|
|
|
Function: holds the program name |
281
|
|
|
|
|
|
|
Returns: string |
282
|
|
|
|
|
|
|
Args : None |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=cut |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sub program_name { |
287
|
6
|
|
|
6
|
1
|
24
|
return 'protdist'; |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=head2 program_dir |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
Title : program_dir |
293
|
|
|
|
|
|
|
Usage : ->program_dir() |
294
|
|
|
|
|
|
|
Function: returns the program directory, obtained from ENV variable. |
295
|
|
|
|
|
|
|
Returns: string |
296
|
|
|
|
|
|
|
Args : |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=cut |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
sub program_dir { |
301
|
3
|
50
|
|
3
|
1
|
12
|
return Bio::Root::IO->catfile($ENV{PHYLIPDIR}) if $ENV{PHYLIPDIR}; |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
sub new { |
305
|
1
|
|
|
1
|
1
|
110
|
my ($class,@args) = @_; |
306
|
1
|
|
|
|
|
14
|
my $self = $class->SUPER::new(@args); |
307
|
1
|
|
|
|
|
41
|
my ($attr, $value); |
308
|
1
|
|
|
|
|
5
|
while (@args) { |
309
|
7
|
|
|
|
|
7
|
$attr = shift @args; |
310
|
7
|
|
|
|
|
4
|
$value = shift @args; |
311
|
7
|
50
|
|
|
|
14
|
next if( $attr =~ /^-/ ); # don't want named parameters |
312
|
7
|
50
|
|
|
|
10
|
if ($attr =~/PROGRAM/i) { |
313
|
0
|
|
|
|
|
0
|
$self->executable($value); |
314
|
0
|
|
|
|
|
0
|
next; |
315
|
|
|
|
|
|
|
} |
316
|
7
|
100
|
|
|
|
11
|
if ($attr =~ /IDLENGTH/i){ |
317
|
1
|
|
|
|
|
3
|
$self->idlength($value); |
318
|
1
|
|
|
|
|
2
|
next; |
319
|
|
|
|
|
|
|
} |
320
|
6
|
|
|
|
|
34
|
$self->$attr($value); |
321
|
|
|
|
|
|
|
} |
322
|
1
|
|
|
|
|
2
|
return $self; |
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub AUTOLOAD { |
326
|
6
|
|
|
6
|
|
5
|
my $self = shift; |
327
|
6
|
|
|
|
|
5
|
my $attr = $AUTOLOAD; |
328
|
6
|
|
|
|
|
13
|
$attr =~ s/.*:://; |
329
|
6
|
|
|
|
|
6
|
$attr = uc $attr; |
330
|
6
|
50
|
|
|
|
12
|
$self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr}; |
331
|
6
|
50
|
|
|
|
12
|
$self->{$attr} = shift if @_; |
332
|
6
|
|
|
|
|
11
|
return $self->{$attr}; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=head2 idlength |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
Title : idlength |
338
|
|
|
|
|
|
|
Usage : $obj->idlength ($newval) |
339
|
|
|
|
|
|
|
Function: |
340
|
|
|
|
|
|
|
Returns : value of idlength |
341
|
|
|
|
|
|
|
Args : newvalue (optional) |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=cut |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
sub idlength{ |
347
|
1
|
|
|
1
|
1
|
1
|
my $self = shift; |
348
|
1
|
50
|
|
|
|
4
|
if( @_ ) { |
349
|
1
|
|
|
|
|
2
|
my $value = shift; |
350
|
1
|
|
|
|
|
1
|
$self->{'idlength'} = $value; |
351
|
|
|
|
|
|
|
} |
352
|
1
|
|
|
|
|
2
|
return $self->{'idlength'}; |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=head2 run |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
Title : run |
360
|
|
|
|
|
|
|
Usage : |
361
|
|
|
|
|
|
|
$inputfilename = 't/data/prot.phy'; |
362
|
|
|
|
|
|
|
$matrix= $prodistfactory->run($inputfilename); |
363
|
|
|
|
|
|
|
or |
364
|
|
|
|
|
|
|
$seq_array_ref = \@seq_array; @seq_array is array of Seq objs |
365
|
|
|
|
|
|
|
$aln = $protdistfactory->align($seq_array_ref); |
366
|
|
|
|
|
|
|
$matrix = $protdistfactory->run($aln); |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
Function: Create a distance matrix from a SimpleAlign object or a multiple alignment file |
369
|
|
|
|
|
|
|
Example : |
370
|
|
|
|
|
|
|
Returns : L |
371
|
|
|
|
|
|
|
Args : Name of a file containing a multiple alignment in Phylip format |
372
|
|
|
|
|
|
|
or an SimpleAlign object |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
Throws an exception if argument is not either a string (eg a |
375
|
|
|
|
|
|
|
filename) or a Bio::SimpleAlign object. If |
376
|
|
|
|
|
|
|
argument is string, throws exception if file corresponding to string |
377
|
|
|
|
|
|
|
name can not be found. |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
=cut |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
sub run{ |
382
|
|
|
|
|
|
|
|
383
|
0
|
|
|
0
|
1
|
|
my ($self,$input) = @_; |
384
|
0
|
|
|
|
|
|
my ($infilename); |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
# Create input file pointer |
387
|
0
|
|
|
|
|
|
$infilename = $self->_setinput($input); |
388
|
0
|
0
|
|
|
|
|
if (!$infilename) {$self->throw("Problems setting up for protdist. Probably bad input data in $input !");} |
|
0
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
# Create parameter string to pass to protdist program |
390
|
0
|
|
|
|
|
|
my $param_string = $self->_setparams(); |
391
|
|
|
|
|
|
|
# run protdist |
392
|
0
|
|
|
|
|
|
my @mat = $self->_run($infilename,$param_string); |
393
|
0
|
0
|
|
|
|
|
return wantarray ? @mat:\@mat; |
394
|
|
|
|
|
|
|
} |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
################################################# |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
=head2 _run |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
Title : _run |
401
|
|
|
|
|
|
|
Usage : Internal function, not to be called directly |
402
|
|
|
|
|
|
|
Function: makes actual system call to protdist program |
403
|
|
|
|
|
|
|
Example : |
404
|
|
|
|
|
|
|
Returns : Bio::Tree object |
405
|
|
|
|
|
|
|
Args : Name of a file containing a set of multiple alignments in Phylip format |
406
|
|
|
|
|
|
|
and a parameter string to be passed to protdist |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=cut |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
sub _run { |
412
|
0
|
|
|
0
|
|
|
my ($self,$infile,$param_string) = @_; |
413
|
0
|
|
|
|
|
|
my $instring; |
414
|
0
|
|
|
|
|
|
my $curpath = cwd; |
415
|
0
|
0
|
|
|
|
|
unless( File::Spec->file_name_is_absolute($infile) ) { |
416
|
0
|
|
|
|
|
|
$infile = $self->io->catfile($curpath,$infile); |
417
|
|
|
|
|
|
|
} |
418
|
0
|
|
|
|
|
|
$instring = $infile."\n$param_string"; |
419
|
0
|
|
|
|
|
|
$self->debug( "Program ".$self->executable." $instring\n"); |
420
|
|
|
|
|
|
|
|
421
|
0
|
|
|
|
|
|
chdir($self->tempdir); |
422
|
|
|
|
|
|
|
#open a pipe to run protdist to bypass interactive menus |
423
|
0
|
0
|
0
|
|
|
|
if ($self->quiet() || $self->verbose() < 0) { |
424
|
0
|
0
|
|
|
|
|
my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null'; |
425
|
0
|
|
|
|
|
|
open(PROTDIST,"|".$self->executable .">$null"); |
426
|
|
|
|
|
|
|
} |
427
|
|
|
|
|
|
|
else { |
428
|
0
|
|
|
|
|
|
open(PROTDIST,"|".$self->executable); |
429
|
|
|
|
|
|
|
} |
430
|
0
|
|
|
|
|
|
print PROTDIST $instring; |
431
|
0
|
|
|
|
|
|
close(PROTDIST); |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
# get the results |
434
|
0
|
|
|
|
|
|
my $outfile = $self->io->catfile($self->tempdir,$self->outfile); |
435
|
0
|
|
|
|
|
|
chdir($curpath); |
436
|
0
|
0
|
|
|
|
|
$self->throw("protdist did not create matrix correctly ($outfile)") |
437
|
|
|
|
|
|
|
unless (-e $outfile); |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
#Create the distance matrix here |
440
|
0
|
|
|
|
|
|
my $parser = Bio::Tools::Phylo::Phylip::ProtDist->new(-file=>$outfile); |
441
|
0
|
|
|
|
|
|
my @matrix; |
442
|
0
|
|
|
|
|
|
while (my $mat = $parser->next_matrix){ |
443
|
0
|
|
|
|
|
|
push @matrix, $mat; |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
# Clean up the temporary files created along the way... |
447
|
0
|
0
|
|
|
|
|
unlink $outfile unless $self->save_tempfiles; |
448
|
|
|
|
|
|
|
|
449
|
0
|
|
|
|
|
|
return @matrix; |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=head2 create_distance_matrix |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
Title : create_distance_matrix |
455
|
|
|
|
|
|
|
Usage : my $file = $app->create_distance_matrix($treefile); |
456
|
|
|
|
|
|
|
Function: This method is deprecated. Please use run method. |
457
|
|
|
|
|
|
|
Returns : L |
458
|
|
|
|
|
|
|
Args : Name of a file containing a multiple alignment in Phylip format |
459
|
|
|
|
|
|
|
or an SimpleAlign object |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
Throws an exception if argument is not either a string (eg a |
462
|
|
|
|
|
|
|
filename) or a Bio::SimpleAlign object. If |
463
|
|
|
|
|
|
|
argument is string, throws exception if file corresponding to string |
464
|
|
|
|
|
|
|
name can not be found. |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=cut |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
sub create_distance_matrix{ |
469
|
0
|
|
|
0
|
1
|
|
return shift->run(@_); |
470
|
|
|
|
|
|
|
} |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
=head2 _setinput() |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
Title : _setinput |
475
|
|
|
|
|
|
|
Usage : Internal function, not to be called directly |
476
|
|
|
|
|
|
|
Function: Create input file for protdist program |
477
|
|
|
|
|
|
|
Example : |
478
|
|
|
|
|
|
|
Returns : name of file containing a multiple alignment in Phylip format |
479
|
|
|
|
|
|
|
Args : SimpleAlign object reference or input file name |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=cut |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
sub _setinput { |
485
|
0
|
|
|
0
|
|
|
my ($self, $input) = @_; |
486
|
0
|
|
|
|
|
|
my ($alnfilename,$tfh); |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
# suffix is used to distinguish alignment files from an align obkect |
489
|
|
|
|
|
|
|
#If $input is not a reference it better be the name of a file with the sequence/ |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
# a phy formatted alignment file |
492
|
0
|
0
|
|
|
|
|
unless (ref $input) { |
493
|
|
|
|
|
|
|
# check that file exists or throw |
494
|
0
|
|
|
|
|
|
$alnfilename= $input; |
495
|
0
|
0
|
|
|
|
|
unless (-e $input) {return 0;} |
|
0
|
|
|
|
|
|
|
496
|
0
|
|
|
|
|
|
return $alnfilename; |
497
|
|
|
|
|
|
|
} |
498
|
0
|
0
|
|
|
|
|
my @input = ref $input eq 'ARRAY' ? @{$input} : ($input); |
|
0
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
# $input may be a SimpleAlign Object |
501
|
0
|
|
|
|
|
|
($tfh,$alnfilename) = $self->io->tempfile(-dir=>$self->tempdir); |
502
|
0
|
|
|
|
|
|
my $alnIO = Bio::AlignIO->new(-fh => $tfh, |
503
|
|
|
|
|
|
|
-format=>'phylip', |
504
|
|
|
|
|
|
|
-idlength=>$self->idlength()); |
505
|
0
|
|
|
|
|
|
my $input_count = 0; |
506
|
0
|
|
|
|
|
|
foreach my $input(@input){ |
507
|
0
|
0
|
|
|
|
|
if ($input->isa("Bio::SimpleAlign")){ |
508
|
|
|
|
|
|
|
# Open temporary file for both reading & writing of BioSeq array |
509
|
0
|
|
|
|
|
|
$alnIO->write_aln($input); |
510
|
|
|
|
|
|
|
} |
511
|
0
|
|
|
|
|
|
$input_count++; |
512
|
|
|
|
|
|
|
} |
513
|
0
|
|
|
|
|
|
$alnIO->close(); |
514
|
0
|
|
|
|
|
|
close($tfh); |
515
|
0
|
|
|
|
|
|
$tfh = undef; |
516
|
0
|
|
|
|
|
|
$self->_input_nbr($input_count); |
517
|
0
|
|
|
|
|
|
return $alnfilename; |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
sub _input_nbr { |
521
|
0
|
|
|
0
|
|
|
my ($self,$val) = @_; |
522
|
0
|
0
|
|
|
|
|
if($val){ |
523
|
0
|
|
|
|
|
|
$self->{'_input_nbr'} = $val; |
524
|
|
|
|
|
|
|
} |
525
|
0
|
|
|
|
|
|
return $self->{'_input_nbr'}; |
526
|
|
|
|
|
|
|
} |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=head2 _setparams() |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
Title : _setparams |
531
|
|
|
|
|
|
|
Usage : Internal function, not to be called directly |
532
|
|
|
|
|
|
|
Function: Create parameter inputs for protdist program |
533
|
|
|
|
|
|
|
Example : |
534
|
|
|
|
|
|
|
Returns : parameter string to be passed to protdist |
535
|
|
|
|
|
|
|
Args : name of calling object |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
=cut |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
sub _setparams { |
540
|
0
|
|
|
0
|
|
|
my ($attr, $value, $self); |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
#do nothing for now |
543
|
0
|
|
|
|
|
|
$self = shift; |
544
|
0
|
|
|
|
|
|
my $param_string = ""; |
545
|
0
|
|
|
|
|
|
my $cat = 0; |
546
|
0
|
|
|
|
|
|
my %menu = %{$Menu{$self->version}->{'PROTDIST'}}; |
|
0
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
|
548
|
0
|
|
|
|
|
|
foreach my $attr ( @PROTDIST_PARAMS) { |
549
|
0
|
|
|
|
|
|
$value = $self->$attr(); |
550
|
0
|
0
|
|
|
|
|
next unless (defined $value); |
551
|
0
|
0
|
|
|
|
|
if ($attr =~/MODEL/i){ |
552
|
0
|
0
|
|
|
|
|
if ($value=~/CAT/i){ |
553
|
0
|
|
|
|
|
|
$cat = 1; |
554
|
|
|
|
|
|
|
} |
555
|
0
|
|
|
|
|
|
$param_string .= $menu{'MODEL'}{$value}; |
556
|
|
|
|
|
|
|
} |
557
|
0
|
0
|
|
|
|
|
if($attr=~/MULTIPLE/i){ |
558
|
0
|
|
|
|
|
|
$param_string.=$menu{'MULTIPLE'}."$value\n"; |
559
|
|
|
|
|
|
|
} |
560
|
0
|
0
|
|
|
|
|
if ($cat == 1){ |
561
|
0
|
0
|
|
|
|
|
if($attr =~ /GENCODE/i){ |
562
|
0
|
|
|
|
|
|
my $allowed = $menu{'GENCODE'}{'ALLOWED'}; |
563
|
0
|
0
|
|
|
|
|
$self->throw("Unallowed value for genetic code") unless ($value =~ /[$allowed]/); |
564
|
0
|
|
|
|
|
|
$param_string .= $menu{'GENCODE'}{'OPTION'}."$value\n"; |
565
|
|
|
|
|
|
|
} |
566
|
0
|
0
|
|
|
|
|
if ($attr =~/CATEGORY/i){ |
567
|
0
|
|
|
|
|
|
my $allowed = $menu{'CATEGORY'}{'ALLOWED'}; |
568
|
0
|
0
|
|
|
|
|
$self->throw("Unallowed value for categorization of amino acids") unless ($value =~/[$allowed]/); |
569
|
0
|
|
|
|
|
|
$param_string .= $menu{'CATEGORY'}{'OPTION'}."$value\n"; |
570
|
|
|
|
|
|
|
} |
571
|
0
|
0
|
|
|
|
|
if ($attr =~/PROBCHANGE/i){ |
572
|
0
|
0
|
0
|
|
|
|
if (($value =~ /\d+/)&&($value >= 0) && ($value < 1)){ |
|
|
|
0
|
|
|
|
|
573
|
0
|
|
|
|
|
|
$param_string .= $menu{'PROBCHANGE'}."$value\n"; |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
else { |
576
|
0
|
|
|
|
|
|
$self->throw("Unallowed value for probability change category"); |
577
|
|
|
|
|
|
|
} |
578
|
|
|
|
|
|
|
} |
579
|
0
|
0
|
|
|
|
|
if ($attr =~/TRANS/i){ |
580
|
0
|
0
|
0
|
|
|
|
if (($value=~/\d+/) && ($value >=0)){ |
581
|
0
|
|
|
|
|
|
$param_string .=$menu{'TRANS'}."$value\n"; |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
} |
584
|
0
|
0
|
|
|
|
|
if ($attr =~ /FREQ/i){ |
585
|
0
|
|
|
|
|
|
my @freq = split(",",$value); |
586
|
0
|
0
|
|
|
|
|
if ($freq[0] !~ /\d+/){ #a letter provided (sets frequencies equally to 0.25) |
|
|
0
|
|
|
|
|
|
587
|
0
|
|
|
|
|
|
$param_string .=$menu{'FREQ'}.$freq[0]."\n"; |
588
|
|
|
|
|
|
|
} |
589
|
|
|
|
|
|
|
elsif ($#freq == 3) {#must have 4 digits for each base |
590
|
0
|
|
|
|
|
|
$param_string .=$menu{'FREQ'}; |
591
|
0
|
|
|
|
|
|
foreach my $f (@freq){ |
592
|
0
|
|
|
|
|
|
$param_string.="$f\n"; |
593
|
|
|
|
|
|
|
} |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
else { |
596
|
0
|
|
|
|
|
|
$self->throw("Unallowed value for base frequencies"); |
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
} |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
} |
601
|
|
|
|
|
|
|
#set multiple option is not set and there are more than one sequence |
602
|
0
|
0
|
0
|
|
|
|
if (($param_string !~ $menu{'MULTIPLE'}) && (defined ($self->_input_nbr) &&($self->_input_nbr > 1))){ |
|
|
|
0
|
|
|
|
|
603
|
0
|
|
|
|
|
|
$param_string.=$menu{'MULTIPLE'}.$self->_input_nbr."\n"; |
604
|
|
|
|
|
|
|
} |
605
|
0
|
|
|
|
|
|
$param_string .=$menu{'SUBMIT'}; |
606
|
|
|
|
|
|
|
|
607
|
0
|
|
|
|
|
|
return $param_string; |
608
|
|
|
|
|
|
|
} |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
=head1 Bio::Tools::Run::Wrapper methods |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
=cut |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
=head2 no_param_checks |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
Title : no_param_checks |
619
|
|
|
|
|
|
|
Usage : $obj->no_param_checks($newval) |
620
|
|
|
|
|
|
|
Function: Boolean flag as to whether or not we should |
621
|
|
|
|
|
|
|
trust the sanity checks for parameter values |
622
|
|
|
|
|
|
|
Returns : value of no_param_checks |
623
|
|
|
|
|
|
|
Args : newvalue (optional) |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
=cut |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
=head2 save_tempfiles |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
Title : save_tempfiles |
631
|
|
|
|
|
|
|
Usage : $obj->save_tempfiles($newval) |
632
|
|
|
|
|
|
|
Function: |
633
|
|
|
|
|
|
|
Returns : value of save_tempfiles |
634
|
|
|
|
|
|
|
Args : newvalue (optional) |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
=cut |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
=head2 outfile_name |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
Title : outfile_name |
642
|
|
|
|
|
|
|
Usage : my $outfile = $protdist->outfile_name(); |
643
|
|
|
|
|
|
|
Function: Get/Set the name of the output file for this run |
644
|
|
|
|
|
|
|
(if you wanted to do something special) |
645
|
|
|
|
|
|
|
Returns : string |
646
|
|
|
|
|
|
|
Args : [optional] string to set value to |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
=cut |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
=head2 tempdir |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
Title : tempdir |
655
|
|
|
|
|
|
|
Usage : my $tmpdir = $self->tempdir(); |
656
|
|
|
|
|
|
|
Function: Retrieve a temporary directory name (which is created) |
657
|
|
|
|
|
|
|
Returns : string which is the name of the temporary directory |
658
|
|
|
|
|
|
|
Args : none |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
=cut |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
=head2 cleanup |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
Title : cleanup |
666
|
|
|
|
|
|
|
Usage : $codeml->cleanup(); |
667
|
|
|
|
|
|
|
Function: Will cleanup the tempdir directory after a ProtDist run |
668
|
|
|
|
|
|
|
Returns : none |
669
|
|
|
|
|
|
|
Args : none |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
=cut |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
=head2 io |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
Title : io |
677
|
|
|
|
|
|
|
Usage : $obj->io($newval) |
678
|
|
|
|
|
|
|
Function: Gets a L object |
679
|
|
|
|
|
|
|
Returns : L |
680
|
|
|
|
|
|
|
Args : none |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
=cut |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
1; # Needed to keep compiler happy |