line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Phylo::EvolutionaryModels; |
2
|
1
|
|
|
1
|
|
807
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
26
|
|
3
|
1
|
|
|
1
|
|
4
|
use base 'Exporter'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
83
|
|
4
|
1
|
|
|
1
|
|
337
|
use Bio::Phylo::Forest::Tree; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
10
|
|
5
|
1
|
|
|
1
|
|
376
|
use Bio::Phylo::Forest; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
9
|
|
6
|
1
|
|
|
1
|
|
7
|
use Math::CDF qw'qnorm qbeta'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
62
|
|
7
|
1
|
|
|
1
|
|
6
|
use List::Util qw'min max'; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
56
|
|
8
|
1
|
|
|
1
|
|
6
|
use POSIX qw'ceil floor'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
7
|
|
9
|
1
|
|
|
1
|
|
1286
|
use Config; #Use to check whether multi-threading is available |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
40
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
BEGIN { |
12
|
|
|
|
|
|
|
|
13
|
1
|
|
|
1
|
|
5
|
use Bio::Phylo; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
3
|
|
14
|
1
|
|
|
1
|
|
1027
|
our @EXPORT_OK = |
15
|
|
|
|
|
|
|
qw(&sample &constant_rate_birth &constant_rate_birth_death &clade_shifts); |
16
|
|
|
|
|
|
|
} |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 NAME |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
Bio::Phylo::EvolutionaryModels - Evolutionary models for phylogenetic trees and methods to sample these |
21
|
|
|
|
|
|
|
Klaas Hartmann, September 2007 |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 SYNOPSIS |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
#For convenience we import the sample routine (so we can write sample(...) instead of |
26
|
|
|
|
|
|
|
#Bio::Phylo::EvolutionaryModels::sample(...). |
27
|
|
|
|
|
|
|
use Bio::Phylo::EvolutionaryModels qw (sample); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
#Example#A###################################################################### |
30
|
|
|
|
|
|
|
#Simulate a single tree with ten species from the constant rate birth model with parameter 0.5 |
31
|
|
|
|
|
|
|
my $tree = Bio::Phylo::EvolutionaryModels::constant_rate_birth(birth_rate => .5, tree_size => 10); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
#Example#B###################################################################### |
34
|
|
|
|
|
|
|
#Sample 5 trees with ten species from the constant rate birth model using the b algorithm |
35
|
|
|
|
|
|
|
my ($sample,$stats) = sample(sample_size =>5, |
36
|
|
|
|
|
|
|
tree_size => 10, |
37
|
|
|
|
|
|
|
algorithm => 'b', |
38
|
|
|
|
|
|
|
algorithm_options => {rate => 1}, |
39
|
|
|
|
|
|
|
model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth, |
40
|
|
|
|
|
|
|
model_options => {birth_rate=>.5}); |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
#Print a newick string for the 4th sampled tree |
44
|
|
|
|
|
|
|
print $sample->[3]->to_newick."\n"; |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
#Example#C###################################################################### |
47
|
|
|
|
|
|
|
#Sample 5 trees with ten species from the constant rate birth and death model using |
48
|
|
|
|
|
|
|
#the bd algorithm and two threads (useful for dual core processors) |
49
|
|
|
|
|
|
|
#NB: we must specify an nstar here, an appropriate choice will depend on the birth_rate |
50
|
|
|
|
|
|
|
# and death_rate we are giving the model |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
my ($sample,$stats) = sample(sample_size =>5, |
53
|
|
|
|
|
|
|
tree_size => 10, |
54
|
|
|
|
|
|
|
threads => 2, |
55
|
|
|
|
|
|
|
algorithm => 'bd', |
56
|
|
|
|
|
|
|
algorithm_options => {rate => 1, nstar => 30}, |
57
|
|
|
|
|
|
|
model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth_death, |
58
|
|
|
|
|
|
|
model_options => {birth_rate=>1,death_rate=>.8}); |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
#Example#D###################################################################### |
61
|
|
|
|
|
|
|
#Sample 5 trees with ten species from the constant rate birth and death model using |
62
|
|
|
|
|
|
|
#incomplete taxon sampling |
63
|
|
|
|
|
|
|
# |
64
|
|
|
|
|
|
|
#sampling_probability is set so that the true tree has 10 species with 50% probability, |
65
|
|
|
|
|
|
|
#11 species with 30% probability and 12 species with 20% probability |
66
|
|
|
|
|
|
|
# |
67
|
|
|
|
|
|
|
#NB: we must specify an mstar here this will depend on the model parameters and the |
68
|
|
|
|
|
|
|
# incomplete taxon sampling parameters |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
my $algorithm_options = {rate => 1, |
71
|
|
|
|
|
|
|
nstar => 30, |
72
|
|
|
|
|
|
|
mstar => 12, |
73
|
|
|
|
|
|
|
sampling_probability => [.5, .3, .2]}; |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
my ($sample,$stats) = sample(sample_size =>5, |
76
|
|
|
|
|
|
|
tree_size => 10, |
77
|
|
|
|
|
|
|
algorithm => 'incomplete_sampling_bd', |
78
|
|
|
|
|
|
|
algorithm_options => $algorithm_options, |
79
|
|
|
|
|
|
|
model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth_death, |
80
|
|
|
|
|
|
|
model_options => {birth_rate=>1,death_rate=>.8}); |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
#Example#E###################################################################### |
83
|
|
|
|
|
|
|
#Sample 5 trees with ten species from a Yule model using the memoryless_b algorithm |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
#First we define the random function for the shortest pendant edge for a Yule model |
86
|
|
|
|
|
|
|
my $random_pendant_function = sub { |
87
|
|
|
|
|
|
|
%options = @_; |
88
|
|
|
|
|
|
|
return -log(rand)/$options{birth_rate}/$options{tree_size}; |
89
|
|
|
|
|
|
|
}; |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
#Then we produce our sample |
92
|
|
|
|
|
|
|
my ($sample,$stats) = sample(sample_size =>5, |
93
|
|
|
|
|
|
|
tree_size => 10, |
94
|
|
|
|
|
|
|
algorithm => 'memoryless_b', |
95
|
|
|
|
|
|
|
algorithm_options => {pendant_dist => $random_pendant_function}, |
96
|
|
|
|
|
|
|
model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth, |
97
|
|
|
|
|
|
|
model_options => {birth_rate=>1}); |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
#Example#F####################################################################### |
100
|
|
|
|
|
|
|
#Sample 5 trees with ten species from a constant birth death rate model using the |
101
|
|
|
|
|
|
|
#constant_rate_bd algorithm |
102
|
|
|
|
|
|
|
my ($sample) = sample(sample_size => 5, |
103
|
|
|
|
|
|
|
tree_size => 10, |
104
|
|
|
|
|
|
|
algorithm => 'constant_rate_bd', |
105
|
|
|
|
|
|
|
model_options => {birth_rate=>1,death_rate=>.8}); |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head1 DESCRIPTION |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
This package contains evolutionary models for phylogenetic trees and |
110
|
|
|
|
|
|
|
algorithms for sampling from these models. It is a non-OO module that |
111
|
|
|
|
|
|
|
optionally exports the 'sample', 'constant_rate_birth' and |
112
|
|
|
|
|
|
|
'constant_rate_birth_death' subroutines into the caller's namespace, |
113
|
|
|
|
|
|
|
using the C<< use Bio::Phylo::EvolutionaryModels qw(sample constant_rate_birth constant_rate_birth_death); >> |
114
|
|
|
|
|
|
|
directive. Alternatively, you can call the subroutines as class methods, |
115
|
|
|
|
|
|
|
as in the synopsis. |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
The initial set of algorithms available in this package corresponds to those in: |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
Sampling trees from evolutionary models |
120
|
|
|
|
|
|
|
Klaas Hartmann, Dennis Wong, Tanja Gernhard |
121
|
|
|
|
|
|
|
Systematic Biology, in press |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Some comments and code refers back to this paper. |
124
|
|
|
|
|
|
|
Further algorithms and evolutionary are encouraged |
125
|
|
|
|
|
|
|
and welcome. |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
To make this code as straightforward as possible to read some of the |
128
|
|
|
|
|
|
|
algorithms have been implemented in a less than optimal manner. The code |
129
|
|
|
|
|
|
|
also follows the structure of an earlier version of the manuscript so |
130
|
|
|
|
|
|
|
there is some redundancy (eg. the birth algorithm is just a specific |
131
|
|
|
|
|
|
|
instance of the birth_death algorithm) |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=head1 SAMPLING |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
All sampling algorithms should be accessed through the generic sample |
136
|
|
|
|
|
|
|
interface. |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head2 Generic sampling interface: sample() |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Type : Interface |
141
|
|
|
|
|
|
|
Title : sample |
142
|
|
|
|
|
|
|
Usage : see SYNOPSIS |
143
|
|
|
|
|
|
|
Function: Samples phylogenetic trees from an evolutionary model |
144
|
|
|
|
|
|
|
Returns : A sample of phylogenetic trees and statistics from the |
145
|
|
|
|
|
|
|
sampling algorithm |
146
|
|
|
|
|
|
|
Args : Sampling parameters in a hash |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
This method acts as a gateway to the various sampling algorithms. The |
149
|
|
|
|
|
|
|
argument is a single hash containing the options for the sampling run. |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
Sampling parameters (* denotes optional parameters): |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sample_size The number of trees to return (more trees may be returned) |
155
|
|
|
|
|
|
|
tree_size The size that returned trees should be |
156
|
|
|
|
|
|
|
model The evolutionary model (should be a function reference) |
157
|
|
|
|
|
|
|
model_options A hash pointer for model options (see individual models) |
158
|
|
|
|
|
|
|
algorithm The algorithm to use (omit the preceding sample_) |
159
|
|
|
|
|
|
|
algorithm_options A hash pointer for options for the algorithm (see individual algorithms for details) |
160
|
|
|
|
|
|
|
threads* The number of threads to use (default is 1) |
161
|
|
|
|
|
|
|
output_format* Set to newick for newick trees (default is Bio::Phylo::Forest::Tree) |
162
|
|
|
|
|
|
|
remove_extinct Set to true to remove extinct species |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Available algorithms (algorithm names in the paper are given in brackets): |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
b For all pure birth models (simplified GSA) |
169
|
|
|
|
|
|
|
bd For all birth and death models (GSA) |
170
|
|
|
|
|
|
|
incomplete_sampling_bd As above, with incomplete taxon sampling (extended GSA) |
171
|
|
|
|
|
|
|
memoryless_b For memoryless pure birth models (PBMSA) |
172
|
|
|
|
|
|
|
constant_rate_bd For birth and death models with constant rates (BDSA) |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Model |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
If you create your own model it must accept an options hash as its input. |
178
|
|
|
|
|
|
|
This options hash can contain any parameters you desire. Your model should |
179
|
|
|
|
|
|
|
simulate a tree until it becomes extinct or the size/age limit as specified |
180
|
|
|
|
|
|
|
in the options has been reached. Respectively these options are tree_size |
181
|
|
|
|
|
|
|
and tree_age. |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
Multi-threading |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
Multi-thread support is very simplistic. The number of threads you specify |
187
|
|
|
|
|
|
|
are created and each is assigned the task of finding sample_size/threads |
188
|
|
|
|
|
|
|
samples. I had problems with using Bio::Phylo::Forest::Tree in a multi- |
189
|
|
|
|
|
|
|
threaded setting. Hence the sampled trees are returned as newick strings to |
190
|
|
|
|
|
|
|
the main routine where (if required) Tree objects are recreated from the |
191
|
|
|
|
|
|
|
strings. For most applications this overhead seems negligible in contrast |
192
|
|
|
|
|
|
|
to the sampling times. |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
From a code perspective this function (sample): |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
Checks input arguments |
198
|
|
|
|
|
|
|
Handles multi-threading |
199
|
|
|
|
|
|
|
Calls the individual algorithms to perform sampling |
200
|
|
|
|
|
|
|
Reformats data |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=cut |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
my @threads; |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub sample { |
207
|
5
|
|
|
5
|
1
|
107
|
my %options = @_; |
208
|
5
|
|
|
|
|
66
|
my %methods_require = ( |
209
|
|
|
|
|
|
|
b => ['rate'], |
210
|
|
|
|
|
|
|
bd => [ 'rate', 'nstar' ], |
211
|
|
|
|
|
|
|
incomplete_sampling_bd => |
212
|
|
|
|
|
|
|
[ 'rate', 'nstar', 'mstar', 'sampling_probability' ], |
213
|
|
|
|
|
|
|
memoryless_b => ['pendant_dist'], |
214
|
|
|
|
|
|
|
constant_rate_bd => [], |
215
|
|
|
|
|
|
|
); |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
#Default is to sample a single tree |
218
|
5
|
50
|
|
|
|
20
|
$options{sample_size} = 1 unless defined $options{sample_size}; |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
#Default is to use a single thread |
221
|
5
|
100
|
|
|
|
27
|
$options{threads} = 1 unless defined $options{threads}; |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
#Check that multiple threads are actually supported |
224
|
5
|
50
|
33
|
|
|
22
|
if ( $options{threads} > 1 && !$Config{useithreads} ) { |
225
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' => |
226
|
|
|
|
|
|
|
"your perl installation does not support multiple threads" ); |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
#Check an algorithm was specified |
230
|
5
|
50
|
|
|
|
19
|
unless ( defined $options{algorithm} ) { |
231
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( |
232
|
|
|
|
|
|
|
'error' => "an algorithm type must be specified" ); |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
#Check the algorithm type is valid |
236
|
5
|
50
|
|
|
|
18
|
unless ( defined $methods_require{ $options{algorithm} } ) { |
237
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadFormat->throw( |
238
|
|
|
|
|
|
|
'error' => "'$options{algorithm}' is not a valid algorithm" ); |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
#Check the algorithm options |
242
|
5
|
|
|
|
|
11
|
foreach ( @{ $methods_require{ $options{algorithm} } } ) { |
|
5
|
|
|
|
|
18
|
|
243
|
8
|
50
|
|
|
|
23
|
unless ( defined $options{algorithm_options}->{$_} ) { |
244
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' => |
245
|
|
|
|
|
|
|
"'$_' must be specified for the '$options{algorithm}' algorithm" |
246
|
|
|
|
|
|
|
); |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
#If we are doing incomplete taxon sampling the sampling probability must be specified |
251
|
5
|
0
|
33
|
|
|
17
|
if ( defined $options{incomplete_sampling} |
|
|
|
0
|
|
|
|
|
252
|
|
|
|
|
|
|
&& $options{incomplete_sampling} |
253
|
|
|
|
|
|
|
&& !( defined $options{algorithm_options}->{sampling_probability} ) ) |
254
|
|
|
|
|
|
|
{ |
255
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' => |
256
|
|
|
|
|
|
|
"'sampling_probability' must be specified for post hoc incomplete sampling to be applied" |
257
|
|
|
|
|
|
|
); |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
#Check that a model has been specified |
261
|
5
|
50
|
66
|
|
|
47
|
unless ( defined $options{model} |
262
|
|
|
|
|
|
|
|| $options{algorithm} eq 'constant_rate_bd' ) |
263
|
|
|
|
|
|
|
{ |
264
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( |
265
|
|
|
|
|
|
|
'error' => "a model must be specified" ); |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
#Get a function pointer for the algorithm |
269
|
5
|
|
|
|
|
18
|
my $algorithm = 'sample_' . $options{algorithm}; |
270
|
5
|
|
|
|
|
23
|
$algorithm = \&$algorithm; |
271
|
5
|
|
|
|
|
11
|
my @output; |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
#Run the algorithm, different method for multiple threads |
274
|
5
|
50
|
|
|
|
15
|
if ( $options{threads} > 1 ) { |
275
|
0
|
|
|
|
|
0
|
require threads; |
276
|
0
|
|
|
|
|
0
|
require threads::shared; |
277
|
0
|
|
|
|
|
0
|
@output = ( [], [] ); |
278
|
|
|
|
|
|
|
$SIG{'KILL'} = sub { |
279
|
0
|
|
|
0
|
|
0
|
foreach (@threads) { $_->kill('KILL')->detach(); } |
|
0
|
|
|
|
|
0
|
|
280
|
0
|
|
|
|
|
0
|
threads->exit(); |
281
|
0
|
|
|
|
|
0
|
}; |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
#Start the threads |
284
|
0
|
|
|
|
|
0
|
for ( ( 1 .. $options{threads} ) ) { |
285
|
0
|
|
|
|
|
0
|
@_ = (); |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
#Note the list context of the return argument here determines |
288
|
|
|
|
|
|
|
#the data type returned from the thread. |
289
|
|
|
|
|
|
|
( $threads[ $_ - 1 ] ) = threads->new( \&_sample_newick, %options, |
290
|
|
|
|
|
|
|
sample_size => ceil( $options{sample_size} / $options{threads} ) |
291
|
0
|
|
|
|
|
0
|
); |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
#Wait for them to finish and combine the data |
295
|
0
|
|
|
|
|
0
|
for ( ( 1 .. $options{threads} ) ) { |
296
|
0
|
|
|
|
|
0
|
until ( $threads[ $_ - 1 ]->is_joinable() ) { sleep(0.1); } |
|
0
|
|
|
|
|
0
|
|
297
|
0
|
|
|
|
|
0
|
my @thread_data = $threads[ $_ - 1 ]->join; |
298
|
0
|
|
|
|
|
0
|
for ( my $index = 0 ; $index < scalar @thread_data ; $index++ ) { |
299
|
0
|
0
|
|
|
|
0
|
$output[$index] = [] if scalar @output < $index; |
300
|
|
|
|
|
|
|
$output[$index] = |
301
|
0
|
|
|
|
|
0
|
[ @{ $output[$index] }, @{ $thread_data[$index] } ]; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
#Turn newick strings back into tree objects |
306
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{output_format} |
307
|
|
|
|
|
|
|
&& $options{output_format} eq 'newick' ) |
308
|
|
|
|
|
|
|
{ |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
#Convert to newick trees |
311
|
0
|
|
|
|
|
0
|
for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) { |
|
0
|
|
|
|
|
0
|
|
312
|
0
|
|
|
|
|
0
|
$output[0]->[$index] = Bio::Phylo::IO->parse( |
313
|
|
|
|
|
|
|
-format => 'newick', |
314
|
|
|
|
|
|
|
-string => $output[0]->[$index] |
315
|
|
|
|
|
|
|
)->first; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
else { |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
#Get the samples |
322
|
5
|
|
|
|
|
35
|
@output = &$algorithm(%options); |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
#Turn into newick trees if requested |
325
|
5
|
50
|
66
|
|
|
164
|
if ( defined $options{output_format} |
|
|
100
|
66
|
|
|
|
|
326
|
|
|
|
|
|
|
&& $options{output_format} eq 'newick' ) |
327
|
|
|
|
|
|
|
{ |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
#Convert to newick trees |
330
|
0
|
|
|
|
|
0
|
for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) { |
|
0
|
|
|
|
|
0
|
|
331
|
0
|
|
|
|
|
0
|
$output[0]->[$index] = |
332
|
|
|
|
|
|
|
$output[0]->[$index]->to_newick( '-nodelabels' => 1 ); |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
elsif ( defined $options{output_format} |
336
|
|
|
|
|
|
|
&& $options{output_format} eq 'forest' ) |
337
|
|
|
|
|
|
|
{ |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
# save as a forest |
340
|
4
|
|
|
|
|
27
|
my $forest = Bio::Phylo::Forest->new; |
341
|
4
|
|
|
|
|
11
|
for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) { |
|
24
|
|
|
|
|
63
|
|
342
|
20
|
|
|
|
|
61
|
$forest->insert( $output[0]->[$index] ); |
343
|
|
|
|
|
|
|
} |
344
|
4
|
|
|
|
|
14
|
$output[0] = $forest; |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
} |
347
|
5
|
|
|
|
|
75
|
return @output; |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
=begin comment |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
Type : Internal method |
353
|
|
|
|
|
|
|
Title : _sample_newick |
354
|
|
|
|
|
|
|
Usage : ($thread) = threads->new(\&_sample_newick, %options); |
355
|
|
|
|
|
|
|
@thread_output = $thread->join; |
356
|
|
|
|
|
|
|
Function: Wrapper for sampling routines used for multi-threading |
357
|
|
|
|
|
|
|
Returns : Output from sampling algorithms with trees replaced by newick strings |
358
|
|
|
|
|
|
|
Args : %options to pass to sampling algorithm |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
=end comment |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=cut |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
sub _sample_newick { |
365
|
0
|
|
|
0
|
|
0
|
my %options = @_; |
366
|
0
|
|
|
|
|
0
|
my $algorithm = 'sample_' . $options{algorithm}; |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
# Thread 'cancellation' signal handler |
369
|
0
|
|
|
0
|
|
0
|
$SIG{'KILL'} = sub { threads->exit(); }; |
|
0
|
|
|
|
|
0
|
|
370
|
0
|
|
|
|
|
0
|
$algorithm = \&$algorithm; |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
#Perform the sampling |
373
|
0
|
|
|
|
|
0
|
my @output = ( &$algorithm(%options) ); |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
#Convert to newick trees |
376
|
0
|
|
|
|
|
0
|
for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) { |
|
0
|
|
|
|
|
0
|
|
377
|
0
|
|
|
|
|
0
|
$output[0]->[$index] = |
378
|
|
|
|
|
|
|
$output[0]->[$index]->to_newick( '-nodelabels' => 1 ); |
379
|
|
|
|
|
|
|
} |
380
|
0
|
|
|
|
|
0
|
return @output; |
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=head2 Sampling algorithms |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
These algorithms should be accessed through the sampling interface (sample()). |
386
|
|
|
|
|
|
|
Additional parameters need to be passed to these algorithms as described for |
387
|
|
|
|
|
|
|
each algorithm. |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=over |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=item sample_b() |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
Sample from any birth model |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
Type : Sampling algorithm |
396
|
|
|
|
|
|
|
Title : sample_b |
397
|
|
|
|
|
|
|
Usage : see sample |
398
|
|
|
|
|
|
|
Function: Samples trees from a pure birth model |
399
|
|
|
|
|
|
|
Returns : see sample |
400
|
|
|
|
|
|
|
Args : %algorithm_options requires the field: |
401
|
|
|
|
|
|
|
rate => sampling rate |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=cut |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
sub sample_b { |
406
|
1
|
|
|
1
|
1
|
4
|
my %options = @_; |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
#The sample of trees |
409
|
1
|
|
|
|
|
3
|
my @sample; |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
#A list of the expected number of samples |
412
|
|
|
|
|
|
|
my @expected_summary; |
413
|
1
|
|
|
|
|
3
|
my $model = $options{model}; |
414
|
1
|
|
|
|
|
2
|
my $rate = $options{algorithm_options}->{rate}; |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
#While we have insufficient samples |
417
|
1
|
|
|
|
|
4
|
while ( scalar @sample < $options{sample_size} ) { |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
#Generate a candidate model run |
420
|
14
|
|
|
|
|
58
|
my $candidate = &$model( %{ $options{model_options} }, |
421
|
14
|
|
|
|
|
24
|
tree_size => $options{tree_size} ); |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
#Check that the tree has no extinctions |
424
|
14
|
50
|
|
|
|
56
|
unless ( $candidate->is_ultrametric(1e-6) ) { |
425
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadFormat->throw( |
426
|
|
|
|
|
|
|
'error' => "the model must be a pure birth process" ); |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
#Get the lineage through time data |
430
|
14
|
|
|
|
|
40
|
my ( $time, $count ) = lineage_through_time($candidate); |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
#The expected number of samples we want |
433
|
14
|
|
|
|
|
30
|
my $expected_samples = $rate * ( $time->[-1] - $time->[-2] ); |
434
|
14
|
|
|
|
|
25
|
push( @expected_summary, $expected_samples ); |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
#Get the random number of samples from this candidate tree |
437
|
14
|
|
|
|
|
31
|
while ( $expected_samples > 0 ) { |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
#If the number of samples remaining is greater than one |
440
|
|
|
|
|
|
|
#we take a sample, otherwise we take a sample with probability |
441
|
|
|
|
|
|
|
#according to the remaining number of samples |
442
|
16
|
100
|
100
|
|
|
63
|
if ( $expected_samples > 1 || rand(1) < $expected_samples ) { |
443
|
5
|
|
|
|
|
16
|
my $tree = copy_tree($candidate); |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
#Truncate the tree at a random point during which it had tree_size species |
446
|
5
|
|
|
|
|
19
|
truncate_tree_time( $tree, |
447
|
|
|
|
|
|
|
( $time->[-1] - $time->[-2] ) * rand(1) + $time->[-2] ); |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
#Add the tree to our sample |
450
|
5
|
|
|
|
|
12
|
push( @sample, $tree ); |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
#Update the sample counter (for GUI or other applications that want to know how |
453
|
|
|
|
|
|
|
#many samples have been obtained) |
454
|
5
|
50
|
|
|
|
16
|
if ( defined $options{counter} ) { |
455
|
0
|
|
|
|
|
0
|
my $counter = $options{counter}; |
456
|
0
|
|
|
|
|
0
|
&$counter(1); |
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
} |
459
|
16
|
|
|
|
|
89
|
$expected_samples--; |
460
|
|
|
|
|
|
|
} |
461
|
|
|
|
|
|
|
} |
462
|
1
|
|
|
|
|
6
|
return ( \@sample, \@expected_summary ); |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=item sample_bd() |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
Sample from any birth and death model for which nstar exists |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
Type : Sampling algorithm |
470
|
|
|
|
|
|
|
Title : sample_bd |
471
|
|
|
|
|
|
|
Usage : see sample |
472
|
|
|
|
|
|
|
Function: Samples trees from a birth and death model |
473
|
|
|
|
|
|
|
Returns : see sample |
474
|
|
|
|
|
|
|
Args : %algorithm_options requires the fields: |
475
|
|
|
|
|
|
|
nstar => once a tree has nstar species there should be |
476
|
|
|
|
|
|
|
a negligible chance of returning to tree_size species |
477
|
|
|
|
|
|
|
rate => sampling rate |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
=cut |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
sub sample_bd { |
482
|
1
|
|
|
1
|
1
|
5
|
my %options = @_; |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
#The sample of trees |
485
|
1
|
|
|
|
|
3
|
my @sample; |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
#A list of the expected number of samples |
488
|
|
|
|
|
|
|
my @expected_summary; |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
#Convenience variables |
491
|
1
|
|
|
|
|
3
|
my $model = $options{model}; |
492
|
1
|
|
|
|
|
2
|
my $nstar = $options{algorithm_options}->{nstar}; |
493
|
1
|
|
|
|
|
3
|
my $rate = $options{algorithm_options}->{rate}; |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
#While we have insufficient samples |
496
|
1
|
|
|
|
|
4
|
while ( scalar @sample < $options{sample_size} ) { |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
#Generate a candidate model run |
499
|
|
|
|
|
|
|
my $candidate = |
500
|
55
|
|
|
|
|
104
|
&$model( %{ $options{model_options} }, tree_size => $nstar ); |
|
55
|
|
|
|
|
308
|
|
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
#Get the lineage through time data |
503
|
55
|
|
|
|
|
227
|
my ( $time, $count ) = lineage_through_time($candidate); |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
#Reorganise the lineage through time data |
506
|
|
|
|
|
|
|
#@duration contains the length of the intervals with the right number of species |
507
|
|
|
|
|
|
|
#@start contains the starting times of these intervals |
508
|
|
|
|
|
|
|
#@prob contains the cumulative probability of each interval being selected |
509
|
|
|
|
|
|
|
#$total_duration contains the sum of the interval lengths |
510
|
55
|
|
|
|
|
140
|
my ( @duration, @start, @prob, $total_duration ); |
511
|
55
|
|
|
|
|
147
|
for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) { |
|
3492
|
|
|
|
|
4920
|
|
512
|
3437
|
100
|
|
|
|
5144
|
if ( $count->[$index] == $options{tree_size} ) { |
513
|
95
|
|
|
|
|
184
|
push( @duration, $time->[ $index + 1 ] - $time->[$index] ); |
514
|
95
|
|
|
|
|
138
|
push( @start, $time->[$index] ); |
515
|
95
|
|
|
|
|
134
|
$total_duration += $duration[-1]; |
516
|
95
|
|
|
|
|
140
|
push( @prob, $total_duration ); |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
} |
519
|
1
|
|
|
1
|
|
8
|
no warnings 'uninitialized'; # FIXME |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
43
|
|
520
|
55
|
100
|
|
|
|
353
|
next if $total_duration == 0; |
521
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
306
|
|
522
|
20
|
|
|
|
|
74
|
for ( my $index = 0 ; $index < scalar @prob ; $index++ ) { |
523
|
95
|
|
|
|
|
163
|
$prob[$index] /= $total_duration; |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
#The expected number of samples we want |
527
|
20
|
|
|
|
|
50
|
my $expected_samples = $rate * $total_duration; |
528
|
20
|
|
|
|
|
52
|
push( @expected_summary, $expected_samples ); |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
#Get the random number of samples from this candidate tree |
531
|
20
|
|
|
|
|
68
|
while ( $expected_samples > 0 ) { |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
#If the number of samples remaining is greater than one |
534
|
|
|
|
|
|
|
#we take a sample, otherwise we take a sample with probability |
535
|
|
|
|
|
|
|
#according to the remaining number of samples |
536
|
21
|
100
|
100
|
|
|
135
|
if ( $expected_samples > 1 || rand(1) < $expected_samples ) { |
537
|
5
|
|
|
|
|
26
|
my $tree = copy_tree($candidate); |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
#Get a random interval |
540
|
5
|
|
|
|
|
22
|
my $interval_choice = rand(1); |
541
|
5
|
|
|
|
|
11
|
my $interval; |
542
|
5
|
|
|
|
|
38
|
for ( |
543
|
|
|
|
|
|
|
$interval = 0 ; |
544
|
|
|
|
|
|
|
$interval_choice > $prob[$interval] ; |
545
|
|
|
|
|
|
|
$interval++ |
546
|
|
|
|
|
|
|
) |
547
|
|
|
|
|
|
|
{ |
548
|
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
#Truncate the tree at a random point during this interval |
551
|
5
|
|
|
|
|
29
|
truncate_tree_time( $tree, |
552
|
|
|
|
|
|
|
$duration[$interval] * rand(1) + $start[$interval] ); |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
#Add the tree to our sample |
555
|
5
|
|
|
|
|
20
|
push( @sample, $tree ); |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
#Update the sample counter (for GUI or other applications that want to know how |
558
|
|
|
|
|
|
|
#many samples have been obtained) |
559
|
5
|
50
|
|
|
|
23
|
if ( defined $options{counter} ) { |
560
|
0
|
|
|
|
|
0
|
my $counter = $options{counter}; |
561
|
0
|
|
|
|
|
0
|
&$counter(1); |
562
|
|
|
|
|
|
|
} |
563
|
|
|
|
|
|
|
} |
564
|
21
|
|
|
|
|
400
|
$expected_samples--; |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
} |
567
|
1
|
|
|
|
|
8
|
return ( \@sample, \@expected_summary ); |
568
|
|
|
|
|
|
|
} |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
=item sample_incomplete_sampling_bd() |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
Sample from any birth and death model with incomplete taxon sampling |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
Type : Sampling algorithm |
575
|
|
|
|
|
|
|
Title : sample_incomplete_sampling_bd |
576
|
|
|
|
|
|
|
Usage : see sample |
577
|
|
|
|
|
|
|
Function: Samples trees from a birth and death model with incomplete taxon sampling |
578
|
|
|
|
|
|
|
Returns : see sample |
579
|
|
|
|
|
|
|
Args : %algorithm_options requires the fields: |
580
|
|
|
|
|
|
|
rate => sampling rate |
581
|
|
|
|
|
|
|
nstar => once a tree has nstar species there should be |
582
|
|
|
|
|
|
|
a negligible chance of returning to mstar species |
583
|
|
|
|
|
|
|
mstar => trees with more than mstar species form a negligible |
584
|
|
|
|
|
|
|
contribution to the final sample. |
585
|
|
|
|
|
|
|
sampling_probability => see below. |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
sampling_probability |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
vector: must have length (mstar-tree_size+1) The ith element gives the probability |
590
|
|
|
|
|
|
|
of not sampling i species. |
591
|
|
|
|
|
|
|
scalar: the probability of sampling any individual species. Is used to calculate |
592
|
|
|
|
|
|
|
a vector as discussed in the paper. |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
=cut |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
sub sample_incomplete_sampling_bd { |
597
|
1
|
|
|
1
|
1
|
5
|
my %options = @_; |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
# %options = (%options, %{$options{algorithm_options}}); |
600
|
|
|
|
|
|
|
#The sample of trees |
601
|
1
|
|
|
|
|
3
|
my @sample; |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
#A list of the expected number of samples |
604
|
|
|
|
|
|
|
my @expected_summary; |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
# #Convenience variables |
607
|
1
|
|
|
|
|
2
|
my $model = $options{model}; |
608
|
1
|
|
|
|
|
3
|
my $nstar = $options{algorithm_options}->{nstar}; |
609
|
1
|
|
|
|
|
2
|
my $mstar = $options{algorithm_options}->{mstar}; |
610
|
1
|
|
|
|
|
3
|
my $rate = $options{algorithm_options}->{rate}; |
611
|
|
|
|
|
|
|
my $sampling_probability = |
612
|
1
|
|
|
|
|
2
|
$options{algorithm_options}->{sampling_probability}; |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
#If sampling_probability is a list check it's length |
615
|
1
|
50
|
33
|
|
|
7
|
if ( ref $sampling_probability |
616
|
1
|
|
|
|
|
7
|
&& scalar @{$sampling_probability} != |
617
|
|
|
|
|
|
|
( $mstar - $options{tree_size} + 1 ) ) |
618
|
|
|
|
|
|
|
{ |
619
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' => |
620
|
|
|
|
|
|
|
"'sampling_probability' must be a scalar or a list with m_star-tree_size+1 items" |
621
|
|
|
|
|
|
|
); |
622
|
|
|
|
|
|
|
} |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
#If a single sampling probability was given we calculate the |
625
|
|
|
|
|
|
|
#probability of sub-sampling given numbers of species. |
626
|
1
|
50
|
|
|
|
4
|
unless ( ref $sampling_probability ) { |
627
|
0
|
|
|
|
|
0
|
my $p = $sampling_probability; |
628
|
0
|
|
|
|
|
0
|
my @vec; |
629
|
0
|
|
|
|
|
0
|
my $total = 0; |
630
|
0
|
|
|
|
|
0
|
foreach ( ( $options{tree_size} .. $mstar ) ) { |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
#The probability of sampling tree_size species from a tree containing $_ species |
633
|
|
|
|
|
|
|
push( @vec, |
634
|
|
|
|
|
|
|
nchoosek( $_, $options{tree_size} ) * |
635
|
|
|
|
|
|
|
( $p**$options{tree_size} ) * |
636
|
0
|
|
|
|
|
0
|
( ( 1 - $p )**( $_ - $options{tree_size} ) ) ); |
637
|
0
|
|
|
|
|
0
|
$total += $vec[-1]; |
638
|
|
|
|
|
|
|
} |
639
|
0
|
|
|
|
|
0
|
for ( my $ii = 0 ; $ii < scalar @vec ; $ii++ ) { $vec[$ii] /= $total; } |
|
0
|
|
|
|
|
0
|
|
640
|
0
|
|
|
|
|
0
|
$sampling_probability = \@vec; |
641
|
|
|
|
|
|
|
} |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
#We now normalise the sampling_probability list so that it sums to unity |
644
|
|
|
|
|
|
|
#this allows comparable sampling rates to be used here and in sample_bd |
645
|
1
|
|
|
|
|
2
|
my $total_sp = 0; |
646
|
1
|
|
|
|
|
3
|
foreach ( @{$sampling_probability} ) { $total_sp += $_; } |
|
1
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
7
|
|
647
|
1
|
|
|
1
|
|
6
|
no warnings; # FIXME |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
73
|
|
648
|
1
|
|
|
|
|
8
|
foreach ( my $index = 1 .. scalar @{$sampling_probability} ) { |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
649
|
1
|
|
|
1
|
|
6
|
no warnings; # FIXME |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
33
|
|
650
|
1
|
|
|
|
|
7
|
$sampling_probability->[ $index - 1 ] /= $total_sp; |
651
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
22
|
|
652
|
|
|
|
|
|
|
} |
653
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
61
|
|
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
#While we have insufficient samples |
656
|
1
|
|
|
|
|
5
|
while ( scalar @sample < $options{sample_size} ) { |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
#Generate a candidate model run |
659
|
|
|
|
|
|
|
my $candidate = |
660
|
20
|
|
|
|
|
38
|
&$model( %{ $options{model_options} }, tree_size => $nstar ); |
|
20
|
|
|
|
|
124
|
|
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
#Get the lineage through time data |
663
|
20
|
|
|
|
|
91
|
my ( $time, $count ) = lineage_through_time($candidate); |
664
|
20
|
|
|
|
|
48
|
my @size_stats; |
665
|
20
|
|
|
|
|
53
|
for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) { |
|
1778
|
|
|
|
|
2526
|
|
666
|
1
|
|
|
1
|
|
5
|
no warnings 'uninitialized'; # FIXME |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
168
|
|
667
|
1758
|
100
|
|
|
|
2558
|
if ( $count->[$index] >= $options{tree_size} ) { |
668
|
|
|
|
|
|
|
$size_stats[ $count->[$index] - $options{tree_size} ] += |
669
|
|
|
|
|
|
|
$time->[$index] * |
670
|
|
|
|
|
|
|
$sampling_probability->[ $count->[$index] - |
671
|
1141
|
|
|
|
|
1791
|
$options{tree_size} ]; |
672
|
|
|
|
|
|
|
} |
673
|
|
|
|
|
|
|
} |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
#Reorganise the lineage through time data |
676
|
|
|
|
|
|
|
#@duration contains the length of intervals with more than tree_size species |
677
|
|
|
|
|
|
|
#@start contains the starting times of these intervals |
678
|
|
|
|
|
|
|
#@prob contains the cumulative probability of each interval being selected |
679
|
|
|
|
|
|
|
#$total_prob contains the sum of @prob before normalisation |
680
|
20
|
|
|
|
|
41
|
my ( @duration, @start, @prob, $total_prob ); |
681
|
20
|
|
|
|
|
37
|
$total_prob = 0; |
682
|
20
|
|
|
|
|
38
|
for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) { |
|
1778
|
|
|
|
|
2495
|
|
683
|
1758
|
100
|
|
|
|
2457
|
if ( $count->[$index] >= $options{tree_size} ) { |
684
|
1141
|
|
|
|
|
1503
|
push( @duration, $time->[ $index + 1 ] - $time->[$index] ); |
685
|
1141
|
|
|
|
|
1317
|
push( @start, $time->[$index] ); |
686
|
1
|
|
|
1
|
|
9
|
no warnings 'uninitialized'; # FIXME |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
7289
|
|
687
|
|
|
|
|
|
|
$total_prob += |
688
|
|
|
|
|
|
|
$duration[-1] * |
689
|
|
|
|
|
|
|
$sampling_probability->[ $count->[$index] - |
690
|
1141
|
|
|
|
|
1466
|
$options{tree_size} ]; |
691
|
1141
|
|
|
|
|
1505
|
push( @prob, $total_prob ); |
692
|
|
|
|
|
|
|
} |
693
|
|
|
|
|
|
|
} |
694
|
20
|
100
|
|
|
|
111
|
next if $total_prob == 0; |
695
|
10
|
|
|
|
|
44
|
for ( my $index = 0 ; $index < scalar @prob ; $index++ ) { |
696
|
1141
|
|
|
|
|
1604
|
$prob[$index] /= $total_prob; |
697
|
|
|
|
|
|
|
} |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
#The expected number of samples we want |
700
|
10
|
|
|
|
|
22
|
my $expected_samples = $rate * $total_prob; |
701
|
10
|
|
|
|
|
27
|
push( @expected_summary, $expected_samples ); |
702
|
|
|
|
|
|
|
$expected_samples = $options{sample_size} - scalar @sample |
703
|
10
|
50
|
|
|
|
45
|
if $expected_samples > $options{sample_size} - scalar @sample; |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
#Get the random number of samples from this candidate tree |
706
|
10
|
|
|
|
|
48
|
while ( $expected_samples > 0 ) { |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
#If the number of samples remaining is greater than one |
709
|
|
|
|
|
|
|
#we take a sample, otherwise we take a sample with probability |
710
|
|
|
|
|
|
|
#according to the remaining number of samples |
711
|
10
|
100
|
66
|
|
|
88
|
if ( $expected_samples > 1 || rand(1) < $expected_samples ) { |
712
|
5
|
|
|
|
|
28
|
my $tree = copy_tree($candidate); |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
#Get a random interval |
715
|
5
|
|
|
|
|
24
|
my $interval_choice = rand(1); |
716
|
5
|
|
|
|
|
13
|
my $interval; |
717
|
5
|
|
|
|
|
77
|
for ( |
718
|
|
|
|
|
|
|
$interval = 0 ; |
719
|
|
|
|
|
|
|
$interval_choice > $prob[$interval] ; |
720
|
|
|
|
|
|
|
$interval++ |
721
|
|
|
|
|
|
|
) |
722
|
|
|
|
|
|
|
{ |
723
|
|
|
|
|
|
|
} |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
#Truncate the tree at a random point during this interval |
726
|
5
|
|
|
|
|
38
|
truncate_tree_time( $tree, |
727
|
|
|
|
|
|
|
$duration[$interval] * rand(1) + $start[$interval] ); |
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
#Remove random species so it has the right size |
730
|
5
|
|
|
|
|
34
|
truncate_tree_size( $tree, $options{tree_size} ); |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
#Add the tree to our sample |
733
|
5
|
|
|
|
|
18
|
push( @sample, $tree ); |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
#Update the sample counter (for GUI or other applications that want to know how |
736
|
|
|
|
|
|
|
#many samples have been obtained) |
737
|
5
|
50
|
|
|
|
29
|
if ( defined $options{counter} ) { |
738
|
0
|
|
|
|
|
0
|
my $counter = $options{counter}; |
739
|
0
|
|
|
|
|
0
|
&$counter(1); |
740
|
|
|
|
|
|
|
} |
741
|
|
|
|
|
|
|
} |
742
|
10
|
|
|
|
|
541
|
$expected_samples--; |
743
|
|
|
|
|
|
|
} |
744
|
|
|
|
|
|
|
} |
745
|
1
|
|
|
|
|
13
|
return ( \@sample, \@expected_summary ); |
746
|
|
|
|
|
|
|
} |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
=item sample_memoryless_b() |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
Sample from a memoryless birth model |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
Type : Sampling algorithm |
753
|
|
|
|
|
|
|
Title : sample_memoryless_b |
754
|
|
|
|
|
|
|
Usage : see sample |
755
|
|
|
|
|
|
|
Function: Samples trees from a memoryless birth model |
756
|
|
|
|
|
|
|
Returns : see sample |
757
|
|
|
|
|
|
|
Args : %algorithm_options with fields: |
758
|
|
|
|
|
|
|
pendant_dist => function reference for generating random |
759
|
|
|
|
|
|
|
shortest pendant edges |
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
NB: The function pointed to by pendant_dist is given model_options |
762
|
|
|
|
|
|
|
as it's input argument with an added field tree_size. It must return |
763
|
|
|
|
|
|
|
a random value from the probability density for the shortest pendant |
764
|
|
|
|
|
|
|
edges. |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
=cut |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
sub sample_memoryless_b { |
769
|
1
|
|
|
1
|
1
|
6
|
my %options = @_; |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
#The sample of trees |
772
|
1
|
|
|
|
|
3
|
my @sample; |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
#A list of the expected number of samples |
775
|
|
|
|
|
|
|
my @expected_summary; |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
#The user specified functions |
778
|
1
|
|
|
|
|
4
|
my $model = $options{model}; |
779
|
1
|
|
|
|
|
2
|
my $pendant_dist = $options{algorithm_options}->{pendant_dist}; |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
#While we have insufficient samples |
782
|
1
|
|
|
|
|
6
|
while ( scalar @sample < $options{sample_size} ) { |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
#Generate a tree ending just after the last speciation event |
785
|
5
|
|
|
|
|
24
|
my $tree = &$model( %{ $options{model_options} }, |
786
|
5
|
|
|
|
|
13
|
tree_size => $options{tree_size} ); |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
#Check that the tree has no extinctions |
789
|
5
|
50
|
|
|
|
31
|
unless ( $tree->is_ultrametric(1e-6) ) { |
790
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadFormat->throw( |
791
|
|
|
|
|
|
|
'error' => "the model must be a pure birth process" ); |
792
|
|
|
|
|
|
|
} |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
#Get the random length to add after the last speciation event |
795
|
5
|
|
|
|
|
49
|
my $pendant_add = &$pendant_dist( %{ $options{model_options} }, |
796
|
5
|
|
|
|
|
11
|
tree_size => $options{tree_size} ); |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
#Add the final length |
799
|
5
|
|
|
|
|
58
|
foreach ( @{ $tree->get_terminals } ) { |
|
5
|
|
|
|
|
23
|
|
800
|
50
|
|
|
|
|
100
|
$_->set_branch_length( $_->get_branch_length + $pendant_add ); |
801
|
|
|
|
|
|
|
} |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
#Add to the sample |
804
|
5
|
|
|
|
|
19
|
push( @sample, $tree ); |
805
|
5
|
|
|
|
|
11
|
push( @expected_summary, 1 ); |
806
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
#Update the sample counter (for GUI or other applications that want to know how |
808
|
|
|
|
|
|
|
#many samples have been obtained) |
809
|
5
|
50
|
|
|
|
34
|
if ( defined $options{counter} ) { |
810
|
0
|
|
|
|
|
0
|
my $counter = $options{counter}; |
811
|
0
|
|
|
|
|
0
|
&$counter(1); |
812
|
|
|
|
|
|
|
} |
813
|
|
|
|
|
|
|
} |
814
|
1
|
|
|
|
|
7
|
return ( \@sample, \@expected_summary ); |
815
|
|
|
|
|
|
|
} |
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
=item sample_constant_rate_bd() |
818
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
Sample from a constant rate birth and death model |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
Type : Sampling algorithm |
822
|
|
|
|
|
|
|
Title : sample_constant_rate_bd |
823
|
|
|
|
|
|
|
Usage : see sample |
824
|
|
|
|
|
|
|
Function: Samples trees from a memoryless birth model |
825
|
|
|
|
|
|
|
Returns : see sample |
826
|
|
|
|
|
|
|
Args : no specific algorithm options but see below |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
NB: This algorithm only applies to constant rate birth and death |
829
|
|
|
|
|
|
|
processes. Consequently a model does not need to be specified (and |
830
|
|
|
|
|
|
|
will be ignored if it is). But birth_rate and death_rate model |
831
|
|
|
|
|
|
|
options must be given. |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
=cut |
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
sub sample_constant_rate_bd { |
836
|
1
|
|
|
1
|
1
|
5
|
my %options = @_; |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
#Store parameters in shorter variables (for clarity) |
839
|
|
|
|
|
|
|
my ( $br, $dr, $n ) = ( |
840
|
|
|
|
|
|
|
$options{model_options}->{birth_rate}, |
841
|
|
|
|
|
|
|
$options{model_options}->{death_rate}, |
842
|
|
|
|
|
|
|
$options{tree_size} |
843
|
1
|
|
|
|
|
5
|
); |
844
|
1
|
|
|
|
|
4
|
my @sample; |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
#Loop for sampling each tree |
847
|
1
|
|
|
|
|
6
|
while ( scalar @sample < $options{sample_size} ) { |
848
|
5
|
|
|
|
|
16
|
my @nodes; |
849
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
#Compute the random tree age from the inverse CDF (different formulas for |
851
|
|
|
|
|
|
|
#birth rate == death rate and otherwise) |
852
|
|
|
|
|
|
|
my $tree_age; |
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
#The uniform random variable |
855
|
5
|
|
|
|
|
19
|
my $r = rand; |
856
|
5
|
50
|
|
|
|
23
|
if ( $br == $dr ) { |
857
|
0
|
|
|
|
|
0
|
$tree_age = 1 / ( $br * ( $r**( -1 / $n ) - 1 ) ); |
858
|
|
|
|
|
|
|
} |
859
|
|
|
|
|
|
|
else { |
860
|
5
|
|
|
|
|
57
|
$tree_age = |
861
|
|
|
|
|
|
|
1 / |
862
|
|
|
|
|
|
|
( $br - $dr ) * |
863
|
|
|
|
|
|
|
log( |
864
|
|
|
|
|
|
|
( 1 - $dr / $br * $r**( 1 / $n ) ) / ( 1 - $r**( 1 / $n ) ) ); |
865
|
|
|
|
|
|
|
} |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
#Find the random speciation times |
868
|
5
|
|
|
|
|
8
|
my @speciation; |
869
|
5
|
|
|
|
|
19
|
foreach ( 0 .. ( $n - 2 ) ) { |
870
|
45
|
50
|
|
|
|
64
|
if ( $br == $dr ) { |
871
|
0
|
|
|
|
|
0
|
my $r = rand; |
872
|
0
|
|
|
|
|
0
|
$speciation[$_] = |
873
|
|
|
|
|
|
|
$r * $tree_age / ( 1 + $br * $tree_age * ( 1 - $r ) ); |
874
|
|
|
|
|
|
|
} |
875
|
|
|
|
|
|
|
else { |
876
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
#Two repeated parts of the inverse CDF for clarity |
878
|
45
|
|
|
|
|
81
|
my $a = $br - $dr * exp( ( $dr - $br ) * $tree_age ); |
879
|
45
|
|
|
|
|
64
|
my $b = ( 1 - exp( ( $dr - $br ) * $tree_age ) ) * rand; |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
#The random speciation time from the inverse CDF |
882
|
45
|
|
|
|
|
97
|
$speciation[$_] = |
883
|
|
|
|
|
|
|
1 / |
884
|
|
|
|
|
|
|
( $br - $dr ) * |
885
|
|
|
|
|
|
|
log( ( $a - $dr * $b ) / ( $a - $br * $b ) ); |
886
|
|
|
|
|
|
|
} |
887
|
|
|
|
|
|
|
} |
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
#Create the initial terminals and a vector for their ages |
890
|
5
|
|
|
|
|
11
|
my @terminals; |
891
|
|
|
|
|
|
|
my @ages; |
892
|
5
|
|
|
|
|
12
|
foreach ( 0 .. ( $n - 1 ) ) { |
893
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
#Add a new terminal |
895
|
50
|
|
|
|
|
120
|
$terminals[$_] = Bio::Phylo::Forest::Node->new(); |
896
|
50
|
|
|
|
|
169
|
$terminals[$_]->set_name( 'ID' . $_ ); |
897
|
50
|
|
|
|
|
94
|
$ages[$_] = 0; |
898
|
|
|
|
|
|
|
} |
899
|
5
|
|
|
|
|
17
|
@nodes = @terminals; |
900
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
#Sort the speciation times |
902
|
5
|
|
|
|
|
40
|
my @sorted_speciation = sort { $a <=> $b } @speciation; |
|
101
|
|
|
|
|
145
|
|
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
#Make a hash for easily finding the index of a given speciation event |
905
|
5
|
|
|
|
|
10
|
my %speciation_hash; |
906
|
5
|
|
|
|
|
17
|
foreach ( 0 .. ( $n - 2 ) ) { |
907
|
45
|
|
|
|
|
163
|
$speciation_hash{ $speciation[$_] } = $_; |
908
|
|
|
|
|
|
|
} |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
#Construct the tree |
911
|
5
|
|
|
|
|
17
|
foreach my $index ( 0 .. ( $n - 2 ) ) { |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
#Create the parent node |
914
|
45
|
|
|
|
|
126
|
my $parent = Bio::Phylo::Forest::Node->new(); |
915
|
45
|
|
|
|
|
174
|
$parent->set_name( 'ID' . ( $n + $index ) ); |
916
|
45
|
|
|
|
|
92
|
push( @nodes, $parent ); |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
#An index for this speciation event back into the unsorted vectors |
919
|
45
|
|
|
|
|
248
|
my $spec_index = $speciation_hash{ $sorted_speciation[$index] }; |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
#Add the children to the parent node |
922
|
45
|
|
|
|
|
149
|
$parent->set_child( $terminals[$spec_index] ); |
923
|
45
|
|
|
|
|
135
|
$terminals[$spec_index]->set_parent($parent); |
924
|
45
|
|
|
|
|
125
|
$parent->set_child( $terminals[ $spec_index + 1 ] ); |
925
|
45
|
|
|
|
|
134
|
$terminals[ $spec_index + 1 ]->set_parent($parent); |
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
#Set the children's branch lengths |
928
|
45
|
|
|
|
|
164
|
$terminals[$spec_index]->set_branch_length( |
929
|
|
|
|
|
|
|
$sorted_speciation[$index] - $ages[$spec_index] ); |
930
|
45
|
|
|
|
|
181
|
$terminals[ $spec_index + 1 ]->set_branch_length( |
931
|
|
|
|
|
|
|
$sorted_speciation[$index] - $ages[ $spec_index + 1 ] ); |
932
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
#Replace the two terminals with the new one |
934
|
45
|
|
|
|
|
102
|
splice( @terminals, $spec_index, 2, $parent ); |
935
|
45
|
|
|
|
|
92
|
splice( @ages, $spec_index, 2, $sorted_speciation[$index] ); |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
#Update the mapping for the sorted speciation times |
938
|
45
|
|
|
|
|
192
|
foreach ( keys %speciation_hash ) { |
939
|
405
|
100
|
|
|
|
702
|
$speciation_hash{$_}-- if $speciation_hash{$_} > $spec_index; |
940
|
|
|
|
|
|
|
} |
941
|
|
|
|
|
|
|
} |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
#Add the nodes to a tree |
944
|
5
|
|
|
|
|
34
|
my $tree = Bio::Phylo::Forest::Tree->new(); |
945
|
5
|
|
|
|
|
17
|
foreach ( reverse(@nodes) ) { $tree->insert($_); } |
|
95
|
|
|
|
|
175
|
|
946
|
5
|
|
|
|
|
12
|
push( @sample, $tree ); |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
#Update the sample counter (for GUI or other applications that want to know how |
949
|
|
|
|
|
|
|
#many samples have been obtained) |
950
|
5
|
50
|
|
|
|
87
|
if ( defined $options{counter} ) { |
951
|
0
|
|
|
|
|
0
|
my $counter = $options{counter}; |
952
|
0
|
|
|
|
|
0
|
&$counter(1); |
953
|
|
|
|
|
|
|
} |
954
|
|
|
|
|
|
|
} |
955
|
1
|
|
|
|
|
8
|
return ( \@sample, [] ); |
956
|
|
|
|
|
|
|
} |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
=back |
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
=head1 EVOLUTIONARY MODELS |
961
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
All evolutionary models take a options hash as their input argument |
963
|
|
|
|
|
|
|
and return a Bio::Phylo::Forest::Tree. This tree may contain extinct |
964
|
|
|
|
|
|
|
lineages (lineages that end prior to the end of the tree). |
965
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
The options hash contains any model specific parameters (see the |
967
|
|
|
|
|
|
|
individual model descriptions) and one or both terminating conditions: |
968
|
|
|
|
|
|
|
tree_size => the number of extant species at which to terminate the tree |
969
|
|
|
|
|
|
|
tree_age => the age of the tree at which to terminate the process |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
Note that if the model stops due to the tree_size condition then the |
972
|
|
|
|
|
|
|
tree ends immediately after the speciation event that created the last |
973
|
|
|
|
|
|
|
species. |
974
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
=over |
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
=item constant_rate_birth() |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
A constant rate birth model (Yule/ERM) |
980
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
Type : Evolutionary model |
982
|
|
|
|
|
|
|
Title : constant_rate_birth |
983
|
|
|
|
|
|
|
Usage : $tree = constant_rate_birth(%options) |
984
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time |
985
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree |
986
|
|
|
|
|
|
|
Args : %options with fields: |
987
|
|
|
|
|
|
|
birth_rate The birth rate parameter (default 1) |
988
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate |
989
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified |
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
=cut |
994
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
sub constant_rate_birth { |
996
|
20
|
|
|
20
|
1
|
849
|
my %options = @_; |
997
|
20
|
|
|
|
|
40
|
$options{death_rate} = 0; |
998
|
20
|
|
|
|
|
73
|
return constant_rate_birth_death(%options); |
999
|
|
|
|
|
|
|
} |
1000
|
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
=item external_model() |
1002
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
A dummy model that takes as input a set of newick_trees and randomly samples |
1004
|
|
|
|
|
|
|
these. |
1005
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
Type : Evolutionary model |
1007
|
|
|
|
|
|
|
Title : external_model |
1008
|
|
|
|
|
|
|
Usage : $tree = $external_model(%options) |
1009
|
|
|
|
|
|
|
Function: Returns a random tree that was given as input |
1010
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree |
1011
|
|
|
|
|
|
|
Args : %options with fields: |
1012
|
|
|
|
|
|
|
trees An array of newick strings. One of these is returned at random. |
1013
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
NB: The usual parameters tree_size and tree_age will be ignored. When sampling |
1015
|
|
|
|
|
|
|
using this model the trees array must contain trees adhering to the requirements |
1016
|
|
|
|
|
|
|
of the sampling algorithm. This is NOT checked automatically. |
1017
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
=cut |
1019
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
sub external_model { |
1021
|
0
|
|
|
0
|
1
|
0
|
my %options = @_; |
1022
|
0
|
|
|
|
|
0
|
my $choice = int( rand( scalar @{ $options{trees} } ) ); |
|
0
|
|
|
|
|
0
|
|
1023
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
#Pick a newick string and turn it in to a Bio::Phylo::Forest::Tree object |
1025
|
|
|
|
|
|
|
my $tree = Bio::Phylo::IO->parse( |
1026
|
|
|
|
|
|
|
-format => 'newick', |
1027
|
0
|
|
|
|
|
0
|
-string => $options{trees}->[$choice] |
1028
|
|
|
|
|
|
|
)->first; |
1029
|
0
|
|
|
|
|
0
|
return $tree; |
1030
|
|
|
|
|
|
|
} |
1031
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
=item constant_rate_birth_death() |
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
A constant rate birth and death model |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
Type : Evolutionary model |
1037
|
|
|
|
|
|
|
Title : constant_rate_birth_death |
1038
|
|
|
|
|
|
|
Usage : $tree = constant_rate_birth_death(%options) |
1039
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time |
1040
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree |
1041
|
|
|
|
|
|
|
Args : %options with fields: |
1042
|
|
|
|
|
|
|
birth_rate The birth rate parameter (default 1) |
1043
|
|
|
|
|
|
|
death_rate The death rate parameter (default no extinction) |
1044
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate |
1045
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate |
1046
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
=cut |
1050
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
sub constant_rate_birth_death { |
1052
|
95
|
|
|
95
|
1
|
404
|
my %options = @_; |
1053
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
#Check that we have a termination condition |
1055
|
95
|
50
|
33
|
|
|
324
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) { |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
#Error here. |
1058
|
0
|
|
|
|
|
0
|
return undef; |
1059
|
|
|
|
|
|
|
} |
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
#Set the undefined condition to infinity |
1062
|
95
|
50
|
|
|
|
272
|
$options{tree_size} = 1e6 unless defined $options{tree_size}; |
1063
|
95
|
50
|
|
|
|
338
|
$options{tree_age} = 1e6 unless defined $options{tree_age}; |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
#Set default rates |
1066
|
95
|
50
|
|
|
|
226
|
$options{birth_rate} = 1 unless defined( $options{birth_rate} ); |
1067
|
|
|
|
|
|
|
delete $options{death_rate} |
1068
|
95
|
100
|
66
|
|
|
603
|
if defined( $options{death_rate} ) && $options{death_rate} == 0; |
1069
|
|
|
|
|
|
|
|
1070
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these |
1071
|
95
|
|
|
|
|
156
|
my $node_id = 0; |
1072
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species |
1074
|
95
|
|
|
|
|
419
|
my $tree = Bio::Phylo::Forest::Tree->new(); |
1075
|
95
|
|
|
|
|
390
|
my $root = Bio::Phylo::Forest::Node->new(); |
1076
|
95
|
|
|
|
|
374
|
$root->set_branch_length(0); |
1077
|
95
|
|
|
|
|
454
|
$root->set_name( 'ID' . $node_id++ ); |
1078
|
95
|
|
|
|
|
386
|
$tree->insert($root); |
1079
|
95
|
|
|
|
|
193
|
my @terminals = ($root); |
1080
|
95
|
|
|
|
|
155
|
my ( $next_extinction, $next_speciation ); |
1081
|
95
|
|
|
|
|
145
|
my $time = 0; |
1082
|
95
|
|
|
|
|
159
|
my $tree_size = 1; |
1083
|
|
|
|
|
|
|
|
1084
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge |
1085
|
95
|
50
|
33
|
|
|
279
|
if ( defined $options{root_edge} && $options{root_edge} ) { |
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event |
1088
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $options{birth_rate} / $tree_size; |
1089
|
|
|
|
|
|
|
} |
1090
|
|
|
|
|
|
|
else { |
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away |
1093
|
95
|
|
|
|
|
139
|
$next_speciation = 0; |
1094
|
|
|
|
|
|
|
} |
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
#Time of the first extinction event. If no extinction we always |
1097
|
|
|
|
|
|
|
#set the extinction event after the current speciation event |
1098
|
95
|
100
|
|
|
|
198
|
if ( defined $options{death_rate} ) { |
1099
|
75
|
|
|
|
|
467
|
$next_extinction = -log(rand) / $options{death_rate} / $tree_size; |
1100
|
|
|
|
|
|
|
} |
1101
|
|
|
|
|
|
|
else { |
1102
|
20
|
|
|
|
|
32
|
$next_extinction = $next_speciation + 1; |
1103
|
|
|
|
|
|
|
} |
1104
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
#While the tree has not become extinct and the termination criterion |
1106
|
|
|
|
|
|
|
#has not been achieved we create new speciation and extinction events |
1107
|
95
|
|
66
|
|
|
581
|
while ($tree_size > 0 |
|
|
|
66
|
|
|
|
|
1108
|
|
|
|
|
|
|
&& $tree_size < $options{tree_size} |
1109
|
|
|
|
|
|
|
&& $time < $options{tree_age} ) |
1110
|
|
|
|
|
|
|
{ |
1111
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species |
1113
|
5423
|
|
|
|
|
9167
|
foreach (@terminals) { |
1114
|
|
|
|
|
|
|
$_->set_branch_length( |
1115
|
|
|
|
|
|
|
$_->get_branch_length + min( |
1116
|
|
|
|
|
|
|
$next_extinction, $next_speciation, |
1117
|
68947
|
|
|
|
|
119408
|
$options{tree_age} - $time |
1118
|
|
|
|
|
|
|
) |
1119
|
|
|
|
|
|
|
); |
1120
|
|
|
|
|
|
|
} |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
#Update the time |
1123
|
5423
|
|
|
|
|
10235
|
$time += min( $next_extinction, $next_speciation ); |
1124
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done |
1126
|
5423
|
50
|
|
|
|
10391
|
return $tree if ( $time > $options{tree_age} ); |
1127
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
#Get the species effected by this event and remove it from the terminals list |
1129
|
5423
|
|
|
|
|
13056
|
my $effected = |
1130
|
|
|
|
|
|
|
splice( @terminals, int( rand( scalar @terminals ) ), 1 ); |
1131
|
|
|
|
|
|
|
|
1132
|
|
|
|
|
|
|
#If we have a speciation event we add two new species |
1133
|
5423
|
100
|
66
|
|
|
14055
|
if ( $next_speciation < $next_extinction || !defined $next_extinction ) |
1134
|
|
|
|
|
|
|
{ |
1135
|
3169
|
|
|
|
|
5107
|
foreach ( 1, 2 ) { |
1136
|
|
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
#Create a new species |
1138
|
6338
|
|
|
|
|
17104
|
my $child = Bio::Phylo::Forest::Node->new(); |
1139
|
6338
|
|
|
|
|
23134
|
$child->set_name( 'ID' . $node_id++ ); |
1140
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
#Give it a zero edge length |
1142
|
6338
|
|
|
|
|
18360
|
$child->set_branch_length(0); |
1143
|
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
|
#Add it as a child to the speciating species |
1145
|
6338
|
|
|
|
|
15632
|
$effected->set_child($child); |
1146
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
#Add it to the tree |
1148
|
6338
|
|
|
|
|
17074
|
$tree->insert($child); |
1149
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
#Add it to the terminals list |
1151
|
6338
|
|
|
|
|
13206
|
push( @terminals, $child ); |
1152
|
|
|
|
|
|
|
} |
1153
|
|
|
|
|
|
|
} |
1154
|
|
|
|
|
|
|
|
1155
|
|
|
|
|
|
|
#We calculate the time that the next extinction and speciation |
1156
|
|
|
|
|
|
|
#events will occur (only the earliest of these will actually |
1157
|
|
|
|
|
|
|
#happen). NB: this approach is only appropriate for models where |
1158
|
|
|
|
|
|
|
#speciation and extinction times are exponentially distributed. |
1159
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand... |
1160
|
5423
|
|
|
|
|
9416
|
my ( $r1, $r2 ) = ( 0, 0 ); |
1161
|
5423
|
|
|
|
|
13690
|
$r1 = rand until $r1; |
1162
|
5423
|
|
|
|
|
11493
|
$r2 = rand until $r2; |
1163
|
5423
|
|
|
|
|
8356
|
$tree_size = scalar @terminals; |
1164
|
5423
|
100
|
|
|
|
8875
|
return $tree unless $tree_size; |
1165
|
5375
|
|
|
|
|
16121
|
$next_speciation = -log($r1) / $options{birth_rate} / $tree_size; |
1166
|
5375
|
100
|
|
|
|
9440
|
if ( defined $options{death_rate} ) { |
1167
|
5195
|
|
|
|
|
26494
|
$next_extinction = -log($r2) / $options{death_rate} / $tree_size; |
1168
|
|
|
|
|
|
|
} |
1169
|
|
|
|
|
|
|
else { |
1170
|
180
|
|
|
|
|
841
|
$next_extinction = $next_speciation + 1; |
1171
|
|
|
|
|
|
|
} |
1172
|
|
|
|
|
|
|
} |
1173
|
47
|
|
|
|
|
262
|
return $tree; |
1174
|
|
|
|
|
|
|
} |
1175
|
|
|
|
|
|
|
|
1176
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
=item diversity_dependent_speciation() |
1178
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
A birth and death model with speciation rate dependent on diversity as per |
1180
|
|
|
|
|
|
|
Etienne et. al. 2012 |
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
Type : Evolutionary model |
1183
|
|
|
|
|
|
|
Title : diversity_dependent_speciation |
1184
|
|
|
|
|
|
|
Usage : $tree = diversity_dependent_speciation(%options) |
1185
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time |
1186
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree |
1187
|
|
|
|
|
|
|
Args : %options with fields: |
1188
|
|
|
|
|
|
|
maximal_birth_rate The maximal birth rate parameter (default 1) |
1189
|
|
|
|
|
|
|
death_rate The death rate parameter (default no extinction) |
1190
|
|
|
|
|
|
|
K_dash The modified carrying capacity (no default) |
1191
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate |
1192
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate |
1193
|
|
|
|
|
|
|
|
1194
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified |
1195
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
Reference: |
1197
|
|
|
|
|
|
|
Rampal S. Etienne, Bart Haegeman, Tanja Stadler, Tracy Aze, Paul N. Pearson, |
1198
|
|
|
|
|
|
|
Andy Purvis and Albert B. Phillimore. "Diversity-dependence brings molecular |
1199
|
|
|
|
|
|
|
phylogenies closer to agreement with the fossil record" |
1200
|
|
|
|
|
|
|
doi: 10.1098/rspb.2011.1439 |
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
=cut |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
sub diversity_dependent_speciation { |
1205
|
0
|
|
|
0
|
1
|
0
|
my %options = @_; |
1206
|
|
|
|
|
|
|
|
1207
|
|
|
|
|
|
|
#Check that we have a termination condition |
1208
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) { |
1209
|
|
|
|
|
|
|
#Error here. |
1210
|
0
|
|
|
|
|
0
|
return undef; |
1211
|
|
|
|
|
|
|
} |
1212
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
#Check that we have a carrying capacity |
1214
|
0
|
0
|
|
|
|
0
|
unless ( defined $options{K_dash} ) { |
1215
|
|
|
|
|
|
|
#Error here. |
1216
|
0
|
|
|
|
|
0
|
return undef; |
1217
|
|
|
|
|
|
|
} |
1218
|
|
|
|
|
|
|
|
1219
|
|
|
|
|
|
|
#Set the undefined condition to infinity |
1220
|
0
|
0
|
|
|
|
0
|
$options{tree_size} = 1e6 unless defined $options{tree_size}; |
1221
|
0
|
0
|
|
|
|
0
|
$options{tree_age} = 1e6 unless defined $options{tree_age}; |
1222
|
|
|
|
|
|
|
|
1223
|
|
|
|
|
|
|
#Set default rates |
1224
|
0
|
0
|
|
|
|
0
|
$options{maximal_birth_rate} = 1 unless defined( $options{maximal_birth_rate} ); |
1225
|
|
|
|
|
|
|
delete $options{death_rate} |
1226
|
0
|
0
|
0
|
|
|
0
|
if defined( $options{death_rate} ) && $options{death_rate} == 0; |
1227
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these |
1229
|
0
|
|
|
|
|
0
|
my $node_id = 0; |
1230
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species |
1232
|
0
|
|
|
|
|
0
|
my $tree = Bio::Phylo::Forest::Tree->new(); |
1233
|
0
|
|
|
|
|
0
|
my $root = Bio::Phylo::Forest::Node->new(); |
1234
|
0
|
|
|
|
|
0
|
$root->set_branch_length(0); |
1235
|
0
|
|
|
|
|
0
|
$root->set_name( 'ID' . $node_id++ ); |
1236
|
0
|
|
|
|
|
0
|
$tree->insert($root); |
1237
|
0
|
|
|
|
|
0
|
my @terminals = ($root); |
1238
|
0
|
|
|
|
|
0
|
my ( $next_extinction, $next_speciation ); |
1239
|
0
|
|
|
|
|
0
|
my $time = 0; |
1240
|
0
|
|
|
|
|
0
|
my $tree_size = 1; |
1241
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
|
1243
|
0
|
|
|
|
|
0
|
$options{birth_rate} = max(0,$options{max_birth_rate}*(1-1/$options{K_dash})); |
1244
|
|
|
|
|
|
|
|
1245
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge |
1247
|
0
|
0
|
0
|
|
|
0
|
if ( defined $options{root_edge} && $options{root_edge} ) { |
1248
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event |
1250
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $options{birth_rate} / $tree_size; |
1251
|
|
|
|
|
|
|
} |
1252
|
|
|
|
|
|
|
else { |
1253
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away |
1255
|
0
|
|
|
|
|
0
|
$next_speciation = 0; |
1256
|
|
|
|
|
|
|
} |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
#Time of the first extinction event. If no extinction we always |
1259
|
|
|
|
|
|
|
#set the extinction event after the current speciation event |
1260
|
0
|
0
|
|
|
|
0
|
if ( defined $options{death_rate} ) { |
1261
|
0
|
|
|
|
|
0
|
$next_extinction = -log(rand) / $options{death_rate} / $tree_size; |
1262
|
|
|
|
|
|
|
} |
1263
|
|
|
|
|
|
|
else { |
1264
|
0
|
|
|
|
|
0
|
$next_extinction = $next_speciation + 1; |
1265
|
|
|
|
|
|
|
} |
1266
|
|
|
|
|
|
|
|
1267
|
|
|
|
|
|
|
#While the tree has not become extinct and the termination criterion |
1268
|
|
|
|
|
|
|
#has not been achieved we create new speciation and extinction events |
1269
|
0
|
|
0
|
|
|
0
|
while ($tree_size > 0 |
|
|
|
0
|
|
|
|
|
1270
|
|
|
|
|
|
|
&& $tree_size < $options{tree_size} |
1271
|
|
|
|
|
|
|
&& $time < $options{tree_age} ) |
1272
|
|
|
|
|
|
|
{ |
1273
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species |
1275
|
0
|
|
|
|
|
0
|
foreach (@terminals) { |
1276
|
|
|
|
|
|
|
$_->set_branch_length( |
1277
|
|
|
|
|
|
|
$_->get_branch_length + min( |
1278
|
|
|
|
|
|
|
$next_extinction, $next_speciation, |
1279
|
0
|
|
|
|
|
0
|
$options{tree_age} - $time |
1280
|
|
|
|
|
|
|
) |
1281
|
|
|
|
|
|
|
); |
1282
|
|
|
|
|
|
|
} |
1283
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
#Update the time |
1285
|
0
|
|
|
|
|
0
|
$time += min( $next_extinction, $next_speciation ); |
1286
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done |
1288
|
0
|
0
|
|
|
|
0
|
return $tree if ( $time > $options{tree_age} ); |
1289
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
#Get the species effected by this event and remove it from the terminals list |
1291
|
0
|
|
|
|
|
0
|
my $effected = |
1292
|
|
|
|
|
|
|
splice( @terminals, int( rand( scalar @terminals ) ), 1 ); |
1293
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
#If we have a speciation event we add two new species |
1295
|
0
|
0
|
0
|
|
|
0
|
if ( $next_speciation < $next_extinction || !defined $next_extinction ) |
1296
|
|
|
|
|
|
|
{ |
1297
|
0
|
|
|
|
|
0
|
foreach ( 1, 2 ) { |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
#Create a new species |
1300
|
0
|
|
|
|
|
0
|
my $child = Bio::Phylo::Forest::Node->new(); |
1301
|
0
|
|
|
|
|
0
|
$child->set_name( 'ID' . $node_id++ ); |
1302
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
#Give it a zero edge length |
1304
|
0
|
|
|
|
|
0
|
$child->set_branch_length(0); |
1305
|
|
|
|
|
|
|
|
1306
|
|
|
|
|
|
|
#Add it as a child to the speciating species |
1307
|
0
|
|
|
|
|
0
|
$effected->set_child($child); |
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
#Add it to the tree |
1310
|
0
|
|
|
|
|
0
|
$tree->insert($child); |
1311
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
#Add it to the terminals list |
1313
|
0
|
|
|
|
|
0
|
push( @terminals, $child ); |
1314
|
|
|
|
|
|
|
} |
1315
|
|
|
|
|
|
|
} |
1316
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
#We calculate the time that the next extinction and speciation |
1318
|
|
|
|
|
|
|
#events will occur (only the earliest of these will actually |
1319
|
|
|
|
|
|
|
#happen). NB: this approach is only appropriate for models where |
1320
|
|
|
|
|
|
|
#speciation and extinction times are exponentially distributed. |
1321
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand... |
1322
|
0
|
|
|
|
|
0
|
my ( $r1, $r2 ) = ( 0, 0 ); |
1323
|
0
|
|
|
|
|
0
|
$r1 = rand until $r1; |
1324
|
0
|
|
|
|
|
0
|
$r2 = rand until $r2; |
1325
|
0
|
|
|
|
|
0
|
$tree_size = scalar @terminals; |
1326
|
0
|
0
|
|
|
|
0
|
return $tree unless $tree_size; |
1327
|
|
|
|
|
|
|
|
1328
|
0
|
|
|
|
|
0
|
$options{birth_rate} = max(0,$options{max_birth_rate}*(1-$tree_size/$options{K_dash})); |
1329
|
0
|
0
|
|
|
|
0
|
if ($options{birth_rate}==0) |
1330
|
|
|
|
|
|
|
{ |
1331
|
0
|
|
|
|
|
0
|
$next_speciation = $options{tree_age}; |
1332
|
|
|
|
|
|
|
} else |
1333
|
|
|
|
|
|
|
{ |
1334
|
0
|
|
|
|
|
0
|
$next_speciation = -log($r1) / $options{birth_rate} / $tree_size; |
1335
|
|
|
|
|
|
|
} |
1336
|
0
|
0
|
|
|
|
0
|
if ( defined $options{death_rate} ) { |
1337
|
0
|
|
|
|
|
0
|
$next_extinction = -log($r2) / $options{death_rate} / $tree_size; |
1338
|
|
|
|
|
|
|
} |
1339
|
|
|
|
|
|
|
else { |
1340
|
0
|
|
|
|
|
0
|
$next_extinction = $next_speciation + 1; |
1341
|
|
|
|
|
|
|
} |
1342
|
|
|
|
|
|
|
} |
1343
|
0
|
|
|
|
|
0
|
return $tree; |
1344
|
|
|
|
|
|
|
} |
1345
|
|
|
|
|
|
|
|
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
=item constant_rate_birth_death() |
1348
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
A temporal shift birth and death model |
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
Type : Evolutionary model |
1352
|
|
|
|
|
|
|
Title : temporal_shift_birth_death |
1353
|
|
|
|
|
|
|
Usage : $tree = constant_rate_birth_death(%options) |
1354
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time |
1355
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree |
1356
|
|
|
|
|
|
|
Args : %options with fields: |
1357
|
|
|
|
|
|
|
birth_rates The birth rates |
1358
|
|
|
|
|
|
|
death_rates The death rates |
1359
|
|
|
|
|
|
|
rate_times The times after which the rates apply (first element must be 0) |
1360
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate |
1361
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate |
1362
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified |
1364
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
=cut |
1366
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
sub temporal_shift_birth_death { |
1368
|
0
|
|
|
0
|
0
|
0
|
my %options = @_; |
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
#Check that we have a termination condition |
1371
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) { |
1372
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
#Error here. |
1374
|
0
|
|
|
|
|
0
|
return undef; |
1375
|
|
|
|
|
|
|
} |
1376
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
#Set the undefined condition to infinity |
1378
|
0
|
0
|
|
|
|
0
|
$options{tree_size} = 1e6 unless defined $options{tree_size}; |
1379
|
0
|
0
|
|
|
|
0
|
$options{tree_age} = 1e6 unless defined $options{tree_age}; |
1380
|
|
|
|
|
|
|
|
1381
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these |
1382
|
0
|
|
|
|
|
0
|
my $node_id = 0; |
1383
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species |
1385
|
0
|
|
|
|
|
0
|
my $tree = Bio::Phylo::Forest::Tree->new(); |
1386
|
0
|
|
|
|
|
0
|
my $root = Bio::Phylo::Forest::Node->new(); |
1387
|
0
|
|
|
|
|
0
|
$root->set_branch_length(0); |
1388
|
0
|
|
|
|
|
0
|
$root->set_name( 'ID' . $node_id++ ); |
1389
|
0
|
|
|
|
|
0
|
$tree->insert($root); |
1390
|
0
|
|
|
|
|
0
|
my @terminals = ($root); |
1391
|
0
|
|
|
|
|
0
|
my ( $next_extinction, $next_speciation ); |
1392
|
0
|
|
|
|
|
0
|
my $time = 0; |
1393
|
0
|
|
|
|
|
0
|
my $tree_size = 1; |
1394
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
#Load current rates |
1396
|
0
|
|
|
|
|
0
|
my $birth_rate = $options{birth_rates}[0]; |
1397
|
0
|
|
|
|
|
0
|
my $death_rate = $options{death_rates}[0]; |
1398
|
0
|
|
|
|
|
0
|
my $current_rates = 0; |
1399
|
0
|
|
|
|
|
0
|
my $next_rate_change = $options{rate_times}[$current_rates+1]; |
1400
|
|
|
|
|
|
|
|
1401
|
|
|
|
|
|
|
#Add an additional time to the end of the rate change times to simplify checking |
1402
|
0
|
|
|
|
|
0
|
push(@{$options{rate_times}},$options{tree_size}*2); |
|
0
|
|
|
|
|
0
|
|
1403
|
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge |
1405
|
0
|
0
|
0
|
|
|
0
|
if ( defined $options{root_edge} && $options{root_edge} ) { |
1406
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event |
1408
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $birth_rate / $tree_size; |
1409
|
|
|
|
|
|
|
} |
1410
|
|
|
|
|
|
|
else { |
1411
|
|
|
|
|
|
|
|
1412
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away |
1413
|
0
|
|
|
|
|
0
|
$next_speciation = 0; |
1414
|
|
|
|
|
|
|
} |
1415
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
# print "RATES:".$time."|".$birth_rate."|".$death_rate."\n"; |
1417
|
|
|
|
|
|
|
#Time of the first extinction event. If no extinction we always |
1418
|
|
|
|
|
|
|
#set the extinction event after the current speciation event |
1419
|
0
|
|
|
|
|
0
|
$next_extinction = -log(rand) / $death_rate / $tree_size; |
1420
|
|
|
|
|
|
|
|
1421
|
|
|
|
|
|
|
#While the tree has not become extinct and the termination criterion |
1422
|
|
|
|
|
|
|
#has not been achieved we create new speciation and extinction events |
1423
|
0
|
|
0
|
|
|
0
|
while ($tree_size > 0 |
|
|
|
0
|
|
|
|
|
1424
|
|
|
|
|
|
|
&& $tree_size < $options{tree_size} |
1425
|
|
|
|
|
|
|
&& $time < $options{tree_age} ) |
1426
|
|
|
|
|
|
|
{ |
1427
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
# print "TIMES:".$next_extinction."|".$next_speciation."|".$next_rate_change."|\n"; |
1429
|
|
|
|
|
|
|
# print "RATES:".$time."|".$birth_rate."|".$death_rate."\n"; |
1430
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species |
1431
|
0
|
|
|
|
|
0
|
foreach (@terminals) { |
1432
|
|
|
|
|
|
|
$_->set_branch_length( |
1433
|
|
|
|
|
|
|
$_->get_branch_length + min( |
1434
|
|
|
|
|
|
|
$next_extinction, $next_speciation, |
1435
|
0
|
|
|
|
|
0
|
$options{tree_age} - $time |
1436
|
|
|
|
|
|
|
) |
1437
|
|
|
|
|
|
|
); |
1438
|
|
|
|
|
|
|
} |
1439
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
#Update the time |
1441
|
0
|
|
|
|
|
0
|
my $time_last = $time; |
1442
|
0
|
|
|
|
|
0
|
$time += min( $next_extinction, $next_speciation, $next_rate_change-$time_last); |
1443
|
|
|
|
|
|
|
|
1444
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done |
1445
|
0
|
0
|
|
|
|
0
|
return $tree if ( $time > $options{tree_age} ); |
1446
|
|
|
|
|
|
|
|
1447
|
|
|
|
|
|
|
|
1448
|
0
|
0
|
|
|
|
0
|
if ($next_rate_change-$time_last < min( $next_extinction, $next_speciation) ) |
1449
|
|
|
|
|
|
|
{ |
1450
|
0
|
|
|
|
|
0
|
$current_rates += 1; |
1451
|
0
|
|
|
|
|
0
|
$birth_rate = $options{birth_rates}[$current_rates]; |
1452
|
0
|
|
|
|
|
0
|
$death_rate = $options{death_rates}[$current_rates]; |
1453
|
0
|
|
|
|
|
0
|
$next_rate_change = $options{rate_times}[$current_rates+1]; |
1454
|
|
|
|
|
|
|
|
1455
|
|
|
|
|
|
|
} else |
1456
|
|
|
|
|
|
|
{ |
1457
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
|
1459
|
|
|
|
|
|
|
#Get the species effected by this event and remove it from the terminals list |
1460
|
0
|
|
|
|
|
0
|
my $effected = splice( @terminals, int( rand( scalar @terminals ) ), 1 ); |
1461
|
|
|
|
|
|
|
|
1462
|
|
|
|
|
|
|
#If we have a speciation event we add two new species |
1463
|
0
|
0
|
0
|
|
|
0
|
if ( $next_speciation < $next_extinction || !defined $next_extinction ) |
1464
|
|
|
|
|
|
|
{ |
1465
|
0
|
|
|
|
|
0
|
foreach ( 1, 2 ) { |
1466
|
|
|
|
|
|
|
|
1467
|
|
|
|
|
|
|
#Create a new species |
1468
|
0
|
|
|
|
|
0
|
my $child = Bio::Phylo::Forest::Node->new(); |
1469
|
0
|
|
|
|
|
0
|
$child->set_name( 'ID' . $node_id++ ); |
1470
|
|
|
|
|
|
|
|
1471
|
|
|
|
|
|
|
#Give it a zero edge length |
1472
|
0
|
|
|
|
|
0
|
$child->set_branch_length(0); |
1473
|
|
|
|
|
|
|
|
1474
|
|
|
|
|
|
|
#Add it as a child to the speciating species |
1475
|
0
|
|
|
|
|
0
|
$effected->set_child($child); |
1476
|
|
|
|
|
|
|
|
1477
|
|
|
|
|
|
|
#Add it to the tree |
1478
|
0
|
|
|
|
|
0
|
$tree->insert($child); |
1479
|
|
|
|
|
|
|
|
1480
|
|
|
|
|
|
|
#Add it to the terminals list |
1481
|
0
|
|
|
|
|
0
|
push( @terminals, $child ); |
1482
|
|
|
|
|
|
|
} |
1483
|
|
|
|
|
|
|
} |
1484
|
|
|
|
|
|
|
} |
1485
|
|
|
|
|
|
|
|
1486
|
|
|
|
|
|
|
#We calculate the time that the next extinction and speciation |
1487
|
|
|
|
|
|
|
#events will occur (only the earliest of these will actually |
1488
|
|
|
|
|
|
|
#happen). NB: this approach is only appropriate for models where |
1489
|
|
|
|
|
|
|
#speciation and extinction times are exponentially distributed. |
1490
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand... |
1491
|
0
|
|
|
|
|
0
|
my ( $r1, $r2 ) = ( 0, 0 ); |
1492
|
0
|
|
|
|
|
0
|
$r1 = rand until $r1; |
1493
|
0
|
|
|
|
|
0
|
$r2 = rand until $r2; |
1494
|
0
|
|
|
|
|
0
|
$tree_size = scalar @terminals; |
1495
|
0
|
0
|
|
|
|
0
|
return $tree unless $tree_size; |
1496
|
0
|
|
|
|
|
0
|
$next_speciation = -log($r1) / $birth_rate / $tree_size; |
1497
|
0
|
|
|
|
|
0
|
$next_extinction = -log($r2) / $death_rate / $tree_size; |
1498
|
|
|
|
|
|
|
|
1499
|
0
|
0
|
|
|
|
0
|
if ((scalar @terminals)%100==0) |
1500
|
|
|
|
|
|
|
{ |
1501
|
0
|
|
|
|
|
0
|
print $time."|".@terminals."|\n"; |
1502
|
|
|
|
|
|
|
} |
1503
|
|
|
|
|
|
|
|
1504
|
|
|
|
|
|
|
} |
1505
|
0
|
|
|
|
|
0
|
return $tree; |
1506
|
|
|
|
|
|
|
} |
1507
|
|
|
|
|
|
|
|
1508
|
|
|
|
|
|
|
=item evolving_speciation_rate() |
1509
|
|
|
|
|
|
|
|
1510
|
|
|
|
|
|
|
An evolutionary model featuring evolving speciation rates. Each daughter |
1511
|
|
|
|
|
|
|
species is assigned its parent's speciation rate multiplied by a normally |
1512
|
|
|
|
|
|
|
distributed noise factor. |
1513
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
Type : Evolutionary model |
1515
|
|
|
|
|
|
|
Title : evolving_speciation_rate |
1516
|
|
|
|
|
|
|
Usage : $tree = evolving_speciation_rate(%options) |
1517
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time |
1518
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree |
1519
|
|
|
|
|
|
|
Args : %options with fields: |
1520
|
|
|
|
|
|
|
birth_rate The initial speciation rate (default 1) |
1521
|
|
|
|
|
|
|
evolving_std The standard deviation of the normal distribution |
1522
|
|
|
|
|
|
|
from which the rate multiplier is drawn. |
1523
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate |
1524
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate |
1525
|
|
|
|
|
|
|
|
1526
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified |
1527
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
=cut |
1529
|
|
|
|
|
|
|
|
1530
|
|
|
|
|
|
|
sub evolving_speciation_rate { |
1531
|
0
|
|
|
0
|
1
|
0
|
my %options = @_; |
1532
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
#Check that we have a termination condition |
1534
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) { |
1535
|
|
|
|
|
|
|
|
1536
|
|
|
|
|
|
|
#Error here. |
1537
|
0
|
|
|
|
|
0
|
return undef; |
1538
|
|
|
|
|
|
|
} |
1539
|
|
|
|
|
|
|
|
1540
|
|
|
|
|
|
|
#Set the undefined condition to infinity |
1541
|
0
|
0
|
|
|
|
0
|
$options{tree_size} = 1e6 unless defined $options{tree_size}; |
1542
|
0
|
0
|
|
|
|
0
|
$options{tree_age} = 1e6 unless defined $options{tree_age}; |
1543
|
|
|
|
|
|
|
|
1544
|
|
|
|
|
|
|
#Set default rates |
1545
|
0
|
0
|
|
|
|
0
|
$options{birth_rate} = 1 unless defined( $options{birth_rate} ); |
1546
|
0
|
0
|
|
|
|
0
|
$options{evolving_std} = 1 unless defined( $options{evolving_std} ); |
1547
|
|
|
|
|
|
|
|
1548
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these |
1549
|
0
|
|
|
|
|
0
|
my $node_id = 0; |
1550
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species |
1552
|
0
|
|
|
|
|
0
|
my $tree = Bio::Phylo::Forest::Tree->new(); |
1553
|
0
|
|
|
|
|
0
|
my $root = Bio::Phylo::Forest::Node->new(); |
1554
|
0
|
|
|
|
|
0
|
$root->set_branch_length(0); |
1555
|
0
|
|
|
|
|
0
|
$root->set_name( 'ID' . $node_id++ ); |
1556
|
0
|
|
|
|
|
0
|
$tree->insert($root); |
1557
|
0
|
|
|
|
|
0
|
my @terminals = ($root); |
1558
|
0
|
|
|
|
|
0
|
my @birth_rates = ( $options{birth_rate} ); |
1559
|
0
|
|
|
|
|
0
|
my $net_rate = $options{birth_rate}; |
1560
|
0
|
|
|
|
|
0
|
my $next_speciation; |
1561
|
0
|
|
|
|
|
0
|
my $time = 0; |
1562
|
0
|
|
|
|
|
0
|
my $tree_size = 1; |
1563
|
|
|
|
|
|
|
|
1564
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge |
1565
|
0
|
0
|
0
|
|
|
0
|
if ( defined $options{root_edge} && $options{root_edge} ) { |
1566
|
|
|
|
|
|
|
|
1567
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event |
1568
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $options{birth_rate} / $tree_size; |
1569
|
|
|
|
|
|
|
} |
1570
|
|
|
|
|
|
|
else { |
1571
|
|
|
|
|
|
|
|
1572
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away |
1573
|
0
|
|
|
|
|
0
|
$next_speciation = 0; |
1574
|
|
|
|
|
|
|
} |
1575
|
|
|
|
|
|
|
|
1576
|
|
|
|
|
|
|
#While we haven't reached termination |
1577
|
0
|
|
0
|
|
|
0
|
while ( $tree_size < $options{tree_size} && $time < $options{tree_age} ) { |
1578
|
|
|
|
|
|
|
|
1579
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species |
1580
|
0
|
|
|
|
|
0
|
foreach (@terminals) { |
1581
|
|
|
|
|
|
|
$_->set_branch_length( $_->get_branch_length + |
1582
|
0
|
|
|
|
|
0
|
min( $next_speciation, $options{tree_age} - $time ) ); |
1583
|
|
|
|
|
|
|
} |
1584
|
|
|
|
|
|
|
|
1585
|
|
|
|
|
|
|
#Update the time |
1586
|
0
|
|
|
|
|
0
|
$time += $next_speciation; |
1587
|
|
|
|
|
|
|
|
1588
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done |
1589
|
0
|
0
|
|
|
|
0
|
return $tree if ( $time > $options{tree_age} ); |
1590
|
|
|
|
|
|
|
|
1591
|
|
|
|
|
|
|
#Get the species effected by this event |
1592
|
0
|
|
|
|
|
0
|
my $rand_select = rand($net_rate); |
1593
|
0
|
|
|
|
|
0
|
my $selected = 0; |
1594
|
0
|
|
0
|
|
|
0
|
for ( |
1595
|
|
|
|
|
|
|
; |
1596
|
|
|
|
|
|
|
$selected < scalar @terminals |
1597
|
|
|
|
|
|
|
&& $rand_select > $birth_rates[$selected] ; |
1598
|
|
|
|
|
|
|
$selected++ |
1599
|
|
|
|
|
|
|
) |
1600
|
|
|
|
|
|
|
{ |
1601
|
0
|
|
|
|
|
0
|
$rand_select -= $birth_rates[$selected]; |
1602
|
|
|
|
|
|
|
} |
1603
|
|
|
|
|
|
|
|
1604
|
|
|
|
|
|
|
#Remove it from the terminals list |
1605
|
0
|
|
|
|
|
0
|
my $effected = splice( @terminals, $selected, 1 ); |
1606
|
0
|
|
|
|
|
0
|
my $effected_rate = splice( @birth_rates, $selected, 1 ); |
1607
|
|
|
|
|
|
|
|
1608
|
|
|
|
|
|
|
#Update the net speciation rate |
1609
|
0
|
|
|
|
|
0
|
$net_rate -= $effected_rate; |
1610
|
|
|
|
|
|
|
|
1611
|
|
|
|
|
|
|
#If we have a speciation event we add two new species |
1612
|
0
|
|
|
|
|
0
|
foreach ( 1, 2 ) { |
1613
|
|
|
|
|
|
|
|
1614
|
|
|
|
|
|
|
#Create a new species |
1615
|
0
|
|
|
|
|
0
|
my $child = Bio::Phylo::Forest::Node->new(); |
1616
|
0
|
|
|
|
|
0
|
$child->set_name( 'ID' . $node_id++ ); |
1617
|
|
|
|
|
|
|
|
1618
|
|
|
|
|
|
|
#Give it a zero edge length |
1619
|
0
|
|
|
|
|
0
|
$child->set_branch_length(0); |
1620
|
|
|
|
|
|
|
|
1621
|
|
|
|
|
|
|
#Add it as a child to the speciating species |
1622
|
0
|
|
|
|
|
0
|
$effected->set_child($child); |
1623
|
|
|
|
|
|
|
|
1624
|
|
|
|
|
|
|
#Add it to the tree |
1625
|
0
|
|
|
|
|
0
|
$tree->insert($child); |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
#Add it to the terminals list |
1628
|
0
|
|
|
|
|
0
|
push( @terminals, $child ); |
1629
|
|
|
|
|
|
|
|
1630
|
|
|
|
|
|
|
#New speciation rate |
1631
|
|
|
|
|
|
|
my $new_speciation_rate = |
1632
|
0
|
|
|
|
|
0
|
$effected_rate * ( 1 + qnorm(rand) * $options{evolving_std} ); |
1633
|
0
|
0
|
|
|
|
0
|
if ( $new_speciation_rate < 0 ) { $new_speciation_rate = 0; } |
|
0
|
|
|
|
|
0
|
|
1634
|
0
|
|
|
|
|
0
|
push( @birth_rates, $new_speciation_rate ); |
1635
|
0
|
|
|
|
|
0
|
$net_rate += $new_speciation_rate; |
1636
|
|
|
|
|
|
|
} |
1637
|
|
|
|
|
|
|
|
1638
|
|
|
|
|
|
|
# $net_rate = 0; |
1639
|
|
|
|
|
|
|
# foreach (@birth_rates) { $net_rate += $_; } |
1640
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand... |
1641
|
0
|
|
|
|
|
0
|
my ( $r1, $r2 ) = ( 0, 0 ); |
1642
|
0
|
|
|
|
|
0
|
$r1 = rand until $r1; |
1643
|
0
|
|
|
|
|
0
|
$tree_size = scalar @terminals; |
1644
|
|
|
|
|
|
|
|
1645
|
|
|
|
|
|
|
#If all species have stopped speciating (unlikely) |
1646
|
0
|
0
|
|
|
|
0
|
if ( $net_rate == 0 ) { |
1647
|
0
|
|
|
|
|
0
|
return $tree; |
1648
|
|
|
|
|
|
|
} |
1649
|
0
|
|
|
|
|
0
|
$next_speciation = -log($r1) / $net_rate / $tree_size; |
1650
|
0
|
0
|
|
|
|
0
|
return $tree unless $tree_size; |
1651
|
|
|
|
|
|
|
} |
1652
|
0
|
|
|
|
|
0
|
return $tree; |
1653
|
|
|
|
|
|
|
} |
1654
|
|
|
|
|
|
|
|
1655
|
|
|
|
|
|
|
|
1656
|
|
|
|
|
|
|
=item clade_shifts() |
1657
|
|
|
|
|
|
|
|
1658
|
|
|
|
|
|
|
A constant rate birth-death model with punctuated changes in the speciation |
1659
|
|
|
|
|
|
|
and extinction rates. At each change one lineage receives new pre-specified |
1660
|
|
|
|
|
|
|
speciation and extinction rates. |
1661
|
|
|
|
|
|
|
|
1662
|
|
|
|
|
|
|
Type : Evolutionary model |
1663
|
|
|
|
|
|
|
Title : clade_shifts |
1664
|
|
|
|
|
|
|
Usage : $tree = clade_shifts(%options) |
1665
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time |
1666
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree |
1667
|
|
|
|
|
|
|
Args : %options with fields: |
1668
|
|
|
|
|
|
|
birth_rates The speciation rates |
1669
|
|
|
|
|
|
|
death_rates The death rates |
1670
|
|
|
|
|
|
|
rate_times The times at which the rates are introduced to a new |
1671
|
|
|
|
|
|
|
clade. The first time should be zero. The remaining must be in |
1672
|
|
|
|
|
|
|
ascending order. |
1673
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate |
1674
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate |
1675
|
|
|
|
|
|
|
|
1676
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified |
1677
|
|
|
|
|
|
|
|
1678
|
|
|
|
|
|
|
=cut |
1679
|
|
|
|
|
|
|
|
1680
|
|
|
|
|
|
|
sub clade_shifts { |
1681
|
0
|
|
|
0
|
1
|
0
|
my %options = @_; |
1682
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
#Check that we have a termination condition |
1684
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) { |
1685
|
|
|
|
|
|
|
|
1686
|
|
|
|
|
|
|
#Error here. |
1687
|
0
|
|
|
|
|
0
|
return undef; |
1688
|
|
|
|
|
|
|
} |
1689
|
|
|
|
|
|
|
|
1690
|
|
|
|
|
|
|
#Set the undefined condition to infinity |
1691
|
0
|
0
|
|
|
|
0
|
$options{tree_size} = 1e6 unless defined $options{tree_size}; |
1692
|
0
|
0
|
|
|
|
0
|
$options{tree_age} = 1e6 unless defined $options{tree_age}; |
1693
|
|
|
|
|
|
|
|
1694
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these |
1695
|
0
|
|
|
|
|
0
|
my $node_id = 0; |
1696
|
|
|
|
|
|
|
|
1697
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species |
1698
|
0
|
|
|
|
|
0
|
my $tree = Bio::Phylo::Forest::Tree->new(); |
1699
|
0
|
|
|
|
|
0
|
my $root = Bio::Phylo::Forest::Node->new(); |
1700
|
0
|
|
|
|
|
0
|
$root->set_branch_length(0); |
1701
|
0
|
|
|
|
|
0
|
$root->set_name( 'ID' . $node_id++ ); |
1702
|
0
|
|
|
|
|
0
|
$tree->insert($root); |
1703
|
|
|
|
|
|
|
|
1704
|
|
|
|
|
|
|
#rates |
1705
|
0
|
|
|
|
|
0
|
my @birth_rates_in = @{$options{birth_rates}}; |
|
0
|
|
|
|
|
0
|
|
1706
|
0
|
|
|
|
|
0
|
my @death_rates_in = @{$options{death_rates}}; |
|
0
|
|
|
|
|
0
|
|
1707
|
0
|
|
|
|
|
0
|
my @rate_times_in = @{$options{rate_times}}; |
|
0
|
|
|
|
|
0
|
|
1708
|
|
|
|
|
|
|
|
1709
|
0
|
0
|
|
|
|
0
|
if ($rate_times_in[0] != 0) |
1710
|
|
|
|
|
|
|
{ |
1711
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' => |
1712
|
|
|
|
|
|
|
"The first rate time must be 0" ); |
1713
|
|
|
|
|
|
|
} |
1714
|
0
|
0
|
|
|
|
0
|
if (scalar @birth_rates_in != scalar @death_rates_in) |
1715
|
|
|
|
|
|
|
{ |
1716
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' => |
1717
|
|
|
|
|
|
|
"birth and death rates must have the same length" ); |
1718
|
|
|
|
|
|
|
} |
1719
|
0
|
0
|
|
|
|
0
|
if (scalar @birth_rates_in != scalar @rate_times_in) |
1720
|
|
|
|
|
|
|
{ |
1721
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' => |
1722
|
|
|
|
|
|
|
"birth/death rates must have the same length as rate times" ); |
1723
|
|
|
|
|
|
|
} |
1724
|
0
|
|
|
|
|
0
|
my @birth_rates = ($birth_rates_in[0]); |
1725
|
0
|
|
|
|
|
0
|
my @death_rates = ($death_rates_in[0]); |
1726
|
|
|
|
|
|
|
|
1727
|
0
|
|
|
|
|
0
|
my $net_birth_rate = $birth_rates[0]; |
1728
|
0
|
|
|
|
|
0
|
my $net_death_rate = $death_rates[0]; |
1729
|
|
|
|
|
|
|
|
1730
|
0
|
|
|
|
|
0
|
shift(@birth_rates_in); |
1731
|
0
|
|
|
|
|
0
|
shift(@death_rates_in); |
1732
|
|
|
|
|
|
|
|
1733
|
0
|
|
|
|
|
0
|
my @terminals = ($root); |
1734
|
0
|
|
|
|
|
0
|
my ( $next_extinction, $next_speciation, $next_rate_change ); |
1735
|
0
|
|
|
|
|
0
|
my $time = 0; |
1736
|
0
|
|
|
|
|
0
|
my $tree_size = 1; |
1737
|
0
|
|
|
|
|
0
|
my $inf = 9**9**9**9; |
1738
|
|
|
|
|
|
|
|
1739
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge |
1740
|
0
|
0
|
0
|
|
|
0
|
if ( defined $options{root_edge} && $options{root_edge} ) { |
1741
|
|
|
|
|
|
|
|
1742
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event |
1743
|
0
|
0
|
|
|
|
0
|
if ($birth_rates[0] > 0) |
1744
|
|
|
|
|
|
|
{ |
1745
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $birth_rates[0] ; |
1746
|
|
|
|
|
|
|
} else |
1747
|
|
|
|
|
|
|
{ |
1748
|
0
|
|
|
|
|
0
|
$next_speciation = $inf; |
1749
|
|
|
|
|
|
|
} |
1750
|
|
|
|
|
|
|
} |
1751
|
|
|
|
|
|
|
else { |
1752
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away |
1753
|
0
|
|
|
|
|
0
|
$next_speciation = 0.0; |
1754
|
|
|
|
|
|
|
} |
1755
|
|
|
|
|
|
|
|
1756
|
|
|
|
|
|
|
#Time of the first extinction event. If no extinction we always |
1757
|
|
|
|
|
|
|
#set the extinction event after the current speciation event |
1758
|
0
|
0
|
|
|
|
0
|
if ($death_rates[0] > 0) |
1759
|
|
|
|
|
|
|
{ |
1760
|
0
|
|
|
|
|
0
|
$next_extinction = -log(rand) / $death_rates[0]; |
1761
|
|
|
|
|
|
|
} else |
1762
|
|
|
|
|
|
|
{ |
1763
|
0
|
|
|
|
|
0
|
$next_extinction = $inf; |
1764
|
|
|
|
|
|
|
} |
1765
|
|
|
|
|
|
|
|
1766
|
|
|
|
|
|
|
#Time of next rate change |
1767
|
0
|
|
|
|
|
0
|
shift(@rate_times_in); #pop the initial 0 |
1768
|
0
|
|
|
|
|
0
|
$next_rate_change = shift(@rate_times_in); |
1769
|
|
|
|
|
|
|
|
1770
|
|
|
|
|
|
|
#While the tree has not become extinct and the termination criterion |
1771
|
|
|
|
|
|
|
#has not been achieved we create new speciation and extinction events |
1772
|
0
|
|
0
|
|
|
0
|
while ($tree_size > 0 |
|
|
|
0
|
|
|
|
|
1773
|
|
|
|
|
|
|
&& $tree_size < $options{tree_size} |
1774
|
|
|
|
|
|
|
&& $time < $options{tree_age} ) |
1775
|
|
|
|
|
|
|
{ |
1776
|
|
|
|
|
|
|
#print $time."|".$tree_size."\n"; |
1777
|
|
|
|
|
|
|
#Update rates if a clade shift is happening |
1778
|
|
|
|
|
|
|
#TODO index rates or pop off one at a time. |
1779
|
|
|
|
|
|
|
|
1780
|
|
|
|
|
|
|
|
1781
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species |
1782
|
0
|
|
|
|
|
0
|
foreach (@terminals) { |
1783
|
|
|
|
|
|
|
$_->set_branch_length( |
1784
|
|
|
|
|
|
|
$_->get_branch_length + min( |
1785
|
|
|
|
|
|
|
$next_extinction, |
1786
|
|
|
|
|
|
|
$next_speciation, |
1787
|
|
|
|
|
|
|
$next_rate_change-$time, |
1788
|
0
|
|
|
|
|
0
|
$options{tree_age} - $time |
1789
|
|
|
|
|
|
|
) |
1790
|
|
|
|
|
|
|
); |
1791
|
|
|
|
|
|
|
} |
1792
|
|
|
|
|
|
|
|
1793
|
|
|
|
|
|
|
#Update the time |
1794
|
0
|
|
|
|
|
0
|
my $time_last = $time; |
1795
|
0
|
|
|
|
|
0
|
$time += min( $next_extinction, $next_speciation, $next_rate_change-$time_last ); |
1796
|
|
|
|
|
|
|
|
1797
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done |
1798
|
0
|
0
|
|
|
|
0
|
return $tree if ( $time > $options{tree_age} ); |
1799
|
|
|
|
|
|
|
|
1800
|
|
|
|
|
|
|
#We have a rate change |
1801
|
0
|
0
|
|
|
|
0
|
if ($next_rate_change-$time_last < min($next_extinction, $next_speciation)) |
1802
|
|
|
|
|
|
|
{ |
1803
|
|
|
|
|
|
|
#Find a random species to effect |
1804
|
0
|
|
|
|
|
0
|
my $effected_species = int(rand($tree_size)); |
1805
|
|
|
|
|
|
|
#Subtract current rates |
1806
|
0
|
|
|
|
|
0
|
$net_death_rate -= $death_rates[$effected_species]; |
1807
|
0
|
|
|
|
|
0
|
$net_birth_rate -= $birth_rates[$effected_species]; |
1808
|
|
|
|
|
|
|
#Get new rates |
1809
|
0
|
|
|
|
|
0
|
$death_rates[$effected_species] = shift(@death_rates_in); |
1810
|
0
|
|
|
|
|
0
|
$birth_rates[$effected_species] = shift(@birth_rates_in); |
1811
|
|
|
|
|
|
|
#Add new rates |
1812
|
0
|
|
|
|
|
0
|
$net_death_rate += $death_rates[$effected_species]; |
1813
|
0
|
|
|
|
|
0
|
$net_birth_rate += $birth_rates[$effected_species]; |
1814
|
|
|
|
|
|
|
#Get next rate change time |
1815
|
0
|
0
|
|
|
|
0
|
if (scalar(@rate_times_in)) |
1816
|
|
|
|
|
|
|
{ |
1817
|
0
|
|
|
|
|
0
|
$next_rate_change = shift(@rate_times_in); |
1818
|
|
|
|
|
|
|
} else |
1819
|
|
|
|
|
|
|
{ |
1820
|
0
|
|
|
|
|
0
|
$next_rate_change = $inf; |
1821
|
|
|
|
|
|
|
} |
1822
|
|
|
|
|
|
|
} |
1823
|
|
|
|
|
|
|
#Choosing a random species to speciate |
1824
|
|
|
|
|
|
|
else |
1825
|
|
|
|
|
|
|
{ |
1826
|
0
|
|
|
|
|
0
|
my $selected = 0; |
1827
|
0
|
0
|
|
|
|
0
|
if ( $next_speciation < $next_extinction ) |
1828
|
|
|
|
|
|
|
{ |
1829
|
0
|
|
|
|
|
0
|
my $rand_select = rand($net_birth_rate); |
1830
|
0
|
|
0
|
|
|
0
|
for ( |
1831
|
|
|
|
|
|
|
; |
1832
|
|
|
|
|
|
|
$selected < scalar @terminals |
1833
|
|
|
|
|
|
|
&& $rand_select > $birth_rates[$selected] ; |
1834
|
|
|
|
|
|
|
$selected++ |
1835
|
|
|
|
|
|
|
) |
1836
|
|
|
|
|
|
|
{ |
1837
|
0
|
|
|
|
|
0
|
$rand_select -= $birth_rates[$selected]; |
1838
|
|
|
|
|
|
|
} |
1839
|
|
|
|
|
|
|
} else |
1840
|
|
|
|
|
|
|
{ |
1841
|
0
|
|
|
|
|
0
|
my $rand_select = rand($net_death_rate); |
1842
|
0
|
|
0
|
|
|
0
|
for ( |
1843
|
|
|
|
|
|
|
; |
1844
|
|
|
|
|
|
|
$selected < scalar @terminals |
1845
|
|
|
|
|
|
|
&& $rand_select > $death_rates[$selected] ; |
1846
|
|
|
|
|
|
|
$selected++ |
1847
|
|
|
|
|
|
|
) |
1848
|
|
|
|
|
|
|
{ |
1849
|
0
|
|
|
|
|
0
|
$rand_select -= $death_rates[$selected]; |
1850
|
|
|
|
|
|
|
} |
1851
|
|
|
|
|
|
|
} |
1852
|
0
|
0
|
|
|
|
0
|
if ($net_birth_rate == 0) |
1853
|
|
|
|
|
|
|
{ |
1854
|
0
|
|
|
|
|
0
|
$selected = 0; |
1855
|
|
|
|
|
|
|
} |
1856
|
|
|
|
|
|
|
|
1857
|
|
|
|
|
|
|
#Remove the species effected by this event and remove it from the terminals list |
1858
|
0
|
|
|
|
|
0
|
my $effected = splice( @terminals, $selected, 1 ); |
1859
|
0
|
|
|
|
|
0
|
my $effected_birth_rate = splice( @birth_rates, $selected, 1 ); |
1860
|
0
|
|
|
|
|
0
|
my $effected_death_rate = splice( @death_rates, $selected, 1 ); |
1861
|
|
|
|
|
|
|
|
1862
|
0
|
|
|
|
|
0
|
$net_birth_rate -= $effected_birth_rate; |
1863
|
0
|
|
|
|
|
0
|
$net_death_rate -= $effected_death_rate; |
1864
|
|
|
|
|
|
|
|
1865
|
|
|
|
|
|
|
#If we have a speciation event we add two new species |
1866
|
0
|
0
|
0
|
|
|
0
|
if ( $next_speciation < $next_extinction || !defined $next_extinction ) |
1867
|
|
|
|
|
|
|
{ |
1868
|
0
|
|
|
|
|
0
|
foreach ( 1, 2 ) { |
1869
|
|
|
|
|
|
|
|
1870
|
|
|
|
|
|
|
#Create a new species |
1871
|
0
|
|
|
|
|
0
|
my $child = Bio::Phylo::Forest::Node->new(); |
1872
|
0
|
|
|
|
|
0
|
$child->set_name( 'ID' . $node_id++ ); |
1873
|
|
|
|
|
|
|
|
1874
|
|
|
|
|
|
|
#Give it a zero edge length |
1875
|
0
|
|
|
|
|
0
|
$child->set_branch_length(0); |
1876
|
|
|
|
|
|
|
|
1877
|
|
|
|
|
|
|
#Add it as a child to the speciating species |
1878
|
0
|
|
|
|
|
0
|
$effected->set_child($child); |
1879
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
#Add it to the tree |
1881
|
0
|
|
|
|
|
0
|
$tree->insert($child); |
1882
|
|
|
|
|
|
|
|
1883
|
|
|
|
|
|
|
#Add it to the terminals list |
1884
|
0
|
|
|
|
|
0
|
push( @terminals, $child ); |
1885
|
|
|
|
|
|
|
|
1886
|
0
|
|
|
|
|
0
|
push( @birth_rates, $effected_birth_rate ); |
1887
|
0
|
|
|
|
|
0
|
push( @death_rates, $effected_death_rate ); |
1888
|
|
|
|
|
|
|
|
1889
|
0
|
|
|
|
|
0
|
$net_death_rate += $effected_death_rate; |
1890
|
0
|
|
|
|
|
0
|
$net_birth_rate += $effected_birth_rate; |
1891
|
|
|
|
|
|
|
|
1892
|
|
|
|
|
|
|
} |
1893
|
|
|
|
|
|
|
} |
1894
|
|
|
|
|
|
|
} |
1895
|
|
|
|
|
|
|
#We calculate the time that the next extinction and speciation |
1896
|
|
|
|
|
|
|
#events will occur (only the earliest of these will actually |
1897
|
|
|
|
|
|
|
#happen). NB: this approach is only appropriate for models where |
1898
|
|
|
|
|
|
|
#speciation and extinction times are exponentially distributed. |
1899
|
|
|
|
|
|
|
|
1900
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand... |
1901
|
0
|
|
|
|
|
0
|
my ( $r1, $r2 ) = ( 0, 0 ); |
1902
|
0
|
|
|
|
|
0
|
$r1 = rand until $r1; |
1903
|
0
|
|
|
|
|
0
|
$r2 = rand until $r2; |
1904
|
|
|
|
|
|
|
|
1905
|
|
|
|
|
|
|
#The current tree size |
1906
|
0
|
|
|
|
|
0
|
$tree_size = scalar @terminals; |
1907
|
|
|
|
|
|
|
|
1908
|
0
|
0
|
|
|
|
0
|
return $tree unless $tree_size; |
1909
|
|
|
|
|
|
|
|
1910
|
0
|
0
|
|
|
|
0
|
if ($net_birth_rate > 0) |
1911
|
|
|
|
|
|
|
{ |
1912
|
0
|
|
|
|
|
0
|
$next_speciation = -log($r1) / $net_birth_rate; |
1913
|
|
|
|
|
|
|
} else |
1914
|
|
|
|
|
|
|
{ |
1915
|
0
|
|
|
|
|
0
|
$next_speciation = $inf; |
1916
|
|
|
|
|
|
|
} |
1917
|
|
|
|
|
|
|
|
1918
|
0
|
0
|
|
|
|
0
|
if ($net_death_rate > 0) |
1919
|
|
|
|
|
|
|
{ |
1920
|
0
|
|
|
|
|
0
|
$next_extinction = -log($r2) / $net_death_rate; |
1921
|
|
|
|
|
|
|
} else |
1922
|
|
|
|
|
|
|
{ |
1923
|
0
|
|
|
|
|
0
|
$next_extinction = $inf; |
1924
|
|
|
|
|
|
|
} |
1925
|
|
|
|
|
|
|
|
1926
|
|
|
|
|
|
|
} |
1927
|
0
|
|
|
|
|
0
|
return $tree; |
1928
|
|
|
|
|
|
|
} |
1929
|
|
|
|
|
|
|
|
1930
|
|
|
|
|
|
|
=item beta_binomial() |
1931
|
|
|
|
|
|
|
|
1932
|
|
|
|
|
|
|
An evolutionary model featuring evolving speciation rates. From Blum2007 |
1933
|
|
|
|
|
|
|
|
1934
|
|
|
|
|
|
|
Type : Evolutionary model |
1935
|
|
|
|
|
|
|
Title : beta_binomial |
1936
|
|
|
|
|
|
|
Usage : $tree = beta_binomial(%options) |
1937
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time |
1938
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree |
1939
|
|
|
|
|
|
|
Args : %options with fields: |
1940
|
|
|
|
|
|
|
birth_rate The initial speciation rate (default 1) |
1941
|
|
|
|
|
|
|
model_param The parameter as defined in Blum2007 |
1942
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate |
1943
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate |
1944
|
|
|
|
|
|
|
|
1945
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified |
1946
|
|
|
|
|
|
|
|
1947
|
|
|
|
|
|
|
=cut |
1948
|
|
|
|
|
|
|
|
1949
|
|
|
|
|
|
|
sub beta_binomial { |
1950
|
0
|
|
|
0
|
1
|
0
|
my %options = @_; |
1951
|
|
|
|
|
|
|
|
1952
|
|
|
|
|
|
|
#Check that we have a termination condition |
1953
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) { |
1954
|
|
|
|
|
|
|
|
1955
|
|
|
|
|
|
|
#Error here. |
1956
|
0
|
|
|
|
|
0
|
return undef; |
1957
|
|
|
|
|
|
|
} |
1958
|
|
|
|
|
|
|
|
1959
|
|
|
|
|
|
|
#Set the undefined condition to infinity |
1960
|
0
|
0
|
|
|
|
0
|
$options{tree_size} = 1e6 unless defined $options{tree_size}; |
1961
|
0
|
0
|
|
|
|
0
|
$options{tree_age} = 1e6 unless defined $options{tree_age}; |
1962
|
|
|
|
|
|
|
|
1963
|
|
|
|
|
|
|
#Set default rates |
1964
|
0
|
0
|
|
|
|
0
|
$options{birth_rate} = 1 unless defined( $options{birth_rate} ); |
1965
|
0
|
0
|
|
|
|
0
|
$options{model_param} = 0 unless defined( $options{model_param} ); |
1966
|
|
|
|
|
|
|
|
1967
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these |
1968
|
0
|
|
|
|
|
0
|
my $node_id = 0; |
1969
|
|
|
|
|
|
|
|
1970
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species |
1971
|
0
|
|
|
|
|
0
|
my $tree = Bio::Phylo::Forest::Tree->new(); |
1972
|
0
|
|
|
|
|
0
|
my $root = Bio::Phylo::Forest::Node->new(); |
1973
|
0
|
|
|
|
|
0
|
$root->set_branch_length(0); |
1974
|
0
|
|
|
|
|
0
|
$root->set_name( 'ID' . $node_id++ ); |
1975
|
0
|
|
|
|
|
0
|
$tree->insert($root); |
1976
|
0
|
|
|
|
|
0
|
my @terminals = ($root); |
1977
|
0
|
|
|
|
|
0
|
my @birth_rates = ( $options{birth_rate} ); |
1978
|
0
|
|
|
|
|
0
|
my $net_rate = $options{birth_rate}; |
1979
|
0
|
|
|
|
|
0
|
my $next_speciation; |
1980
|
0
|
|
|
|
|
0
|
my $time = 0; |
1981
|
0
|
|
|
|
|
0
|
my $tree_size = 1; |
1982
|
|
|
|
|
|
|
|
1983
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge |
1984
|
0
|
0
|
0
|
|
|
0
|
if ( defined $options{root_edge} && $options{root_edge} ) { |
1985
|
|
|
|
|
|
|
|
1986
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event |
1987
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $options{birth_rate} / $tree_size; |
1988
|
|
|
|
|
|
|
} |
1989
|
|
|
|
|
|
|
else { |
1990
|
|
|
|
|
|
|
|
1991
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away |
1992
|
0
|
|
|
|
|
0
|
$next_speciation = 0; |
1993
|
|
|
|
|
|
|
} |
1994
|
|
|
|
|
|
|
|
1995
|
|
|
|
|
|
|
#While we haven't reached termination |
1996
|
0
|
|
0
|
|
|
0
|
while ( $tree_size < $options{tree_size} && $time < $options{tree_age} ) { |
1997
|
|
|
|
|
|
|
|
1998
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species |
1999
|
0
|
|
|
|
|
0
|
foreach (@terminals) { |
2000
|
|
|
|
|
|
|
$_->set_branch_length( $_->get_branch_length + |
2001
|
0
|
|
|
|
|
0
|
min( $next_speciation, $options{tree_age} - $time ) ); |
2002
|
|
|
|
|
|
|
} |
2003
|
|
|
|
|
|
|
|
2004
|
|
|
|
|
|
|
#Update the time |
2005
|
0
|
|
|
|
|
0
|
$time += $next_speciation; |
2006
|
|
|
|
|
|
|
|
2007
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done |
2008
|
0
|
0
|
|
|
|
0
|
return $tree if ( $time > $options{tree_age} ); |
2009
|
|
|
|
|
|
|
|
2010
|
|
|
|
|
|
|
#Get the species effected by this event |
2011
|
0
|
|
|
|
|
0
|
my $rand_select = rand($net_rate); |
2012
|
0
|
|
|
|
|
0
|
my $selected = 0; |
2013
|
0
|
|
0
|
|
|
0
|
for ( |
2014
|
|
|
|
|
|
|
; |
2015
|
|
|
|
|
|
|
$selected < scalar @terminals |
2016
|
|
|
|
|
|
|
&& $rand_select > $birth_rates[$selected] ; |
2017
|
|
|
|
|
|
|
$selected++ |
2018
|
|
|
|
|
|
|
) |
2019
|
|
|
|
|
|
|
{ |
2020
|
0
|
|
|
|
|
0
|
$rand_select -= $birth_rates[$selected]; |
2021
|
|
|
|
|
|
|
} |
2022
|
|
|
|
|
|
|
|
2023
|
|
|
|
|
|
|
#Remove it from the terminals list |
2024
|
0
|
|
|
|
|
0
|
my $effected = splice( @terminals, $selected, 1 ); |
2025
|
0
|
|
|
|
|
0
|
my $effected_rate = splice( @birth_rates, $selected, 1 ); |
2026
|
|
|
|
|
|
|
my $p = |
2027
|
0
|
|
|
|
|
0
|
qbeta( rand, $options{model_param} + 1, $options{model_param} + 1 ); |
2028
|
|
|
|
|
|
|
|
2029
|
|
|
|
|
|
|
#If we have a speciation event we add two new species |
2030
|
0
|
|
|
|
|
0
|
foreach ( 1, 2 ) { |
2031
|
|
|
|
|
|
|
|
2032
|
|
|
|
|
|
|
#Create a new species |
2033
|
0
|
|
|
|
|
0
|
my $child = Bio::Phylo::Forest::Node->new(); |
2034
|
0
|
|
|
|
|
0
|
$child->set_name( 'ID' . $node_id++ ); |
2035
|
|
|
|
|
|
|
|
2036
|
|
|
|
|
|
|
#Give it a zero edge length |
2037
|
0
|
|
|
|
|
0
|
$child->set_branch_length(0); |
2038
|
|
|
|
|
|
|
|
2039
|
|
|
|
|
|
|
#Add it as a child to the speciating species |
2040
|
0
|
|
|
|
|
0
|
$effected->set_child($child); |
2041
|
|
|
|
|
|
|
|
2042
|
|
|
|
|
|
|
#Add it to the tree |
2043
|
0
|
|
|
|
|
0
|
$tree->insert($child); |
2044
|
|
|
|
|
|
|
|
2045
|
|
|
|
|
|
|
#Add it to the terminals list |
2046
|
0
|
|
|
|
|
0
|
push( @terminals, $child ); |
2047
|
|
|
|
|
|
|
|
2048
|
|
|
|
|
|
|
#New speciation rate |
2049
|
0
|
|
|
|
|
0
|
my $new_speciation_rate = $effected_rate * $p; |
2050
|
0
|
|
|
|
|
0
|
$p = 1 - $p; |
2051
|
0
|
0
|
|
|
|
0
|
if ( $new_speciation_rate < 0 ) { $new_speciation_rate = 0; } |
|
0
|
|
|
|
|
0
|
|
2052
|
0
|
|
|
|
|
0
|
push( @birth_rates, $new_speciation_rate ); |
2053
|
|
|
|
|
|
|
} |
2054
|
|
|
|
|
|
|
|
2055
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand... |
2056
|
0
|
|
|
|
|
0
|
my ( $r1, $r2 ) = ( 0, 0 ); |
2057
|
0
|
|
|
|
|
0
|
$r1 = rand until $r1; |
2058
|
0
|
|
|
|
|
0
|
$tree_size = scalar @terminals; |
2059
|
|
|
|
|
|
|
|
2060
|
|
|
|
|
|
|
#If all species have stopped speciating (unlikely) |
2061
|
0
|
0
|
|
|
|
0
|
if ( $net_rate == 0 ) { |
2062
|
0
|
|
|
|
|
0
|
return $tree; |
2063
|
|
|
|
|
|
|
} |
2064
|
0
|
|
|
|
|
0
|
$next_speciation = -log($r1) / $net_rate / $tree_size; |
2065
|
0
|
0
|
|
|
|
0
|
return $tree unless $tree_size; |
2066
|
|
|
|
|
|
|
} |
2067
|
0
|
|
|
|
|
0
|
return $tree; |
2068
|
|
|
|
|
|
|
} |
2069
|
|
|
|
|
|
|
|
2070
|
|
|
|
|
|
|
=back |
2071
|
|
|
|
|
|
|
|
2072
|
|
|
|
|
|
|
=cut |
2073
|
|
|
|
|
|
|
|
2074
|
|
|
|
|
|
|
=begin comment |
2075
|
|
|
|
|
|
|
|
2076
|
|
|
|
|
|
|
########################################################### |
2077
|
|
|
|
|
|
|
#INTERNAL METHODS |
2078
|
|
|
|
|
|
|
#These are methods that permit additional manipulations of |
2079
|
|
|
|
|
|
|
#a Bio::Phylo::Tree to be easily made. As such some of |
2080
|
|
|
|
|
|
|
#these could easily be moved into Bio::Phylo::Forest::Tree |
2081
|
|
|
|
|
|
|
########################################################### |
2082
|
|
|
|
|
|
|
|
2083
|
|
|
|
|
|
|
=end comment |
2084
|
|
|
|
|
|
|
|
2085
|
|
|
|
|
|
|
=cut |
2086
|
|
|
|
|
|
|
|
2087
|
|
|
|
|
|
|
=begin comment |
2088
|
|
|
|
|
|
|
|
2089
|
|
|
|
|
|
|
Type : Internal method |
2090
|
|
|
|
|
|
|
Title : copy_tree |
2091
|
|
|
|
|
|
|
Usage : $tree = copy_tree($tree) |
2092
|
|
|
|
|
|
|
Function: Makes a new independent copy of a tree |
2093
|
|
|
|
|
|
|
Returns : the phylogenetic $tree |
2094
|
|
|
|
|
|
|
Args : the phylogenetic $tree |
2095
|
|
|
|
|
|
|
|
2096
|
|
|
|
|
|
|
=end comment |
2097
|
|
|
|
|
|
|
|
2098
|
|
|
|
|
|
|
=cut |
2099
|
|
|
|
|
|
|
|
2100
|
|
|
|
|
|
|
sub copy_tree { |
2101
|
15
|
|
|
15
|
0
|
131
|
return Bio::Phylo::IO->parse( |
2102
|
|
|
|
|
|
|
-format => 'newick', |
2103
|
|
|
|
|
|
|
-string => shift->to_newick( '-nodelabels' => 1 ) |
2104
|
|
|
|
|
|
|
)->first; |
2105
|
|
|
|
|
|
|
} |
2106
|
|
|
|
|
|
|
|
2107
|
|
|
|
|
|
|
=begin comment |
2108
|
|
|
|
|
|
|
|
2109
|
|
|
|
|
|
|
Type : Internal method |
2110
|
|
|
|
|
|
|
Title : truncate_tree_time |
2111
|
|
|
|
|
|
|
Usage : truncate_tree_time($tree,$age) |
2112
|
|
|
|
|
|
|
Function: Truncates the tree at the specified age |
2113
|
|
|
|
|
|
|
Returns : N/A |
2114
|
|
|
|
|
|
|
Args : $tree: the phylogenetic tree (which will be modified) |
2115
|
|
|
|
|
|
|
$age: the age at which to cut the $tree |
2116
|
|
|
|
|
|
|
|
2117
|
|
|
|
|
|
|
=end comment |
2118
|
|
|
|
|
|
|
|
2119
|
|
|
|
|
|
|
=cut |
2120
|
|
|
|
|
|
|
|
2121
|
|
|
|
|
|
|
sub truncate_tree_time { |
2122
|
|
|
|
|
|
|
|
2123
|
|
|
|
|
|
|
#$node and $time are used only by this function recursively |
2124
|
843
|
|
|
843
|
0
|
1612
|
my ( $tree, $age, $node, $time ) = @_; |
2125
|
|
|
|
|
|
|
|
2126
|
|
|
|
|
|
|
#If node and time weren't specified we are starting from the root |
2127
|
843
|
100
|
|
|
|
1727
|
$node = $tree->get_root unless defined $node; |
2128
|
843
|
100
|
|
|
|
1326
|
$time = 0 unless defined $time; |
2129
|
|
|
|
|
|
|
|
2130
|
|
|
|
|
|
|
#If we are truncating this branch |
2131
|
843
|
100
|
|
|
|
1434
|
if ( $time + $node->get_branch_length >= $age ) { |
2132
|
|
|
|
|
|
|
|
2133
|
|
|
|
|
|
|
#Collapse the node unless it is terminal |
2134
|
252
|
100
|
|
|
|
568
|
$node->collapse unless $node->is_terminal(); |
2135
|
|
|
|
|
|
|
|
2136
|
|
|
|
|
|
|
#Set the branch length appropriately |
2137
|
252
|
|
|
|
|
840
|
$node->set_branch_length( $age - $time ); |
2138
|
252
|
|
|
|
|
743
|
return; |
2139
|
|
|
|
|
|
|
} |
2140
|
|
|
|
|
|
|
|
2141
|
|
|
|
|
|
|
#If this node has no children we are done |
2142
|
591
|
100
|
|
|
|
1273
|
return if $node->is_terminal(); |
2143
|
|
|
|
|
|
|
|
2144
|
|
|
|
|
|
|
#Call the function recursively on the children |
2145
|
361
|
|
|
|
|
506
|
foreach ( @{ $node->get_children } ) { |
|
361
|
|
|
|
|
593
|
|
2146
|
828
|
|
|
|
|
1791
|
truncate_tree_time( $tree, $age, $_, |
2147
|
|
|
|
|
|
|
$time + $node->get_branch_length() ); |
2148
|
|
|
|
|
|
|
} |
2149
|
|
|
|
|
|
|
} |
2150
|
|
|
|
|
|
|
|
2151
|
|
|
|
|
|
|
=begin comment |
2152
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
Type : Internal method |
2154
|
|
|
|
|
|
|
Title : truncate_tree_size |
2155
|
|
|
|
|
|
|
Usage : truncate_tree_size($tree,$size) |
2156
|
|
|
|
|
|
|
Function: Truncates the tree to the specified number of species |
2157
|
|
|
|
|
|
|
Returns : N/A |
2158
|
|
|
|
|
|
|
Args : $tree: the phylogenetic tree (which will be modified) |
2159
|
|
|
|
|
|
|
$size: random species are delete so that the tree has this many terminals |
2160
|
|
|
|
|
|
|
|
2161
|
|
|
|
|
|
|
=end comment |
2162
|
|
|
|
|
|
|
|
2163
|
|
|
|
|
|
|
=cut |
2164
|
|
|
|
|
|
|
|
2165
|
|
|
|
|
|
|
sub truncate_tree_size { |
2166
|
5
|
|
|
5
|
0
|
19
|
my ( $tree, $size ) = @_; |
2167
|
5
|
|
|
|
|
11
|
my @terminals = @{ $tree->get_terminals }; |
|
5
|
|
|
|
|
26
|
|
2168
|
5
|
|
|
|
|
25
|
my @names; |
2169
|
|
|
|
|
|
|
|
2170
|
|
|
|
|
|
|
#Calculate the tree height and node distances from the root |
2171
|
|
|
|
|
|
|
#much more efficient to do this in one hit than repeatedly |
2172
|
|
|
|
|
|
|
#calling the analogous functions on the tree |
2173
|
5
|
|
|
|
|
29
|
_calc_node_properties($tree); |
2174
|
|
|
|
|
|
|
|
2175
|
|
|
|
|
|
|
#Only push species that are extant and store the number of those |
2176
|
|
|
|
|
|
|
# my $tree_height = $tree->get_tallest_tip->calc_path_to_root; |
2177
|
5
|
|
|
|
|
21
|
my $tree_height = $tree->get_root->get_generic('tree_height'); |
2178
|
5
|
|
|
|
|
20
|
foreach (@terminals) { |
2179
|
567
|
100
|
|
|
|
1021
|
if ( |
2180
|
|
|
|
|
|
|
abs( |
2181
|
|
|
|
|
|
|
( $_->get_generic('root_distance') - $tree_height ) / |
2182
|
|
|
|
|
|
|
$tree_height |
2183
|
|
|
|
|
|
|
) < 1e-6 |
2184
|
|
|
|
|
|
|
) |
2185
|
|
|
|
|
|
|
{ |
2186
|
145
|
|
|
|
|
302
|
push( @names, $_->get_name ); |
2187
|
|
|
|
|
|
|
} |
2188
|
|
|
|
|
|
|
} |
2189
|
5
|
50
|
|
|
|
23
|
if ( @names < $size ) { print "Internal error\n"; } |
|
0
|
|
|
|
|
0
|
|
2190
|
5
|
|
|
|
|
13
|
my %deletions; |
2191
|
5
|
|
|
|
|
27
|
while ( scalar( keys %deletions ) < @names - $size ) { |
2192
|
155
|
|
|
|
|
371
|
$deletions{ $names[ int( rand(@names) ) ] } = 1; |
2193
|
|
|
|
|
|
|
} |
2194
|
5
|
|
|
|
|
50
|
$tree = prune_tips( $tree, [ keys %deletions ] ); |
2195
|
5
|
|
|
|
|
143
|
return $tree; |
2196
|
|
|
|
|
|
|
} |
2197
|
|
|
|
|
|
|
|
2198
|
|
|
|
|
|
|
sub _get_ultrametric_size { |
2199
|
0
|
|
|
0
|
|
0
|
my ( $tree, $size ) = @_; |
2200
|
0
|
|
|
|
|
0
|
my @terminals = @{ $tree->get_terminals }; |
|
0
|
|
|
|
|
0
|
|
2201
|
0
|
|
|
|
|
0
|
_calc_node_properties($tree); |
2202
|
0
|
|
|
|
|
0
|
my @names; |
2203
|
0
|
|
|
|
|
0
|
my $tree_height = $tree->get_root->get_generic('tree_height'); |
2204
|
0
|
|
|
|
|
0
|
foreach (@terminals) { |
2205
|
0
|
0
|
|
|
|
0
|
if ( |
2206
|
|
|
|
|
|
|
abs( |
2207
|
|
|
|
|
|
|
( $_->get_generic('root_distance') - $tree_height ) / |
2208
|
|
|
|
|
|
|
$tree_height |
2209
|
|
|
|
|
|
|
) < 1e-6 |
2210
|
|
|
|
|
|
|
) |
2211
|
|
|
|
|
|
|
{ |
2212
|
0
|
|
|
|
|
0
|
push( @names, $_->get_name ); |
2213
|
|
|
|
|
|
|
} |
2214
|
|
|
|
|
|
|
} |
2215
|
0
|
|
|
|
|
0
|
return scalar @names; |
2216
|
|
|
|
|
|
|
} |
2217
|
|
|
|
|
|
|
|
2218
|
|
|
|
|
|
|
=begin comment |
2219
|
|
|
|
|
|
|
|
2220
|
|
|
|
|
|
|
Type : Internal method |
2221
|
|
|
|
|
|
|
Title : remove_extinct_species |
2222
|
|
|
|
|
|
|
Usage : remove_extinct_species($tree) |
2223
|
|
|
|
|
|
|
Function: Removes extinct species from the tree. An extinct species |
2224
|
|
|
|
|
|
|
is a terminal that does not extend as far as the furthest |
2225
|
|
|
|
|
|
|
terminal(s). |
2226
|
|
|
|
|
|
|
Returns : N/A |
2227
|
|
|
|
|
|
|
Args : $tree: the phylogenetic tree (which will be modified) |
2228
|
|
|
|
|
|
|
$age: the age at which to cut the $tree |
2229
|
|
|
|
|
|
|
|
2230
|
|
|
|
|
|
|
=end comment |
2231
|
|
|
|
|
|
|
|
2232
|
|
|
|
|
|
|
=cut |
2233
|
|
|
|
|
|
|
|
2234
|
|
|
|
|
|
|
sub remove_extinct_species { |
2235
|
0
|
|
|
0
|
0
|
0
|
my $tree = shift; |
2236
|
|
|
|
|
|
|
|
2237
|
|
|
|
|
|
|
#Calculate the tree height and node distances from the root |
2238
|
|
|
|
|
|
|
#much more efficient to do this in one hit than repeatedly |
2239
|
|
|
|
|
|
|
#calling the analogous functions on the tree |
2240
|
0
|
|
|
|
|
0
|
_calc_node_properties($tree); |
2241
|
0
|
|
|
|
|
0
|
my $height = $tree->get_root->get_generic('tree_height'); |
2242
|
0
|
0
|
|
|
|
0
|
return unless $height > 0; |
2243
|
0
|
|
|
|
|
0
|
my $leaves = $tree->get_terminals; |
2244
|
0
|
0
|
|
|
|
0
|
return unless $leaves; |
2245
|
0
|
|
|
|
|
0
|
my @remove; |
2246
|
0
|
|
|
|
|
0
|
foreach ( @{$leaves} ) { |
|
0
|
|
|
|
|
0
|
|
2247
|
|
|
|
|
|
|
|
2248
|
0
|
0
|
|
|
|
0
|
unless ( |
2249
|
|
|
|
|
|
|
abs( ( $_->get_generic('root_distance') - $height ) / $height ) < |
2250
|
|
|
|
|
|
|
1e-6 ) |
2251
|
|
|
|
|
|
|
{ |
2252
|
0
|
|
|
|
|
0
|
push( @remove, $_->get_name ); |
2253
|
|
|
|
|
|
|
} |
2254
|
|
|
|
|
|
|
} |
2255
|
0
|
|
|
|
|
0
|
$tree = prune_tips( $tree, \@remove ); |
2256
|
0
|
|
|
|
|
0
|
return $tree; |
2257
|
|
|
|
|
|
|
} |
2258
|
|
|
|
|
|
|
|
2259
|
|
|
|
|
|
|
=begin comment |
2260
|
|
|
|
|
|
|
|
2261
|
|
|
|
|
|
|
Type : Internal method |
2262
|
|
|
|
|
|
|
Title : prune_tips |
2263
|
|
|
|
|
|
|
Usage : prune_tips($tree,$tips) |
2264
|
|
|
|
|
|
|
Function: Removes named terminals from the tree |
2265
|
|
|
|
|
|
|
Returns : N/A |
2266
|
|
|
|
|
|
|
Args : $tree: the phylogenetic tree (which will be modified) |
2267
|
|
|
|
|
|
|
$tips: array ref of terminals to remove from the tree |
2268
|
|
|
|
|
|
|
|
2269
|
|
|
|
|
|
|
NB: Available as $tree->prune_tips($tips), but had some problems with |
2270
|
|
|
|
|
|
|
this. |
2271
|
|
|
|
|
|
|
|
2272
|
|
|
|
|
|
|
=end comment |
2273
|
|
|
|
|
|
|
|
2274
|
|
|
|
|
|
|
=cut |
2275
|
|
|
|
|
|
|
|
2276
|
|
|
|
|
|
|
sub prune_tips { |
2277
|
5
|
|
|
5
|
0
|
17
|
my ( $self, $tips ) = @_; |
2278
|
5
|
|
|
|
|
13
|
my %names_to_delete = map { $_ => 1 } @{$tips}; |
|
95
|
|
|
|
|
142
|
|
|
5
|
|
|
|
|
14
|
|
2279
|
472
|
|
|
|
|
726
|
my %keep = map { $_->get_name => 1 } |
2280
|
567
|
|
|
|
|
1015
|
grep { not exists $names_to_delete{ $_->get_name } } |
2281
|
5
|
|
|
|
|
16
|
@{ $self->get_terminals }; |
|
5
|
|
|
|
|
45
|
|
2282
|
|
|
|
|
|
|
$self->visit_depth_first( |
2283
|
|
|
|
|
|
|
-post => sub { |
2284
|
1077
|
|
|
1077
|
|
1616
|
my $node = shift; |
2285
|
1077
|
100
|
|
|
|
2163
|
if ( $node->is_terminal ) { |
2286
|
567
|
100
|
|
|
|
1252
|
if ( not $keep{ $node->get_name } ) { |
2287
|
95
|
|
|
|
|
293
|
$node->set_parent(); |
2288
|
95
|
|
|
|
|
304
|
$self->delete($node); |
2289
|
|
|
|
|
|
|
} |
2290
|
|
|
|
|
|
|
} |
2291
|
|
|
|
|
|
|
else { |
2292
|
510
|
|
|
|
|
801
|
my $seen_tip_to_keep = 0; |
2293
|
510
|
|
|
|
|
656
|
for my $tip ( @{ $node->get_terminals } ) { |
|
510
|
|
|
|
|
1232
|
|
2294
|
9248
|
100
|
|
|
|
16534
|
$seen_tip_to_keep++ if $keep{ $tip->get_name }; |
2295
|
|
|
|
|
|
|
} |
2296
|
510
|
100
|
|
|
|
2472
|
if ( not $seen_tip_to_keep ) { |
2297
|
26
|
|
|
|
|
84
|
$node->set_parent(); |
2298
|
26
|
|
|
|
|
78
|
$self->delete($node); |
2299
|
|
|
|
|
|
|
} |
2300
|
|
|
|
|
|
|
} |
2301
|
|
|
|
|
|
|
} |
2302
|
5
|
|
|
|
|
116
|
); |
2303
|
5
|
|
|
|
|
120
|
$self->remove_unbranched_internals; |
2304
|
5
|
|
|
|
|
202
|
return $self; |
2305
|
|
|
|
|
|
|
} |
2306
|
|
|
|
|
|
|
|
2307
|
|
|
|
|
|
|
=begin comment |
2308
|
|
|
|
|
|
|
|
2309
|
|
|
|
|
|
|
Type : Internal method |
2310
|
|
|
|
|
|
|
Title : lineage_through_time |
2311
|
|
|
|
|
|
|
Usage : ($time,$count) = lineage_through_time($tree) |
2312
|
|
|
|
|
|
|
Function: Alternative to $tree->ltt that permits extinctions |
2313
|
|
|
|
|
|
|
Returns : $time: array ref of times |
2314
|
|
|
|
|
|
|
$count: array ref of species counts corresponding to the times |
2315
|
|
|
|
|
|
|
Args : the phylogenetic $tree for which to produce the ltt data |
2316
|
|
|
|
|
|
|
|
2317
|
|
|
|
|
|
|
=end comment |
2318
|
|
|
|
|
|
|
|
2319
|
|
|
|
|
|
|
=cut |
2320
|
|
|
|
|
|
|
|
2321
|
|
|
|
|
|
|
sub lineage_through_time { |
2322
|
89
|
|
|
89
|
0
|
180
|
my $tree = shift; |
2323
|
89
|
|
|
|
|
253
|
my ( $speciation, $extinction ) = _recursive_ltt_helper($tree); |
2324
|
89
|
|
|
|
|
189
|
my @speciation = sort { $a <=> $b } @{$speciation}; |
|
13358
|
|
|
|
|
14943
|
|
|
89
|
|
|
|
|
555
|
|
2325
|
89
|
|
|
|
|
187
|
my @extinction = sort { $a <=> $b } @{$extinction}; |
|
14123
|
|
|
|
|
15581
|
|
|
89
|
|
|
|
|
230
|
|
2326
|
89
|
|
|
|
|
209
|
my @time = (0); |
2327
|
89
|
|
|
|
|
194
|
my @count = (1); |
2328
|
89
|
|
|
|
|
165
|
my $n_species = 1; |
2329
|
89
|
|
|
|
|
383
|
my $end_time = max( @speciation, @extinction ); |
2330
|
89
|
50
|
|
|
|
255
|
return ( [], [] ) if ( $end_time == 0 ); |
2331
|
|
|
|
|
|
|
|
2332
|
|
|
|
|
|
|
#We remove any extinction events occurring at the very end of the tree (as they are not real extinctions) |
2333
|
89
|
|
100
|
|
|
543
|
while ( scalar @extinction |
2334
|
|
|
|
|
|
|
&& ( $end_time - $extinction[-1] ) / $end_time < 1e-6 ) |
2335
|
|
|
|
|
|
|
{ |
2336
|
998
|
|
|
|
|
2235
|
pop @extinction; |
2337
|
|
|
|
|
|
|
} |
2338
|
89
|
|
100
|
|
|
313
|
while ( scalar @speciation || scalar @extinction ) { |
2339
|
5321
|
100
|
100
|
|
|
14949
|
if ( scalar @extinction == 0 |
|
|
|
100
|
|
|
|
|
2340
|
|
|
|
|
|
|
|| ( scalar @speciation && $speciation[0] < $extinction[0] ) ) |
2341
|
|
|
|
|
|
|
{ |
2342
|
3115
|
|
|
|
|
4185
|
push( @count, ++$n_species ); |
2343
|
3115
|
|
|
|
|
6025
|
push( @time, shift(@speciation) ); |
2344
|
|
|
|
|
|
|
} |
2345
|
|
|
|
|
|
|
else { |
2346
|
2206
|
|
|
|
|
2895
|
push( @count, --$n_species ); |
2347
|
2206
|
|
|
|
|
4136
|
push( @time, shift(@extinction) ); |
2348
|
|
|
|
|
|
|
} |
2349
|
|
|
|
|
|
|
} |
2350
|
89
|
|
|
|
|
494
|
return ( \@time, \@count ); |
2351
|
|
|
|
|
|
|
} |
2352
|
|
|
|
|
|
|
|
2353
|
|
|
|
|
|
|
=begin comment |
2354
|
|
|
|
|
|
|
|
2355
|
|
|
|
|
|
|
Type : Internal method |
2356
|
|
|
|
|
|
|
Title : _recursive_ltt_helper |
2357
|
|
|
|
|
|
|
Usage : ($speciation, $extinction) = _recursive_ltt_helper($tree) |
2358
|
|
|
|
|
|
|
Function: Helper for lineage_through_time |
2359
|
|
|
|
|
|
|
Returns : $speciation: array ref of speciation times |
2360
|
|
|
|
|
|
|
$extinction: array ref of extinction times |
2361
|
|
|
|
|
|
|
Args : the phylogenetic $tree for which to produce the ltt data |
2362
|
|
|
|
|
|
|
|
2363
|
|
|
|
|
|
|
=end comment |
2364
|
|
|
|
|
|
|
|
2365
|
|
|
|
|
|
|
=cut |
2366
|
|
|
|
|
|
|
|
2367
|
|
|
|
|
|
|
sub _recursive_ltt_helper { |
2368
|
6319
|
|
|
6319
|
|
9351
|
my ( $tree, $node, $time ) = @_; |
2369
|
|
|
|
|
|
|
|
2370
|
|
|
|
|
|
|
#If we are being invoked at the root level |
2371
|
6319
|
100
|
|
|
|
10236
|
$node = $tree->get_root unless defined $node; |
2372
|
6319
|
100
|
|
|
|
9820
|
$time = 0 unless defined $time; |
2373
|
|
|
|
|
|
|
|
2374
|
|
|
|
|
|
|
#The new time |
2375
|
6319
|
|
|
|
|
12013
|
$time += $node->get_branch_length; |
2376
|
6319
|
100
|
|
|
|
12204
|
return ( [], [$time] ) if ( $node->is_terminal ); |
2377
|
3115
|
|
|
|
|
5052
|
my @speciation; |
2378
|
|
|
|
|
|
|
my @extinction; |
2379
|
3115
|
|
|
|
|
3759
|
foreach ( @{ $node->get_children } ) { |
|
3115
|
|
|
|
|
5967
|
|
2380
|
6230
|
|
|
|
|
11014
|
my ( $spec, $ext ) = _recursive_ltt_helper( $tree, $_, $time ); |
2381
|
6230
|
|
|
|
|
8713
|
@speciation = ( @speciation, @{$spec} ); |
|
6230
|
|
|
|
|
12488
|
|
2382
|
6230
|
|
|
|
|
7744
|
@extinction = ( @extinction, @{$ext} ); |
|
6230
|
|
|
|
|
19913
|
|
2383
|
|
|
|
|
|
|
} |
2384
|
3115
|
|
|
|
|
5457
|
push( @speciation, $time ); |
2385
|
3115
|
|
|
|
|
5906
|
return ( \@speciation, \@extinction ); |
2386
|
|
|
|
|
|
|
} |
2387
|
|
|
|
|
|
|
|
2388
|
|
|
|
|
|
|
=begin comment |
2389
|
|
|
|
|
|
|
|
2390
|
|
|
|
|
|
|
Type : Internal method. |
2391
|
|
|
|
|
|
|
Title : _calc_node_properties |
2392
|
|
|
|
|
|
|
Usage : _calc_node_properties($tree); |
2393
|
|
|
|
|
|
|
Function: Calculates the distance of nodes from the root |
2394
|
|
|
|
|
|
|
Returns : The maximum distance from the root |
2395
|
|
|
|
|
|
|
Args : |
2396
|
|
|
|
|
|
|
|
2397
|
|
|
|
|
|
|
=end comment |
2398
|
|
|
|
|
|
|
|
2399
|
|
|
|
|
|
|
=cut |
2400
|
|
|
|
|
|
|
|
2401
|
|
|
|
|
|
|
sub _calc_node_properties { |
2402
|
1077
|
|
|
1077
|
|
1836
|
my ( $node, $root_distance ); |
2403
|
1077
|
|
|
|
|
1543
|
my $tree = shift; |
2404
|
1077
|
|
|
|
|
2689
|
my $root = $tree->get_root; |
2405
|
|
|
|
|
|
|
|
2406
|
|
|
|
|
|
|
#Check whether we were given a node and distance |
2407
|
1077
|
100
|
|
|
|
2390
|
if ( scalar @_ ) { |
2408
|
1072
|
|
|
|
|
1603
|
$node = shift; |
2409
|
1072
|
|
|
|
|
1455
|
$root_distance = shift; |
2410
|
|
|
|
|
|
|
|
2411
|
|
|
|
|
|
|
#Otherwise the root is the default |
2412
|
|
|
|
|
|
|
} |
2413
|
|
|
|
|
|
|
else { |
2414
|
5
|
|
|
|
|
10
|
$node = $root; |
2415
|
5
|
|
|
|
|
46
|
$root->set_generic( tree_height => 0 ); |
2416
|
5
|
|
|
|
|
10
|
$root_distance = 0; |
2417
|
|
|
|
|
|
|
} |
2418
|
1077
|
|
|
|
|
3531
|
$node->set_generic( root_distance => $root_distance ); |
2419
|
1077
|
100
|
|
|
|
2760
|
if ( $root_distance > $root->get_generic('tree_height') ) { |
2420
|
105
|
|
|
|
|
285
|
$root->set_generic( tree_height => $root_distance ); |
2421
|
|
|
|
|
|
|
} |
2422
|
1077
|
|
|
|
|
1627
|
my $terminal_count = 0; |
2423
|
1077
|
|
|
|
|
2970
|
my $children = $node->get_children; |
2424
|
1077
|
50
|
|
|
|
2419
|
if ( defined $children ) { |
2425
|
1077
|
|
|
|
|
1289
|
foreach ( @{$children} ) { |
|
1077
|
|
|
|
|
3419
|
|
2426
|
1072
|
|
|
|
|
2578
|
_calc_node_properties( $tree, $_, |
2427
|
|
|
|
|
|
|
$root_distance + $_->get_branch_length() ); |
2428
|
|
|
|
|
|
|
} |
2429
|
|
|
|
|
|
|
} |
2430
|
|
|
|
|
|
|
} |
2431
|
|
|
|
|
|
|
|
2432
|
|
|
|
|
|
|
=begin comment |
2433
|
|
|
|
|
|
|
|
2434
|
|
|
|
|
|
|
Type : Internal method |
2435
|
|
|
|
|
|
|
Title : nchoosek |
2436
|
|
|
|
|
|
|
Usage : $out = nchoosek($n,$k) |
2437
|
|
|
|
|
|
|
Function: Returns the binomial coefficient for $n and $k |
2438
|
|
|
|
|
|
|
Returns : the binomial coefficient |
2439
|
|
|
|
|
|
|
Args : $n, $k |
2440
|
|
|
|
|
|
|
|
2441
|
|
|
|
|
|
|
=end comment |
2442
|
|
|
|
|
|
|
|
2443
|
|
|
|
|
|
|
=cut |
2444
|
|
|
|
|
|
|
|
2445
|
|
|
|
|
|
|
sub nchoosek { |
2446
|
0
|
|
|
0
|
0
|
|
my ( $n, $k ) = @_; |
2447
|
0
|
|
|
|
|
|
my $r = 1; |
2448
|
0
|
0
|
0
|
|
|
|
return 0 if ( $k > $n || $k < 0 ); |
2449
|
0
|
|
|
|
|
|
for ( my $d = 1 ; $d <= $k ; $d++ ) { |
2450
|
0
|
|
|
|
|
|
$r *= $n--; |
2451
|
0
|
|
|
|
|
|
$r /= $d; |
2452
|
|
|
|
|
|
|
} |
2453
|
0
|
|
|
|
|
|
return $r; |
2454
|
|
|
|
|
|
|
} |
2455
|
|
|
|
|
|
|
1; |