line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Phylo::EvolutionaryModels; |
2
|
1
|
|
|
1
|
|
753
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
31
|
|
3
|
1
|
|
|
1
|
|
8
|
use warnings; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
40
|
|
4
|
1
|
|
|
1
|
|
6
|
use base 'Exporter'; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
82
|
|
5
|
1
|
|
|
1
|
|
331
|
use Bio::Phylo::Forest::Tree; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
8
|
|
6
|
1
|
|
|
1
|
|
390
|
use Bio::Phylo::Forest; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
9
|
|
7
|
1
|
|
|
1
|
|
7
|
use Math::CDF qw'qnorm qbeta'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
64
|
|
8
|
1
|
|
|
1
|
|
6
|
use List::Util qw'min max'; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
51
|
|
9
|
1
|
|
|
1
|
|
5
|
use POSIX qw'ceil floor'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
6
|
|
10
|
1
|
|
|
1
|
|
1347
|
use Config; #Use to check whether multi-threading is available |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
45
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
BEGIN { |
13
|
|
|
|
|
|
|
|
14
|
1
|
|
|
1
|
|
5
|
use Bio::Phylo; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
4
|
|
15
|
1
|
|
|
1
|
|
1056
|
our @EXPORT_OK = |
16
|
|
|
|
|
|
|
qw(&sample &constant_rate_birth &constant_rate_birth_death &clade_shifts); |
17
|
|
|
|
|
|
|
} |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 NAME |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
Bio::Phylo::EvolutionaryModels - Evolutionary models for phylogenetic trees and methods to sample these |
22
|
|
|
|
|
|
|
Klaas Hartmann, September 2007 |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 SYNOPSIS |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
#For convenience we import the sample routine (so we can write sample(...) instead of |
27
|
|
|
|
|
|
|
#Bio::Phylo::EvolutionaryModels::sample(...). |
28
|
|
|
|
|
|
|
use Bio::Phylo::EvolutionaryModels qw (sample); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
#Example#A###################################################################### |
31
|
|
|
|
|
|
|
#Simulate a single tree with ten species from the constant rate birth model with parameter 0.5 |
32
|
|
|
|
|
|
|
my $tree = Bio::Phylo::EvolutionaryModels::constant_rate_birth(birth_rate => .5, tree_size => 10); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
#Example#B###################################################################### |
35
|
|
|
|
|
|
|
#Sample 5 trees with ten species from the constant rate birth model using the b algorithm |
36
|
|
|
|
|
|
|
my ($sample,$stats) = sample(sample_size =>5, |
37
|
|
|
|
|
|
|
tree_size => 10, |
38
|
|
|
|
|
|
|
algorithm => 'b', |
39
|
|
|
|
|
|
|
algorithm_options => {rate => 1}, |
40
|
|
|
|
|
|
|
model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth, |
41
|
|
|
|
|
|
|
model_options => {birth_rate=>.5}); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
#Print a newick string for the 4th sampled tree |
45
|
|
|
|
|
|
|
print $sample->[3]->to_newick."\n"; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
#Example#C###################################################################### |
48
|
|
|
|
|
|
|
#Sample 5 trees with ten species from the constant rate birth and death model using |
49
|
|
|
|
|
|
|
#the bd algorithm and two threads (useful for dual core processors) |
50
|
|
|
|
|
|
|
#NB: we must specify an nstar here, an appropriate choice will depend on the birth_rate |
51
|
|
|
|
|
|
|
# and death_rate we are giving the model |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
my ($sample,$stats) = sample(sample_size =>5, |
54
|
|
|
|
|
|
|
tree_size => 10, |
55
|
|
|
|
|
|
|
threads => 2, |
56
|
|
|
|
|
|
|
algorithm => 'bd', |
57
|
|
|
|
|
|
|
algorithm_options => {rate => 1, nstar => 30}, |
58
|
|
|
|
|
|
|
model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth_death, |
59
|
|
|
|
|
|
|
model_options => {birth_rate=>1,death_rate=>.8}); |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
#Example#D###################################################################### |
62
|
|
|
|
|
|
|
#Sample 5 trees with ten species from the constant rate birth and death model using |
63
|
|
|
|
|
|
|
#incomplete taxon sampling |
64
|
|
|
|
|
|
|
# |
65
|
|
|
|
|
|
|
#sampling_probability is set so that the true tree has 10 species with 50% probability, |
66
|
|
|
|
|
|
|
#11 species with 30% probability and 12 species with 20% probability |
67
|
|
|
|
|
|
|
# |
68
|
|
|
|
|
|
|
#NB: we must specify an mstar here this will depend on the model parameters and the |
69
|
|
|
|
|
|
|
# incomplete taxon sampling parameters |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
my $algorithm_options = {rate => 1, |
72
|
|
|
|
|
|
|
nstar => 30, |
73
|
|
|
|
|
|
|
mstar => 12, |
74
|
|
|
|
|
|
|
sampling_probability => [.5, .3, .2]}; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
my ($sample,$stats) = sample(sample_size =>5, |
77
|
|
|
|
|
|
|
tree_size => 10, |
78
|
|
|
|
|
|
|
algorithm => 'incomplete_sampling_bd', |
79
|
|
|
|
|
|
|
algorithm_options => $algorithm_options, |
80
|
|
|
|
|
|
|
model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth_death, |
81
|
|
|
|
|
|
|
model_options => {birth_rate=>1,death_rate=>.8}); |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
#Example#E###################################################################### |
84
|
|
|
|
|
|
|
#Sample 5 trees with ten species from a Yule model using the memoryless_b algorithm |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
#First we define the random function for the shortest pendant edge for a Yule model |
87
|
|
|
|
|
|
|
my $random_pendant_function = sub { |
88
|
|
|
|
|
|
|
%options = @_; |
89
|
|
|
|
|
|
|
return -log(rand)/$options{birth_rate}/$options{tree_size}; |
90
|
|
|
|
|
|
|
}; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
#Then we produce our sample |
93
|
|
|
|
|
|
|
my ($sample,$stats) = sample(sample_size =>5, |
94
|
|
|
|
|
|
|
tree_size => 10, |
95
|
|
|
|
|
|
|
algorithm => 'memoryless_b', |
96
|
|
|
|
|
|
|
algorithm_options => {pendant_dist => $random_pendant_function}, |
97
|
|
|
|
|
|
|
model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth, |
98
|
|
|
|
|
|
|
model_options => {birth_rate=>1}); |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
#Example#F####################################################################### |
101
|
|
|
|
|
|
|
#Sample 5 trees with ten species from a constant birth death rate model using the |
102
|
|
|
|
|
|
|
#constant_rate_bd algorithm |
103
|
|
|
|
|
|
|
my ($sample) = sample(sample_size => 5, |
104
|
|
|
|
|
|
|
tree_size => 10, |
105
|
|
|
|
|
|
|
algorithm => 'constant_rate_bd', |
106
|
|
|
|
|
|
|
model_options => {birth_rate=>1,death_rate=>.8}); |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=head1 DESCRIPTION |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
This package contains evolutionary models for phylogenetic trees and |
111
|
|
|
|
|
|
|
algorithms for sampling from these models. It is a non-OO module that |
112
|
|
|
|
|
|
|
optionally exports the 'sample', 'constant_rate_birth' and |
113
|
|
|
|
|
|
|
'constant_rate_birth_death' subroutines into the caller's namespace, |
114
|
|
|
|
|
|
|
using the C<< use Bio::Phylo::EvolutionaryModels qw(sample constant_rate_birth constant_rate_birth_death); >> |
115
|
|
|
|
|
|
|
directive. Alternatively, you can call the subroutines as class methods, |
116
|
|
|
|
|
|
|
as in the synopsis. |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
The initial set of algorithms available in this package corresponds to those in: |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Sampling trees from evolutionary models |
121
|
|
|
|
|
|
|
Klaas Hartmann, Dennis Wong, Tanja Gernhard |
122
|
|
|
|
|
|
|
Systematic Biology, in press |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
Some comments and code refers back to this paper. |
125
|
|
|
|
|
|
|
Further algorithms and evolutionary are encouraged |
126
|
|
|
|
|
|
|
and welcome. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
To make this code as straightforward as possible to read some of the |
129
|
|
|
|
|
|
|
algorithms have been implemented in a less than optimal manner. The code |
130
|
|
|
|
|
|
|
also follows the structure of an earlier version of the manuscript so |
131
|
|
|
|
|
|
|
there is some redundancy (eg. the birth algorithm is just a specific |
132
|
|
|
|
|
|
|
instance of the birth_death algorithm) |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head1 SAMPLING |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
All sampling algorithms should be accessed through the generic sample |
137
|
|
|
|
|
|
|
interface. |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=head2 Generic sampling interface: sample() |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
Type : Interface |
142
|
|
|
|
|
|
|
Title : sample |
143
|
|
|
|
|
|
|
Usage : see SYNOPSIS |
144
|
|
|
|
|
|
|
Function: Samples phylogenetic trees from an evolutionary model |
145
|
|
|
|
|
|
|
Returns : A sample of phylogenetic trees and statistics from the |
146
|
|
|
|
|
|
|
sampling algorithm |
147
|
|
|
|
|
|
|
Args : Sampling parameters in a hash |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
This method acts as a gateway to the various sampling algorithms. The |
150
|
|
|
|
|
|
|
argument is a single hash containing the options for the sampling run. |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Sampling parameters (* denotes optional parameters): |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sample_size The number of trees to return (more trees may be returned) |
156
|
|
|
|
|
|
|
tree_size The size that returned trees should be |
157
|
|
|
|
|
|
|
model The evolutionary model (should be a function reference) |
158
|
|
|
|
|
|
|
model_options A hash pointer for model options (see individual models) |
159
|
|
|
|
|
|
|
algorithm The algorithm to use (omit the preceding sample_) |
160
|
|
|
|
|
|
|
algorithm_options A hash pointer for options for the algorithm (see individual algorithms for details) |
161
|
|
|
|
|
|
|
threads* The number of threads to use (default is 1) |
162
|
|
|
|
|
|
|
output_format* Set to newick for newick trees (default is Bio::Phylo::Forest::Tree) |
163
|
|
|
|
|
|
|
remove_extinct Set to true to remove extinct species |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
Available algorithms (algorithm names in the paper are given in brackets): |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
b For all pure birth models (simplified GSA) |
170
|
|
|
|
|
|
|
bd For all birth and death models (GSA) |
171
|
|
|
|
|
|
|
incomplete_sampling_bd As above, with incomplete taxon sampling (extended GSA) |
172
|
|
|
|
|
|
|
memoryless_b For memoryless pure birth models (PBMSA) |
173
|
|
|
|
|
|
|
constant_rate_bd For birth and death models with constant rates (BDSA) |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
Model |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
If you create your own model it must accept an options hash as its input. |
179
|
|
|
|
|
|
|
This options hash can contain any parameters you desire. Your model should |
180
|
|
|
|
|
|
|
simulate a tree until it becomes extinct or the size/age limit as specified |
181
|
|
|
|
|
|
|
in the options has been reached. Respectively these options are tree_size |
182
|
|
|
|
|
|
|
and tree_age. |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Multi-threading |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
Multi-thread support is very simplistic. The number of threads you specify |
188
|
|
|
|
|
|
|
are created and each is assigned the task of finding sample_size/threads |
189
|
|
|
|
|
|
|
samples. I had problems with using Bio::Phylo::Forest::Tree in a multi- |
190
|
|
|
|
|
|
|
threaded setting. Hence the sampled trees are returned as newick strings to |
191
|
|
|
|
|
|
|
the main routine where (if required) Tree objects are recreated from the |
192
|
|
|
|
|
|
|
strings. For most applications this overhead seems negligible in contrast |
193
|
|
|
|
|
|
|
to the sampling times. |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
From a code perspective this function (sample): |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
Checks input arguments |
199
|
|
|
|
|
|
|
Handles multi-threading |
200
|
|
|
|
|
|
|
Calls the individual algorithms to perform sampling |
201
|
|
|
|
|
|
|
Reformats data |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=cut |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
my @threads; |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
sub sample { |
208
|
5
|
|
|
5
|
1
|
110
|
my %options = @_; |
209
|
5
|
|
|
|
|
51
|
my %methods_require = ( |
210
|
|
|
|
|
|
|
b => ['rate'], |
211
|
|
|
|
|
|
|
bd => [ 'rate', 'nstar' ], |
212
|
|
|
|
|
|
|
incomplete_sampling_bd => |
213
|
|
|
|
|
|
|
[ 'rate', 'nstar', 'mstar', 'sampling_probability' ], |
214
|
|
|
|
|
|
|
memoryless_b => ['pendant_dist'], |
215
|
|
|
|
|
|
|
constant_rate_bd => [], |
216
|
|
|
|
|
|
|
); |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
#Default is to sample a single tree |
219
|
5
|
50
|
|
|
|
22
|
$options{sample_size} = 1 unless defined $options{sample_size}; |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
#Default is to use a single thread |
222
|
5
|
100
|
|
|
|
20
|
$options{threads} = 1 unless defined $options{threads}; |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
#Check that multiple threads are actually supported |
225
|
5
|
50
|
33
|
|
|
25
|
if ( $options{threads} > 1 && !$Config{useithreads} ) { |
226
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' => |
227
|
|
|
|
|
|
|
"your perl installation does not support multiple threads" ); |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
#Check an algorithm was specified |
231
|
5
|
50
|
|
|
|
20
|
unless ( defined $options{algorithm} ) { |
232
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( |
233
|
|
|
|
|
|
|
'error' => "an algorithm type must be specified" ); |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
#Check the algorithm type is valid |
237
|
5
|
50
|
|
|
|
22
|
unless ( defined $methods_require{ $options{algorithm} } ) { |
238
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadFormat->throw( |
239
|
|
|
|
|
|
|
'error' => "'$options{algorithm}' is not a valid algorithm" ); |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
#Check the algorithm options |
243
|
5
|
|
|
|
|
13
|
foreach ( @{ $methods_require{ $options{algorithm} } } ) { |
|
5
|
|
|
|
|
16
|
|
244
|
8
|
50
|
|
|
|
23
|
unless ( defined $options{algorithm_options}->{$_} ) { |
245
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' => |
246
|
|
|
|
|
|
|
"'$_' must be specified for the '$options{algorithm}' algorithm" |
247
|
|
|
|
|
|
|
); |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
#If we are doing incomplete taxon sampling the sampling probability must be specified |
252
|
5
|
0
|
33
|
|
|
17
|
if ( defined $options{incomplete_sampling} |
|
|
|
0
|
|
|
|
|
253
|
|
|
|
|
|
|
&& $options{incomplete_sampling} |
254
|
|
|
|
|
|
|
&& !( defined $options{algorithm_options}->{sampling_probability} ) ) |
255
|
|
|
|
|
|
|
{ |
256
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' => |
257
|
|
|
|
|
|
|
"'sampling_probability' must be specified for post hoc incomplete sampling to be applied" |
258
|
|
|
|
|
|
|
); |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
#Check that a model has been specified |
262
|
5
|
50
|
66
|
|
|
25
|
unless ( defined $options{model} |
263
|
|
|
|
|
|
|
|| $options{algorithm} eq 'constant_rate_bd' ) |
264
|
|
|
|
|
|
|
{ |
265
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( |
266
|
|
|
|
|
|
|
'error' => "a model must be specified" ); |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
#Get a function pointer for the algorithm |
270
|
5
|
|
|
|
|
16
|
my $algorithm = 'sample_' . $options{algorithm}; |
271
|
5
|
|
|
|
|
23
|
$algorithm = \&$algorithm; |
272
|
5
|
|
|
|
|
12
|
my @output; |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
#Run the algorithm, different method for multiple threads |
275
|
5
|
50
|
|
|
|
17
|
if ( $options{threads} > 1 ) { |
276
|
0
|
|
|
|
|
0
|
require threads; |
277
|
0
|
|
|
|
|
0
|
require threads::shared; |
278
|
0
|
|
|
|
|
0
|
@output = ( [], [] ); |
279
|
|
|
|
|
|
|
$SIG{'KILL'} = sub { |
280
|
0
|
|
|
0
|
|
0
|
foreach (@threads) { $_->kill('KILL')->detach(); } |
|
0
|
|
|
|
|
0
|
|
281
|
0
|
|
|
|
|
0
|
threads->exit(); |
282
|
0
|
|
|
|
|
0
|
}; |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
#Start the threads |
285
|
0
|
|
|
|
|
0
|
for ( ( 1 .. $options{threads} ) ) { |
286
|
0
|
|
|
|
|
0
|
@_ = (); |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
#Note the list context of the return argument here determines |
289
|
|
|
|
|
|
|
#the data type returned from the thread. |
290
|
|
|
|
|
|
|
( $threads[ $_ - 1 ] ) = threads->new( \&_sample_newick, %options, |
291
|
|
|
|
|
|
|
sample_size => ceil( $options{sample_size} / $options{threads} ) |
292
|
0
|
|
|
|
|
0
|
); |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
#Wait for them to finish and combine the data |
296
|
0
|
|
|
|
|
0
|
for ( ( 1 .. $options{threads} ) ) { |
297
|
0
|
|
|
|
|
0
|
until ( $threads[ $_ - 1 ]->is_joinable() ) { sleep(0.1); } |
|
0
|
|
|
|
|
0
|
|
298
|
0
|
|
|
|
|
0
|
my @thread_data = $threads[ $_ - 1 ]->join; |
299
|
0
|
|
|
|
|
0
|
for ( my $index = 0 ; $index < scalar @thread_data ; $index++ ) { |
300
|
0
|
0
|
|
|
|
0
|
$output[$index] = [] if scalar @output < $index; |
301
|
|
|
|
|
|
|
$output[$index] = |
302
|
0
|
|
|
|
|
0
|
[ @{ $output[$index] }, @{ $thread_data[$index] } ]; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
#Turn newick strings back into tree objects |
307
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{output_format} |
308
|
|
|
|
|
|
|
&& $options{output_format} eq 'newick' ) |
309
|
|
|
|
|
|
|
{ |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
#Convert to newick trees |
312
|
0
|
|
|
|
|
0
|
for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) { |
|
0
|
|
|
|
|
0
|
|
313
|
0
|
|
|
|
|
0
|
$output[0]->[$index] = Bio::Phylo::IO->parse( |
314
|
|
|
|
|
|
|
-format => 'newick', |
315
|
|
|
|
|
|
|
-string => $output[0]->[$index] |
316
|
|
|
|
|
|
|
)->first; |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
else { |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
#Get the samples |
323
|
5
|
|
|
|
|
30
|
@output = &$algorithm(%options); |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
#Turn into newick trees if requested |
326
|
5
|
50
|
66
|
|
|
261
|
if ( defined $options{output_format} |
|
|
100
|
66
|
|
|
|
|
327
|
|
|
|
|
|
|
&& $options{output_format} eq 'newick' ) |
328
|
|
|
|
|
|
|
{ |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
#Convert to newick trees |
331
|
0
|
|
|
|
|
0
|
for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) { |
|
0
|
|
|
|
|
0
|
|
332
|
0
|
|
|
|
|
0
|
$output[0]->[$index] = |
333
|
|
|
|
|
|
|
$output[0]->[$index]->to_newick( '-nodelabels' => 1 ); |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
elsif ( defined $options{output_format} |
337
|
|
|
|
|
|
|
&& $options{output_format} eq 'forest' ) |
338
|
|
|
|
|
|
|
{ |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
# save as a forest |
341
|
4
|
|
|
|
|
25
|
my $forest = Bio::Phylo::Forest->new; |
342
|
4
|
|
|
|
|
16
|
for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) { |
|
24
|
|
|
|
|
55
|
|
343
|
20
|
|
|
|
|
53
|
$forest->insert( $output[0]->[$index] ); |
344
|
|
|
|
|
|
|
} |
345
|
4
|
|
|
|
|
15
|
$output[0] = $forest; |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
} |
348
|
5
|
|
|
|
|
71
|
return @output; |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=begin comment |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
Type : Internal method |
354
|
|
|
|
|
|
|
Title : _sample_newick |
355
|
|
|
|
|
|
|
Usage : ($thread) = threads->new(\&_sample_newick, %options); |
356
|
|
|
|
|
|
|
@thread_output = $thread->join; |
357
|
|
|
|
|
|
|
Function: Wrapper for sampling routines used for multi-threading |
358
|
|
|
|
|
|
|
Returns : Output from sampling algorithms with trees replaced by newick strings |
359
|
|
|
|
|
|
|
Args : %options to pass to sampling algorithm |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=end comment |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
=cut |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
sub _sample_newick { |
366
|
0
|
|
|
0
|
|
0
|
my %options = @_; |
367
|
0
|
|
|
|
|
0
|
my $algorithm = 'sample_' . $options{algorithm}; |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# Thread 'cancellation' signal handler |
370
|
0
|
|
|
0
|
|
0
|
$SIG{'KILL'} = sub { threads->exit(); }; |
|
0
|
|
|
|
|
0
|
|
371
|
0
|
|
|
|
|
0
|
$algorithm = \&$algorithm; |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
#Perform the sampling |
374
|
0
|
|
|
|
|
0
|
my @output = ( &$algorithm(%options) ); |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
#Convert to newick trees |
377
|
0
|
|
|
|
|
0
|
for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) { |
|
0
|
|
|
|
|
0
|
|
378
|
0
|
|
|
|
|
0
|
$output[0]->[$index] = |
379
|
|
|
|
|
|
|
$output[0]->[$index]->to_newick( '-nodelabels' => 1 ); |
380
|
|
|
|
|
|
|
} |
381
|
0
|
|
|
|
|
0
|
return @output; |
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=head2 Sampling algorithms |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
These algorithms should be accessed through the sampling interface (sample()). |
387
|
|
|
|
|
|
|
Additional parameters need to be passed to these algorithms as described for |
388
|
|
|
|
|
|
|
each algorithm. |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=over |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
=item sample_b() |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
Sample from any birth model |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
Type : Sampling algorithm |
397
|
|
|
|
|
|
|
Title : sample_b |
398
|
|
|
|
|
|
|
Usage : see sample |
399
|
|
|
|
|
|
|
Function: Samples trees from a pure birth model |
400
|
|
|
|
|
|
|
Returns : see sample |
401
|
|
|
|
|
|
|
Args : %algorithm_options requires the field: |
402
|
|
|
|
|
|
|
rate => sampling rate |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
=cut |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
sub sample_b { |
407
|
1
|
|
|
1
|
1
|
4
|
my %options = @_; |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
#The sample of trees |
410
|
1
|
|
|
|
|
1
|
my @sample; |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
#A list of the expected number of samples |
413
|
|
|
|
|
|
|
my @expected_summary; |
414
|
1
|
|
|
|
|
2
|
my $model = $options{model}; |
415
|
1
|
|
|
|
|
2
|
my $rate = $options{algorithm_options}->{rate}; |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
#While we have insufficient samples |
418
|
1
|
|
|
|
|
3
|
while ( scalar @sample < $options{sample_size} ) { |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
#Generate a candidate model run |
421
|
12
|
|
|
|
|
90
|
my $candidate = &$model( %{ $options{model_options} }, |
422
|
12
|
|
|
|
|
23
|
tree_size => $options{tree_size} ); |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
#Check that the tree has no extinctions |
425
|
12
|
50
|
|
|
|
54
|
unless ( $candidate->is_ultrametric(1e-6) ) { |
426
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadFormat->throw( |
427
|
|
|
|
|
|
|
'error' => "the model must be a pure birth process" ); |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
#Get the lineage through time data |
431
|
12
|
|
|
|
|
40
|
my ( $time, $count ) = lineage_through_time($candidate); |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
#The expected number of samples we want |
434
|
12
|
|
|
|
|
34
|
my $expected_samples = $rate * ( $time->[-1] - $time->[-2] ); |
435
|
12
|
|
|
|
|
24
|
push( @expected_summary, $expected_samples ); |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
#Get the random number of samples from this candidate tree |
438
|
12
|
|
|
|
|
28
|
while ( $expected_samples > 0 ) { |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
#If the number of samples remaining is greater than one |
441
|
|
|
|
|
|
|
#we take a sample, otherwise we take a sample with probability |
442
|
|
|
|
|
|
|
#according to the remaining number of samples |
443
|
12
|
100
|
66
|
|
|
58
|
if ( $expected_samples > 1 || rand(1) < $expected_samples ) { |
444
|
5
|
|
|
|
|
17
|
my $tree = copy_tree($candidate); |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
#Truncate the tree at a random point during which it had tree_size species |
447
|
5
|
|
|
|
|
21
|
truncate_tree_time( $tree, |
448
|
|
|
|
|
|
|
( $time->[-1] - $time->[-2] ) * rand(1) + $time->[-2] ); |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
#Add the tree to our sample |
451
|
5
|
|
|
|
|
14
|
push( @sample, $tree ); |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
#Update the sample counter (for GUI or other applications that want to know how |
454
|
|
|
|
|
|
|
#many samples have been obtained) |
455
|
5
|
50
|
|
|
|
17
|
if ( defined $options{counter} ) { |
456
|
0
|
|
|
|
|
0
|
my $counter = $options{counter}; |
457
|
0
|
|
|
|
|
0
|
&$counter(1); |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
} |
460
|
12
|
|
|
|
|
78
|
$expected_samples--; |
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
} |
463
|
1
|
|
|
|
|
7
|
return ( \@sample, \@expected_summary ); |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=item sample_bd() |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
Sample from any birth and death model for which nstar exists |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
Type : Sampling algorithm |
471
|
|
|
|
|
|
|
Title : sample_bd |
472
|
|
|
|
|
|
|
Usage : see sample |
473
|
|
|
|
|
|
|
Function: Samples trees from a birth and death model |
474
|
|
|
|
|
|
|
Returns : see sample |
475
|
|
|
|
|
|
|
Args : %algorithm_options requires the fields: |
476
|
|
|
|
|
|
|
nstar => once a tree has nstar species there should be |
477
|
|
|
|
|
|
|
a negligible chance of returning to tree_size species |
478
|
|
|
|
|
|
|
rate => sampling rate |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=cut |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
sub sample_bd { |
483
|
1
|
|
|
1
|
1
|
6
|
my %options = @_; |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
#The sample of trees |
486
|
1
|
|
|
|
|
2
|
my @sample; |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
#A list of the expected number of samples |
489
|
|
|
|
|
|
|
my @expected_summary; |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
#Convenience variables |
492
|
1
|
|
|
|
|
3
|
my $model = $options{model}; |
493
|
1
|
|
|
|
|
3
|
my $nstar = $options{algorithm_options}->{nstar}; |
494
|
1
|
|
|
|
|
3
|
my $rate = $options{algorithm_options}->{rate}; |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
#While we have insufficient samples |
497
|
1
|
|
|
|
|
5
|
while ( scalar @sample < $options{sample_size} ) { |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
#Generate a candidate model run |
500
|
|
|
|
|
|
|
my $candidate = |
501
|
31
|
|
|
|
|
65
|
&$model( %{ $options{model_options} }, tree_size => $nstar ); |
|
31
|
|
|
|
|
198
|
|
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
#Get the lineage through time data |
504
|
31
|
|
|
|
|
191
|
my ( $time, $count ) = lineage_through_time($candidate); |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
#Reorganise the lineage through time data |
507
|
|
|
|
|
|
|
#@duration contains the length of the intervals with the right number of species |
508
|
|
|
|
|
|
|
#@start contains the starting times of these intervals |
509
|
|
|
|
|
|
|
#@prob contains the cumulative probability of each interval being selected |
510
|
|
|
|
|
|
|
#$total_duration contains the sum of the interval lengths |
511
|
31
|
|
|
|
|
69
|
my ( @duration, @start, @prob, $total_duration ); |
512
|
31
|
|
|
|
|
86
|
for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) { |
|
2046
|
|
|
|
|
3024
|
|
513
|
2015
|
100
|
|
|
|
3311
|
if ( $count->[$index] == $options{tree_size} ) { |
514
|
79
|
|
|
|
|
133
|
push( @duration, $time->[ $index + 1 ] - $time->[$index] ); |
515
|
79
|
|
|
|
|
102
|
push( @start, $time->[$index] ); |
516
|
79
|
|
|
|
|
96
|
$total_duration += $duration[-1]; |
517
|
79
|
|
|
|
|
119
|
push( @prob, $total_duration ); |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
} |
520
|
1
|
|
|
1
|
|
10
|
no warnings 'uninitialized'; # FIXME |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
47
|
|
521
|
31
|
100
|
|
|
|
261
|
next if $total_duration == 0; |
522
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
350
|
|
523
|
10
|
|
|
|
|
34
|
for ( my $index = 0 ; $index < scalar @prob ; $index++ ) { |
524
|
79
|
|
|
|
|
133
|
$prob[$index] /= $total_duration; |
525
|
|
|
|
|
|
|
} |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
#The expected number of samples we want |
528
|
10
|
|
|
|
|
21
|
my $expected_samples = $rate * $total_duration; |
529
|
10
|
|
|
|
|
31
|
push( @expected_summary, $expected_samples ); |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
#Get the random number of samples from this candidate tree |
532
|
10
|
|
|
|
|
31
|
while ( $expected_samples > 0 ) { |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
#If the number of samples remaining is greater than one |
535
|
|
|
|
|
|
|
#we take a sample, otherwise we take a sample with probability |
536
|
|
|
|
|
|
|
#according to the remaining number of samples |
537
|
12
|
100
|
100
|
|
|
69
|
if ( $expected_samples > 1 || rand(1) < $expected_samples ) { |
538
|
5
|
|
|
|
|
26
|
my $tree = copy_tree($candidate); |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
#Get a random interval |
541
|
5
|
|
|
|
|
29
|
my $interval_choice = rand(1); |
542
|
5
|
|
|
|
|
12
|
my $interval; |
543
|
5
|
|
|
|
|
52
|
for ( |
544
|
|
|
|
|
|
|
$interval = 0 ; |
545
|
|
|
|
|
|
|
$interval_choice > $prob[$interval] ; |
546
|
|
|
|
|
|
|
$interval++ |
547
|
|
|
|
|
|
|
) |
548
|
|
|
|
|
|
|
{ |
549
|
|
|
|
|
|
|
} |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
#Truncate the tree at a random point during this interval |
552
|
5
|
|
|
|
|
32
|
truncate_tree_time( $tree, |
553
|
|
|
|
|
|
|
$duration[$interval] * rand(1) + $start[$interval] ); |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
#Add the tree to our sample |
556
|
5
|
|
|
|
|
22
|
push( @sample, $tree ); |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
#Update the sample counter (for GUI or other applications that want to know how |
559
|
|
|
|
|
|
|
#many samples have been obtained) |
560
|
5
|
50
|
|
|
|
26
|
if ( defined $options{counter} ) { |
561
|
0
|
|
|
|
|
0
|
my $counter = $options{counter}; |
562
|
0
|
|
|
|
|
0
|
&$counter(1); |
563
|
|
|
|
|
|
|
} |
564
|
|
|
|
|
|
|
} |
565
|
12
|
|
|
|
|
213
|
$expected_samples--; |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
} |
568
|
1
|
|
|
|
|
9
|
return ( \@sample, \@expected_summary ); |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
=item sample_incomplete_sampling_bd() |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
Sample from any birth and death model with incomplete taxon sampling |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
Type : Sampling algorithm |
576
|
|
|
|
|
|
|
Title : sample_incomplete_sampling_bd |
577
|
|
|
|
|
|
|
Usage : see sample |
578
|
|
|
|
|
|
|
Function: Samples trees from a birth and death model with incomplete taxon sampling |
579
|
|
|
|
|
|
|
Returns : see sample |
580
|
|
|
|
|
|
|
Args : %algorithm_options requires the fields: |
581
|
|
|
|
|
|
|
rate => sampling rate |
582
|
|
|
|
|
|
|
nstar => once a tree has nstar species there should be |
583
|
|
|
|
|
|
|
a negligible chance of returning to mstar species |
584
|
|
|
|
|
|
|
mstar => trees with more than mstar species form a negligible |
585
|
|
|
|
|
|
|
contribution to the final sample. |
586
|
|
|
|
|
|
|
sampling_probability => see below. |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
sampling_probability |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
vector: must have length (mstar-tree_size+1) The ith element gives the probability |
591
|
|
|
|
|
|
|
of not sampling i species. |
592
|
|
|
|
|
|
|
scalar: the probability of sampling any individual species. Is used to calculate |
593
|
|
|
|
|
|
|
a vector as discussed in the paper. |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
=cut |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
sub sample_incomplete_sampling_bd { |
598
|
1
|
|
|
1
|
1
|
6
|
my %options = @_; |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
# %options = (%options, %{$options{algorithm_options}}); |
601
|
|
|
|
|
|
|
#The sample of trees |
602
|
1
|
|
|
|
|
3
|
my @sample; |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
#A list of the expected number of samples |
605
|
|
|
|
|
|
|
my @expected_summary; |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
# #Convenience variables |
608
|
1
|
|
|
|
|
3
|
my $model = $options{model}; |
609
|
1
|
|
|
|
|
2
|
my $nstar = $options{algorithm_options}->{nstar}; |
610
|
1
|
|
|
|
|
2
|
my $mstar = $options{algorithm_options}->{mstar}; |
611
|
1
|
|
|
|
|
3
|
my $rate = $options{algorithm_options}->{rate}; |
612
|
|
|
|
|
|
|
my $sampling_probability = |
613
|
1
|
|
|
|
|
2
|
$options{algorithm_options}->{sampling_probability}; |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
#If sampling_probability is a list check it's length |
616
|
1
|
50
|
33
|
|
|
8
|
if ( ref $sampling_probability |
617
|
1
|
|
|
|
|
7
|
&& scalar @{$sampling_probability} != |
618
|
|
|
|
|
|
|
( $mstar - $options{tree_size} + 1 ) ) |
619
|
|
|
|
|
|
|
{ |
620
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' => |
621
|
|
|
|
|
|
|
"'sampling_probability' must be a scalar or a list with m_star-tree_size+1 items" |
622
|
|
|
|
|
|
|
); |
623
|
|
|
|
|
|
|
} |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
#If a single sampling probability was given we calculate the |
626
|
|
|
|
|
|
|
#probability of sub-sampling given numbers of species. |
627
|
1
|
50
|
|
|
|
5
|
unless ( ref $sampling_probability ) { |
628
|
0
|
|
|
|
|
0
|
my $p = $sampling_probability; |
629
|
0
|
|
|
|
|
0
|
my @vec; |
630
|
0
|
|
|
|
|
0
|
my $total = 0; |
631
|
0
|
|
|
|
|
0
|
foreach ( ( $options{tree_size} .. $mstar ) ) { |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
#The probability of sampling tree_size species from a tree containing $_ species |
634
|
|
|
|
|
|
|
push( @vec, |
635
|
|
|
|
|
|
|
nchoosek( $_, $options{tree_size} ) * |
636
|
|
|
|
|
|
|
( $p**$options{tree_size} ) * |
637
|
0
|
|
|
|
|
0
|
( ( 1 - $p )**( $_ - $options{tree_size} ) ) ); |
638
|
0
|
|
|
|
|
0
|
$total += $vec[-1]; |
639
|
|
|
|
|
|
|
} |
640
|
0
|
|
|
|
|
0
|
for ( my $ii = 0 ; $ii < scalar @vec ; $ii++ ) { $vec[$ii] /= $total; } |
|
0
|
|
|
|
|
0
|
|
641
|
0
|
|
|
|
|
0
|
$sampling_probability = \@vec; |
642
|
|
|
|
|
|
|
} |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
#We now normalise the sampling_probability list so that it sums to unity |
645
|
|
|
|
|
|
|
#this allows comparable sampling rates to be used here and in sample_bd |
646
|
1
|
|
|
|
|
2
|
my $total_sp = 0; |
647
|
1
|
|
|
|
|
2
|
foreach ( @{$sampling_probability} ) { $total_sp += $_; } |
|
1
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
9
|
|
648
|
1
|
|
|
1
|
|
6
|
no warnings; # FIXME |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
59
|
|
649
|
1
|
|
|
|
|
9
|
foreach ( my $index = 1 .. scalar @{$sampling_probability} ) { |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
650
|
1
|
|
|
1
|
|
5
|
no warnings; # FIXME |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
29
|
|
651
|
1
|
|
|
|
|
7
|
$sampling_probability->[ $index - 1 ] /= $total_sp; |
652
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
27
|
|
653
|
|
|
|
|
|
|
} |
654
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
91
|
|
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
#While we have insufficient samples |
657
|
1
|
|
|
|
|
6
|
while ( scalar @sample < $options{sample_size} ) { |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
#Generate a candidate model run |
660
|
|
|
|
|
|
|
my $candidate = |
661
|
17
|
|
|
|
|
40
|
&$model( %{ $options{model_options} }, tree_size => $nstar ); |
|
17
|
|
|
|
|
118
|
|
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
#Get the lineage through time data |
664
|
17
|
|
|
|
|
101
|
my ( $time, $count ) = lineage_through_time($candidate); |
665
|
17
|
|
|
|
|
51
|
my @size_stats; |
666
|
17
|
|
|
|
|
55
|
for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) { |
|
1748
|
|
|
|
|
2704
|
|
667
|
1
|
|
|
1
|
|
9
|
no warnings 'uninitialized'; # FIXME |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
127
|
|
668
|
1731
|
100
|
|
|
|
2687
|
if ( $count->[$index] >= $options{tree_size} ) { |
669
|
|
|
|
|
|
|
$size_stats[ $count->[$index] - $options{tree_size} ] += |
670
|
|
|
|
|
|
|
$time->[$index] * |
671
|
|
|
|
|
|
|
$sampling_probability->[ $count->[$index] - |
672
|
1115
|
|
|
|
|
1835
|
$options{tree_size} ]; |
673
|
|
|
|
|
|
|
} |
674
|
|
|
|
|
|
|
} |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
#Reorganise the lineage through time data |
677
|
|
|
|
|
|
|
#@duration contains the length of intervals with more than tree_size species |
678
|
|
|
|
|
|
|
#@start contains the starting times of these intervals |
679
|
|
|
|
|
|
|
#@prob contains the cumulative probability of each interval being selected |
680
|
|
|
|
|
|
|
#$total_prob contains the sum of @prob before normalisation |
681
|
17
|
|
|
|
|
44
|
my ( @duration, @start, @prob, $total_prob ); |
682
|
17
|
|
|
|
|
36
|
$total_prob = 0; |
683
|
17
|
|
|
|
|
45
|
for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) { |
|
1748
|
|
|
|
|
2590
|
|
684
|
1731
|
100
|
|
|
|
2596
|
if ( $count->[$index] >= $options{tree_size} ) { |
685
|
1115
|
|
|
|
|
1612
|
push( @duration, $time->[ $index + 1 ] - $time->[$index] ); |
686
|
1115
|
|
|
|
|
1405
|
push( @start, $time->[$index] ); |
687
|
1
|
|
|
1
|
|
6
|
no warnings 'uninitialized'; # FIXME |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
4372
|
|
688
|
|
|
|
|
|
|
$total_prob += |
689
|
|
|
|
|
|
|
$duration[-1] * |
690
|
|
|
|
|
|
|
$sampling_probability->[ $count->[$index] - |
691
|
1115
|
|
|
|
|
1484
|
$options{tree_size} ]; |
692
|
1115
|
|
|
|
|
1635
|
push( @prob, $total_prob ); |
693
|
|
|
|
|
|
|
} |
694
|
|
|
|
|
|
|
} |
695
|
17
|
100
|
|
|
|
111
|
next if $total_prob == 0; |
696
|
9
|
|
|
|
|
44
|
for ( my $index = 0 ; $index < scalar @prob ; $index++ ) { |
697
|
1115
|
|
|
|
|
1846
|
$prob[$index] /= $total_prob; |
698
|
|
|
|
|
|
|
} |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
#The expected number of samples we want |
701
|
9
|
|
|
|
|
24
|
my $expected_samples = $rate * $total_prob; |
702
|
9
|
|
|
|
|
34
|
push( @expected_summary, $expected_samples ); |
703
|
|
|
|
|
|
|
$expected_samples = $options{sample_size} - scalar @sample |
704
|
9
|
100
|
|
|
|
49
|
if $expected_samples > $options{sample_size} - scalar @sample; |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
#Get the random number of samples from this candidate tree |
707
|
9
|
|
|
|
|
39
|
while ( $expected_samples > 0 ) { |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
#If the number of samples remaining is greater than one |
710
|
|
|
|
|
|
|
#we take a sample, otherwise we take a sample with probability |
711
|
|
|
|
|
|
|
#according to the remaining number of samples |
712
|
9
|
100
|
66
|
|
|
85
|
if ( $expected_samples > 1 || rand(1) < $expected_samples ) { |
713
|
5
|
|
|
|
|
29
|
my $tree = copy_tree($candidate); |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
#Get a random interval |
716
|
5
|
|
|
|
|
25
|
my $interval_choice = rand(1); |
717
|
5
|
|
|
|
|
12
|
my $interval; |
718
|
5
|
|
|
|
|
78
|
for ( |
719
|
|
|
|
|
|
|
$interval = 0 ; |
720
|
|
|
|
|
|
|
$interval_choice > $prob[$interval] ; |
721
|
|
|
|
|
|
|
$interval++ |
722
|
|
|
|
|
|
|
) |
723
|
|
|
|
|
|
|
{ |
724
|
|
|
|
|
|
|
} |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
#Truncate the tree at a random point during this interval |
727
|
5
|
|
|
|
|
38
|
truncate_tree_time( $tree, |
728
|
|
|
|
|
|
|
$duration[$interval] * rand(1) + $start[$interval] ); |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
#Remove random species so it has the right size |
731
|
5
|
|
|
|
|
31
|
truncate_tree_size( $tree, $options{tree_size} ); |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
#Add the tree to our sample |
734
|
5
|
|
|
|
|
18
|
push( @sample, $tree ); |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
#Update the sample counter (for GUI or other applications that want to know how |
737
|
|
|
|
|
|
|
#many samples have been obtained) |
738
|
5
|
50
|
|
|
|
26
|
if ( defined $options{counter} ) { |
739
|
0
|
|
|
|
|
0
|
my $counter = $options{counter}; |
740
|
0
|
|
|
|
|
0
|
&$counter(1); |
741
|
|
|
|
|
|
|
} |
742
|
|
|
|
|
|
|
} |
743
|
9
|
|
|
|
|
564
|
$expected_samples--; |
744
|
|
|
|
|
|
|
} |
745
|
|
|
|
|
|
|
} |
746
|
1
|
|
|
|
|
10
|
return ( \@sample, \@expected_summary ); |
747
|
|
|
|
|
|
|
} |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
=item sample_memoryless_b() |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
Sample from a memoryless birth model |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
Type : Sampling algorithm |
754
|
|
|
|
|
|
|
Title : sample_memoryless_b |
755
|
|
|
|
|
|
|
Usage : see sample |
756
|
|
|
|
|
|
|
Function: Samples trees from a memoryless birth model |
757
|
|
|
|
|
|
|
Returns : see sample |
758
|
|
|
|
|
|
|
Args : %algorithm_options with fields: |
759
|
|
|
|
|
|
|
pendant_dist => function reference for generating random |
760
|
|
|
|
|
|
|
shortest pendant edges |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
NB: The function pointed to by pendant_dist is given model_options |
763
|
|
|
|
|
|
|
as it's input argument with an added field tree_size. It must return |
764
|
|
|
|
|
|
|
a random value from the probability density for the shortest pendant |
765
|
|
|
|
|
|
|
edges. |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
=cut |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
sub sample_memoryless_b { |
770
|
1
|
|
|
1
|
1
|
6
|
my %options = @_; |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
#The sample of trees |
773
|
1
|
|
|
|
|
2
|
my @sample; |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
#A list of the expected number of samples |
776
|
|
|
|
|
|
|
my @expected_summary; |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
#The user specified functions |
779
|
1
|
|
|
|
|
3
|
my $model = $options{model}; |
780
|
1
|
|
|
|
|
3
|
my $pendant_dist = $options{algorithm_options}->{pendant_dist}; |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
#While we have insufficient samples |
783
|
1
|
|
|
|
|
5
|
while ( scalar @sample < $options{sample_size} ) { |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
#Generate a tree ending just after the last speciation event |
786
|
5
|
|
|
|
|
30
|
my $tree = &$model( %{ $options{model_options} }, |
787
|
5
|
|
|
|
|
11
|
tree_size => $options{tree_size} ); |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
#Check that the tree has no extinctions |
790
|
5
|
50
|
|
|
|
31
|
unless ( $tree->is_ultrametric(1e-6) ) { |
791
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadFormat->throw( |
792
|
|
|
|
|
|
|
'error' => "the model must be a pure birth process" ); |
793
|
|
|
|
|
|
|
} |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
#Get the random length to add after the last speciation event |
796
|
5
|
|
|
|
|
48
|
my $pendant_add = &$pendant_dist( %{ $options{model_options} }, |
797
|
5
|
|
|
|
|
12
|
tree_size => $options{tree_size} ); |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
#Add the final length |
800
|
5
|
|
|
|
|
72
|
foreach ( @{ $tree->get_terminals } ) { |
|
5
|
|
|
|
|
27
|
|
801
|
50
|
|
|
|
|
109
|
$_->set_branch_length( $_->get_branch_length + $pendant_add ); |
802
|
|
|
|
|
|
|
} |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
#Add to the sample |
805
|
5
|
|
|
|
|
18
|
push( @sample, $tree ); |
806
|
5
|
|
|
|
|
12
|
push( @expected_summary, 1 ); |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
#Update the sample counter (for GUI or other applications that want to know how |
809
|
|
|
|
|
|
|
#many samples have been obtained) |
810
|
5
|
50
|
|
|
|
33
|
if ( defined $options{counter} ) { |
811
|
0
|
|
|
|
|
0
|
my $counter = $options{counter}; |
812
|
0
|
|
|
|
|
0
|
&$counter(1); |
813
|
|
|
|
|
|
|
} |
814
|
|
|
|
|
|
|
} |
815
|
1
|
|
|
|
|
7
|
return ( \@sample, \@expected_summary ); |
816
|
|
|
|
|
|
|
} |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
=item sample_constant_rate_bd() |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
Sample from a constant rate birth and death model |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
Type : Sampling algorithm |
823
|
|
|
|
|
|
|
Title : sample_constant_rate_bd |
824
|
|
|
|
|
|
|
Usage : see sample |
825
|
|
|
|
|
|
|
Function: Samples trees from a memoryless birth model |
826
|
|
|
|
|
|
|
Returns : see sample |
827
|
|
|
|
|
|
|
Args : no specific algorithm options but see below |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
NB: This algorithm only applies to constant rate birth and death |
830
|
|
|
|
|
|
|
processes. Consequently a model does not need to be specified (and |
831
|
|
|
|
|
|
|
will be ignored if it is). But birth_rate and death_rate model |
832
|
|
|
|
|
|
|
options must be given. |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
=cut |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
sub sample_constant_rate_bd { |
837
|
1
|
|
|
1
|
1
|
5
|
my %options = @_; |
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
#Store parameters in shorter variables (for clarity) |
840
|
|
|
|
|
|
|
my ( $br, $dr, $n ) = ( |
841
|
|
|
|
|
|
|
$options{model_options}->{birth_rate}, |
842
|
|
|
|
|
|
|
$options{model_options}->{death_rate}, |
843
|
|
|
|
|
|
|
$options{tree_size} |
844
|
1
|
|
|
|
|
4
|
); |
845
|
1
|
|
|
|
|
3
|
my @sample; |
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
#Loop for sampling each tree |
848
|
1
|
|
|
|
|
5
|
while ( scalar @sample < $options{sample_size} ) { |
849
|
5
|
|
|
|
|
12
|
my @nodes; |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
#Compute the random tree age from the inverse CDF (different formulas for |
852
|
|
|
|
|
|
|
#birth rate == death rate and otherwise) |
853
|
|
|
|
|
|
|
my $tree_age; |
854
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
#The uniform random variable |
856
|
5
|
|
|
|
|
21
|
my $r = rand; |
857
|
5
|
50
|
|
|
|
20
|
if ( $br == $dr ) { |
858
|
0
|
|
|
|
|
0
|
$tree_age = 1 / ( $br * ( $r**( -1 / $n ) - 1 ) ); |
859
|
|
|
|
|
|
|
} |
860
|
|
|
|
|
|
|
else { |
861
|
5
|
|
|
|
|
55
|
$tree_age = |
862
|
|
|
|
|
|
|
1 / |
863
|
|
|
|
|
|
|
( $br - $dr ) * |
864
|
|
|
|
|
|
|
log( |
865
|
|
|
|
|
|
|
( 1 - $dr / $br * $r**( 1 / $n ) ) / ( 1 - $r**( 1 / $n ) ) ); |
866
|
|
|
|
|
|
|
} |
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
#Find the random speciation times |
869
|
5
|
|
|
|
|
12
|
my @speciation; |
870
|
5
|
|
|
|
|
29
|
foreach ( 0 .. ( $n - 2 ) ) { |
871
|
45
|
50
|
|
|
|
65
|
if ( $br == $dr ) { |
872
|
0
|
|
|
|
|
0
|
my $r = rand; |
873
|
0
|
|
|
|
|
0
|
$speciation[$_] = |
874
|
|
|
|
|
|
|
$r * $tree_age / ( 1 + $br * $tree_age * ( 1 - $r ) ); |
875
|
|
|
|
|
|
|
} |
876
|
|
|
|
|
|
|
else { |
877
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
#Two repeated parts of the inverse CDF for clarity |
879
|
45
|
|
|
|
|
84
|
my $a = $br - $dr * exp( ( $dr - $br ) * $tree_age ); |
880
|
45
|
|
|
|
|
66
|
my $b = ( 1 - exp( ( $dr - $br ) * $tree_age ) ) * rand; |
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
#The random speciation time from the inverse CDF |
883
|
45
|
|
|
|
|
99
|
$speciation[$_] = |
884
|
|
|
|
|
|
|
1 / |
885
|
|
|
|
|
|
|
( $br - $dr ) * |
886
|
|
|
|
|
|
|
log( ( $a - $dr * $b ) / ( $a - $br * $b ) ); |
887
|
|
|
|
|
|
|
} |
888
|
|
|
|
|
|
|
} |
889
|
|
|
|
|
|
|
|
890
|
|
|
|
|
|
|
#Create the initial terminals and a vector for their ages |
891
|
5
|
|
|
|
|
12
|
my @terminals; |
892
|
|
|
|
|
|
|
my @ages; |
893
|
5
|
|
|
|
|
13
|
foreach ( 0 .. ( $n - 1 ) ) { |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
#Add a new terminal |
896
|
50
|
|
|
|
|
138
|
$terminals[$_] = Bio::Phylo::Forest::Node->new(); |
897
|
50
|
|
|
|
|
198
|
$terminals[$_]->set_name( 'ID' . $_ ); |
898
|
50
|
|
|
|
|
115
|
$ages[$_] = 0; |
899
|
|
|
|
|
|
|
} |
900
|
5
|
|
|
|
|
20
|
@nodes = @terminals; |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
#Sort the speciation times |
903
|
5
|
|
|
|
|
32
|
my @sorted_speciation = sort { $a <=> $b } @speciation; |
|
94
|
|
|
|
|
129
|
|
904
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
#Make a hash for easily finding the index of a given speciation event |
906
|
5
|
|
|
|
|
9
|
my %speciation_hash; |
907
|
5
|
|
|
|
|
18
|
foreach ( 0 .. ( $n - 2 ) ) { |
908
|
45
|
|
|
|
|
170
|
$speciation_hash{ $speciation[$_] } = $_; |
909
|
|
|
|
|
|
|
} |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
#Construct the tree |
912
|
5
|
|
|
|
|
16
|
foreach my $index ( 0 .. ( $n - 2 ) ) { |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
#Create the parent node |
915
|
45
|
|
|
|
|
125
|
my $parent = Bio::Phylo::Forest::Node->new(); |
916
|
45
|
|
|
|
|
183
|
$parent->set_name( 'ID' . ( $n + $index ) ); |
917
|
45
|
|
|
|
|
99
|
push( @nodes, $parent ); |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
#An index for this speciation event back into the unsorted vectors |
920
|
45
|
|
|
|
|
215
|
my $spec_index = $speciation_hash{ $sorted_speciation[$index] }; |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
#Add the children to the parent node |
923
|
45
|
|
|
|
|
149
|
$parent->set_child( $terminals[$spec_index] ); |
924
|
45
|
|
|
|
|
132
|
$terminals[$spec_index]->set_parent($parent); |
925
|
45
|
|
|
|
|
127
|
$parent->set_child( $terminals[ $spec_index + 1 ] ); |
926
|
45
|
|
|
|
|
130
|
$terminals[ $spec_index + 1 ]->set_parent($parent); |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
#Set the children's branch lengths |
929
|
45
|
|
|
|
|
169
|
$terminals[$spec_index]->set_branch_length( |
930
|
|
|
|
|
|
|
$sorted_speciation[$index] - $ages[$spec_index] ); |
931
|
45
|
|
|
|
|
154
|
$terminals[ $spec_index + 1 ]->set_branch_length( |
932
|
|
|
|
|
|
|
$sorted_speciation[$index] - $ages[ $spec_index + 1 ] ); |
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
#Replace the two terminals with the new one |
935
|
45
|
|
|
|
|
98
|
splice( @terminals, $spec_index, 2, $parent ); |
936
|
45
|
|
|
|
|
94
|
splice( @ages, $spec_index, 2, $sorted_speciation[$index] ); |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
#Update the mapping for the sorted speciation times |
939
|
45
|
|
|
|
|
166
|
foreach ( keys %speciation_hash ) { |
940
|
405
|
100
|
|
|
|
699
|
$speciation_hash{$_}-- if $speciation_hash{$_} > $spec_index; |
941
|
|
|
|
|
|
|
} |
942
|
|
|
|
|
|
|
} |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
#Add the nodes to a tree |
945
|
5
|
|
|
|
|
36
|
my $tree = Bio::Phylo::Forest::Tree->new(); |
946
|
5
|
|
|
|
|
19
|
foreach ( reverse(@nodes) ) { $tree->insert($_); } |
|
95
|
|
|
|
|
203
|
|
947
|
5
|
|
|
|
|
12
|
push( @sample, $tree ); |
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
#Update the sample counter (for GUI or other applications that want to know how |
950
|
|
|
|
|
|
|
#many samples have been obtained) |
951
|
5
|
50
|
|
|
|
90
|
if ( defined $options{counter} ) { |
952
|
0
|
|
|
|
|
0
|
my $counter = $options{counter}; |
953
|
0
|
|
|
|
|
0
|
&$counter(1); |
954
|
|
|
|
|
|
|
} |
955
|
|
|
|
|
|
|
} |
956
|
1
|
|
|
|
|
8
|
return ( \@sample, [] ); |
957
|
|
|
|
|
|
|
} |
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
=back |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
=head1 EVOLUTIONARY MODELS |
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
All evolutionary models take a options hash as their input argument |
964
|
|
|
|
|
|
|
and return a Bio::Phylo::Forest::Tree. This tree may contain extinct |
965
|
|
|
|
|
|
|
lineages (lineages that end prior to the end of the tree). |
966
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
The options hash contains any model specific parameters (see the |
968
|
|
|
|
|
|
|
individual model descriptions) and one or both terminating conditions: |
969
|
|
|
|
|
|
|
tree_size => the number of extant species at which to terminate the tree |
970
|
|
|
|
|
|
|
tree_age => the age of the tree at which to terminate the process |
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
Note that if the model stops due to the tree_size condition then the |
973
|
|
|
|
|
|
|
tree ends immediately after the speciation event that created the last |
974
|
|
|
|
|
|
|
species. |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
=over |
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
=item constant_rate_birth() |
979
|
|
|
|
|
|
|
|
980
|
|
|
|
|
|
|
A constant rate birth model (Yule/ERM) |
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
Type : Evolutionary model |
983
|
|
|
|
|
|
|
Title : constant_rate_birth |
984
|
|
|
|
|
|
|
Usage : $tree = constant_rate_birth(%options) |
985
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time |
986
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree |
987
|
|
|
|
|
|
|
Args : %options with fields: |
988
|
|
|
|
|
|
|
birth_rate The birth rate parameter (default 1) |
989
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate |
990
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate |
991
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified |
993
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
=cut |
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
sub constant_rate_birth { |
997
|
18
|
|
|
18
|
1
|
546
|
my %options = @_; |
998
|
18
|
|
|
|
|
48
|
$options{death_rate} = 0; |
999
|
18
|
|
|
|
|
71
|
return constant_rate_birth_death(%options); |
1000
|
|
|
|
|
|
|
} |
1001
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
=item external_model() |
1003
|
|
|
|
|
|
|
|
1004
|
|
|
|
|
|
|
A dummy model that takes as input a set of newick_trees and randomly samples |
1005
|
|
|
|
|
|
|
these. |
1006
|
|
|
|
|
|
|
|
1007
|
|
|
|
|
|
|
Type : Evolutionary model |
1008
|
|
|
|
|
|
|
Title : external_model |
1009
|
|
|
|
|
|
|
Usage : $tree = $external_model(%options) |
1010
|
|
|
|
|
|
|
Function: Returns a random tree that was given as input |
1011
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree |
1012
|
|
|
|
|
|
|
Args : %options with fields: |
1013
|
|
|
|
|
|
|
trees An array of newick strings. One of these is returned at random. |
1014
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
NB: The usual parameters tree_size and tree_age will be ignored. When sampling |
1016
|
|
|
|
|
|
|
using this model the trees array must contain trees adhering to the requirements |
1017
|
|
|
|
|
|
|
of the sampling algorithm. This is NOT checked automatically. |
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
=cut |
1020
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
sub external_model { |
1022
|
0
|
|
|
0
|
1
|
0
|
my %options = @_; |
1023
|
0
|
|
|
|
|
0
|
my $choice = int( rand( scalar @{ $options{trees} } ) ); |
|
0
|
|
|
|
|
0
|
|
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
#Pick a newick string and turn it in to a Bio::Phylo::Forest::Tree object |
1026
|
|
|
|
|
|
|
my $tree = Bio::Phylo::IO->parse( |
1027
|
|
|
|
|
|
|
-format => 'newick', |
1028
|
0
|
|
|
|
|
0
|
-string => $options{trees}->[$choice] |
1029
|
|
|
|
|
|
|
)->first; |
1030
|
0
|
|
|
|
|
0
|
return $tree; |
1031
|
|
|
|
|
|
|
} |
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
=item constant_rate_birth_death() |
1034
|
|
|
|
|
|
|
|
1035
|
|
|
|
|
|
|
A constant rate birth and death model |
1036
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
Type : Evolutionary model |
1038
|
|
|
|
|
|
|
Title : constant_rate_birth_death |
1039
|
|
|
|
|
|
|
Usage : $tree = constant_rate_birth_death(%options) |
1040
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time |
1041
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree |
1042
|
|
|
|
|
|
|
Args : %options with fields: |
1043
|
|
|
|
|
|
|
birth_rate The birth rate parameter (default 1) |
1044
|
|
|
|
|
|
|
death_rate The death rate parameter (default no extinction) |
1045
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate |
1046
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate |
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified |
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
=cut |
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
sub constant_rate_birth_death { |
1053
|
66
|
|
|
66
|
1
|
303
|
my %options = @_; |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
#Check that we have a termination condition |
1056
|
66
|
50
|
33
|
|
|
259
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) { |
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
#Error here. |
1059
|
0
|
|
|
|
|
0
|
return undef; |
1060
|
|
|
|
|
|
|
} |
1061
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
#Set the undefined condition to infinity |
1063
|
66
|
50
|
|
|
|
190
|
$options{tree_size} = 1e6 unless defined $options{tree_size}; |
1064
|
66
|
50
|
|
|
|
222
|
$options{tree_age} = 1e6 unless defined $options{tree_age}; |
1065
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
#Set default rates |
1067
|
66
|
50
|
|
|
|
162
|
$options{birth_rate} = 1 unless defined( $options{birth_rate} ); |
1068
|
|
|
|
|
|
|
delete $options{death_rate} |
1069
|
66
|
100
|
66
|
|
|
410
|
if defined( $options{death_rate} ) && $options{death_rate} == 0; |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these |
1072
|
66
|
|
|
|
|
127
|
my $node_id = 0; |
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species |
1075
|
66
|
|
|
|
|
331
|
my $tree = Bio::Phylo::Forest::Tree->new(); |
1076
|
66
|
|
|
|
|
274
|
my $root = Bio::Phylo::Forest::Node->new(); |
1077
|
66
|
|
|
|
|
292
|
$root->set_branch_length(0); |
1078
|
66
|
|
|
|
|
683
|
$root->set_name( 'ID' . $node_id++ ); |
1079
|
66
|
|
|
|
|
299
|
$tree->insert($root); |
1080
|
66
|
|
|
|
|
444
|
my @terminals = ($root); |
1081
|
66
|
|
|
|
|
133
|
my ( $next_extinction, $next_speciation ); |
1082
|
66
|
|
|
|
|
108
|
my $time = 0; |
1083
|
66
|
|
|
|
|
92
|
my $tree_size = 1; |
1084
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge |
1086
|
66
|
50
|
33
|
|
|
263
|
if ( defined $options{root_edge} && $options{root_edge} ) { |
1087
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event |
1089
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $options{birth_rate} / $tree_size; |
1090
|
|
|
|
|
|
|
} |
1091
|
|
|
|
|
|
|
else { |
1092
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away |
1094
|
66
|
|
|
|
|
132
|
$next_speciation = 0; |
1095
|
|
|
|
|
|
|
} |
1096
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
#Time of the first extinction event. If no extinction we always |
1098
|
|
|
|
|
|
|
#set the extinction event after the current speciation event |
1099
|
66
|
100
|
|
|
|
163
|
if ( defined $options{death_rate} ) { |
1100
|
48
|
|
|
|
|
330
|
$next_extinction = -log(rand) / $options{death_rate} / $tree_size; |
1101
|
|
|
|
|
|
|
} |
1102
|
|
|
|
|
|
|
else { |
1103
|
18
|
|
|
|
|
37
|
$next_extinction = $next_speciation + 1; |
1104
|
|
|
|
|
|
|
} |
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
#While the tree has not become extinct and the termination criterion |
1107
|
|
|
|
|
|
|
#has not been achieved we create new speciation and extinction events |
1108
|
66
|
|
66
|
|
|
460
|
while ($tree_size > 0 |
|
|
|
66
|
|
|
|
|
1109
|
|
|
|
|
|
|
&& $tree_size < $options{tree_size} |
1110
|
|
|
|
|
|
|
&& $time < $options{tree_age} ) |
1111
|
|
|
|
|
|
|
{ |
1112
|
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species |
1114
|
3938
|
|
|
|
|
6886
|
foreach (@terminals) { |
1115
|
|
|
|
|
|
|
$_->set_branch_length( |
1116
|
|
|
|
|
|
|
$_->get_branch_length + min( |
1117
|
|
|
|
|
|
|
$next_extinction, $next_speciation, |
1118
|
50484
|
|
|
|
|
93620
|
$options{tree_age} - $time |
1119
|
|
|
|
|
|
|
) |
1120
|
|
|
|
|
|
|
); |
1121
|
|
|
|
|
|
|
} |
1122
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
#Update the time |
1124
|
3938
|
|
|
|
|
7854
|
$time += min( $next_extinction, $next_speciation ); |
1125
|
|
|
|
|
|
|
|
1126
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done |
1127
|
3938
|
50
|
|
|
|
8204
|
return $tree if ( $time > $options{tree_age} ); |
1128
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
#Get the species effected by this event and remove it from the terminals list |
1130
|
3938
|
|
|
|
|
10134
|
my $effected = |
1131
|
|
|
|
|
|
|
splice( @terminals, int( rand( scalar @terminals ) ), 1 ); |
1132
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
#If we have a speciation event we add two new species |
1134
|
3938
|
100
|
66
|
|
|
11735
|
if ( $next_speciation < $next_extinction || !defined $next_extinction ) |
1135
|
|
|
|
|
|
|
{ |
1136
|
2296
|
|
|
|
|
4752
|
foreach ( 1, 2 ) { |
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
#Create a new species |
1139
|
4592
|
|
|
|
|
13528
|
my $child = Bio::Phylo::Forest::Node->new(); |
1140
|
4592
|
|
|
|
|
19646
|
$child->set_name( 'ID' . $node_id++ ); |
1141
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
#Give it a zero edge length |
1143
|
4592
|
|
|
|
|
14974
|
$child->set_branch_length(0); |
1144
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
#Add it as a child to the speciating species |
1146
|
4592
|
|
|
|
|
12351
|
$effected->set_child($child); |
1147
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
#Add it to the tree |
1149
|
4592
|
|
|
|
|
13419
|
$tree->insert($child); |
1150
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
#Add it to the terminals list |
1152
|
4592
|
|
|
|
|
10206
|
push( @terminals, $child ); |
1153
|
|
|
|
|
|
|
} |
1154
|
|
|
|
|
|
|
} |
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
#We calculate the time that the next extinction and speciation |
1157
|
|
|
|
|
|
|
#events will occur (only the earliest of these will actually |
1158
|
|
|
|
|
|
|
#happen). NB: this approach is only appropriate for models where |
1159
|
|
|
|
|
|
|
#speciation and extinction times are exponentially distributed. |
1160
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand... |
1161
|
3938
|
|
|
|
|
7672
|
my ( $r1, $r2 ) = ( 0, 0 ); |
1162
|
3938
|
|
|
|
|
10836
|
$r1 = rand until $r1; |
1163
|
3938
|
|
|
|
|
8013
|
$r2 = rand until $r2; |
1164
|
3938
|
|
|
|
|
5366
|
$tree_size = scalar @terminals; |
1165
|
3938
|
100
|
|
|
|
7036
|
return $tree unless $tree_size; |
1166
|
3908
|
|
|
|
|
12883
|
$next_speciation = -log($r1) / $options{birth_rate} / $tree_size; |
1167
|
3908
|
100
|
|
|
|
7597
|
if ( defined $options{death_rate} ) { |
1168
|
3746
|
|
|
|
|
20923
|
$next_extinction = -log($r2) / $options{death_rate} / $tree_size; |
1169
|
|
|
|
|
|
|
} |
1170
|
|
|
|
|
|
|
else { |
1171
|
162
|
|
|
|
|
758
|
$next_extinction = $next_speciation + 1; |
1172
|
|
|
|
|
|
|
} |
1173
|
|
|
|
|
|
|
} |
1174
|
36
|
|
|
|
|
241
|
return $tree; |
1175
|
|
|
|
|
|
|
} |
1176
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
|
1178
|
|
|
|
|
|
|
=item diversity_dependent_speciation() |
1179
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
A birth and death model with speciation rate dependent on diversity as per |
1181
|
|
|
|
|
|
|
Etienne et. al. 2012 |
1182
|
|
|
|
|
|
|
|
1183
|
|
|
|
|
|
|
Type : Evolutionary model |
1184
|
|
|
|
|
|
|
Title : diversity_dependent_speciation |
1185
|
|
|
|
|
|
|
Usage : $tree = diversity_dependent_speciation(%options) |
1186
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time |
1187
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree |
1188
|
|
|
|
|
|
|
Args : %options with fields: |
1189
|
|
|
|
|
|
|
maximal_birth_rate The maximal birth rate parameter (default 1) |
1190
|
|
|
|
|
|
|
death_rate The death rate parameter (default no extinction) |
1191
|
|
|
|
|
|
|
K_dash The modified carrying capacity (no default) |
1192
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate |
1193
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate |
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified |
1196
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
Reference: |
1198
|
|
|
|
|
|
|
Rampal S. Etienne, Bart Haegeman, Tanja Stadler, Tracy Aze, Paul N. Pearson, |
1199
|
|
|
|
|
|
|
Andy Purvis and Albert B. Phillimore. "Diversity-dependence brings molecular |
1200
|
|
|
|
|
|
|
phylogenies closer to agreement with the fossil record" |
1201
|
|
|
|
|
|
|
doi: 10.1098/rspb.2011.1439 |
1202
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
=cut |
1204
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
sub diversity_dependent_speciation { |
1206
|
0
|
|
|
0
|
1
|
0
|
my %options = @_; |
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
#Check that we have a termination condition |
1209
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) { |
1210
|
|
|
|
|
|
|
#Error here. |
1211
|
0
|
|
|
|
|
0
|
return undef; |
1212
|
|
|
|
|
|
|
} |
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
#Check that we have a carrying capacity |
1215
|
0
|
0
|
|
|
|
0
|
unless ( defined $options{K_dash} ) { |
1216
|
|
|
|
|
|
|
#Error here. |
1217
|
0
|
|
|
|
|
0
|
return undef; |
1218
|
|
|
|
|
|
|
} |
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
#Set the undefined condition to infinity |
1221
|
0
|
0
|
|
|
|
0
|
$options{tree_size} = 1e6 unless defined $options{tree_size}; |
1222
|
0
|
0
|
|
|
|
0
|
$options{tree_age} = 1e6 unless defined $options{tree_age}; |
1223
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
#Set default rates |
1225
|
0
|
0
|
|
|
|
0
|
$options{maximal_birth_rate} = 1 unless defined( $options{maximal_birth_rate} ); |
1226
|
|
|
|
|
|
|
delete $options{death_rate} |
1227
|
0
|
0
|
0
|
|
|
0
|
if defined( $options{death_rate} ) && $options{death_rate} == 0; |
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these |
1230
|
0
|
|
|
|
|
0
|
my $node_id = 0; |
1231
|
|
|
|
|
|
|
|
1232
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species |
1233
|
0
|
|
|
|
|
0
|
my $tree = Bio::Phylo::Forest::Tree->new(); |
1234
|
0
|
|
|
|
|
0
|
my $root = Bio::Phylo::Forest::Node->new(); |
1235
|
0
|
|
|
|
|
0
|
$root->set_branch_length(0); |
1236
|
0
|
|
|
|
|
0
|
$root->set_name( 'ID' . $node_id++ ); |
1237
|
0
|
|
|
|
|
0
|
$tree->insert($root); |
1238
|
0
|
|
|
|
|
0
|
my @terminals = ($root); |
1239
|
0
|
|
|
|
|
0
|
my ( $next_extinction, $next_speciation ); |
1240
|
0
|
|
|
|
|
0
|
my $time = 0; |
1241
|
0
|
|
|
|
|
0
|
my $tree_size = 1; |
1242
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
|
1244
|
0
|
|
|
|
|
0
|
$options{birth_rate} = max(0,$options{max_birth_rate}*(1-1/$options{K_dash})); |
1245
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge |
1248
|
0
|
0
|
0
|
|
|
0
|
if ( defined $options{root_edge} && $options{root_edge} ) { |
1249
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event |
1251
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $options{birth_rate} / $tree_size; |
1252
|
|
|
|
|
|
|
} |
1253
|
|
|
|
|
|
|
else { |
1254
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away |
1256
|
0
|
|
|
|
|
0
|
$next_speciation = 0; |
1257
|
|
|
|
|
|
|
} |
1258
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
#Time of the first extinction event. If no extinction we always |
1260
|
|
|
|
|
|
|
#set the extinction event after the current speciation event |
1261
|
0
|
0
|
|
|
|
0
|
if ( defined $options{death_rate} ) { |
1262
|
0
|
|
|
|
|
0
|
$next_extinction = -log(rand) / $options{death_rate} / $tree_size; |
1263
|
|
|
|
|
|
|
} |
1264
|
|
|
|
|
|
|
else { |
1265
|
0
|
|
|
|
|
0
|
$next_extinction = $next_speciation + 1; |
1266
|
|
|
|
|
|
|
} |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
#While the tree has not become extinct and the termination criterion |
1269
|
|
|
|
|
|
|
#has not been achieved we create new speciation and extinction events |
1270
|
0
|
|
0
|
|
|
0
|
while ($tree_size > 0 |
|
|
|
0
|
|
|
|
|
1271
|
|
|
|
|
|
|
&& $tree_size < $options{tree_size} |
1272
|
|
|
|
|
|
|
&& $time < $options{tree_age} ) |
1273
|
|
|
|
|
|
|
{ |
1274
|
|
|
|
|
|
|
|
1275
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species |
1276
|
0
|
|
|
|
|
0
|
foreach (@terminals) { |
1277
|
|
|
|
|
|
|
$_->set_branch_length( |
1278
|
|
|
|
|
|
|
$_->get_branch_length + min( |
1279
|
|
|
|
|
|
|
$next_extinction, $next_speciation, |
1280
|
0
|
|
|
|
|
0
|
$options{tree_age} - $time |
1281
|
|
|
|
|
|
|
) |
1282
|
|
|
|
|
|
|
); |
1283
|
|
|
|
|
|
|
} |
1284
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
#Update the time |
1286
|
0
|
|
|
|
|
0
|
$time += min( $next_extinction, $next_speciation ); |
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done |
1289
|
0
|
0
|
|
|
|
0
|
return $tree if ( $time > $options{tree_age} ); |
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
#Get the species effected by this event and remove it from the terminals list |
1292
|
0
|
|
|
|
|
0
|
my $effected = |
1293
|
|
|
|
|
|
|
splice( @terminals, int( rand( scalar @terminals ) ), 1 ); |
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
#If we have a speciation event we add two new species |
1296
|
0
|
0
|
0
|
|
|
0
|
if ( $next_speciation < $next_extinction || !defined $next_extinction ) |
1297
|
|
|
|
|
|
|
{ |
1298
|
0
|
|
|
|
|
0
|
foreach ( 1, 2 ) { |
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
#Create a new species |
1301
|
0
|
|
|
|
|
0
|
my $child = Bio::Phylo::Forest::Node->new(); |
1302
|
0
|
|
|
|
|
0
|
$child->set_name( 'ID' . $node_id++ ); |
1303
|
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
|
#Give it a zero edge length |
1305
|
0
|
|
|
|
|
0
|
$child->set_branch_length(0); |
1306
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
#Add it as a child to the speciating species |
1308
|
0
|
|
|
|
|
0
|
$effected->set_child($child); |
1309
|
|
|
|
|
|
|
|
1310
|
|
|
|
|
|
|
#Add it to the tree |
1311
|
0
|
|
|
|
|
0
|
$tree->insert($child); |
1312
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
#Add it to the terminals list |
1314
|
0
|
|
|
|
|
0
|
push( @terminals, $child ); |
1315
|
|
|
|
|
|
|
} |
1316
|
|
|
|
|
|
|
} |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
#We calculate the time that the next extinction and speciation |
1319
|
|
|
|
|
|
|
#events will occur (only the earliest of these will actually |
1320
|
|
|
|
|
|
|
#happen). NB: this approach is only appropriate for models where |
1321
|
|
|
|
|
|
|
#speciation and extinction times are exponentially distributed. |
1322
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand... |
1323
|
0
|
|
|
|
|
0
|
my ( $r1, $r2 ) = ( 0, 0 ); |
1324
|
0
|
|
|
|
|
0
|
$r1 = rand until $r1; |
1325
|
0
|
|
|
|
|
0
|
$r2 = rand until $r2; |
1326
|
0
|
|
|
|
|
0
|
$tree_size = scalar @terminals; |
1327
|
0
|
0
|
|
|
|
0
|
return $tree unless $tree_size; |
1328
|
|
|
|
|
|
|
|
1329
|
0
|
|
|
|
|
0
|
$options{birth_rate} = max(0,$options{max_birth_rate}*(1-$tree_size/$options{K_dash})); |
1330
|
0
|
0
|
|
|
|
0
|
if ($options{birth_rate}==0) |
1331
|
|
|
|
|
|
|
{ |
1332
|
0
|
|
|
|
|
0
|
$next_speciation = $options{tree_age}; |
1333
|
|
|
|
|
|
|
} else |
1334
|
|
|
|
|
|
|
{ |
1335
|
0
|
|
|
|
|
0
|
$next_speciation = -log($r1) / $options{birth_rate} / $tree_size; |
1336
|
|
|
|
|
|
|
} |
1337
|
0
|
0
|
|
|
|
0
|
if ( defined $options{death_rate} ) { |
1338
|
0
|
|
|
|
|
0
|
$next_extinction = -log($r2) / $options{death_rate} / $tree_size; |
1339
|
|
|
|
|
|
|
} |
1340
|
|
|
|
|
|
|
else { |
1341
|
0
|
|
|
|
|
0
|
$next_extinction = $next_speciation + 1; |
1342
|
|
|
|
|
|
|
} |
1343
|
|
|
|
|
|
|
} |
1344
|
0
|
|
|
|
|
0
|
return $tree; |
1345
|
|
|
|
|
|
|
} |
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
=item constant_rate_birth_death() |
1349
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
A temporal shift birth and death model |
1351
|
|
|
|
|
|
|
|
1352
|
|
|
|
|
|
|
Type : Evolutionary model |
1353
|
|
|
|
|
|
|
Title : temporal_shift_birth_death |
1354
|
|
|
|
|
|
|
Usage : $tree = constant_rate_birth_death(%options) |
1355
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time |
1356
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree |
1357
|
|
|
|
|
|
|
Args : %options with fields: |
1358
|
|
|
|
|
|
|
birth_rates The birth rates |
1359
|
|
|
|
|
|
|
death_rates The death rates |
1360
|
|
|
|
|
|
|
rate_times The times after which the rates apply (first element must be 0) |
1361
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate |
1362
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate |
1363
|
|
|
|
|
|
|
|
1364
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified |
1365
|
|
|
|
|
|
|
|
1366
|
|
|
|
|
|
|
=cut |
1367
|
|
|
|
|
|
|
|
1368
|
|
|
|
|
|
|
sub temporal_shift_birth_death { |
1369
|
0
|
|
|
0
|
0
|
0
|
my %options = @_; |
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
#Check that we have a termination condition |
1372
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) { |
1373
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
#Error here. |
1375
|
0
|
|
|
|
|
0
|
return undef; |
1376
|
|
|
|
|
|
|
} |
1377
|
|
|
|
|
|
|
|
1378
|
|
|
|
|
|
|
#Set the undefined condition to infinity |
1379
|
0
|
0
|
|
|
|
0
|
$options{tree_size} = 1e6 unless defined $options{tree_size}; |
1380
|
0
|
0
|
|
|
|
0
|
$options{tree_age} = 1e6 unless defined $options{tree_age}; |
1381
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these |
1383
|
0
|
|
|
|
|
0
|
my $node_id = 0; |
1384
|
|
|
|
|
|
|
|
1385
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species |
1386
|
0
|
|
|
|
|
0
|
my $tree = Bio::Phylo::Forest::Tree->new(); |
1387
|
0
|
|
|
|
|
0
|
my $root = Bio::Phylo::Forest::Node->new(); |
1388
|
0
|
|
|
|
|
0
|
$root->set_branch_length(0); |
1389
|
0
|
|
|
|
|
0
|
$root->set_name( 'ID' . $node_id++ ); |
1390
|
0
|
|
|
|
|
0
|
$tree->insert($root); |
1391
|
0
|
|
|
|
|
0
|
my @terminals = ($root); |
1392
|
0
|
|
|
|
|
0
|
my ( $next_extinction, $next_speciation ); |
1393
|
0
|
|
|
|
|
0
|
my $time = 0; |
1394
|
0
|
|
|
|
|
0
|
my $tree_size = 1; |
1395
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
#Load current rates |
1397
|
0
|
|
|
|
|
0
|
my $birth_rate = $options{birth_rates}[0]; |
1398
|
0
|
|
|
|
|
0
|
my $death_rate = $options{death_rates}[0]; |
1399
|
0
|
|
|
|
|
0
|
my $current_rates = 0; |
1400
|
0
|
|
|
|
|
0
|
my $next_rate_change = $options{rate_times}[$current_rates+1]; |
1401
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
#Add an additional time to the end of the rate change times to simplify checking |
1403
|
0
|
|
|
|
|
0
|
push(@{$options{rate_times}},$options{tree_size}*2); |
|
0
|
|
|
|
|
0
|
|
1404
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge |
1406
|
0
|
0
|
0
|
|
|
0
|
if ( defined $options{root_edge} && $options{root_edge} ) { |
1407
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event |
1409
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $birth_rate / $tree_size; |
1410
|
|
|
|
|
|
|
} |
1411
|
|
|
|
|
|
|
else { |
1412
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away |
1414
|
0
|
|
|
|
|
0
|
$next_speciation = 0; |
1415
|
|
|
|
|
|
|
} |
1416
|
|
|
|
|
|
|
|
1417
|
|
|
|
|
|
|
# print "RATES:".$time."|".$birth_rate."|".$death_rate."\n"; |
1418
|
|
|
|
|
|
|
#Time of the first extinction event. If no extinction we always |
1419
|
|
|
|
|
|
|
#set the extinction event after the current speciation event |
1420
|
0
|
|
|
|
|
0
|
$next_extinction = -log(rand) / $death_rate / $tree_size; |
1421
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
#While the tree has not become extinct and the termination criterion |
1423
|
|
|
|
|
|
|
#has not been achieved we create new speciation and extinction events |
1424
|
0
|
|
0
|
|
|
0
|
while ($tree_size > 0 |
|
|
|
0
|
|
|
|
|
1425
|
|
|
|
|
|
|
&& $tree_size < $options{tree_size} |
1426
|
|
|
|
|
|
|
&& $time < $options{tree_age} ) |
1427
|
|
|
|
|
|
|
{ |
1428
|
|
|
|
|
|
|
|
1429
|
|
|
|
|
|
|
# print "TIMES:".$next_extinction."|".$next_speciation."|".$next_rate_change."|\n"; |
1430
|
|
|
|
|
|
|
# print "RATES:".$time."|".$birth_rate."|".$death_rate."\n"; |
1431
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species |
1432
|
0
|
|
|
|
|
0
|
foreach (@terminals) { |
1433
|
|
|
|
|
|
|
$_->set_branch_length( |
1434
|
|
|
|
|
|
|
$_->get_branch_length + min( |
1435
|
|
|
|
|
|
|
$next_extinction, $next_speciation, |
1436
|
0
|
|
|
|
|
0
|
$options{tree_age} - $time |
1437
|
|
|
|
|
|
|
) |
1438
|
|
|
|
|
|
|
); |
1439
|
|
|
|
|
|
|
} |
1440
|
|
|
|
|
|
|
|
1441
|
|
|
|
|
|
|
#Update the time |
1442
|
0
|
|
|
|
|
0
|
my $time_last = $time; |
1443
|
0
|
|
|
|
|
0
|
$time += min( $next_extinction, $next_speciation, $next_rate_change-$time_last); |
1444
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done |
1446
|
0
|
0
|
|
|
|
0
|
return $tree if ( $time > $options{tree_age} ); |
1447
|
|
|
|
|
|
|
|
1448
|
|
|
|
|
|
|
|
1449
|
0
|
0
|
|
|
|
0
|
if ($next_rate_change-$time_last < min( $next_extinction, $next_speciation) ) |
1450
|
|
|
|
|
|
|
{ |
1451
|
0
|
|
|
|
|
0
|
$current_rates += 1; |
1452
|
0
|
|
|
|
|
0
|
$birth_rate = $options{birth_rates}[$current_rates]; |
1453
|
0
|
|
|
|
|
0
|
$death_rate = $options{death_rates}[$current_rates]; |
1454
|
0
|
|
|
|
|
0
|
$next_rate_change = $options{rate_times}[$current_rates+1]; |
1455
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
} else |
1457
|
|
|
|
|
|
|
{ |
1458
|
|
|
|
|
|
|
|
1459
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
#Get the species effected by this event and remove it from the terminals list |
1461
|
0
|
|
|
|
|
0
|
my $effected = splice( @terminals, int( rand( scalar @terminals ) ), 1 ); |
1462
|
|
|
|
|
|
|
|
1463
|
|
|
|
|
|
|
#If we have a speciation event we add two new species |
1464
|
0
|
0
|
0
|
|
|
0
|
if ( $next_speciation < $next_extinction || !defined $next_extinction ) |
1465
|
|
|
|
|
|
|
{ |
1466
|
0
|
|
|
|
|
0
|
foreach ( 1, 2 ) { |
1467
|
|
|
|
|
|
|
|
1468
|
|
|
|
|
|
|
#Create a new species |
1469
|
0
|
|
|
|
|
0
|
my $child = Bio::Phylo::Forest::Node->new(); |
1470
|
0
|
|
|
|
|
0
|
$child->set_name( 'ID' . $node_id++ ); |
1471
|
|
|
|
|
|
|
|
1472
|
|
|
|
|
|
|
#Give it a zero edge length |
1473
|
0
|
|
|
|
|
0
|
$child->set_branch_length(0); |
1474
|
|
|
|
|
|
|
|
1475
|
|
|
|
|
|
|
#Add it as a child to the speciating species |
1476
|
0
|
|
|
|
|
0
|
$effected->set_child($child); |
1477
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
#Add it to the tree |
1479
|
0
|
|
|
|
|
0
|
$tree->insert($child); |
1480
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
#Add it to the terminals list |
1482
|
0
|
|
|
|
|
0
|
push( @terminals, $child ); |
1483
|
|
|
|
|
|
|
} |
1484
|
|
|
|
|
|
|
} |
1485
|
|
|
|
|
|
|
} |
1486
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
#We calculate the time that the next extinction and speciation |
1488
|
|
|
|
|
|
|
#events will occur (only the earliest of these will actually |
1489
|
|
|
|
|
|
|
#happen). NB: this approach is only appropriate for models where |
1490
|
|
|
|
|
|
|
#speciation and extinction times are exponentially distributed. |
1491
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand... |
1492
|
0
|
|
|
|
|
0
|
my ( $r1, $r2 ) = ( 0, 0 ); |
1493
|
0
|
|
|
|
|
0
|
$r1 = rand until $r1; |
1494
|
0
|
|
|
|
|
0
|
$r2 = rand until $r2; |
1495
|
0
|
|
|
|
|
0
|
$tree_size = scalar @terminals; |
1496
|
0
|
0
|
|
|
|
0
|
return $tree unless $tree_size; |
1497
|
0
|
|
|
|
|
0
|
$next_speciation = -log($r1) / $birth_rate / $tree_size; |
1498
|
0
|
|
|
|
|
0
|
$next_extinction = -log($r2) / $death_rate / $tree_size; |
1499
|
|
|
|
|
|
|
|
1500
|
0
|
0
|
|
|
|
0
|
if ((scalar @terminals)%100==0) |
1501
|
|
|
|
|
|
|
{ |
1502
|
0
|
|
|
|
|
0
|
print $time."|".@terminals."|\n"; |
1503
|
|
|
|
|
|
|
} |
1504
|
|
|
|
|
|
|
|
1505
|
|
|
|
|
|
|
} |
1506
|
0
|
|
|
|
|
0
|
return $tree; |
1507
|
|
|
|
|
|
|
} |
1508
|
|
|
|
|
|
|
|
1509
|
|
|
|
|
|
|
=item evolving_speciation_rate() |
1510
|
|
|
|
|
|
|
|
1511
|
|
|
|
|
|
|
An evolutionary model featuring evolving speciation rates. Each daughter |
1512
|
|
|
|
|
|
|
species is assigned its parent's speciation rate multiplied by a normally |
1513
|
|
|
|
|
|
|
distributed noise factor. |
1514
|
|
|
|
|
|
|
|
1515
|
|
|
|
|
|
|
Type : Evolutionary model |
1516
|
|
|
|
|
|
|
Title : evolving_speciation_rate |
1517
|
|
|
|
|
|
|
Usage : $tree = evolving_speciation_rate(%options) |
1518
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time |
1519
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree |
1520
|
|
|
|
|
|
|
Args : %options with fields: |
1521
|
|
|
|
|
|
|
birth_rate The initial speciation rate (default 1) |
1522
|
|
|
|
|
|
|
evolving_std The standard deviation of the normal distribution |
1523
|
|
|
|
|
|
|
from which the rate multiplier is drawn. |
1524
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate |
1525
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate |
1526
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified |
1528
|
|
|
|
|
|
|
|
1529
|
|
|
|
|
|
|
=cut |
1530
|
|
|
|
|
|
|
|
1531
|
|
|
|
|
|
|
sub evolving_speciation_rate { |
1532
|
0
|
|
|
0
|
1
|
0
|
my %options = @_; |
1533
|
|
|
|
|
|
|
|
1534
|
|
|
|
|
|
|
#Check that we have a termination condition |
1535
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) { |
1536
|
|
|
|
|
|
|
|
1537
|
|
|
|
|
|
|
#Error here. |
1538
|
0
|
|
|
|
|
0
|
return undef; |
1539
|
|
|
|
|
|
|
} |
1540
|
|
|
|
|
|
|
|
1541
|
|
|
|
|
|
|
#Set the undefined condition to infinity |
1542
|
0
|
0
|
|
|
|
0
|
$options{tree_size} = 1e6 unless defined $options{tree_size}; |
1543
|
0
|
0
|
|
|
|
0
|
$options{tree_age} = 1e6 unless defined $options{tree_age}; |
1544
|
|
|
|
|
|
|
|
1545
|
|
|
|
|
|
|
#Set default rates |
1546
|
0
|
0
|
|
|
|
0
|
$options{birth_rate} = 1 unless defined( $options{birth_rate} ); |
1547
|
0
|
0
|
|
|
|
0
|
$options{evolving_std} = 1 unless defined( $options{evolving_std} ); |
1548
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these |
1550
|
0
|
|
|
|
|
0
|
my $node_id = 0; |
1551
|
|
|
|
|
|
|
|
1552
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species |
1553
|
0
|
|
|
|
|
0
|
my $tree = Bio::Phylo::Forest::Tree->new(); |
1554
|
0
|
|
|
|
|
0
|
my $root = Bio::Phylo::Forest::Node->new(); |
1555
|
0
|
|
|
|
|
0
|
$root->set_branch_length(0); |
1556
|
0
|
|
|
|
|
0
|
$root->set_name( 'ID' . $node_id++ ); |
1557
|
0
|
|
|
|
|
0
|
$tree->insert($root); |
1558
|
0
|
|
|
|
|
0
|
my @terminals = ($root); |
1559
|
0
|
|
|
|
|
0
|
my @birth_rates = ( $options{birth_rate} ); |
1560
|
0
|
|
|
|
|
0
|
my $net_rate = $options{birth_rate}; |
1561
|
0
|
|
|
|
|
0
|
my $next_speciation; |
1562
|
0
|
|
|
|
|
0
|
my $time = 0; |
1563
|
0
|
|
|
|
|
0
|
my $tree_size = 1; |
1564
|
|
|
|
|
|
|
|
1565
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge |
1566
|
0
|
0
|
0
|
|
|
0
|
if ( defined $options{root_edge} && $options{root_edge} ) { |
1567
|
|
|
|
|
|
|
|
1568
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event |
1569
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $options{birth_rate} / $tree_size; |
1570
|
|
|
|
|
|
|
} |
1571
|
|
|
|
|
|
|
else { |
1572
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away |
1574
|
0
|
|
|
|
|
0
|
$next_speciation = 0; |
1575
|
|
|
|
|
|
|
} |
1576
|
|
|
|
|
|
|
|
1577
|
|
|
|
|
|
|
#While we haven't reached termination |
1578
|
0
|
|
0
|
|
|
0
|
while ( $tree_size < $options{tree_size} && $time < $options{tree_age} ) { |
1579
|
|
|
|
|
|
|
|
1580
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species |
1581
|
0
|
|
|
|
|
0
|
foreach (@terminals) { |
1582
|
|
|
|
|
|
|
$_->set_branch_length( $_->get_branch_length + |
1583
|
0
|
|
|
|
|
0
|
min( $next_speciation, $options{tree_age} - $time ) ); |
1584
|
|
|
|
|
|
|
} |
1585
|
|
|
|
|
|
|
|
1586
|
|
|
|
|
|
|
#Update the time |
1587
|
0
|
|
|
|
|
0
|
$time += $next_speciation; |
1588
|
|
|
|
|
|
|
|
1589
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done |
1590
|
0
|
0
|
|
|
|
0
|
return $tree if ( $time > $options{tree_age} ); |
1591
|
|
|
|
|
|
|
|
1592
|
|
|
|
|
|
|
#Get the species effected by this event |
1593
|
0
|
|
|
|
|
0
|
my $rand_select = rand($net_rate); |
1594
|
0
|
|
|
|
|
0
|
my $selected = 0; |
1595
|
0
|
|
0
|
|
|
0
|
for ( |
1596
|
|
|
|
|
|
|
; |
1597
|
|
|
|
|
|
|
$selected < scalar @terminals |
1598
|
|
|
|
|
|
|
&& $rand_select > $birth_rates[$selected] ; |
1599
|
|
|
|
|
|
|
$selected++ |
1600
|
|
|
|
|
|
|
) |
1601
|
|
|
|
|
|
|
{ |
1602
|
0
|
|
|
|
|
0
|
$rand_select -= $birth_rates[$selected]; |
1603
|
|
|
|
|
|
|
} |
1604
|
|
|
|
|
|
|
|
1605
|
|
|
|
|
|
|
#Remove it from the terminals list |
1606
|
0
|
|
|
|
|
0
|
my $effected = splice( @terminals, $selected, 1 ); |
1607
|
0
|
|
|
|
|
0
|
my $effected_rate = splice( @birth_rates, $selected, 1 ); |
1608
|
|
|
|
|
|
|
|
1609
|
|
|
|
|
|
|
#Update the net speciation rate |
1610
|
0
|
|
|
|
|
0
|
$net_rate -= $effected_rate; |
1611
|
|
|
|
|
|
|
|
1612
|
|
|
|
|
|
|
#If we have a speciation event we add two new species |
1613
|
0
|
|
|
|
|
0
|
foreach ( 1, 2 ) { |
1614
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
#Create a new species |
1616
|
0
|
|
|
|
|
0
|
my $child = Bio::Phylo::Forest::Node->new(); |
1617
|
0
|
|
|
|
|
0
|
$child->set_name( 'ID' . $node_id++ ); |
1618
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
#Give it a zero edge length |
1620
|
0
|
|
|
|
|
0
|
$child->set_branch_length(0); |
1621
|
|
|
|
|
|
|
|
1622
|
|
|
|
|
|
|
#Add it as a child to the speciating species |
1623
|
0
|
|
|
|
|
0
|
$effected->set_child($child); |
1624
|
|
|
|
|
|
|
|
1625
|
|
|
|
|
|
|
#Add it to the tree |
1626
|
0
|
|
|
|
|
0
|
$tree->insert($child); |
1627
|
|
|
|
|
|
|
|
1628
|
|
|
|
|
|
|
#Add it to the terminals list |
1629
|
0
|
|
|
|
|
0
|
push( @terminals, $child ); |
1630
|
|
|
|
|
|
|
|
1631
|
|
|
|
|
|
|
#New speciation rate |
1632
|
|
|
|
|
|
|
my $new_speciation_rate = |
1633
|
0
|
|
|
|
|
0
|
$effected_rate * ( 1 + qnorm(rand) * $options{evolving_std} ); |
1634
|
0
|
0
|
|
|
|
0
|
if ( $new_speciation_rate < 0 ) { $new_speciation_rate = 0; } |
|
0
|
|
|
|
|
0
|
|
1635
|
0
|
|
|
|
|
0
|
push( @birth_rates, $new_speciation_rate ); |
1636
|
0
|
|
|
|
|
0
|
$net_rate += $new_speciation_rate; |
1637
|
|
|
|
|
|
|
} |
1638
|
|
|
|
|
|
|
|
1639
|
|
|
|
|
|
|
# $net_rate = 0; |
1640
|
|
|
|
|
|
|
# foreach (@birth_rates) { $net_rate += $_; } |
1641
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand... |
1642
|
0
|
|
|
|
|
0
|
my ( $r1, $r2 ) = ( 0, 0 ); |
1643
|
0
|
|
|
|
|
0
|
$r1 = rand until $r1; |
1644
|
0
|
|
|
|
|
0
|
$tree_size = scalar @terminals; |
1645
|
|
|
|
|
|
|
|
1646
|
|
|
|
|
|
|
#If all species have stopped speciating (unlikely) |
1647
|
0
|
0
|
|
|
|
0
|
if ( $net_rate == 0 ) { |
1648
|
0
|
|
|
|
|
0
|
return $tree; |
1649
|
|
|
|
|
|
|
} |
1650
|
0
|
|
|
|
|
0
|
$next_speciation = -log($r1) / $net_rate / $tree_size; |
1651
|
0
|
0
|
|
|
|
0
|
return $tree unless $tree_size; |
1652
|
|
|
|
|
|
|
} |
1653
|
0
|
|
|
|
|
0
|
return $tree; |
1654
|
|
|
|
|
|
|
} |
1655
|
|
|
|
|
|
|
|
1656
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
=item clade_shifts() |
1658
|
|
|
|
|
|
|
|
1659
|
|
|
|
|
|
|
A constant rate birth-death model with punctuated changes in the speciation |
1660
|
|
|
|
|
|
|
and extinction rates. At each change one lineage receives new pre-specified |
1661
|
|
|
|
|
|
|
speciation and extinction rates. |
1662
|
|
|
|
|
|
|
|
1663
|
|
|
|
|
|
|
Type : Evolutionary model |
1664
|
|
|
|
|
|
|
Title : clade_shifts |
1665
|
|
|
|
|
|
|
Usage : $tree = clade_shifts(%options) |
1666
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time |
1667
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree |
1668
|
|
|
|
|
|
|
Args : %options with fields: |
1669
|
|
|
|
|
|
|
birth_rates The speciation rates |
1670
|
|
|
|
|
|
|
death_rates The death rates |
1671
|
|
|
|
|
|
|
rate_times The times at which the rates are introduced to a new |
1672
|
|
|
|
|
|
|
clade. The first time should be zero. The remaining must be in |
1673
|
|
|
|
|
|
|
ascending order. |
1674
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate |
1675
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate |
1676
|
|
|
|
|
|
|
|
1677
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified |
1678
|
|
|
|
|
|
|
|
1679
|
|
|
|
|
|
|
=cut |
1680
|
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
|
sub clade_shifts { |
1682
|
0
|
|
|
0
|
1
|
0
|
my %options = @_; |
1683
|
|
|
|
|
|
|
|
1684
|
|
|
|
|
|
|
#Check that we have a termination condition |
1685
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) { |
1686
|
|
|
|
|
|
|
|
1687
|
|
|
|
|
|
|
#Error here. |
1688
|
0
|
|
|
|
|
0
|
return undef; |
1689
|
|
|
|
|
|
|
} |
1690
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
#Set the undefined condition to infinity |
1692
|
0
|
0
|
|
|
|
0
|
$options{tree_size} = 1e6 unless defined $options{tree_size}; |
1693
|
0
|
0
|
|
|
|
0
|
$options{tree_age} = 1e6 unless defined $options{tree_age}; |
1694
|
|
|
|
|
|
|
|
1695
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these |
1696
|
0
|
|
|
|
|
0
|
my $node_id = 0; |
1697
|
|
|
|
|
|
|
|
1698
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species |
1699
|
0
|
|
|
|
|
0
|
my $tree = Bio::Phylo::Forest::Tree->new(); |
1700
|
0
|
|
|
|
|
0
|
my $root = Bio::Phylo::Forest::Node->new(); |
1701
|
0
|
|
|
|
|
0
|
$root->set_branch_length(0); |
1702
|
0
|
|
|
|
|
0
|
$root->set_name( 'ID' . $node_id++ ); |
1703
|
0
|
|
|
|
|
0
|
$tree->insert($root); |
1704
|
|
|
|
|
|
|
|
1705
|
|
|
|
|
|
|
#rates |
1706
|
0
|
|
|
|
|
0
|
my @birth_rates_in = @{$options{birth_rates}}; |
|
0
|
|
|
|
|
0
|
|
1707
|
0
|
|
|
|
|
0
|
my @death_rates_in = @{$options{death_rates}}; |
|
0
|
|
|
|
|
0
|
|
1708
|
0
|
|
|
|
|
0
|
my @rate_times_in = @{$options{rate_times}}; |
|
0
|
|
|
|
|
0
|
|
1709
|
|
|
|
|
|
|
|
1710
|
0
|
0
|
|
|
|
0
|
if ($rate_times_in[0] != 0) |
1711
|
|
|
|
|
|
|
{ |
1712
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' => |
1713
|
|
|
|
|
|
|
"The first rate time must be 0" ); |
1714
|
|
|
|
|
|
|
} |
1715
|
0
|
0
|
|
|
|
0
|
if (scalar @birth_rates_in != scalar @death_rates_in) |
1716
|
|
|
|
|
|
|
{ |
1717
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' => |
1718
|
|
|
|
|
|
|
"birth and death rates must have the same length" ); |
1719
|
|
|
|
|
|
|
} |
1720
|
0
|
0
|
|
|
|
0
|
if (scalar @birth_rates_in != scalar @rate_times_in) |
1721
|
|
|
|
|
|
|
{ |
1722
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' => |
1723
|
|
|
|
|
|
|
"birth/death rates must have the same length as rate times" ); |
1724
|
|
|
|
|
|
|
} |
1725
|
0
|
|
|
|
|
0
|
my @birth_rates = ($birth_rates_in[0]); |
1726
|
0
|
|
|
|
|
0
|
my @death_rates = ($death_rates_in[0]); |
1727
|
|
|
|
|
|
|
|
1728
|
0
|
|
|
|
|
0
|
my $net_birth_rate = $birth_rates[0]; |
1729
|
0
|
|
|
|
|
0
|
my $net_death_rate = $death_rates[0]; |
1730
|
|
|
|
|
|
|
|
1731
|
0
|
|
|
|
|
0
|
shift(@birth_rates_in); |
1732
|
0
|
|
|
|
|
0
|
shift(@death_rates_in); |
1733
|
|
|
|
|
|
|
|
1734
|
0
|
|
|
|
|
0
|
my @terminals = ($root); |
1735
|
0
|
|
|
|
|
0
|
my ( $next_extinction, $next_speciation, $next_rate_change ); |
1736
|
0
|
|
|
|
|
0
|
my $time = 0; |
1737
|
0
|
|
|
|
|
0
|
my $tree_size = 1; |
1738
|
0
|
|
|
|
|
0
|
my $inf = 9**9**9**9; |
1739
|
|
|
|
|
|
|
|
1740
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge |
1741
|
0
|
0
|
0
|
|
|
0
|
if ( defined $options{root_edge} && $options{root_edge} ) { |
1742
|
|
|
|
|
|
|
|
1743
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event |
1744
|
0
|
0
|
|
|
|
0
|
if ($birth_rates[0] > 0) |
1745
|
|
|
|
|
|
|
{ |
1746
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $birth_rates[0] ; |
1747
|
|
|
|
|
|
|
} else |
1748
|
|
|
|
|
|
|
{ |
1749
|
0
|
|
|
|
|
0
|
$next_speciation = $inf; |
1750
|
|
|
|
|
|
|
} |
1751
|
|
|
|
|
|
|
} |
1752
|
|
|
|
|
|
|
else { |
1753
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away |
1754
|
0
|
|
|
|
|
0
|
$next_speciation = 0.0; |
1755
|
|
|
|
|
|
|
} |
1756
|
|
|
|
|
|
|
|
1757
|
|
|
|
|
|
|
#Time of the first extinction event. If no extinction we always |
1758
|
|
|
|
|
|
|
#set the extinction event after the current speciation event |
1759
|
0
|
0
|
|
|
|
0
|
if ($death_rates[0] > 0) |
1760
|
|
|
|
|
|
|
{ |
1761
|
0
|
|
|
|
|
0
|
$next_extinction = -log(rand) / $death_rates[0]; |
1762
|
|
|
|
|
|
|
} else |
1763
|
|
|
|
|
|
|
{ |
1764
|
0
|
|
|
|
|
0
|
$next_extinction = $inf; |
1765
|
|
|
|
|
|
|
} |
1766
|
|
|
|
|
|
|
|
1767
|
|
|
|
|
|
|
#Time of next rate change |
1768
|
0
|
|
|
|
|
0
|
shift(@rate_times_in); #pop the initial 0 |
1769
|
0
|
|
|
|
|
0
|
$next_rate_change = shift(@rate_times_in); |
1770
|
|
|
|
|
|
|
|
1771
|
|
|
|
|
|
|
#While the tree has not become extinct and the termination criterion |
1772
|
|
|
|
|
|
|
#has not been achieved we create new speciation and extinction events |
1773
|
0
|
|
0
|
|
|
0
|
while ($tree_size > 0 |
|
|
|
0
|
|
|
|
|
1774
|
|
|
|
|
|
|
&& $tree_size < $options{tree_size} |
1775
|
|
|
|
|
|
|
&& $time < $options{tree_age} ) |
1776
|
|
|
|
|
|
|
{ |
1777
|
|
|
|
|
|
|
#print $time."|".$tree_size."\n"; |
1778
|
|
|
|
|
|
|
#Update rates if a clade shift is happening |
1779
|
|
|
|
|
|
|
#TODO index rates or pop off one at a time. |
1780
|
|
|
|
|
|
|
|
1781
|
|
|
|
|
|
|
|
1782
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species |
1783
|
0
|
|
|
|
|
0
|
foreach (@terminals) { |
1784
|
|
|
|
|
|
|
$_->set_branch_length( |
1785
|
|
|
|
|
|
|
$_->get_branch_length + min( |
1786
|
|
|
|
|
|
|
$next_extinction, |
1787
|
|
|
|
|
|
|
$next_speciation, |
1788
|
|
|
|
|
|
|
$next_rate_change-$time, |
1789
|
0
|
|
|
|
|
0
|
$options{tree_age} - $time |
1790
|
|
|
|
|
|
|
) |
1791
|
|
|
|
|
|
|
); |
1792
|
|
|
|
|
|
|
} |
1793
|
|
|
|
|
|
|
|
1794
|
|
|
|
|
|
|
#Update the time |
1795
|
0
|
|
|
|
|
0
|
my $time_last = $time; |
1796
|
0
|
|
|
|
|
0
|
$time += min( $next_extinction, $next_speciation, $next_rate_change-$time_last ); |
1797
|
|
|
|
|
|
|
|
1798
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done |
1799
|
0
|
0
|
|
|
|
0
|
return $tree if ( $time > $options{tree_age} ); |
1800
|
|
|
|
|
|
|
|
1801
|
|
|
|
|
|
|
#We have a rate change |
1802
|
0
|
0
|
|
|
|
0
|
if ($next_rate_change-$time_last < min($next_extinction, $next_speciation)) |
1803
|
|
|
|
|
|
|
{ |
1804
|
|
|
|
|
|
|
#Find a random species to effect |
1805
|
0
|
|
|
|
|
0
|
my $effected_species = int(rand($tree_size)); |
1806
|
|
|
|
|
|
|
#Subtract current rates |
1807
|
0
|
|
|
|
|
0
|
$net_death_rate -= $death_rates[$effected_species]; |
1808
|
0
|
|
|
|
|
0
|
$net_birth_rate -= $birth_rates[$effected_species]; |
1809
|
|
|
|
|
|
|
#Get new rates |
1810
|
0
|
|
|
|
|
0
|
$death_rates[$effected_species] = shift(@death_rates_in); |
1811
|
0
|
|
|
|
|
0
|
$birth_rates[$effected_species] = shift(@birth_rates_in); |
1812
|
|
|
|
|
|
|
#Add new rates |
1813
|
0
|
|
|
|
|
0
|
$net_death_rate += $death_rates[$effected_species]; |
1814
|
0
|
|
|
|
|
0
|
$net_birth_rate += $birth_rates[$effected_species]; |
1815
|
|
|
|
|
|
|
#Get next rate change time |
1816
|
0
|
0
|
|
|
|
0
|
if (scalar(@rate_times_in)) |
1817
|
|
|
|
|
|
|
{ |
1818
|
0
|
|
|
|
|
0
|
$next_rate_change = shift(@rate_times_in); |
1819
|
|
|
|
|
|
|
} else |
1820
|
|
|
|
|
|
|
{ |
1821
|
0
|
|
|
|
|
0
|
$next_rate_change = $inf; |
1822
|
|
|
|
|
|
|
} |
1823
|
|
|
|
|
|
|
} |
1824
|
|
|
|
|
|
|
#Choosing a random species to speciate |
1825
|
|
|
|
|
|
|
else |
1826
|
|
|
|
|
|
|
{ |
1827
|
0
|
|
|
|
|
0
|
my $selected = 0; |
1828
|
0
|
0
|
|
|
|
0
|
if ( $next_speciation < $next_extinction ) |
1829
|
|
|
|
|
|
|
{ |
1830
|
0
|
|
|
|
|
0
|
my $rand_select = rand($net_birth_rate); |
1831
|
0
|
|
0
|
|
|
0
|
for ( |
1832
|
|
|
|
|
|
|
; |
1833
|
|
|
|
|
|
|
$selected < scalar @terminals |
1834
|
|
|
|
|
|
|
&& $rand_select > $birth_rates[$selected] ; |
1835
|
|
|
|
|
|
|
$selected++ |
1836
|
|
|
|
|
|
|
) |
1837
|
|
|
|
|
|
|
{ |
1838
|
0
|
|
|
|
|
0
|
$rand_select -= $birth_rates[$selected]; |
1839
|
|
|
|
|
|
|
} |
1840
|
|
|
|
|
|
|
} else |
1841
|
|
|
|
|
|
|
{ |
1842
|
0
|
|
|
|
|
0
|
my $rand_select = rand($net_death_rate); |
1843
|
0
|
|
0
|
|
|
0
|
for ( |
1844
|
|
|
|
|
|
|
; |
1845
|
|
|
|
|
|
|
$selected < scalar @terminals |
1846
|
|
|
|
|
|
|
&& $rand_select > $death_rates[$selected] ; |
1847
|
|
|
|
|
|
|
$selected++ |
1848
|
|
|
|
|
|
|
) |
1849
|
|
|
|
|
|
|
{ |
1850
|
0
|
|
|
|
|
0
|
$rand_select -= $death_rates[$selected]; |
1851
|
|
|
|
|
|
|
} |
1852
|
|
|
|
|
|
|
} |
1853
|
0
|
0
|
|
|
|
0
|
if ($net_birth_rate == 0) |
1854
|
|
|
|
|
|
|
{ |
1855
|
0
|
|
|
|
|
0
|
$selected = 0; |
1856
|
|
|
|
|
|
|
} |
1857
|
|
|
|
|
|
|
|
1858
|
|
|
|
|
|
|
#Remove the species effected by this event and remove it from the terminals list |
1859
|
0
|
|
|
|
|
0
|
my $effected = splice( @terminals, $selected, 1 ); |
1860
|
0
|
|
|
|
|
0
|
my $effected_birth_rate = splice( @birth_rates, $selected, 1 ); |
1861
|
0
|
|
|
|
|
0
|
my $effected_death_rate = splice( @death_rates, $selected, 1 ); |
1862
|
|
|
|
|
|
|
|
1863
|
0
|
|
|
|
|
0
|
$net_birth_rate -= $effected_birth_rate; |
1864
|
0
|
|
|
|
|
0
|
$net_death_rate -= $effected_death_rate; |
1865
|
|
|
|
|
|
|
|
1866
|
|
|
|
|
|
|
#If we have a speciation event we add two new species |
1867
|
0
|
0
|
0
|
|
|
0
|
if ( $next_speciation < $next_extinction || !defined $next_extinction ) |
1868
|
|
|
|
|
|
|
{ |
1869
|
0
|
|
|
|
|
0
|
foreach ( 1, 2 ) { |
1870
|
|
|
|
|
|
|
|
1871
|
|
|
|
|
|
|
#Create a new species |
1872
|
0
|
|
|
|
|
0
|
my $child = Bio::Phylo::Forest::Node->new(); |
1873
|
0
|
|
|
|
|
0
|
$child->set_name( 'ID' . $node_id++ ); |
1874
|
|
|
|
|
|
|
|
1875
|
|
|
|
|
|
|
#Give it a zero edge length |
1876
|
0
|
|
|
|
|
0
|
$child->set_branch_length(0); |
1877
|
|
|
|
|
|
|
|
1878
|
|
|
|
|
|
|
#Add it as a child to the speciating species |
1879
|
0
|
|
|
|
|
0
|
$effected->set_child($child); |
1880
|
|
|
|
|
|
|
|
1881
|
|
|
|
|
|
|
#Add it to the tree |
1882
|
0
|
|
|
|
|
0
|
$tree->insert($child); |
1883
|
|
|
|
|
|
|
|
1884
|
|
|
|
|
|
|
#Add it to the terminals list |
1885
|
0
|
|
|
|
|
0
|
push( @terminals, $child ); |
1886
|
|
|
|
|
|
|
|
1887
|
0
|
|
|
|
|
0
|
push( @birth_rates, $effected_birth_rate ); |
1888
|
0
|
|
|
|
|
0
|
push( @death_rates, $effected_death_rate ); |
1889
|
|
|
|
|
|
|
|
1890
|
0
|
|
|
|
|
0
|
$net_death_rate += $effected_death_rate; |
1891
|
0
|
|
|
|
|
0
|
$net_birth_rate += $effected_birth_rate; |
1892
|
|
|
|
|
|
|
|
1893
|
|
|
|
|
|
|
} |
1894
|
|
|
|
|
|
|
} |
1895
|
|
|
|
|
|
|
} |
1896
|
|
|
|
|
|
|
#We calculate the time that the next extinction and speciation |
1897
|
|
|
|
|
|
|
#events will occur (only the earliest of these will actually |
1898
|
|
|
|
|
|
|
#happen). NB: this approach is only appropriate for models where |
1899
|
|
|
|
|
|
|
#speciation and extinction times are exponentially distributed. |
1900
|
|
|
|
|
|
|
|
1901
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand... |
1902
|
0
|
|
|
|
|
0
|
my ( $r1, $r2 ) = ( 0, 0 ); |
1903
|
0
|
|
|
|
|
0
|
$r1 = rand until $r1; |
1904
|
0
|
|
|
|
|
0
|
$r2 = rand until $r2; |
1905
|
|
|
|
|
|
|
|
1906
|
|
|
|
|
|
|
#The current tree size |
1907
|
0
|
|
|
|
|
0
|
$tree_size = scalar @terminals; |
1908
|
|
|
|
|
|
|
|
1909
|
0
|
0
|
|
|
|
0
|
return $tree unless $tree_size; |
1910
|
|
|
|
|
|
|
|
1911
|
0
|
0
|
|
|
|
0
|
if ($net_birth_rate > 0) |
1912
|
|
|
|
|
|
|
{ |
1913
|
0
|
|
|
|
|
0
|
$next_speciation = -log($r1) / $net_birth_rate; |
1914
|
|
|
|
|
|
|
} else |
1915
|
|
|
|
|
|
|
{ |
1916
|
0
|
|
|
|
|
0
|
$next_speciation = $inf; |
1917
|
|
|
|
|
|
|
} |
1918
|
|
|
|
|
|
|
|
1919
|
0
|
0
|
|
|
|
0
|
if ($net_death_rate > 0) |
1920
|
|
|
|
|
|
|
{ |
1921
|
0
|
|
|
|
|
0
|
$next_extinction = -log($r2) / $net_death_rate; |
1922
|
|
|
|
|
|
|
} else |
1923
|
|
|
|
|
|
|
{ |
1924
|
0
|
|
|
|
|
0
|
$next_extinction = $inf; |
1925
|
|
|
|
|
|
|
} |
1926
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
} |
1928
|
0
|
|
|
|
|
0
|
return $tree; |
1929
|
|
|
|
|
|
|
} |
1930
|
|
|
|
|
|
|
|
1931
|
|
|
|
|
|
|
=item beta_binomial() |
1932
|
|
|
|
|
|
|
|
1933
|
|
|
|
|
|
|
An evolutionary model featuring evolving speciation rates. From Blum2007 |
1934
|
|
|
|
|
|
|
|
1935
|
|
|
|
|
|
|
Type : Evolutionary model |
1936
|
|
|
|
|
|
|
Title : beta_binomial |
1937
|
|
|
|
|
|
|
Usage : $tree = beta_binomial(%options) |
1938
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time |
1939
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree |
1940
|
|
|
|
|
|
|
Args : %options with fields: |
1941
|
|
|
|
|
|
|
birth_rate The initial speciation rate (default 1) |
1942
|
|
|
|
|
|
|
model_param The parameter as defined in Blum2007 |
1943
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate |
1944
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate |
1945
|
|
|
|
|
|
|
|
1946
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified |
1947
|
|
|
|
|
|
|
|
1948
|
|
|
|
|
|
|
=cut |
1949
|
|
|
|
|
|
|
|
1950
|
|
|
|
|
|
|
sub beta_binomial { |
1951
|
0
|
|
|
0
|
1
|
0
|
my %options = @_; |
1952
|
|
|
|
|
|
|
|
1953
|
|
|
|
|
|
|
#Check that we have a termination condition |
1954
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) { |
1955
|
|
|
|
|
|
|
|
1956
|
|
|
|
|
|
|
#Error here. |
1957
|
0
|
|
|
|
|
0
|
return undef; |
1958
|
|
|
|
|
|
|
} |
1959
|
|
|
|
|
|
|
|
1960
|
|
|
|
|
|
|
#Set the undefined condition to infinity |
1961
|
0
|
0
|
|
|
|
0
|
$options{tree_size} = 1e6 unless defined $options{tree_size}; |
1962
|
0
|
0
|
|
|
|
0
|
$options{tree_age} = 1e6 unless defined $options{tree_age}; |
1963
|
|
|
|
|
|
|
|
1964
|
|
|
|
|
|
|
#Set default rates |
1965
|
0
|
0
|
|
|
|
0
|
$options{birth_rate} = 1 unless defined( $options{birth_rate} ); |
1966
|
0
|
0
|
|
|
|
0
|
$options{model_param} = 0 unless defined( $options{model_param} ); |
1967
|
|
|
|
|
|
|
|
1968
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these |
1969
|
0
|
|
|
|
|
0
|
my $node_id = 0; |
1970
|
|
|
|
|
|
|
|
1971
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species |
1972
|
0
|
|
|
|
|
0
|
my $tree = Bio::Phylo::Forest::Tree->new(); |
1973
|
0
|
|
|
|
|
0
|
my $root = Bio::Phylo::Forest::Node->new(); |
1974
|
0
|
|
|
|
|
0
|
$root->set_branch_length(0); |
1975
|
0
|
|
|
|
|
0
|
$root->set_name( 'ID' . $node_id++ ); |
1976
|
0
|
|
|
|
|
0
|
$tree->insert($root); |
1977
|
0
|
|
|
|
|
0
|
my @terminals = ($root); |
1978
|
0
|
|
|
|
|
0
|
my @birth_rates = ( $options{birth_rate} ); |
1979
|
0
|
|
|
|
|
0
|
my $net_rate = $options{birth_rate}; |
1980
|
0
|
|
|
|
|
0
|
my $next_speciation; |
1981
|
0
|
|
|
|
|
0
|
my $time = 0; |
1982
|
0
|
|
|
|
|
0
|
my $tree_size = 1; |
1983
|
|
|
|
|
|
|
|
1984
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge |
1985
|
0
|
0
|
0
|
|
|
0
|
if ( defined $options{root_edge} && $options{root_edge} ) { |
1986
|
|
|
|
|
|
|
|
1987
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event |
1988
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $options{birth_rate} / $tree_size; |
1989
|
|
|
|
|
|
|
} |
1990
|
|
|
|
|
|
|
else { |
1991
|
|
|
|
|
|
|
|
1992
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away |
1993
|
0
|
|
|
|
|
0
|
$next_speciation = 0; |
1994
|
|
|
|
|
|
|
} |
1995
|
|
|
|
|
|
|
|
1996
|
|
|
|
|
|
|
#While we haven't reached termination |
1997
|
0
|
|
0
|
|
|
0
|
while ( $tree_size < $options{tree_size} && $time < $options{tree_age} ) { |
1998
|
|
|
|
|
|
|
|
1999
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species |
2000
|
0
|
|
|
|
|
0
|
foreach (@terminals) { |
2001
|
|
|
|
|
|
|
$_->set_branch_length( $_->get_branch_length + |
2002
|
0
|
|
|
|
|
0
|
min( $next_speciation, $options{tree_age} - $time ) ); |
2003
|
|
|
|
|
|
|
} |
2004
|
|
|
|
|
|
|
|
2005
|
|
|
|
|
|
|
#Update the time |
2006
|
0
|
|
|
|
|
0
|
$time += $next_speciation; |
2007
|
|
|
|
|
|
|
|
2008
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done |
2009
|
0
|
0
|
|
|
|
0
|
return $tree if ( $time > $options{tree_age} ); |
2010
|
|
|
|
|
|
|
|
2011
|
|
|
|
|
|
|
#Get the species effected by this event |
2012
|
0
|
|
|
|
|
0
|
my $rand_select = rand($net_rate); |
2013
|
0
|
|
|
|
|
0
|
my $selected = 0; |
2014
|
0
|
|
0
|
|
|
0
|
for ( |
2015
|
|
|
|
|
|
|
; |
2016
|
|
|
|
|
|
|
$selected < scalar @terminals |
2017
|
|
|
|
|
|
|
&& $rand_select > $birth_rates[$selected] ; |
2018
|
|
|
|
|
|
|
$selected++ |
2019
|
|
|
|
|
|
|
) |
2020
|
|
|
|
|
|
|
{ |
2021
|
0
|
|
|
|
|
0
|
$rand_select -= $birth_rates[$selected]; |
2022
|
|
|
|
|
|
|
} |
2023
|
|
|
|
|
|
|
|
2024
|
|
|
|
|
|
|
#Remove it from the terminals list |
2025
|
0
|
|
|
|
|
0
|
my $effected = splice( @terminals, $selected, 1 ); |
2026
|
0
|
|
|
|
|
0
|
my $effected_rate = splice( @birth_rates, $selected, 1 ); |
2027
|
|
|
|
|
|
|
my $p = |
2028
|
0
|
|
|
|
|
0
|
qbeta( rand, $options{model_param} + 1, $options{model_param} + 1 ); |
2029
|
|
|
|
|
|
|
|
2030
|
|
|
|
|
|
|
#If we have a speciation event we add two new species |
2031
|
0
|
|
|
|
|
0
|
foreach ( 1, 2 ) { |
2032
|
|
|
|
|
|
|
|
2033
|
|
|
|
|
|
|
#Create a new species |
2034
|
0
|
|
|
|
|
0
|
my $child = Bio::Phylo::Forest::Node->new(); |
2035
|
0
|
|
|
|
|
0
|
$child->set_name( 'ID' . $node_id++ ); |
2036
|
|
|
|
|
|
|
|
2037
|
|
|
|
|
|
|
#Give it a zero edge length |
2038
|
0
|
|
|
|
|
0
|
$child->set_branch_length(0); |
2039
|
|
|
|
|
|
|
|
2040
|
|
|
|
|
|
|
#Add it as a child to the speciating species |
2041
|
0
|
|
|
|
|
0
|
$effected->set_child($child); |
2042
|
|
|
|
|
|
|
|
2043
|
|
|
|
|
|
|
#Add it to the tree |
2044
|
0
|
|
|
|
|
0
|
$tree->insert($child); |
2045
|
|
|
|
|
|
|
|
2046
|
|
|
|
|
|
|
#Add it to the terminals list |
2047
|
0
|
|
|
|
|
0
|
push( @terminals, $child ); |
2048
|
|
|
|
|
|
|
|
2049
|
|
|
|
|
|
|
#New speciation rate |
2050
|
0
|
|
|
|
|
0
|
my $new_speciation_rate = $effected_rate * $p; |
2051
|
0
|
|
|
|
|
0
|
$p = 1 - $p; |
2052
|
0
|
0
|
|
|
|
0
|
if ( $new_speciation_rate < 0 ) { $new_speciation_rate = 0; } |
|
0
|
|
|
|
|
0
|
|
2053
|
0
|
|
|
|
|
0
|
push( @birth_rates, $new_speciation_rate ); |
2054
|
|
|
|
|
|
|
} |
2055
|
|
|
|
|
|
|
|
2056
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand... |
2057
|
0
|
|
|
|
|
0
|
my ( $r1, $r2 ) = ( 0, 0 ); |
2058
|
0
|
|
|
|
|
0
|
$r1 = rand until $r1; |
2059
|
0
|
|
|
|
|
0
|
$tree_size = scalar @terminals; |
2060
|
|
|
|
|
|
|
|
2061
|
|
|
|
|
|
|
#If all species have stopped speciating (unlikely) |
2062
|
0
|
0
|
|
|
|
0
|
if ( $net_rate == 0 ) { |
2063
|
0
|
|
|
|
|
0
|
return $tree; |
2064
|
|
|
|
|
|
|
} |
2065
|
0
|
|
|
|
|
0
|
$next_speciation = -log($r1) / $net_rate / $tree_size; |
2066
|
0
|
0
|
|
|
|
0
|
return $tree unless $tree_size; |
2067
|
|
|
|
|
|
|
} |
2068
|
0
|
|
|
|
|
0
|
return $tree; |
2069
|
|
|
|
|
|
|
} |
2070
|
|
|
|
|
|
|
|
2071
|
|
|
|
|
|
|
=back |
2072
|
|
|
|
|
|
|
|
2073
|
|
|
|
|
|
|
=cut |
2074
|
|
|
|
|
|
|
|
2075
|
|
|
|
|
|
|
=begin comment |
2076
|
|
|
|
|
|
|
|
2077
|
|
|
|
|
|
|
########################################################### |
2078
|
|
|
|
|
|
|
#INTERNAL METHODS |
2079
|
|
|
|
|
|
|
#These are methods that permit additional manipulations of |
2080
|
|
|
|
|
|
|
#a Bio::Phylo::Tree to be easily made. As such some of |
2081
|
|
|
|
|
|
|
#these could easily be moved into Bio::Phylo::Forest::Tree |
2082
|
|
|
|
|
|
|
########################################################### |
2083
|
|
|
|
|
|
|
|
2084
|
|
|
|
|
|
|
=end comment |
2085
|
|
|
|
|
|
|
|
2086
|
|
|
|
|
|
|
=cut |
2087
|
|
|
|
|
|
|
|
2088
|
|
|
|
|
|
|
=begin comment |
2089
|
|
|
|
|
|
|
|
2090
|
|
|
|
|
|
|
Type : Internal method |
2091
|
|
|
|
|
|
|
Title : copy_tree |
2092
|
|
|
|
|
|
|
Usage : $tree = copy_tree($tree) |
2093
|
|
|
|
|
|
|
Function: Makes a new independent copy of a tree |
2094
|
|
|
|
|
|
|
Returns : the phylogenetic $tree |
2095
|
|
|
|
|
|
|
Args : the phylogenetic $tree |
2096
|
|
|
|
|
|
|
|
2097
|
|
|
|
|
|
|
=end comment |
2098
|
|
|
|
|
|
|
|
2099
|
|
|
|
|
|
|
=cut |
2100
|
|
|
|
|
|
|
|
2101
|
|
|
|
|
|
|
sub copy_tree { |
2102
|
15
|
|
|
15
|
0
|
112
|
return Bio::Phylo::IO->parse( |
2103
|
|
|
|
|
|
|
-format => 'newick', |
2104
|
|
|
|
|
|
|
-string => shift->to_newick( '-nodelabels' => 1 ) |
2105
|
|
|
|
|
|
|
)->first; |
2106
|
|
|
|
|
|
|
} |
2107
|
|
|
|
|
|
|
|
2108
|
|
|
|
|
|
|
=begin comment |
2109
|
|
|
|
|
|
|
|
2110
|
|
|
|
|
|
|
Type : Internal method |
2111
|
|
|
|
|
|
|
Title : truncate_tree_time |
2112
|
|
|
|
|
|
|
Usage : truncate_tree_time($tree,$age) |
2113
|
|
|
|
|
|
|
Function: Truncates the tree at the specified age |
2114
|
|
|
|
|
|
|
Returns : N/A |
2115
|
|
|
|
|
|
|
Args : $tree: the phylogenetic tree (which will be modified) |
2116
|
|
|
|
|
|
|
$age: the age at which to cut the $tree |
2117
|
|
|
|
|
|
|
|
2118
|
|
|
|
|
|
|
=end comment |
2119
|
|
|
|
|
|
|
|
2120
|
|
|
|
|
|
|
=cut |
2121
|
|
|
|
|
|
|
|
2122
|
|
|
|
|
|
|
sub truncate_tree_time { |
2123
|
|
|
|
|
|
|
|
2124
|
|
|
|
|
|
|
#$node and $time are used only by this function recursively |
2125
|
1127
|
|
|
1127
|
0
|
2237
|
my ( $tree, $age, $node, $time ) = @_; |
2126
|
|
|
|
|
|
|
|
2127
|
|
|
|
|
|
|
#If node and time weren't specified we are starting from the root |
2128
|
1127
|
100
|
|
|
|
2104
|
$node = $tree->get_root unless defined $node; |
2129
|
1127
|
100
|
|
|
|
1743
|
$time = 0 unless defined $time; |
2130
|
|
|
|
|
|
|
|
2131
|
|
|
|
|
|
|
#If we are truncating this branch |
2132
|
1127
|
100
|
|
|
|
1967
|
if ( $time + $node->get_branch_length >= $age ) { |
2133
|
|
|
|
|
|
|
|
2134
|
|
|
|
|
|
|
#Collapse the node unless it is terminal |
2135
|
257
|
100
|
|
|
|
605
|
$node->collapse unless $node->is_terminal(); |
2136
|
|
|
|
|
|
|
|
2137
|
|
|
|
|
|
|
#Set the branch length appropriately |
2138
|
257
|
|
|
|
|
941
|
$node->set_branch_length( $age - $time ); |
2139
|
257
|
|
|
|
|
888
|
return; |
2140
|
|
|
|
|
|
|
} |
2141
|
|
|
|
|
|
|
|
2142
|
|
|
|
|
|
|
#If this node has no children we are done |
2143
|
870
|
100
|
|
|
|
1994
|
return if $node->is_terminal(); |
2144
|
|
|
|
|
|
|
|
2145
|
|
|
|
|
|
|
#Call the function recursively on the children |
2146
|
504
|
|
|
|
|
776
|
foreach ( @{ $node->get_children } ) { |
|
504
|
|
|
|
|
941
|
|
2147
|
1112
|
|
|
|
|
2480
|
truncate_tree_time( $tree, $age, $_, |
2148
|
|
|
|
|
|
|
$time + $node->get_branch_length() ); |
2149
|
|
|
|
|
|
|
} |
2150
|
|
|
|
|
|
|
} |
2151
|
|
|
|
|
|
|
|
2152
|
|
|
|
|
|
|
=begin comment |
2153
|
|
|
|
|
|
|
|
2154
|
|
|
|
|
|
|
Type : Internal method |
2155
|
|
|
|
|
|
|
Title : truncate_tree_size |
2156
|
|
|
|
|
|
|
Usage : truncate_tree_size($tree,$size) |
2157
|
|
|
|
|
|
|
Function: Truncates the tree to the specified number of species |
2158
|
|
|
|
|
|
|
Returns : N/A |
2159
|
|
|
|
|
|
|
Args : $tree: the phylogenetic tree (which will be modified) |
2160
|
|
|
|
|
|
|
$size: random species are delete so that the tree has this many terminals |
2161
|
|
|
|
|
|
|
|
2162
|
|
|
|
|
|
|
=end comment |
2163
|
|
|
|
|
|
|
|
2164
|
|
|
|
|
|
|
=cut |
2165
|
|
|
|
|
|
|
|
2166
|
|
|
|
|
|
|
sub truncate_tree_size { |
2167
|
5
|
|
|
5
|
0
|
14
|
my ( $tree, $size ) = @_; |
2168
|
5
|
|
|
|
|
11
|
my @terminals = @{ $tree->get_terminals }; |
|
5
|
|
|
|
|
28
|
|
2169
|
5
|
|
|
|
|
32
|
my @names; |
2170
|
|
|
|
|
|
|
|
2171
|
|
|
|
|
|
|
#Calculate the tree height and node distances from the root |
2172
|
|
|
|
|
|
|
#much more efficient to do this in one hit than repeatedly |
2173
|
|
|
|
|
|
|
#calling the analogous functions on the tree |
2174
|
5
|
|
|
|
|
31
|
_calc_node_properties($tree); |
2175
|
|
|
|
|
|
|
|
2176
|
|
|
|
|
|
|
#Only push species that are extant and store the number of those |
2177
|
|
|
|
|
|
|
# my $tree_height = $tree->get_tallest_tip->calc_path_to_root; |
2178
|
5
|
|
|
|
|
19
|
my $tree_height = $tree->get_root->get_generic('tree_height'); |
2179
|
5
|
|
|
|
|
23
|
foreach (@terminals) { |
2180
|
631
|
100
|
|
|
|
1333
|
if ( |
2181
|
|
|
|
|
|
|
abs( |
2182
|
|
|
|
|
|
|
( $_->get_generic('root_distance') - $tree_height ) / |
2183
|
|
|
|
|
|
|
$tree_height |
2184
|
|
|
|
|
|
|
) < 1e-6 |
2185
|
|
|
|
|
|
|
) |
2186
|
|
|
|
|
|
|
{ |
2187
|
147
|
|
|
|
|
335
|
push( @names, $_->get_name ); |
2188
|
|
|
|
|
|
|
} |
2189
|
|
|
|
|
|
|
} |
2190
|
5
|
50
|
|
|
|
20
|
if ( @names < $size ) { print "Internal error\n"; } |
|
0
|
|
|
|
|
0
|
|
2191
|
5
|
|
|
|
|
14
|
my %deletions; |
2192
|
5
|
|
|
|
|
25
|
while ( scalar( keys %deletions ) < @names - $size ) { |
2193
|
172
|
|
|
|
|
422
|
$deletions{ $names[ int( rand(@names) ) ] } = 1; |
2194
|
|
|
|
|
|
|
} |
2195
|
5
|
|
|
|
|
51
|
$tree = prune_tips( $tree, [ keys %deletions ] ); |
2196
|
5
|
|
|
|
|
161
|
return $tree; |
2197
|
|
|
|
|
|
|
} |
2198
|
|
|
|
|
|
|
|
2199
|
|
|
|
|
|
|
sub _get_ultrametric_size { |
2200
|
0
|
|
|
0
|
|
0
|
my ( $tree, $size ) = @_; |
2201
|
0
|
|
|
|
|
0
|
my @terminals = @{ $tree->get_terminals }; |
|
0
|
|
|
|
|
0
|
|
2202
|
0
|
|
|
|
|
0
|
_calc_node_properties($tree); |
2203
|
0
|
|
|
|
|
0
|
my @names; |
2204
|
0
|
|
|
|
|
0
|
my $tree_height = $tree->get_root->get_generic('tree_height'); |
2205
|
0
|
|
|
|
|
0
|
foreach (@terminals) { |
2206
|
0
|
0
|
|
|
|
0
|
if ( |
2207
|
|
|
|
|
|
|
abs( |
2208
|
|
|
|
|
|
|
( $_->get_generic('root_distance') - $tree_height ) / |
2209
|
|
|
|
|
|
|
$tree_height |
2210
|
|
|
|
|
|
|
) < 1e-6 |
2211
|
|
|
|
|
|
|
) |
2212
|
|
|
|
|
|
|
{ |
2213
|
0
|
|
|
|
|
0
|
push( @names, $_->get_name ); |
2214
|
|
|
|
|
|
|
} |
2215
|
|
|
|
|
|
|
} |
2216
|
0
|
|
|
|
|
0
|
return scalar @names; |
2217
|
|
|
|
|
|
|
} |
2218
|
|
|
|
|
|
|
|
2219
|
|
|
|
|
|
|
=begin comment |
2220
|
|
|
|
|
|
|
|
2221
|
|
|
|
|
|
|
Type : Internal method |
2222
|
|
|
|
|
|
|
Title : remove_extinct_species |
2223
|
|
|
|
|
|
|
Usage : remove_extinct_species($tree) |
2224
|
|
|
|
|
|
|
Function: Removes extinct species from the tree. An extinct species |
2225
|
|
|
|
|
|
|
is a terminal that does not extend as far as the furthest |
2226
|
|
|
|
|
|
|
terminal(s). |
2227
|
|
|
|
|
|
|
Returns : N/A |
2228
|
|
|
|
|
|
|
Args : $tree: the phylogenetic tree (which will be modified) |
2229
|
|
|
|
|
|
|
$age: the age at which to cut the $tree |
2230
|
|
|
|
|
|
|
|
2231
|
|
|
|
|
|
|
=end comment |
2232
|
|
|
|
|
|
|
|
2233
|
|
|
|
|
|
|
=cut |
2234
|
|
|
|
|
|
|
|
2235
|
|
|
|
|
|
|
sub remove_extinct_species { |
2236
|
0
|
|
|
0
|
0
|
0
|
my $tree = shift; |
2237
|
|
|
|
|
|
|
|
2238
|
|
|
|
|
|
|
#Calculate the tree height and node distances from the root |
2239
|
|
|
|
|
|
|
#much more efficient to do this in one hit than repeatedly |
2240
|
|
|
|
|
|
|
#calling the analogous functions on the tree |
2241
|
0
|
|
|
|
|
0
|
_calc_node_properties($tree); |
2242
|
0
|
|
|
|
|
0
|
my $height = $tree->get_root->get_generic('tree_height'); |
2243
|
0
|
0
|
|
|
|
0
|
return unless $height > 0; |
2244
|
0
|
|
|
|
|
0
|
my $leaves = $tree->get_terminals; |
2245
|
0
|
0
|
|
|
|
0
|
return unless $leaves; |
2246
|
0
|
|
|
|
|
0
|
my @remove; |
2247
|
0
|
|
|
|
|
0
|
foreach ( @{$leaves} ) { |
|
0
|
|
|
|
|
0
|
|
2248
|
|
|
|
|
|
|
|
2249
|
0
|
0
|
|
|
|
0
|
unless ( |
2250
|
|
|
|
|
|
|
abs( ( $_->get_generic('root_distance') - $height ) / $height ) < |
2251
|
|
|
|
|
|
|
1e-6 ) |
2252
|
|
|
|
|
|
|
{ |
2253
|
0
|
|
|
|
|
0
|
push( @remove, $_->get_name ); |
2254
|
|
|
|
|
|
|
} |
2255
|
|
|
|
|
|
|
} |
2256
|
0
|
|
|
|
|
0
|
$tree = prune_tips( $tree, \@remove ); |
2257
|
0
|
|
|
|
|
0
|
return $tree; |
2258
|
|
|
|
|
|
|
} |
2259
|
|
|
|
|
|
|
|
2260
|
|
|
|
|
|
|
=begin comment |
2261
|
|
|
|
|
|
|
|
2262
|
|
|
|
|
|
|
Type : Internal method |
2263
|
|
|
|
|
|
|
Title : prune_tips |
2264
|
|
|
|
|
|
|
Usage : prune_tips($tree,$tips) |
2265
|
|
|
|
|
|
|
Function: Removes named terminals from the tree |
2266
|
|
|
|
|
|
|
Returns : N/A |
2267
|
|
|
|
|
|
|
Args : $tree: the phylogenetic tree (which will be modified) |
2268
|
|
|
|
|
|
|
$tips: array ref of terminals to remove from the tree |
2269
|
|
|
|
|
|
|
|
2270
|
|
|
|
|
|
|
NB: Available as $tree->prune_tips($tips), but had some problems with |
2271
|
|
|
|
|
|
|
this. |
2272
|
|
|
|
|
|
|
|
2273
|
|
|
|
|
|
|
=end comment |
2274
|
|
|
|
|
|
|
|
2275
|
|
|
|
|
|
|
=cut |
2276
|
|
|
|
|
|
|
|
2277
|
|
|
|
|
|
|
sub prune_tips { |
2278
|
5
|
|
|
5
|
0
|
18
|
my ( $self, $tips ) = @_; |
2279
|
5
|
|
|
|
|
12
|
my %names_to_delete = map { $_ => 1 } @{$tips}; |
|
97
|
|
|
|
|
175
|
|
|
5
|
|
|
|
|
17
|
|
2280
|
534
|
|
|
|
|
901
|
my %keep = map { $_->get_name => 1 } |
2281
|
631
|
|
|
|
|
1228
|
grep { not exists $names_to_delete{ $_->get_name } } |
2282
|
5
|
|
|
|
|
13
|
@{ $self->get_terminals }; |
|
5
|
|
|
|
|
80
|
|
2283
|
|
|
|
|
|
|
$self->visit_depth_first( |
2284
|
|
|
|
|
|
|
-post => sub { |
2285
|
1196
|
|
|
1196
|
|
1774
|
my $node = shift; |
2286
|
1196
|
100
|
|
|
|
2562
|
if ( $node->is_terminal ) { |
2287
|
631
|
100
|
|
|
|
1617
|
if ( not $keep{ $node->get_name } ) { |
2288
|
97
|
|
|
|
|
368
|
$node->set_parent(); |
2289
|
97
|
|
|
|
|
339
|
$self->delete($node); |
2290
|
|
|
|
|
|
|
} |
2291
|
|
|
|
|
|
|
} |
2292
|
|
|
|
|
|
|
else { |
2293
|
565
|
|
|
|
|
1008
|
my $seen_tip_to_keep = 0; |
2294
|
565
|
|
|
|
|
843
|
for my $tip ( @{ $node->get_terminals } ) { |
|
565
|
|
|
|
|
1538
|
|
2295
|
11941
|
100
|
|
|
|
22647
|
$seen_tip_to_keep++ if $keep{ $tip->get_name }; |
2296
|
|
|
|
|
|
|
} |
2297
|
565
|
100
|
|
|
|
3371
|
if ( not $seen_tip_to_keep ) { |
2298
|
29
|
|
|
|
|
118
|
$node->set_parent(); |
2299
|
29
|
|
|
|
|
118
|
$self->delete($node); |
2300
|
|
|
|
|
|
|
} |
2301
|
|
|
|
|
|
|
} |
2302
|
|
|
|
|
|
|
} |
2303
|
5
|
|
|
|
|
130
|
); |
2304
|
5
|
|
|
|
|
107
|
$self->remove_unbranched_internals; |
2305
|
5
|
|
|
|
|
248
|
return $self; |
2306
|
|
|
|
|
|
|
} |
2307
|
|
|
|
|
|
|
|
2308
|
|
|
|
|
|
|
=begin comment |
2309
|
|
|
|
|
|
|
|
2310
|
|
|
|
|
|
|
Type : Internal method |
2311
|
|
|
|
|
|
|
Title : lineage_through_time |
2312
|
|
|
|
|
|
|
Usage : ($time,$count) = lineage_through_time($tree) |
2313
|
|
|
|
|
|
|
Function: Alternative to $tree->ltt that permits extinctions |
2314
|
|
|
|
|
|
|
Returns : $time: array ref of times |
2315
|
|
|
|
|
|
|
$count: array ref of species counts corresponding to the times |
2316
|
|
|
|
|
|
|
Args : the phylogenetic $tree for which to produce the ltt data |
2317
|
|
|
|
|
|
|
|
2318
|
|
|
|
|
|
|
=end comment |
2319
|
|
|
|
|
|
|
|
2320
|
|
|
|
|
|
|
=cut |
2321
|
|
|
|
|
|
|
|
2322
|
|
|
|
|
|
|
sub lineage_through_time { |
2323
|
60
|
|
|
60
|
0
|
132
|
my $tree = shift; |
2324
|
60
|
|
|
|
|
231
|
my ( $speciation, $extinction ) = _recursive_ltt_helper($tree); |
2325
|
60
|
|
|
|
|
145
|
my @speciation = sort { $a <=> $b } @{$speciation}; |
|
9823
|
|
|
|
|
10713
|
|
|
60
|
|
|
|
|
414
|
|
2326
|
60
|
|
|
|
|
127
|
my @extinction = sort { $a <=> $b } @{$extinction}; |
|
10362
|
|
|
|
|
11127
|
|
|
60
|
|
|
|
|
152
|
|
2327
|
60
|
|
|
|
|
169
|
my @time = (0); |
2328
|
60
|
|
|
|
|
125
|
my @count = (1); |
2329
|
60
|
|
|
|
|
94
|
my $n_species = 1; |
2330
|
60
|
|
|
|
|
257
|
my $end_time = max( @speciation, @extinction ); |
2331
|
60
|
50
|
|
|
|
194
|
return ( [], [] ) if ( $end_time == 0 ); |
2332
|
|
|
|
|
|
|
|
2333
|
|
|
|
|
|
|
#We remove any extinction events occurring at the very end of the tree (as they are not real extinctions) |
2334
|
60
|
|
100
|
|
|
348
|
while ( scalar @extinction |
2335
|
|
|
|
|
|
|
&& ( $end_time - $extinction[-1] ) / $end_time < 1e-6 ) |
2336
|
|
|
|
|
|
|
{ |
2337
|
690
|
|
|
|
|
1756
|
pop @extinction; |
2338
|
|
|
|
|
|
|
} |
2339
|
60
|
|
100
|
|
|
213
|
while ( scalar @speciation || scalar @extinction ) { |
2340
|
3854
|
100
|
100
|
|
|
10851
|
if ( scalar @extinction == 0 |
|
|
|
100
|
|
|
|
|
2341
|
|
|
|
|
|
|
|| ( scalar @speciation && $speciation[0] < $extinction[0] ) ) |
2342
|
|
|
|
|
|
|
{ |
2343
|
2242
|
|
|
|
|
2881
|
push( @count, ++$n_species ); |
2344
|
2242
|
|
|
|
|
4379
|
push( @time, shift(@speciation) ); |
2345
|
|
|
|
|
|
|
} |
2346
|
|
|
|
|
|
|
else { |
2347
|
1612
|
|
|
|
|
2173
|
push( @count, --$n_species ); |
2348
|
1612
|
|
|
|
|
3108
|
push( @time, shift(@extinction) ); |
2349
|
|
|
|
|
|
|
} |
2350
|
|
|
|
|
|
|
} |
2351
|
60
|
|
|
|
|
374
|
return ( \@time, \@count ); |
2352
|
|
|
|
|
|
|
} |
2353
|
|
|
|
|
|
|
|
2354
|
|
|
|
|
|
|
=begin comment |
2355
|
|
|
|
|
|
|
|
2356
|
|
|
|
|
|
|
Type : Internal method |
2357
|
|
|
|
|
|
|
Title : _recursive_ltt_helper |
2358
|
|
|
|
|
|
|
Usage : ($speciation, $extinction) = _recursive_ltt_helper($tree) |
2359
|
|
|
|
|
|
|
Function: Helper for lineage_through_time |
2360
|
|
|
|
|
|
|
Returns : $speciation: array ref of speciation times |
2361
|
|
|
|
|
|
|
$extinction: array ref of extinction times |
2362
|
|
|
|
|
|
|
Args : the phylogenetic $tree for which to produce the ltt data |
2363
|
|
|
|
|
|
|
|
2364
|
|
|
|
|
|
|
=end comment |
2365
|
|
|
|
|
|
|
|
2366
|
|
|
|
|
|
|
=cut |
2367
|
|
|
|
|
|
|
|
2368
|
|
|
|
|
|
|
sub _recursive_ltt_helper { |
2369
|
4544
|
|
|
4544
|
|
7039
|
my ( $tree, $node, $time ) = @_; |
2370
|
|
|
|
|
|
|
|
2371
|
|
|
|
|
|
|
#If we are being invoked at the root level |
2372
|
4544
|
100
|
|
|
|
7820
|
$node = $tree->get_root unless defined $node; |
2373
|
4544
|
100
|
|
|
|
6945
|
$time = 0 unless defined $time; |
2374
|
|
|
|
|
|
|
|
2375
|
|
|
|
|
|
|
#The new time |
2376
|
4544
|
|
|
|
|
8861
|
$time += $node->get_branch_length; |
2377
|
4544
|
100
|
|
|
|
9381
|
return ( [], [$time] ) if ( $node->is_terminal ); |
2378
|
2242
|
|
|
|
|
3879
|
my @speciation; |
2379
|
|
|
|
|
|
|
my @extinction; |
2380
|
2242
|
|
|
|
|
2703
|
foreach ( @{ $node->get_children } ) { |
|
2242
|
|
|
|
|
3962
|
|
2381
|
4484
|
|
|
|
|
8686
|
my ( $spec, $ext ) = _recursive_ltt_helper( $tree, $_, $time ); |
2382
|
4484
|
|
|
|
|
6998
|
@speciation = ( @speciation, @{$spec} ); |
|
4484
|
|
|
|
|
10506
|
|
2383
|
4484
|
|
|
|
|
6078
|
@extinction = ( @extinction, @{$ext} ); |
|
4484
|
|
|
|
|
16186
|
|
2384
|
|
|
|
|
|
|
} |
2385
|
2242
|
|
|
|
|
4176
|
push( @speciation, $time ); |
2386
|
2242
|
|
|
|
|
4304
|
return ( \@speciation, \@extinction ); |
2387
|
|
|
|
|
|
|
} |
2388
|
|
|
|
|
|
|
|
2389
|
|
|
|
|
|
|
=begin comment |
2390
|
|
|
|
|
|
|
|
2391
|
|
|
|
|
|
|
Type : Internal method. |
2392
|
|
|
|
|
|
|
Title : _calc_node_properties |
2393
|
|
|
|
|
|
|
Usage : _calc_node_properties($tree); |
2394
|
|
|
|
|
|
|
Function: Calculates the distance of nodes from the root |
2395
|
|
|
|
|
|
|
Returns : The maximum distance from the root |
2396
|
|
|
|
|
|
|
Args : |
2397
|
|
|
|
|
|
|
|
2398
|
|
|
|
|
|
|
=end comment |
2399
|
|
|
|
|
|
|
|
2400
|
|
|
|
|
|
|
=cut |
2401
|
|
|
|
|
|
|
|
2402
|
|
|
|
|
|
|
sub _calc_node_properties { |
2403
|
1196
|
|
|
1196
|
|
2197
|
my ( $node, $root_distance ); |
2404
|
1196
|
|
|
|
|
1671
|
my $tree = shift; |
2405
|
1196
|
|
|
|
|
3309
|
my $root = $tree->get_root; |
2406
|
|
|
|
|
|
|
|
2407
|
|
|
|
|
|
|
#Check whether we were given a node and distance |
2408
|
1196
|
100
|
|
|
|
3084
|
if ( scalar @_ ) { |
2409
|
1191
|
|
|
|
|
2141
|
$node = shift; |
2410
|
1191
|
|
|
|
|
1651
|
$root_distance = shift; |
2411
|
|
|
|
|
|
|
|
2412
|
|
|
|
|
|
|
#Otherwise the root is the default |
2413
|
|
|
|
|
|
|
} |
2414
|
|
|
|
|
|
|
else { |
2415
|
5
|
|
|
|
|
14
|
$node = $root; |
2416
|
5
|
|
|
|
|
58
|
$root->set_generic( tree_height => 0 ); |
2417
|
5
|
|
|
|
|
13
|
$root_distance = 0; |
2418
|
|
|
|
|
|
|
} |
2419
|
1196
|
|
|
|
|
4080
|
$node->set_generic( root_distance => $root_distance ); |
2420
|
1196
|
100
|
|
|
|
3525
|
if ( $root_distance > $root->get_generic('tree_height') ) { |
2421
|
126
|
|
|
|
|
325
|
$root->set_generic( tree_height => $root_distance ); |
2422
|
|
|
|
|
|
|
} |
2423
|
1196
|
|
|
|
|
2206
|
my $terminal_count = 0; |
2424
|
1196
|
|
|
|
|
3100
|
my $children = $node->get_children; |
2425
|
1196
|
50
|
|
|
|
3054
|
if ( defined $children ) { |
2426
|
1196
|
|
|
|
|
1672
|
foreach ( @{$children} ) { |
|
1196
|
|
|
|
|
4085
|
|
2427
|
1191
|
|
|
|
|
3235
|
_calc_node_properties( $tree, $_, |
2428
|
|
|
|
|
|
|
$root_distance + $_->get_branch_length() ); |
2429
|
|
|
|
|
|
|
} |
2430
|
|
|
|
|
|
|
} |
2431
|
|
|
|
|
|
|
} |
2432
|
|
|
|
|
|
|
|
2433
|
|
|
|
|
|
|
=begin comment |
2434
|
|
|
|
|
|
|
|
2435
|
|
|
|
|
|
|
Type : Internal method |
2436
|
|
|
|
|
|
|
Title : nchoosek |
2437
|
|
|
|
|
|
|
Usage : $out = nchoosek($n,$k) |
2438
|
|
|
|
|
|
|
Function: Returns the binomial coefficient for $n and $k |
2439
|
|
|
|
|
|
|
Returns : the binomial coefficient |
2440
|
|
|
|
|
|
|
Args : $n, $k |
2441
|
|
|
|
|
|
|
|
2442
|
|
|
|
|
|
|
=end comment |
2443
|
|
|
|
|
|
|
|
2444
|
|
|
|
|
|
|
=cut |
2445
|
|
|
|
|
|
|
|
2446
|
|
|
|
|
|
|
sub nchoosek { |
2447
|
0
|
|
|
0
|
0
|
|
my ( $n, $k ) = @_; |
2448
|
0
|
|
|
|
|
|
my $r = 1; |
2449
|
0
|
0
|
0
|
|
|
|
return 0 if ( $k > $n || $k < 0 ); |
2450
|
0
|
|
|
|
|
|
for ( my $d = 1 ; $d <= $k ; $d++ ) { |
2451
|
0
|
|
|
|
|
|
$r *= $n--; |
2452
|
0
|
|
|
|
|
|
$r /= $d; |
2453
|
|
|
|
|
|
|
} |
2454
|
0
|
|
|
|
|
|
return $r; |
2455
|
|
|
|
|
|
|
} |
2456
|
|
|
|
|
|
|
1; |