line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Phylo::EvolutionaryModels;
|
2
|
1
|
|
|
1
|
|
763
|
use strict;
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
30
|
|
3
|
1
|
|
|
1
|
|
4
|
use base 'Exporter';
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
88
|
|
4
|
1
|
|
|
1
|
|
282
|
use Bio::Phylo::Forest::Tree;
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
8
|
|
5
|
1
|
|
|
1
|
|
386
|
use Bio::Phylo::Forest;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
9
|
|
6
|
1
|
|
|
1
|
|
5
|
use Math::CDF qw'qnorm qbeta';
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
60
|
|
7
|
1
|
|
|
1
|
|
6
|
use List::Util qw'min max';
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
45
|
|
8
|
1
|
|
|
1
|
|
316
|
use POSIX qw'ceil floor';
|
|
1
|
|
|
|
|
4339
|
|
|
1
|
|
|
|
|
4
|
|
9
|
1
|
|
|
1
|
|
1235
|
use Config; #Use to check whether multi-threading is available
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
37
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
BEGIN {
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# set the version for version checking
|
14
|
1
|
|
|
1
|
|
5
|
use Bio::Phylo; our $VERSION = $Bio::Phylo::VERSION;
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
4
|
|
15
|
1
|
|
|
|
|
981
|
our @EXPORT_OK =
|
16
|
|
|
|
|
|
|
qw(&sample &constant_rate_birth &constant_rate_birth_death &clade_shifts);
|
17
|
|
|
|
|
|
|
}
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 NAME
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
Bio::Phylo::EvolutionaryModels - Evolutionary models for phylogenetic trees and methods to sample these
|
22
|
|
|
|
|
|
|
Klaas Hartmann, September 2007
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
#For convenience we import the sample routine (so we can write sample(...) instead of
|
27
|
|
|
|
|
|
|
#Bio::Phylo::EvolutionaryModels::sample(...).
|
28
|
|
|
|
|
|
|
use Bio::Phylo::EvolutionaryModels qw (sample);
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
#Example#A######################################################################
|
31
|
|
|
|
|
|
|
#Simulate a single tree with ten species from the constant rate birth model with parameter 0.5
|
32
|
|
|
|
|
|
|
my $tree = Bio::Phylo::EvolutionaryModels::constant_rate_birth(birth_rate => .5, tree_size => 10);
|
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
#Example#B######################################################################
|
35
|
|
|
|
|
|
|
#Sample 5 trees with ten species from the constant rate birth model using the b algorithm
|
36
|
|
|
|
|
|
|
my ($sample,$stats) = sample(sample_size =>5,
|
37
|
|
|
|
|
|
|
tree_size => 10,
|
38
|
|
|
|
|
|
|
algorithm => 'b',
|
39
|
|
|
|
|
|
|
algorithm_options => {rate => 1},
|
40
|
|
|
|
|
|
|
model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth,
|
41
|
|
|
|
|
|
|
model_options => {birth_rate=>.5});
|
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
#Print a newick string for the 4th sampled tree
|
45
|
|
|
|
|
|
|
print $sample->[3]->to_newick."\n";
|
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
#Example#C######################################################################
|
48
|
|
|
|
|
|
|
#Sample 5 trees with ten species from the constant rate birth and death model using
|
49
|
|
|
|
|
|
|
#the bd algorithm and two threads (useful for dual core processors)
|
50
|
|
|
|
|
|
|
#NB: we must specify an nstar here, an appropriate choice will depend on the birth_rate
|
51
|
|
|
|
|
|
|
# and death_rate we are giving the model
|
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
my ($sample,$stats) = sample(sample_size =>5,
|
54
|
|
|
|
|
|
|
tree_size => 10,
|
55
|
|
|
|
|
|
|
threads => 2,
|
56
|
|
|
|
|
|
|
algorithm => 'bd',
|
57
|
|
|
|
|
|
|
algorithm_options => {rate => 1, nstar => 30},
|
58
|
|
|
|
|
|
|
model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth_death,
|
59
|
|
|
|
|
|
|
model_options => {birth_rate=>1,death_rate=>.8});
|
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
#Example#D######################################################################
|
62
|
|
|
|
|
|
|
#Sample 5 trees with ten species from the constant rate birth and death model using
|
63
|
|
|
|
|
|
|
#incomplete taxon sampling
|
64
|
|
|
|
|
|
|
#
|
65
|
|
|
|
|
|
|
#sampling_probability is set so that the true tree has 10 species with 50% probability,
|
66
|
|
|
|
|
|
|
#11 species with 30% probability and 12 species with 20% probability
|
67
|
|
|
|
|
|
|
#
|
68
|
|
|
|
|
|
|
#NB: we must specify an mstar here this will depend on the model parameters and the
|
69
|
|
|
|
|
|
|
# incomplete taxon sampling parameters
|
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
my $algorithm_options = {rate => 1,
|
72
|
|
|
|
|
|
|
nstar => 30,
|
73
|
|
|
|
|
|
|
mstar => 12,
|
74
|
|
|
|
|
|
|
sampling_probability => [.5, .3, .2]};
|
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
my ($sample,$stats) = sample(sample_size =>5,
|
77
|
|
|
|
|
|
|
tree_size => 10,
|
78
|
|
|
|
|
|
|
algorithm => 'incomplete_sampling_bd',
|
79
|
|
|
|
|
|
|
algorithm_options => $algorithm_options,
|
80
|
|
|
|
|
|
|
model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth_death,
|
81
|
|
|
|
|
|
|
model_options => {birth_rate=>1,death_rate=>.8});
|
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
#Example#E######################################################################
|
84
|
|
|
|
|
|
|
#Sample 5 trees with ten species from a Yule model using the memoryless_b algorithm
|
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
#First we define the random function for the shortest pendant edge for a Yule model
|
87
|
|
|
|
|
|
|
my $random_pendant_function = sub {
|
88
|
|
|
|
|
|
|
%options = @_;
|
89
|
|
|
|
|
|
|
return -log(rand)/$options{birth_rate}/$options{tree_size};
|
90
|
|
|
|
|
|
|
};
|
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
#Then we produce our sample
|
93
|
|
|
|
|
|
|
my ($sample,$stats) = sample(sample_size =>5,
|
94
|
|
|
|
|
|
|
tree_size => 10,
|
95
|
|
|
|
|
|
|
algorithm => 'memoryless_b',
|
96
|
|
|
|
|
|
|
algorithm_options => {pendant_dist => $random_pendant_function},
|
97
|
|
|
|
|
|
|
model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth,
|
98
|
|
|
|
|
|
|
model_options => {birth_rate=>1});
|
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
#Example#F#######################################################################
|
101
|
|
|
|
|
|
|
#Sample 5 trees with ten species from a constant birth death rate model using the
|
102
|
|
|
|
|
|
|
#constant_rate_bd algorithm
|
103
|
|
|
|
|
|
|
my ($sample) = sample(sample_size => 5,
|
104
|
|
|
|
|
|
|
tree_size => 10,
|
105
|
|
|
|
|
|
|
algorithm => 'constant_rate_bd',
|
106
|
|
|
|
|
|
|
model_options => {birth_rate=>1,death_rate=>.8});
|
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
This package contains evolutionary models for phylogenetic trees and
|
111
|
|
|
|
|
|
|
algorithms for sampling from these models. It is a non-OO module that
|
112
|
|
|
|
|
|
|
optionally exports the 'sample', 'constant_rate_birth' and
|
113
|
|
|
|
|
|
|
'constant_rate_birth_death' subroutines into the caller's namespace,
|
114
|
|
|
|
|
|
|
using the C<< use Bio::Phylo::EvolutionaryModels qw(sample constant_rate_birth constant_rate_birth_death); >>
|
115
|
|
|
|
|
|
|
directive. Alternatively, you can call the subroutines as class methods,
|
116
|
|
|
|
|
|
|
as in the synopsis.
|
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
The initial set of algorithms available in this package corresponds to those in:
|
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Sampling trees from evolutionary models
|
121
|
|
|
|
|
|
|
Klaas Hartmann, Dennis Wong, Tanja Gernhard
|
122
|
|
|
|
|
|
|
Systematic Biology, in press
|
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
Some comments and code refers back to this paper.
|
125
|
|
|
|
|
|
|
Further algorithms and evolutionary are encouraged
|
126
|
|
|
|
|
|
|
and welcome.
|
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
To make this code as straightforward as possible to read some of the
|
129
|
|
|
|
|
|
|
algorithms have been implemented in a less than optimal manner. The code
|
130
|
|
|
|
|
|
|
also follows the structure of an earlier version of the manuscript so
|
131
|
|
|
|
|
|
|
there is some redundancy (eg. the birth algorithm is just a specific
|
132
|
|
|
|
|
|
|
instance of the birth_death algorithm)
|
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head1 SAMPLING
|
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
All sampling algorithms should be accessed through the generic sample
|
137
|
|
|
|
|
|
|
interface.
|
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=head2 Generic sampling interface: sample()
|
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
Type : Interface
|
142
|
|
|
|
|
|
|
Title : sample
|
143
|
|
|
|
|
|
|
Usage : see SYNOPSIS
|
144
|
|
|
|
|
|
|
Function: Samples phylogenetic trees from an evolutionary model
|
145
|
|
|
|
|
|
|
Returns : A sample of phylogenetic trees and statistics from the
|
146
|
|
|
|
|
|
|
sampling algorithm
|
147
|
|
|
|
|
|
|
Args : Sampling parameters in a hash
|
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
This method acts as a gateway to the various sampling algorithms. The
|
150
|
|
|
|
|
|
|
argument is a single hash containing the options for the sampling run.
|
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Sampling parameters (* denotes optional parameters):
|
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sample_size The number of trees to return (more trees may be returned)
|
156
|
|
|
|
|
|
|
tree_size The size that returned trees should be
|
157
|
|
|
|
|
|
|
model The evolutionary model (should be a function reference)
|
158
|
|
|
|
|
|
|
model_options A hash pointer for model options (see individual models)
|
159
|
|
|
|
|
|
|
algorithm The algorithm to use (omit the preceding sample_)
|
160
|
|
|
|
|
|
|
algorithm_options A hash pointer for options for the algorithm (see individual algorithms for details)
|
161
|
|
|
|
|
|
|
threads* The number of threads to use (default is 1)
|
162
|
|
|
|
|
|
|
output_format* Set to newick for newick trees (default is Bio::Phylo::Forest::Tree)
|
163
|
|
|
|
|
|
|
remove_extinct Set to true to remove extinct species
|
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
Available algorithms (algorithm names in the paper are given in brackets):
|
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
b For all pure birth models (simplified GSA)
|
170
|
|
|
|
|
|
|
bd For all birth and death models (GSA)
|
171
|
|
|
|
|
|
|
incomplete_sampling_bd As above, with incomplete taxon sampling (extended GSA)
|
172
|
|
|
|
|
|
|
memoryless_b For memoryless pure birth models (PBMSA)
|
173
|
|
|
|
|
|
|
constant_rate_bd For birth and death models with constant rates (BDSA)
|
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
Model
|
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
If you create your own model it must accept an options hash as its input.
|
179
|
|
|
|
|
|
|
This options hash can contain any parameters you desire. Your model should
|
180
|
|
|
|
|
|
|
simulate a tree until it becomes extinct or the size/age limit as specified
|
181
|
|
|
|
|
|
|
in the options has been reached. Respectively these options are tree_size
|
182
|
|
|
|
|
|
|
and tree_age.
|
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Multi-threading
|
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
Multi-thread support is very simplistic. The number of threads you specify
|
188
|
|
|
|
|
|
|
are created and each is assigned the task of finding sample_size/threads
|
189
|
|
|
|
|
|
|
samples. I had problems with using Bio::Phylo::Forest::Tree in a multi-
|
190
|
|
|
|
|
|
|
threaded setting. Hence the sampled trees are returned as newick strings to
|
191
|
|
|
|
|
|
|
the main routine where (if required) Tree objects are recreated from the
|
192
|
|
|
|
|
|
|
strings. For most applications this overhead seems negligible in contrast
|
193
|
|
|
|
|
|
|
to the sampling times.
|
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
From a code perspective this function (sample):
|
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
Checks input arguments
|
199
|
|
|
|
|
|
|
Handles multi-threading
|
200
|
|
|
|
|
|
|
Calls the individual algorithms to perform sampling
|
201
|
|
|
|
|
|
|
Reformats data
|
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=cut
|
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
my @threads;
|
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
sub sample {
|
208
|
5
|
|
|
5
|
1
|
107
|
my %options = @_;
|
209
|
5
|
|
|
|
|
51
|
my %methods_require = (
|
210
|
|
|
|
|
|
|
b => ['rate'],
|
211
|
|
|
|
|
|
|
bd => [ 'rate', 'nstar' ],
|
212
|
|
|
|
|
|
|
incomplete_sampling_bd =>
|
213
|
|
|
|
|
|
|
[ 'rate', 'nstar', 'mstar', 'sampling_probability' ],
|
214
|
|
|
|
|
|
|
memoryless_b => ['pendant_dist'],
|
215
|
|
|
|
|
|
|
constant_rate_bd => [],
|
216
|
|
|
|
|
|
|
);
|
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
#Default is to sample a single tree
|
219
|
5
|
50
|
|
|
|
27
|
$options{sample_size} = 1 unless defined $options{sample_size};
|
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
#Default is to use a single thread
|
222
|
5
|
100
|
|
|
|
19
|
$options{threads} = 1 unless defined $options{threads};
|
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
#Check that multiple threads are actually supported
|
225
|
5
|
50
|
33
|
|
|
19
|
if ( $options{threads} > 1 && !$Config{useithreads} ) {
|
226
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
|
227
|
|
|
|
|
|
|
"your perl installation does not support multiple threads" );
|
228
|
|
|
|
|
|
|
}
|
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
#Check an algorithm was specified
|
231
|
5
|
50
|
|
|
|
16
|
unless ( defined $options{algorithm} ) {
|
232
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw(
|
233
|
|
|
|
|
|
|
'error' => "an algorithm type must be specified" );
|
234
|
|
|
|
|
|
|
}
|
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
#Check the algorithm type is valid
|
237
|
5
|
50
|
|
|
|
15
|
unless ( defined $methods_require{ $options{algorithm} } ) {
|
238
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadFormat->throw(
|
239
|
|
|
|
|
|
|
'error' => "'$options{algorithm}' is not a valid algorithm" );
|
240
|
|
|
|
|
|
|
}
|
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
#Check the algorithm options
|
243
|
5
|
|
|
|
|
10
|
foreach ( @{ $methods_require{ $options{algorithm} } } ) {
|
|
5
|
|
|
|
|
14
|
|
244
|
8
|
50
|
|
|
|
19
|
unless ( defined $options{algorithm_options}->{$_} ) {
|
245
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
|
246
|
|
|
|
|
|
|
"'$_' must be specified for the '$options{algorithm}' algorithm"
|
247
|
|
|
|
|
|
|
);
|
248
|
|
|
|
|
|
|
}
|
249
|
|
|
|
|
|
|
}
|
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
#If we are doing incomplete taxon sampling the sampling probability must be specified
|
252
|
5
|
0
|
33
|
|
|
15
|
if ( defined $options{incomplete_sampling}
|
|
|
|
0
|
|
|
|
|
253
|
|
|
|
|
|
|
&& $options{incomplete_sampling}
|
254
|
|
|
|
|
|
|
&& !( defined $options{algorithm_options}->{sampling_probability} ) )
|
255
|
|
|
|
|
|
|
{
|
256
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
|
257
|
|
|
|
|
|
|
"'sampling_probability' must be specified for post hoc incomplete sampling to be applied"
|
258
|
|
|
|
|
|
|
);
|
259
|
|
|
|
|
|
|
}
|
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
#Check that a model has been specified
|
262
|
5
|
50
|
66
|
|
|
26
|
unless ( defined $options{model}
|
263
|
|
|
|
|
|
|
|| $options{algorithm} eq 'constant_rate_bd' )
|
264
|
|
|
|
|
|
|
{
|
265
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw(
|
266
|
|
|
|
|
|
|
'error' => "a model must be specified" );
|
267
|
|
|
|
|
|
|
}
|
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
#Get a function pointer for the algorithm
|
270
|
5
|
|
|
|
|
15
|
my $algorithm = 'sample_' . $options{algorithm};
|
271
|
5
|
|
|
|
|
24
|
$algorithm = \&$algorithm;
|
272
|
5
|
|
|
|
|
7
|
my @output;
|
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
#Run the algorithm, different method for multiple threads
|
275
|
5
|
50
|
|
|
|
16
|
if ( $options{threads} > 1 ) {
|
276
|
0
|
|
|
|
|
0
|
require threads;
|
277
|
0
|
|
|
|
|
0
|
require threads::shared;
|
278
|
0
|
|
|
|
|
0
|
@output = ( [], [] );
|
279
|
|
|
|
|
|
|
$SIG{'KILL'} = sub {
|
280
|
0
|
|
|
0
|
|
0
|
foreach (@threads) { $_->kill('KILL')->detach(); }
|
|
0
|
|
|
|
|
0
|
|
281
|
0
|
|
|
|
|
0
|
threads->exit();
|
282
|
0
|
|
|
|
|
0
|
};
|
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
#Start the threads
|
285
|
0
|
|
|
|
|
0
|
for ( ( 1 .. $options{threads} ) ) {
|
286
|
0
|
|
|
|
|
0
|
@_ = ();
|
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
#Note the list context of the return argument here determines
|
289
|
|
|
|
|
|
|
#the data type returned from the thread.
|
290
|
|
|
|
|
|
|
( $threads[ $_ - 1 ] ) = threads->new( \&_sample_newick, %options,
|
291
|
|
|
|
|
|
|
sample_size => ceil( $options{sample_size} / $options{threads} )
|
292
|
0
|
|
|
|
|
0
|
);
|
293
|
|
|
|
|
|
|
}
|
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
#Wait for them to finish and combine the data
|
296
|
0
|
|
|
|
|
0
|
for ( ( 1 .. $options{threads} ) ) {
|
297
|
0
|
|
|
|
|
0
|
until ( $threads[ $_ - 1 ]->is_joinable() ) { sleep(0.1); }
|
|
0
|
|
|
|
|
0
|
|
298
|
0
|
|
|
|
|
0
|
my @thread_data = $threads[ $_ - 1 ]->join;
|
299
|
0
|
|
|
|
|
0
|
for ( my $index = 0 ; $index < scalar @thread_data ; $index++ ) {
|
300
|
0
|
0
|
|
|
|
0
|
$output[$index] = [] if scalar @output < $index;
|
301
|
|
|
|
|
|
|
$output[$index] =
|
302
|
0
|
|
|
|
|
0
|
[ @{ $output[$index] }, @{ $thread_data[$index] } ];
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
303
|
|
|
|
|
|
|
}
|
304
|
|
|
|
|
|
|
}
|
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
#Turn newick strings back into tree objects
|
307
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{output_format}
|
308
|
|
|
|
|
|
|
&& $options{output_format} eq 'newick' )
|
309
|
|
|
|
|
|
|
{
|
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
#Convert to newick trees
|
312
|
0
|
|
|
|
|
0
|
for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) {
|
|
0
|
|
|
|
|
0
|
|
313
|
0
|
|
|
|
|
0
|
$output[0]->[$index] = Bio::Phylo::IO->parse(
|
314
|
|
|
|
|
|
|
-format => 'newick',
|
315
|
|
|
|
|
|
|
-string => $output[0]->[$index]
|
316
|
|
|
|
|
|
|
)->first;
|
317
|
|
|
|
|
|
|
}
|
318
|
|
|
|
|
|
|
}
|
319
|
|
|
|
|
|
|
}
|
320
|
|
|
|
|
|
|
else {
|
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
#Get the samples
|
323
|
5
|
|
|
|
|
32
|
@output = &$algorithm(%options);
|
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
#Turn into newick trees if requested
|
326
|
5
|
50
|
66
|
|
|
110
|
if ( defined $options{output_format}
|
|
|
100
|
66
|
|
|
|
|
327
|
|
|
|
|
|
|
&& $options{output_format} eq 'newick' )
|
328
|
|
|
|
|
|
|
{
|
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
#Convert to newick trees
|
331
|
0
|
|
|
|
|
0
|
for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) {
|
|
0
|
|
|
|
|
0
|
|
332
|
0
|
|
|
|
|
0
|
$output[0]->[$index] =
|
333
|
|
|
|
|
|
|
$output[0]->[$index]->to_newick( '-nodelabels' => 1 );
|
334
|
|
|
|
|
|
|
}
|
335
|
|
|
|
|
|
|
}
|
336
|
|
|
|
|
|
|
elsif ( defined $options{output_format}
|
337
|
|
|
|
|
|
|
&& $options{output_format} eq 'forest' )
|
338
|
|
|
|
|
|
|
{
|
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
# save as a forest
|
341
|
4
|
|
|
|
|
22
|
my $forest = Bio::Phylo::Forest->new;
|
342
|
4
|
|
|
|
|
12
|
for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) {
|
|
24
|
|
|
|
|
63
|
|
343
|
20
|
|
|
|
|
56
|
$forest->insert( $output[0]->[$index] );
|
344
|
|
|
|
|
|
|
}
|
345
|
4
|
|
|
|
|
10
|
$output[0] = $forest;
|
346
|
|
|
|
|
|
|
}
|
347
|
|
|
|
|
|
|
}
|
348
|
5
|
|
|
|
|
64
|
return @output;
|
349
|
|
|
|
|
|
|
}
|
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=begin comment
|
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
Type : Internal method
|
354
|
|
|
|
|
|
|
Title : _sample_newick
|
355
|
|
|
|
|
|
|
Usage : ($thread) = threads->new(\&_sample_newick, %options);
|
356
|
|
|
|
|
|
|
@thread_output = $thread->join;
|
357
|
|
|
|
|
|
|
Function: Wrapper for sampling routines used for multi-threading
|
358
|
|
|
|
|
|
|
Returns : Output from sampling algorithms with trees replaced by newick strings
|
359
|
|
|
|
|
|
|
Args : %options to pass to sampling algorithm
|
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=end comment
|
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
=cut
|
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
sub _sample_newick {
|
366
|
0
|
|
|
0
|
|
0
|
my %options = @_;
|
367
|
0
|
|
|
|
|
0
|
my $algorithm = 'sample_' . $options{algorithm};
|
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# Thread 'cancellation' signal handler
|
370
|
0
|
|
|
0
|
|
0
|
$SIG{'KILL'} = sub { threads->exit(); };
|
|
0
|
|
|
|
|
0
|
|
371
|
0
|
|
|
|
|
0
|
$algorithm = \&$algorithm;
|
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
#Perform the sampling
|
374
|
0
|
|
|
|
|
0
|
my @output = ( &$algorithm(%options) );
|
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
#Convert to newick trees
|
377
|
0
|
|
|
|
|
0
|
for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) {
|
|
0
|
|
|
|
|
0
|
|
378
|
0
|
|
|
|
|
0
|
$output[0]->[$index] =
|
379
|
|
|
|
|
|
|
$output[0]->[$index]->to_newick( '-nodelabels' => 1 );
|
380
|
|
|
|
|
|
|
}
|
381
|
0
|
|
|
|
|
0
|
return @output;
|
382
|
|
|
|
|
|
|
}
|
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=head2 Sampling algorithms
|
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
These algorithms should be accessed through the sampling interface (sample()).
|
387
|
|
|
|
|
|
|
Additional parameters need to be passed to these algorithms as described for
|
388
|
|
|
|
|
|
|
each algorithm.
|
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=over
|
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
=item sample_b()
|
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
Sample from any birth model
|
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
Type : Sampling algorithm
|
397
|
|
|
|
|
|
|
Title : sample_b
|
398
|
|
|
|
|
|
|
Usage : see sample
|
399
|
|
|
|
|
|
|
Function: Samples trees from a pure birth model
|
400
|
|
|
|
|
|
|
Returns : see sample
|
401
|
|
|
|
|
|
|
Args : %algorithm_options requires the field:
|
402
|
|
|
|
|
|
|
rate => sampling rate
|
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
=cut
|
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
sub sample_b {
|
407
|
1
|
|
|
1
|
1
|
3
|
my %options = @_;
|
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
#The sample of trees
|
410
|
1
|
|
|
|
|
2
|
my @sample;
|
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
#A list of the expected number of samples
|
413
|
|
|
|
|
|
|
my @expected_summary;
|
414
|
1
|
|
|
|
|
2
|
my $model = $options{model};
|
415
|
1
|
|
|
|
|
2
|
my $rate = $options{algorithm_options}->{rate};
|
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
#While we have insufficient samples
|
418
|
1
|
|
|
|
|
3
|
while ( scalar @sample < $options{sample_size} ) {
|
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
#Generate a candidate model run
|
421
|
16
|
|
|
|
|
82
|
my $candidate = &$model( %{ $options{model_options} },
|
422
|
16
|
|
|
|
|
30
|
tree_size => $options{tree_size} );
|
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
#Check that the tree has no extinctions
|
425
|
16
|
50
|
|
|
|
73
|
unless ( $candidate->is_ultrametric(1e-6) ) {
|
426
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadFormat->throw(
|
427
|
|
|
|
|
|
|
'error' => "the model must be a pure birth process" );
|
428
|
|
|
|
|
|
|
}
|
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
#Get the lineage through time data
|
431
|
16
|
|
|
|
|
54
|
my ( $time, $count ) = lineage_through_time($candidate);
|
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
#The expected number of samples we want
|
434
|
16
|
|
|
|
|
42
|
my $expected_samples = $rate * ( $time->[-1] - $time->[-2] );
|
435
|
16
|
|
|
|
|
34
|
push( @expected_summary, $expected_samples );
|
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
#Get the random number of samples from this candidate tree
|
438
|
16
|
|
|
|
|
45
|
while ( $expected_samples > 0 ) {
|
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
#If the number of samples remaining is greater than one
|
441
|
|
|
|
|
|
|
#we take a sample, otherwise we take a sample with probability
|
442
|
|
|
|
|
|
|
#according to the remaining number of samples
|
443
|
16
|
100
|
66
|
|
|
91
|
if ( $expected_samples > 1 || rand(1) < $expected_samples ) {
|
444
|
5
|
|
|
|
|
20
|
my $tree = copy_tree($candidate);
|
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
#Truncate the tree at a random point during which it had tree_size species
|
447
|
5
|
|
|
|
|
18
|
truncate_tree_time( $tree,
|
448
|
|
|
|
|
|
|
( $time->[-1] - $time->[-2] ) * rand(1) + $time->[-2] );
|
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
#Add the tree to our sample
|
451
|
5
|
|
|
|
|
15
|
push( @sample, $tree );
|
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
#Update the sample counter (for GUI or other applications that want to know how
|
454
|
|
|
|
|
|
|
#many samples have been obtained)
|
455
|
5
|
50
|
|
|
|
16
|
if ( defined $options{counter} ) {
|
456
|
0
|
|
|
|
|
0
|
my $counter = $options{counter};
|
457
|
0
|
|
|
|
|
0
|
&$counter(1);
|
458
|
|
|
|
|
|
|
}
|
459
|
|
|
|
|
|
|
}
|
460
|
16
|
|
|
|
|
110
|
$expected_samples--;
|
461
|
|
|
|
|
|
|
}
|
462
|
|
|
|
|
|
|
}
|
463
|
1
|
|
|
|
|
6
|
return ( \@sample, \@expected_summary );
|
464
|
|
|
|
|
|
|
}
|
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=item sample_bd()
|
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
Sample from any birth and death model for which nstar exists
|
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
Type : Sampling algorithm
|
471
|
|
|
|
|
|
|
Title : sample_bd
|
472
|
|
|
|
|
|
|
Usage : see sample
|
473
|
|
|
|
|
|
|
Function: Samples trees from a birth and death model
|
474
|
|
|
|
|
|
|
Returns : see sample
|
475
|
|
|
|
|
|
|
Args : %algorithm_options requires the fields:
|
476
|
|
|
|
|
|
|
nstar => once a tree has nstar species there should be
|
477
|
|
|
|
|
|
|
a negligible chance of returning to tree_size species
|
478
|
|
|
|
|
|
|
rate => sampling rate
|
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=cut
|
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
sub sample_bd {
|
483
|
1
|
|
|
1
|
1
|
5
|
my %options = @_;
|
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
#The sample of trees
|
486
|
1
|
|
|
|
|
3
|
my @sample;
|
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
#A list of the expected number of samples
|
489
|
|
|
|
|
|
|
my @expected_summary;
|
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
#Convenience variables
|
492
|
1
|
|
|
|
|
2
|
my $model = $options{model};
|
493
|
1
|
|
|
|
|
1
|
my $nstar = $options{algorithm_options}->{nstar};
|
494
|
1
|
|
|
|
|
3
|
my $rate = $options{algorithm_options}->{rate};
|
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
#While we have insufficient samples
|
497
|
1
|
|
|
|
|
4
|
while ( scalar @sample < $options{sample_size} ) {
|
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
#Generate a candidate model run
|
500
|
|
|
|
|
|
|
my $candidate =
|
501
|
15
|
|
|
|
|
27
|
&$model( %{ $options{model_options} }, tree_size => $nstar );
|
|
15
|
|
|
|
|
92
|
|
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
#Get the lineage through time data
|
504
|
15
|
|
|
|
|
67
|
my ( $time, $count ) = lineage_through_time($candidate);
|
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
#Reorganise the lineage through time data
|
507
|
|
|
|
|
|
|
#@duration contains the length of the intervals with the right number of species
|
508
|
|
|
|
|
|
|
#@start contains the starting times of these intervals
|
509
|
|
|
|
|
|
|
#@prob contains the cumulative probability of each interval being selected
|
510
|
|
|
|
|
|
|
#$total_duration contains the sum of the interval lengths
|
511
|
15
|
|
|
|
|
36
|
my ( @duration, @start, @prob, $total_duration );
|
512
|
15
|
|
|
|
|
30
|
for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) {
|
|
1203
|
|
|
|
|
1756
|
|
513
|
1188
|
100
|
|
|
|
1764
|
if ( $count->[$index] == $options{tree_size} ) {
|
514
|
40
|
|
|
|
|
72
|
push( @duration, $time->[ $index + 1 ] - $time->[$index] );
|
515
|
40
|
|
|
|
|
59
|
push( @start, $time->[$index] );
|
516
|
40
|
|
|
|
|
54
|
$total_duration += $duration[-1];
|
517
|
40
|
|
|
|
|
63
|
push( @prob, $total_duration );
|
518
|
|
|
|
|
|
|
}
|
519
|
|
|
|
|
|
|
}
|
520
|
1
|
|
|
1
|
|
6
|
no warnings 'uninitialized'; # FIXME
|
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
41
|
|
521
|
15
|
100
|
|
|
|
76
|
next if $total_duration == 0;
|
522
|
1
|
|
|
1
|
|
5
|
use warnings;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
313
|
|
523
|
8
|
|
|
|
|
23
|
for ( my $index = 0 ; $index < scalar @prob ; $index++ ) {
|
524
|
40
|
|
|
|
|
67
|
$prob[$index] /= $total_duration;
|
525
|
|
|
|
|
|
|
}
|
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
#The expected number of samples we want
|
528
|
8
|
|
|
|
|
17
|
my $expected_samples = $rate * $total_duration;
|
529
|
8
|
|
|
|
|
23
|
push( @expected_summary, $expected_samples );
|
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
#Get the random number of samples from this candidate tree
|
532
|
8
|
|
|
|
|
21
|
while ( $expected_samples > 0 ) {
|
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
#If the number of samples remaining is greater than one
|
535
|
|
|
|
|
|
|
#we take a sample, otherwise we take a sample with probability
|
536
|
|
|
|
|
|
|
#according to the remaining number of samples
|
537
|
8
|
100
|
66
|
|
|
47
|
if ( $expected_samples > 1 || rand(1) < $expected_samples ) {
|
538
|
5
|
|
|
|
|
24
|
my $tree = copy_tree($candidate);
|
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
#Get a random interval
|
541
|
5
|
|
|
|
|
19
|
my $interval_choice = rand(1);
|
542
|
5
|
|
|
|
|
12
|
my $interval;
|
543
|
5
|
|
|
|
|
33
|
for (
|
544
|
|
|
|
|
|
|
$interval = 0 ;
|
545
|
|
|
|
|
|
|
$interval_choice > $prob[$interval] ;
|
546
|
|
|
|
|
|
|
$interval++
|
547
|
|
|
|
|
|
|
)
|
548
|
|
|
|
|
|
|
{
|
549
|
|
|
|
|
|
|
}
|
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
#Truncate the tree at a random point during this interval
|
552
|
5
|
|
|
|
|
32
|
truncate_tree_time( $tree,
|
553
|
|
|
|
|
|
|
$duration[$interval] * rand(1) + $start[$interval] );
|
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
#Add the tree to our sample
|
556
|
5
|
|
|
|
|
15
|
push( @sample, $tree );
|
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
#Update the sample counter (for GUI or other applications that want to know how
|
559
|
|
|
|
|
|
|
#many samples have been obtained)
|
560
|
5
|
50
|
|
|
|
22
|
if ( defined $options{counter} ) {
|
561
|
0
|
|
|
|
|
0
|
my $counter = $options{counter};
|
562
|
0
|
|
|
|
|
0
|
&$counter(1);
|
563
|
|
|
|
|
|
|
}
|
564
|
|
|
|
|
|
|
}
|
565
|
8
|
|
|
|
|
166
|
$expected_samples--;
|
566
|
|
|
|
|
|
|
}
|
567
|
|
|
|
|
|
|
}
|
568
|
1
|
|
|
|
|
7
|
return ( \@sample, \@expected_summary );
|
569
|
|
|
|
|
|
|
}
|
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
=item sample_incomplete_sampling_bd()
|
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
Sample from any birth and death model with incomplete taxon sampling
|
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
Type : Sampling algorithm
|
576
|
|
|
|
|
|
|
Title : sample_incomplete_sampling_bd
|
577
|
|
|
|
|
|
|
Usage : see sample
|
578
|
|
|
|
|
|
|
Function: Samples trees from a birth and death model with incomplete taxon sampling
|
579
|
|
|
|
|
|
|
Returns : see sample
|
580
|
|
|
|
|
|
|
Args : %algorithm_options requires the fields:
|
581
|
|
|
|
|
|
|
rate => sampling rate
|
582
|
|
|
|
|
|
|
nstar => once a tree has nstar species there should be
|
583
|
|
|
|
|
|
|
a negligible chance of returning to mstar species
|
584
|
|
|
|
|
|
|
mstar => trees with more than mstar species form a negligible
|
585
|
|
|
|
|
|
|
contribution to the final sample.
|
586
|
|
|
|
|
|
|
sampling_probability => see below.
|
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
sampling_probability
|
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
vector: must have length (mstar-tree_size+1) The ith element gives the probability
|
591
|
|
|
|
|
|
|
of not sampling i species.
|
592
|
|
|
|
|
|
|
scalar: the probability of sampling any individual species. Is used to calculate
|
593
|
|
|
|
|
|
|
a vector as discussed in the paper.
|
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
=cut
|
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
sub sample_incomplete_sampling_bd {
|
598
|
1
|
|
|
1
|
1
|
6
|
my %options = @_;
|
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
# %options = (%options, %{$options{algorithm_options}});
|
601
|
|
|
|
|
|
|
#The sample of trees
|
602
|
1
|
|
|
|
|
3
|
my @sample;
|
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
#A list of the expected number of samples
|
605
|
|
|
|
|
|
|
my @expected_summary;
|
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
# #Convenience variables
|
608
|
1
|
|
|
|
|
2
|
my $model = $options{model};
|
609
|
1
|
|
|
|
|
2
|
my $nstar = $options{algorithm_options}->{nstar};
|
610
|
1
|
|
|
|
|
2
|
my $mstar = $options{algorithm_options}->{mstar};
|
611
|
1
|
|
|
|
|
1
|
my $rate = $options{algorithm_options}->{rate};
|
612
|
|
|
|
|
|
|
my $sampling_probability =
|
613
|
1
|
|
|
|
|
3
|
$options{algorithm_options}->{sampling_probability};
|
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
#If sampling_probability is a list check it's length
|
616
|
1
|
50
|
33
|
|
|
5
|
if ( ref $sampling_probability
|
617
|
1
|
|
|
|
|
6
|
&& scalar @{$sampling_probability} !=
|
618
|
|
|
|
|
|
|
( $mstar - $options{tree_size} + 1 ) )
|
619
|
|
|
|
|
|
|
{
|
620
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
|
621
|
|
|
|
|
|
|
"'sampling_probability' must be a scalar or a list with m_star-tree_size+1 items"
|
622
|
|
|
|
|
|
|
);
|
623
|
|
|
|
|
|
|
}
|
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
#If a single sampling probability was given we calculate the
|
626
|
|
|
|
|
|
|
#probability of sub-sampling given numbers of species.
|
627
|
1
|
50
|
|
|
|
3
|
unless ( ref $sampling_probability ) {
|
628
|
0
|
|
|
|
|
0
|
my $p = $sampling_probability;
|
629
|
0
|
|
|
|
|
0
|
my @vec;
|
630
|
0
|
|
|
|
|
0
|
my $total = 0;
|
631
|
0
|
|
|
|
|
0
|
foreach ( ( $options{tree_size} .. $mstar ) ) {
|
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
#The probability of sampling tree_size species from a tree containing $_ species
|
634
|
|
|
|
|
|
|
push( @vec,
|
635
|
|
|
|
|
|
|
nchoosek( $_, $options{tree_size} ) *
|
636
|
|
|
|
|
|
|
( $p**$options{tree_size} ) *
|
637
|
0
|
|
|
|
|
0
|
( ( 1 - $p )**( $_ - $options{tree_size} ) ) );
|
638
|
0
|
|
|
|
|
0
|
$total += $vec[-1];
|
639
|
|
|
|
|
|
|
}
|
640
|
0
|
|
|
|
|
0
|
for ( my $ii = 0 ; $ii < scalar @vec ; $ii++ ) { $vec[$ii] /= $total; }
|
|
0
|
|
|
|
|
0
|
|
641
|
0
|
|
|
|
|
0
|
$sampling_probability = \@vec;
|
642
|
|
|
|
|
|
|
}
|
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
#We now normalise the sampling_probability list so that it sums to unity
|
645
|
|
|
|
|
|
|
#this allows comparable sampling rates to be used here and in sample_bd
|
646
|
1
|
|
|
|
|
3
|
my $total_sp = 0;
|
647
|
1
|
|
|
|
|
3
|
foreach ( @{$sampling_probability} ) { $total_sp += $_; }
|
|
1
|
|
|
|
|
2
|
|
|
3
|
|
|
|
|
8
|
|
648
|
1
|
|
|
1
|
|
5
|
no warnings; # FIXME
|
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
48
|
|
649
|
1
|
|
|
|
|
6
|
foreach ( my $index = 1 .. scalar @{$sampling_probability} ) {
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
650
|
1
|
|
|
1
|
|
5
|
no warnings; # FIXME
|
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
27
|
|
651
|
1
|
|
|
|
|
7
|
$sampling_probability->[ $index - 1 ] /= $total_sp;
|
652
|
1
|
|
|
1
|
|
5
|
use warnings;
|
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
25
|
|
653
|
|
|
|
|
|
|
}
|
654
|
1
|
|
|
1
|
|
4
|
use warnings;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
65
|
|
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
#While we have insufficient samples
|
657
|
1
|
|
|
|
|
4
|
while ( scalar @sample < $options{sample_size} ) {
|
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
#Generate a candidate model run
|
660
|
|
|
|
|
|
|
my $candidate =
|
661
|
13
|
|
|
|
|
28
|
&$model( %{ $options{model_options} }, tree_size => $nstar );
|
|
13
|
|
|
|
|
72
|
|
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
#Get the lineage through time data
|
664
|
13
|
|
|
|
|
65
|
my ( $time, $count ) = lineage_through_time($candidate);
|
665
|
13
|
|
|
|
|
35
|
my @size_stats;
|
666
|
13
|
|
|
|
|
41
|
for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) {
|
|
1709
|
|
|
|
|
2423
|
|
667
|
1
|
|
|
1
|
|
5
|
no warnings 'uninitialized'; # FIXME
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
87
|
|
668
|
1696
|
100
|
|
|
|
2424
|
if ( $count->[$index] >= $options{tree_size} ) {
|
669
|
|
|
|
|
|
|
$size_stats[ $count->[$index] - $options{tree_size} ] +=
|
670
|
|
|
|
|
|
|
$time->[$index] *
|
671
|
|
|
|
|
|
|
$sampling_probability->[ $count->[$index] -
|
672
|
1091
|
|
|
|
|
1693
|
$options{tree_size} ];
|
673
|
|
|
|
|
|
|
}
|
674
|
|
|
|
|
|
|
}
|
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
#Reorganise the lineage through time data
|
677
|
|
|
|
|
|
|
#@duration contains the length of intervals with more than tree_size species
|
678
|
|
|
|
|
|
|
#@start contains the starting times of these intervals
|
679
|
|
|
|
|
|
|
#@prob contains the cumulative probability of each interval being selected
|
680
|
|
|
|
|
|
|
#$total_prob contains the sum of @prob before normalisation
|
681
|
13
|
|
|
|
|
30
|
my ( @duration, @start, @prob, $total_prob );
|
682
|
13
|
|
|
|
|
32
|
$total_prob = 0;
|
683
|
13
|
|
|
|
|
23
|
for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) {
|
|
1709
|
|
|
|
|
2459
|
|
684
|
1696
|
100
|
|
|
|
2441
|
if ( $count->[$index] >= $options{tree_size} ) {
|
685
|
1091
|
|
|
|
|
1457
|
push( @duration, $time->[ $index + 1 ] - $time->[$index] );
|
686
|
1091
|
|
|
|
|
1373
|
push( @start, $time->[$index] );
|
687
|
1
|
|
|
1
|
|
5
|
no warnings 'uninitialized'; # FIXME
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
4098
|
|
688
|
|
|
|
|
|
|
$total_prob +=
|
689
|
|
|
|
|
|
|
$duration[-1] *
|
690
|
|
|
|
|
|
|
$sampling_probability->[ $count->[$index] -
|
691
|
1091
|
|
|
|
|
1424
|
$options{tree_size} ];
|
692
|
1091
|
|
|
|
|
1506
|
push( @prob, $total_prob );
|
693
|
|
|
|
|
|
|
}
|
694
|
|
|
|
|
|
|
}
|
695
|
13
|
100
|
|
|
|
65
|
next if $total_prob == 0;
|
696
|
7
|
|
|
|
|
27
|
for ( my $index = 0 ; $index < scalar @prob ; $index++ ) {
|
697
|
1091
|
|
|
|
|
1551
|
$prob[$index] /= $total_prob;
|
698
|
|
|
|
|
|
|
}
|
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
#The expected number of samples we want
|
701
|
7
|
|
|
|
|
21
|
my $expected_samples = $rate * $total_prob;
|
702
|
7
|
|
|
|
|
17
|
push( @expected_summary, $expected_samples );
|
703
|
|
|
|
|
|
|
$expected_samples = $options{sample_size} - scalar @sample
|
704
|
7
|
50
|
|
|
|
24
|
if $expected_samples > $options{sample_size} - scalar @sample;
|
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
#Get the random number of samples from this candidate tree
|
707
|
7
|
|
|
|
|
22
|
while ( $expected_samples > 0 ) {
|
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
#If the number of samples remaining is greater than one
|
710
|
|
|
|
|
|
|
#we take a sample, otherwise we take a sample with probability
|
711
|
|
|
|
|
|
|
#according to the remaining number of samples
|
712
|
8
|
100
|
100
|
|
|
55
|
if ( $expected_samples > 1 || rand(1) < $expected_samples ) {
|
713
|
5
|
|
|
|
|
19
|
my $tree = copy_tree($candidate);
|
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
#Get a random interval
|
716
|
5
|
|
|
|
|
22
|
my $interval_choice = rand(1);
|
717
|
5
|
|
|
|
|
10
|
my $interval;
|
718
|
5
|
|
|
|
|
131
|
for (
|
719
|
|
|
|
|
|
|
$interval = 0 ;
|
720
|
|
|
|
|
|
|
$interval_choice > $prob[$interval] ;
|
721
|
|
|
|
|
|
|
$interval++
|
722
|
|
|
|
|
|
|
)
|
723
|
|
|
|
|
|
|
{
|
724
|
|
|
|
|
|
|
}
|
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
#Truncate the tree at a random point during this interval
|
727
|
5
|
|
|
|
|
36
|
truncate_tree_time( $tree,
|
728
|
|
|
|
|
|
|
$duration[$interval] * rand(1) + $start[$interval] );
|
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
#Remove random species so it has the right size
|
731
|
5
|
|
|
|
|
26
|
truncate_tree_size( $tree, $options{tree_size} );
|
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
#Add the tree to our sample
|
734
|
5
|
|
|
|
|
22
|
push( @sample, $tree );
|
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
#Update the sample counter (for GUI or other applications that want to know how
|
737
|
|
|
|
|
|
|
#many samples have been obtained)
|
738
|
5
|
50
|
|
|
|
26
|
if ( defined $options{counter} ) {
|
739
|
0
|
|
|
|
|
0
|
my $counter = $options{counter};
|
740
|
0
|
|
|
|
|
0
|
&$counter(1);
|
741
|
|
|
|
|
|
|
}
|
742
|
|
|
|
|
|
|
}
|
743
|
8
|
|
|
|
|
507
|
$expected_samples--;
|
744
|
|
|
|
|
|
|
}
|
745
|
|
|
|
|
|
|
}
|
746
|
1
|
|
|
|
|
8
|
return ( \@sample, \@expected_summary );
|
747
|
|
|
|
|
|
|
}
|
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
=item sample_memoryless_b()
|
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
Sample from a memoryless birth model
|
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
Type : Sampling algorithm
|
754
|
|
|
|
|
|
|
Title : sample_memoryless_b
|
755
|
|
|
|
|
|
|
Usage : see sample
|
756
|
|
|
|
|
|
|
Function: Samples trees from a memoryless birth model
|
757
|
|
|
|
|
|
|
Returns : see sample
|
758
|
|
|
|
|
|
|
Args : %algorithm_options with fields:
|
759
|
|
|
|
|
|
|
pendant_dist => function reference for generating random
|
760
|
|
|
|
|
|
|
shortest pendant edges
|
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
NB: The function pointed to by pendant_dist is given model_options
|
763
|
|
|
|
|
|
|
as it's input argument with an added field tree_size. It must return
|
764
|
|
|
|
|
|
|
a random value from the probability density for the shortest pendant
|
765
|
|
|
|
|
|
|
edges.
|
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
=cut
|
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
sub sample_memoryless_b {
|
770
|
1
|
|
|
1
|
1
|
7
|
my %options = @_;
|
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
#The sample of trees
|
773
|
1
|
|
|
|
|
4
|
my @sample;
|
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
#A list of the expected number of samples
|
776
|
|
|
|
|
|
|
my @expected_summary;
|
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
#The user specified functions
|
779
|
1
|
|
|
|
|
3
|
my $model = $options{model};
|
780
|
1
|
|
|
|
|
3
|
my $pendant_dist = $options{algorithm_options}->{pendant_dist};
|
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
#While we have insufficient samples
|
783
|
1
|
|
|
|
|
5
|
while ( scalar @sample < $options{sample_size} ) {
|
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
#Generate a tree ending just after the last speciation event
|
786
|
5
|
|
|
|
|
38
|
my $tree = &$model( %{ $options{model_options} },
|
787
|
5
|
|
|
|
|
12
|
tree_size => $options{tree_size} );
|
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
#Check that the tree has no extinctions
|
790
|
5
|
50
|
|
|
|
36
|
unless ( $tree->is_ultrametric(1e-6) ) {
|
791
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadFormat->throw(
|
792
|
|
|
|
|
|
|
'error' => "the model must be a pure birth process" );
|
793
|
|
|
|
|
|
|
}
|
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
#Get the random length to add after the last speciation event
|
796
|
5
|
|
|
|
|
57
|
my $pendant_add = &$pendant_dist( %{ $options{model_options} },
|
797
|
5
|
|
|
|
|
12
|
tree_size => $options{tree_size} );
|
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
#Add the final length
|
800
|
5
|
|
|
|
|
87
|
foreach ( @{ $tree->get_terminals } ) {
|
|
5
|
|
|
|
|
33
|
|
801
|
50
|
|
|
|
|
113
|
$_->set_branch_length( $_->get_branch_length + $pendant_add );
|
802
|
|
|
|
|
|
|
}
|
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
#Add to the sample
|
805
|
5
|
|
|
|
|
16
|
push( @sample, $tree );
|
806
|
5
|
|
|
|
|
16
|
push( @expected_summary, 1 );
|
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
#Update the sample counter (for GUI or other applications that want to know how
|
809
|
|
|
|
|
|
|
#many samples have been obtained)
|
810
|
5
|
50
|
|
|
|
44
|
if ( defined $options{counter} ) {
|
811
|
0
|
|
|
|
|
0
|
my $counter = $options{counter};
|
812
|
0
|
|
|
|
|
0
|
&$counter(1);
|
813
|
|
|
|
|
|
|
}
|
814
|
|
|
|
|
|
|
}
|
815
|
1
|
|
|
|
|
8
|
return ( \@sample, \@expected_summary );
|
816
|
|
|
|
|
|
|
}
|
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
=item sample_constant_rate_bd()
|
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
Sample from a constant rate birth and death model
|
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
Type : Sampling algorithm
|
823
|
|
|
|
|
|
|
Title : sample_constant_rate_bd
|
824
|
|
|
|
|
|
|
Usage : see sample
|
825
|
|
|
|
|
|
|
Function: Samples trees from a memoryless birth model
|
826
|
|
|
|
|
|
|
Returns : see sample
|
827
|
|
|
|
|
|
|
Args : no specific algorithm options but see below
|
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
NB: This algorithm only applies to constant rate birth and death
|
830
|
|
|
|
|
|
|
processes. Consequently a model does not need to be specified (and
|
831
|
|
|
|
|
|
|
will be ignored if it is). But birth_rate and death_rate model
|
832
|
|
|
|
|
|
|
options must be given.
|
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
=cut
|
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
sub sample_constant_rate_bd {
|
837
|
1
|
|
|
1
|
1
|
4
|
my %options = @_;
|
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
#Store parameters in shorter variables (for clarity)
|
840
|
|
|
|
|
|
|
my ( $br, $dr, $n ) = (
|
841
|
|
|
|
|
|
|
$options{model_options}->{birth_rate},
|
842
|
|
|
|
|
|
|
$options{model_options}->{death_rate},
|
843
|
|
|
|
|
|
|
$options{tree_size}
|
844
|
1
|
|
|
|
|
5
|
);
|
845
|
1
|
|
|
|
|
2
|
my @sample;
|
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
#Loop for sampling each tree
|
848
|
1
|
|
|
|
|
4
|
while ( scalar @sample < $options{sample_size} ) {
|
849
|
5
|
|
|
|
|
15
|
my @nodes;
|
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
#Compute the random tree age from the inverse CDF (different formulas for
|
852
|
|
|
|
|
|
|
#birth rate == death rate and otherwise)
|
853
|
|
|
|
|
|
|
my $tree_age;
|
854
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
#The uniform random variable
|
856
|
5
|
|
|
|
|
19
|
my $r = rand;
|
857
|
5
|
50
|
|
|
|
22
|
if ( $br == $dr ) {
|
858
|
0
|
|
|
|
|
0
|
$tree_age = 1 / ( $br * ( $r**( -1 / $n ) - 1 ) );
|
859
|
|
|
|
|
|
|
}
|
860
|
|
|
|
|
|
|
else {
|
861
|
5
|
|
|
|
|
53
|
$tree_age =
|
862
|
|
|
|
|
|
|
1 /
|
863
|
|
|
|
|
|
|
( $br - $dr ) *
|
864
|
|
|
|
|
|
|
log(
|
865
|
|
|
|
|
|
|
( 1 - $dr / $br * $r**( 1 / $n ) ) / ( 1 - $r**( 1 / $n ) ) );
|
866
|
|
|
|
|
|
|
}
|
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
#Find the random speciation times
|
869
|
5
|
|
|
|
|
13
|
my @speciation;
|
870
|
5
|
|
|
|
|
19
|
foreach ( 0 .. ( $n - 2 ) ) {
|
871
|
45
|
50
|
|
|
|
70
|
if ( $br == $dr ) {
|
872
|
0
|
|
|
|
|
0
|
my $r = rand;
|
873
|
0
|
|
|
|
|
0
|
$speciation[$_] =
|
874
|
|
|
|
|
|
|
$r * $tree_age / ( 1 + $br * $tree_age * ( 1 - $r ) );
|
875
|
|
|
|
|
|
|
}
|
876
|
|
|
|
|
|
|
else {
|
877
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
#Two repeated parts of the inverse CDF for clarity
|
879
|
45
|
|
|
|
|
82
|
my $a = $br - $dr * exp( ( $dr - $br ) * $tree_age );
|
880
|
45
|
|
|
|
|
62
|
my $b = ( 1 - exp( ( $dr - $br ) * $tree_age ) ) * rand;
|
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
#The random speciation time from the inverse CDF
|
883
|
45
|
|
|
|
|
96
|
$speciation[$_] =
|
884
|
|
|
|
|
|
|
1 /
|
885
|
|
|
|
|
|
|
( $br - $dr ) *
|
886
|
|
|
|
|
|
|
log( ( $a - $dr * $b ) / ( $a - $br * $b ) );
|
887
|
|
|
|
|
|
|
}
|
888
|
|
|
|
|
|
|
}
|
889
|
|
|
|
|
|
|
|
890
|
|
|
|
|
|
|
#Create the initial terminals and a vector for their ages
|
891
|
5
|
|
|
|
|
11
|
my @terminals;
|
892
|
|
|
|
|
|
|
my @ages;
|
893
|
5
|
|
|
|
|
14
|
foreach ( 0 .. ( $n - 1 ) ) {
|
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
#Add a new terminal
|
896
|
50
|
|
|
|
|
146
|
$terminals[$_] = Bio::Phylo::Forest::Node->new();
|
897
|
50
|
|
|
|
|
210
|
$terminals[$_]->set_name( 'ID' . $_ );
|
898
|
50
|
|
|
|
|
106
|
$ages[$_] = 0;
|
899
|
|
|
|
|
|
|
}
|
900
|
5
|
|
|
|
|
21
|
@nodes = @terminals;
|
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
#Sort the speciation times
|
903
|
5
|
|
|
|
|
46
|
my @sorted_speciation = sort { $a <=> $b } @speciation;
|
|
96
|
|
|
|
|
148
|
|
904
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
#Make a hash for easily finding the index of a given speciation event
|
906
|
5
|
|
|
|
|
11
|
my %speciation_hash;
|
907
|
5
|
|
|
|
|
19
|
foreach ( 0 .. ( $n - 2 ) ) {
|
908
|
45
|
|
|
|
|
183
|
$speciation_hash{ $speciation[$_] } = $_;
|
909
|
|
|
|
|
|
|
}
|
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
#Construct the tree
|
912
|
5
|
|
|
|
|
18
|
foreach my $index ( 0 .. ( $n - 2 ) ) {
|
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
#Create the parent node
|
915
|
45
|
|
|
|
|
135
|
my $parent = Bio::Phylo::Forest::Node->new();
|
916
|
45
|
|
|
|
|
189
|
$parent->set_name( 'ID' . ( $n + $index ) );
|
917
|
45
|
|
|
|
|
95
|
push( @nodes, $parent );
|
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
#An index for this speciation event back into the unsorted vectors
|
920
|
45
|
|
|
|
|
219
|
my $spec_index = $speciation_hash{ $sorted_speciation[$index] };
|
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
#Add the children to the parent node
|
923
|
45
|
|
|
|
|
168
|
$parent->set_child( $terminals[$spec_index] );
|
924
|
45
|
|
|
|
|
147
|
$terminals[$spec_index]->set_parent($parent);
|
925
|
45
|
|
|
|
|
133
|
$parent->set_child( $terminals[ $spec_index + 1 ] );
|
926
|
45
|
|
|
|
|
138
|
$terminals[ $spec_index + 1 ]->set_parent($parent);
|
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
#Set the children's branch lengths
|
929
|
45
|
|
|
|
|
163
|
$terminals[$spec_index]->set_branch_length(
|
930
|
|
|
|
|
|
|
$sorted_speciation[$index] - $ages[$spec_index] );
|
931
|
45
|
|
|
|
|
158
|
$terminals[ $spec_index + 1 ]->set_branch_length(
|
932
|
|
|
|
|
|
|
$sorted_speciation[$index] - $ages[ $spec_index + 1 ] );
|
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
#Replace the two terminals with the new one
|
935
|
45
|
|
|
|
|
95
|
splice( @terminals, $spec_index, 2, $parent );
|
936
|
45
|
|
|
|
|
98
|
splice( @ages, $spec_index, 2, $sorted_speciation[$index] );
|
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
#Update the mapping for the sorted speciation times
|
939
|
45
|
|
|
|
|
187
|
foreach ( keys %speciation_hash ) {
|
940
|
405
|
100
|
|
|
|
692
|
$speciation_hash{$_}-- if $speciation_hash{$_} > $spec_index;
|
941
|
|
|
|
|
|
|
}
|
942
|
|
|
|
|
|
|
}
|
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
#Add the nodes to a tree
|
945
|
5
|
|
|
|
|
40
|
my $tree = Bio::Phylo::Forest::Tree->new();
|
946
|
5
|
|
|
|
|
19
|
foreach ( reverse(@nodes) ) { $tree->insert($_); }
|
|
95
|
|
|
|
|
204
|
|
947
|
5
|
|
|
|
|
12
|
push( @sample, $tree );
|
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
#Update the sample counter (for GUI or other applications that want to know how
|
950
|
|
|
|
|
|
|
#many samples have been obtained)
|
951
|
5
|
50
|
|
|
|
114
|
if ( defined $options{counter} ) {
|
952
|
0
|
|
|
|
|
0
|
my $counter = $options{counter};
|
953
|
0
|
|
|
|
|
0
|
&$counter(1);
|
954
|
|
|
|
|
|
|
}
|
955
|
|
|
|
|
|
|
}
|
956
|
1
|
|
|
|
|
7
|
return ( \@sample, [] );
|
957
|
|
|
|
|
|
|
}
|
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
=back
|
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
=head1 EVOLUTIONARY MODELS
|
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
All evolutionary models take a options hash as their input argument
|
964
|
|
|
|
|
|
|
and return a Bio::Phylo::Forest::Tree. This tree may contain extinct
|
965
|
|
|
|
|
|
|
lineages (lineages that end prior to the end of the tree).
|
966
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
The options hash contains any model specific parameters (see the
|
968
|
|
|
|
|
|
|
individual model descriptions) and one or both terminating conditions:
|
969
|
|
|
|
|
|
|
tree_size => the number of extant species at which to terminate the tree
|
970
|
|
|
|
|
|
|
tree_age => the age of the tree at which to terminate the process
|
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
Note that if the model stops due to the tree_size condition then the
|
973
|
|
|
|
|
|
|
tree ends immediately after the speciation event that created the last
|
974
|
|
|
|
|
|
|
species.
|
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
=over
|
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
=item constant_rate_birth()
|
979
|
|
|
|
|
|
|
|
980
|
|
|
|
|
|
|
A constant rate birth model (Yule/ERM)
|
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
Type : Evolutionary model
|
983
|
|
|
|
|
|
|
Title : constant_rate_birth
|
984
|
|
|
|
|
|
|
Usage : $tree = constant_rate_birth(%options)
|
985
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time
|
986
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree
|
987
|
|
|
|
|
|
|
Args : %options with fields:
|
988
|
|
|
|
|
|
|
birth_rate The birth rate parameter (default 1)
|
989
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate
|
990
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate
|
991
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified
|
993
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
=cut
|
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
sub constant_rate_birth {
|
997
|
22
|
|
|
22
|
1
|
545
|
my %options = @_;
|
998
|
22
|
|
|
|
|
64
|
$options{death_rate} = 0;
|
999
|
22
|
|
|
|
|
81
|
return constant_rate_birth_death(%options);
|
1000
|
|
|
|
|
|
|
}
|
1001
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
=item external_model()
|
1003
|
|
|
|
|
|
|
|
1004
|
|
|
|
|
|
|
A dummy model that takes as input a set of newick_trees and randomly samples
|
1005
|
|
|
|
|
|
|
these.
|
1006
|
|
|
|
|
|
|
|
1007
|
|
|
|
|
|
|
Type : Evolutionary model
|
1008
|
|
|
|
|
|
|
Title : external_model
|
1009
|
|
|
|
|
|
|
Usage : $tree = $external_model(%options)
|
1010
|
|
|
|
|
|
|
Function: Returns a random tree that was given as input
|
1011
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree
|
1012
|
|
|
|
|
|
|
Args : %options with fields:
|
1013
|
|
|
|
|
|
|
trees An array of newick strings. One of these is returned at random.
|
1014
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
NB: The usual parameters tree_size and tree_age will be ignored. When sampling
|
1016
|
|
|
|
|
|
|
using this model the trees array must contain trees adhering to the requirements
|
1017
|
|
|
|
|
|
|
of the sampling algorithm. This is NOT checked automatically.
|
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
=cut
|
1020
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
sub external_model {
|
1022
|
0
|
|
|
0
|
1
|
0
|
my %options = @_;
|
1023
|
0
|
|
|
|
|
0
|
my $choice = int( rand( scalar @{ $options{trees} } ) );
|
|
0
|
|
|
|
|
0
|
|
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
#Pick a newick string and turn it in to a Bio::Phylo::Forest::Tree object
|
1026
|
|
|
|
|
|
|
my $tree = Bio::Phylo::IO->parse(
|
1027
|
|
|
|
|
|
|
-format => 'newick',
|
1028
|
0
|
|
|
|
|
0
|
-string => $options{trees}->[$choice]
|
1029
|
|
|
|
|
|
|
)->first;
|
1030
|
0
|
|
|
|
|
0
|
return $tree;
|
1031
|
|
|
|
|
|
|
}
|
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
=item constant_rate_birth_death()
|
1034
|
|
|
|
|
|
|
|
1035
|
|
|
|
|
|
|
A constant rate birth and death model
|
1036
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
Type : Evolutionary model
|
1038
|
|
|
|
|
|
|
Title : constant_rate_birth_death
|
1039
|
|
|
|
|
|
|
Usage : $tree = constant_rate_birth_death(%options)
|
1040
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time
|
1041
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree
|
1042
|
|
|
|
|
|
|
Args : %options with fields:
|
1043
|
|
|
|
|
|
|
birth_rate The birth rate parameter (default 1)
|
1044
|
|
|
|
|
|
|
death_rate The death rate parameter (default no extinction)
|
1045
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate
|
1046
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate
|
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified
|
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
=cut
|
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
sub constant_rate_birth_death {
|
1053
|
50
|
|
|
50
|
1
|
198
|
my %options = @_;
|
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
#Check that we have a termination condition
|
1056
|
50
|
50
|
33
|
|
|
200
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) {
|
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
#Error here.
|
1059
|
0
|
|
|
|
|
0
|
return undef;
|
1060
|
|
|
|
|
|
|
}
|
1061
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
#Set the undefined condition to infinity
|
1063
|
50
|
50
|
|
|
|
134
|
$options{tree_size} = 1e6 unless defined $options{tree_size};
|
1064
|
50
|
50
|
|
|
|
167
|
$options{tree_age} = 1e6 unless defined $options{tree_age};
|
1065
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
#Set default rates
|
1067
|
50
|
50
|
|
|
|
124
|
$options{birth_rate} = 1 unless defined( $options{birth_rate} );
|
1068
|
|
|
|
|
|
|
delete $options{death_rate}
|
1069
|
50
|
100
|
66
|
|
|
316
|
if defined( $options{death_rate} ) && $options{death_rate} == 0;
|
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these
|
1072
|
50
|
|
|
|
|
90
|
my $node_id = 0;
|
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species
|
1075
|
50
|
|
|
|
|
218
|
my $tree = Bio::Phylo::Forest::Tree->new();
|
1076
|
50
|
|
|
|
|
218
|
my $root = Bio::Phylo::Forest::Node->new();
|
1077
|
50
|
|
|
|
|
214
|
$root->set_branch_length(0);
|
1078
|
50
|
|
|
|
|
268
|
$root->set_name( 'ID' . $node_id++ );
|
1079
|
50
|
|
|
|
|
203
|
$tree->insert($root);
|
1080
|
50
|
|
|
|
|
98
|
my @terminals = ($root);
|
1081
|
50
|
|
|
|
|
86
|
my ( $next_extinction, $next_speciation );
|
1082
|
50
|
|
|
|
|
103
|
my $time = 0;
|
1083
|
50
|
|
|
|
|
67
|
my $tree_size = 1;
|
1084
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge
|
1086
|
50
|
50
|
33
|
|
|
198
|
if ( defined $options{root_edge} && $options{root_edge} ) {
|
1087
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event
|
1089
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $options{birth_rate} / $tree_size;
|
1090
|
|
|
|
|
|
|
}
|
1091
|
|
|
|
|
|
|
else {
|
1092
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away
|
1094
|
50
|
|
|
|
|
90
|
$next_speciation = 0;
|
1095
|
|
|
|
|
|
|
}
|
1096
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
#Time of the first extinction event. If no extinction we always
|
1098
|
|
|
|
|
|
|
#set the extinction event after the current speciation event
|
1099
|
50
|
100
|
|
|
|
101
|
if ( defined $options{death_rate} ) {
|
1100
|
28
|
|
|
|
|
198
|
$next_extinction = -log(rand) / $options{death_rate} / $tree_size;
|
1101
|
|
|
|
|
|
|
}
|
1102
|
|
|
|
|
|
|
else {
|
1103
|
22
|
|
|
|
|
44
|
$next_extinction = $next_speciation + 1;
|
1104
|
|
|
|
|
|
|
}
|
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
#While the tree has not become extinct and the termination criterion
|
1107
|
|
|
|
|
|
|
#has not been achieved we create new speciation and extinction events
|
1108
|
50
|
|
66
|
|
|
294
|
while ($tree_size > 0
|
|
|
|
66
|
|
|
|
|
1109
|
|
|
|
|
|
|
&& $tree_size < $options{tree_size}
|
1110
|
|
|
|
|
|
|
&& $time < $options{tree_age} )
|
1111
|
|
|
|
|
|
|
{
|
1112
|
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species
|
1114
|
3096
|
|
|
|
|
5196
|
foreach (@terminals) {
|
1115
|
|
|
|
|
|
|
$_->set_branch_length(
|
1116
|
|
|
|
|
|
|
$_->get_branch_length + min(
|
1117
|
|
|
|
|
|
|
$next_extinction, $next_speciation,
|
1118
|
41725
|
|
|
|
|
73075
|
$options{tree_age} - $time
|
1119
|
|
|
|
|
|
|
)
|
1120
|
|
|
|
|
|
|
);
|
1121
|
|
|
|
|
|
|
}
|
1122
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
#Update the time
|
1124
|
3096
|
|
|
|
|
5571
|
$time += min( $next_extinction, $next_speciation );
|
1125
|
|
|
|
|
|
|
|
1126
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done
|
1127
|
3096
|
50
|
|
|
|
5759
|
return $tree if ( $time > $options{tree_age} );
|
1128
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
#Get the species effected by this event and remove it from the terminals list
|
1130
|
3096
|
|
|
|
|
7320
|
my $effected =
|
1131
|
|
|
|
|
|
|
splice( @terminals, int( rand( scalar @terminals ) ), 1 );
|
1132
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
#If we have a speciation event we add two new species
|
1134
|
3096
|
100
|
66
|
|
|
8429
|
if ( $next_speciation < $next_extinction || !defined $next_extinction )
|
1135
|
|
|
|
|
|
|
{
|
1136
|
1843
|
|
|
|
|
3064
|
foreach ( 1, 2 ) {
|
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
#Create a new species
|
1139
|
3686
|
|
|
|
|
10596
|
my $child = Bio::Phylo::Forest::Node->new();
|
1140
|
3686
|
|
|
|
|
13716
|
$child->set_name( 'ID' . $node_id++ );
|
1141
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
#Give it a zero edge length
|
1143
|
3686
|
|
|
|
|
10772
|
$child->set_branch_length(0);
|
1144
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
#Add it as a child to the speciating species
|
1146
|
3686
|
|
|
|
|
10169
|
$effected->set_child($child);
|
1147
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
#Add it to the tree
|
1149
|
3686
|
|
|
|
|
9644
|
$tree->insert($child);
|
1150
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
#Add it to the terminals list
|
1152
|
3686
|
|
|
|
|
7430
|
push( @terminals, $child );
|
1153
|
|
|
|
|
|
|
}
|
1154
|
|
|
|
|
|
|
}
|
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
#We calculate the time that the next extinction and speciation
|
1157
|
|
|
|
|
|
|
#events will occur (only the earliest of these will actually
|
1158
|
|
|
|
|
|
|
#happen). NB: this approach is only appropriate for models where
|
1159
|
|
|
|
|
|
|
#speciation and extinction times are exponentially distributed.
|
1160
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand...
|
1161
|
3096
|
|
|
|
|
5012
|
my ( $r1, $r2 ) = ( 0, 0 );
|
1162
|
3096
|
|
|
|
|
7559
|
$r1 = rand until $r1;
|
1163
|
3096
|
|
|
|
|
5840
|
$r2 = rand until $r2;
|
1164
|
3096
|
|
|
|
|
4005
|
$tree_size = scalar @terminals;
|
1165
|
3096
|
100
|
|
|
|
5011
|
return $tree unless $tree_size;
|
1166
|
3082
|
|
|
|
|
8445
|
$next_speciation = -log($r1) / $options{birth_rate} / $tree_size;
|
1167
|
3082
|
100
|
|
|
|
5399
|
if ( defined $options{death_rate} ) {
|
1168
|
2884
|
|
|
|
|
13568
|
$next_extinction = -log($r2) / $options{death_rate} / $tree_size;
|
1169
|
|
|
|
|
|
|
}
|
1170
|
|
|
|
|
|
|
else {
|
1171
|
198
|
|
|
|
|
996
|
$next_extinction = $next_speciation + 1;
|
1172
|
|
|
|
|
|
|
}
|
1173
|
|
|
|
|
|
|
}
|
1174
|
36
|
|
|
|
|
196
|
return $tree;
|
1175
|
|
|
|
|
|
|
}
|
1176
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
|
1178
|
|
|
|
|
|
|
=item diversity_dependent_speciation()
|
1179
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
A birth and death model with speciation rate dependent on diversity as per
|
1181
|
|
|
|
|
|
|
Etienne et. al. 2012
|
1182
|
|
|
|
|
|
|
|
1183
|
|
|
|
|
|
|
Type : Evolutionary model
|
1184
|
|
|
|
|
|
|
Title : diversity_dependent_speciation
|
1185
|
|
|
|
|
|
|
Usage : $tree = diversity_dependent_speciation(%options)
|
1186
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time
|
1187
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree
|
1188
|
|
|
|
|
|
|
Args : %options with fields:
|
1189
|
|
|
|
|
|
|
maximal_birth_rate The maximal birth rate parameter (default 1)
|
1190
|
|
|
|
|
|
|
death_rate The death rate parameter (default no extinction)
|
1191
|
|
|
|
|
|
|
K_dash The modified carrying capacity (no default)
|
1192
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate
|
1193
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate
|
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified
|
1196
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
Reference:
|
1198
|
|
|
|
|
|
|
Rampal S. Etienne, Bart Haegeman, Tanja Stadler, Tracy Aze, Paul N. Pearson,
|
1199
|
|
|
|
|
|
|
Andy Purvis and Albert B. Phillimore. "Diversity-dependence brings molecular
|
1200
|
|
|
|
|
|
|
phylogenies closer to agreement with the fossil record"
|
1201
|
|
|
|
|
|
|
doi: 10.1098/rspb.2011.1439
|
1202
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
=cut
|
1204
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
sub diversity_dependent_speciation {
|
1206
|
0
|
|
|
0
|
1
|
0
|
my %options = @_;
|
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
#Check that we have a termination condition
|
1209
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) {
|
1210
|
|
|
|
|
|
|
#Error here.
|
1211
|
0
|
|
|
|
|
0
|
return undef;
|
1212
|
|
|
|
|
|
|
}
|
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
#Check that we have a carrying capacity
|
1215
|
0
|
0
|
|
|
|
0
|
unless ( defined $options{K_dash} ) {
|
1216
|
|
|
|
|
|
|
#Error here.
|
1217
|
0
|
|
|
|
|
0
|
return undef;
|
1218
|
|
|
|
|
|
|
}
|
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
#Set the undefined condition to infinity
|
1221
|
0
|
0
|
|
|
|
0
|
$options{tree_size} = 1e6 unless defined $options{tree_size};
|
1222
|
0
|
0
|
|
|
|
0
|
$options{tree_age} = 1e6 unless defined $options{tree_age};
|
1223
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
#Set default rates
|
1225
|
0
|
0
|
|
|
|
0
|
$options{maximal_birth_rate} = 1 unless defined( $options{maximal_birth_rate} );
|
1226
|
|
|
|
|
|
|
delete $options{death_rate}
|
1227
|
0
|
0
|
0
|
|
|
0
|
if defined( $options{death_rate} ) && $options{death_rate} == 0;
|
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these
|
1230
|
0
|
|
|
|
|
0
|
my $node_id = 0;
|
1231
|
|
|
|
|
|
|
|
1232
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species
|
1233
|
0
|
|
|
|
|
0
|
my $tree = Bio::Phylo::Forest::Tree->new();
|
1234
|
0
|
|
|
|
|
0
|
my $root = Bio::Phylo::Forest::Node->new();
|
1235
|
0
|
|
|
|
|
0
|
$root->set_branch_length(0);
|
1236
|
0
|
|
|
|
|
0
|
$root->set_name( 'ID' . $node_id++ );
|
1237
|
0
|
|
|
|
|
0
|
$tree->insert($root);
|
1238
|
0
|
|
|
|
|
0
|
my @terminals = ($root);
|
1239
|
0
|
|
|
|
|
0
|
my ( $next_extinction, $next_speciation );
|
1240
|
0
|
|
|
|
|
0
|
my $time = 0;
|
1241
|
0
|
|
|
|
|
0
|
my $tree_size = 1;
|
1242
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
|
1244
|
0
|
|
|
|
|
0
|
$options{birth_rate} = max(0,$options{max_birth_rate}*(1-1/$options{K_dash}));
|
1245
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge
|
1248
|
0
|
0
|
0
|
|
|
0
|
if ( defined $options{root_edge} && $options{root_edge} ) {
|
1249
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event
|
1251
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $options{birth_rate} / $tree_size;
|
1252
|
|
|
|
|
|
|
}
|
1253
|
|
|
|
|
|
|
else {
|
1254
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away
|
1256
|
0
|
|
|
|
|
0
|
$next_speciation = 0;
|
1257
|
|
|
|
|
|
|
}
|
1258
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
#Time of the first extinction event. If no extinction we always
|
1260
|
|
|
|
|
|
|
#set the extinction event after the current speciation event
|
1261
|
0
|
0
|
|
|
|
0
|
if ( defined $options{death_rate} ) {
|
1262
|
0
|
|
|
|
|
0
|
$next_extinction = -log(rand) / $options{death_rate} / $tree_size;
|
1263
|
|
|
|
|
|
|
}
|
1264
|
|
|
|
|
|
|
else {
|
1265
|
0
|
|
|
|
|
0
|
$next_extinction = $next_speciation + 1;
|
1266
|
|
|
|
|
|
|
}
|
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
#While the tree has not become extinct and the termination criterion
|
1269
|
|
|
|
|
|
|
#has not been achieved we create new speciation and extinction events
|
1270
|
0
|
|
0
|
|
|
0
|
while ($tree_size > 0
|
|
|
|
0
|
|
|
|
|
1271
|
|
|
|
|
|
|
&& $tree_size < $options{tree_size}
|
1272
|
|
|
|
|
|
|
&& $time < $options{tree_age} )
|
1273
|
|
|
|
|
|
|
{
|
1274
|
|
|
|
|
|
|
|
1275
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species
|
1276
|
0
|
|
|
|
|
0
|
foreach (@terminals) {
|
1277
|
|
|
|
|
|
|
$_->set_branch_length(
|
1278
|
|
|
|
|
|
|
$_->get_branch_length + min(
|
1279
|
|
|
|
|
|
|
$next_extinction, $next_speciation,
|
1280
|
0
|
|
|
|
|
0
|
$options{tree_age} - $time
|
1281
|
|
|
|
|
|
|
)
|
1282
|
|
|
|
|
|
|
);
|
1283
|
|
|
|
|
|
|
}
|
1284
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
#Update the time
|
1286
|
0
|
|
|
|
|
0
|
$time += min( $next_extinction, $next_speciation );
|
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done
|
1289
|
0
|
0
|
|
|
|
0
|
return $tree if ( $time > $options{tree_age} );
|
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
#Get the species effected by this event and remove it from the terminals list
|
1292
|
0
|
|
|
|
|
0
|
my $effected =
|
1293
|
|
|
|
|
|
|
splice( @terminals, int( rand( scalar @terminals ) ), 1 );
|
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
#If we have a speciation event we add two new species
|
1296
|
0
|
0
|
0
|
|
|
0
|
if ( $next_speciation < $next_extinction || !defined $next_extinction )
|
1297
|
|
|
|
|
|
|
{
|
1298
|
0
|
|
|
|
|
0
|
foreach ( 1, 2 ) {
|
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
#Create a new species
|
1301
|
0
|
|
|
|
|
0
|
my $child = Bio::Phylo::Forest::Node->new();
|
1302
|
0
|
|
|
|
|
0
|
$child->set_name( 'ID' . $node_id++ );
|
1303
|
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
|
#Give it a zero edge length
|
1305
|
0
|
|
|
|
|
0
|
$child->set_branch_length(0);
|
1306
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
#Add it as a child to the speciating species
|
1308
|
0
|
|
|
|
|
0
|
$effected->set_child($child);
|
1309
|
|
|
|
|
|
|
|
1310
|
|
|
|
|
|
|
#Add it to the tree
|
1311
|
0
|
|
|
|
|
0
|
$tree->insert($child);
|
1312
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
#Add it to the terminals list
|
1314
|
0
|
|
|
|
|
0
|
push( @terminals, $child );
|
1315
|
|
|
|
|
|
|
}
|
1316
|
|
|
|
|
|
|
}
|
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
#We calculate the time that the next extinction and speciation
|
1319
|
|
|
|
|
|
|
#events will occur (only the earliest of these will actually
|
1320
|
|
|
|
|
|
|
#happen). NB: this approach is only appropriate for models where
|
1321
|
|
|
|
|
|
|
#speciation and extinction times are exponentially distributed.
|
1322
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand...
|
1323
|
0
|
|
|
|
|
0
|
my ( $r1, $r2 ) = ( 0, 0 );
|
1324
|
0
|
|
|
|
|
0
|
$r1 = rand until $r1;
|
1325
|
0
|
|
|
|
|
0
|
$r2 = rand until $r2;
|
1326
|
0
|
|
|
|
|
0
|
$tree_size = scalar @terminals;
|
1327
|
0
|
0
|
|
|
|
0
|
return $tree unless $tree_size;
|
1328
|
|
|
|
|
|
|
|
1329
|
0
|
|
|
|
|
0
|
$options{birth_rate} = max(0,$options{max_birth_rate}*(1-$tree_size/$options{K_dash}));
|
1330
|
0
|
0
|
|
|
|
0
|
if ($options{birth_rate}==0)
|
1331
|
|
|
|
|
|
|
{
|
1332
|
0
|
|
|
|
|
0
|
$next_speciation = $options{tree_age};
|
1333
|
|
|
|
|
|
|
} else
|
1334
|
|
|
|
|
|
|
{
|
1335
|
0
|
|
|
|
|
0
|
$next_speciation = -log($r1) / $options{birth_rate} / $tree_size;
|
1336
|
|
|
|
|
|
|
}
|
1337
|
0
|
0
|
|
|
|
0
|
if ( defined $options{death_rate} ) {
|
1338
|
0
|
|
|
|
|
0
|
$next_extinction = -log($r2) / $options{death_rate} / $tree_size;
|
1339
|
|
|
|
|
|
|
}
|
1340
|
|
|
|
|
|
|
else {
|
1341
|
0
|
|
|
|
|
0
|
$next_extinction = $next_speciation + 1;
|
1342
|
|
|
|
|
|
|
}
|
1343
|
|
|
|
|
|
|
}
|
1344
|
0
|
|
|
|
|
0
|
return $tree;
|
1345
|
|
|
|
|
|
|
}
|
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
=item constant_rate_birth_death()
|
1349
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
A temporal shift birth and death model
|
1351
|
|
|
|
|
|
|
|
1352
|
|
|
|
|
|
|
Type : Evolutionary model
|
1353
|
|
|
|
|
|
|
Title : temporal_shift_birth_death
|
1354
|
|
|
|
|
|
|
Usage : $tree = constant_rate_birth_death(%options)
|
1355
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time
|
1356
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree
|
1357
|
|
|
|
|
|
|
Args : %options with fields:
|
1358
|
|
|
|
|
|
|
birth_rates The birth rates
|
1359
|
|
|
|
|
|
|
death_rates The death rates
|
1360
|
|
|
|
|
|
|
rate_times The times after which the rates apply (first element must be 0)
|
1361
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate
|
1362
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate
|
1363
|
|
|
|
|
|
|
|
1364
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified
|
1365
|
|
|
|
|
|
|
|
1366
|
|
|
|
|
|
|
=cut
|
1367
|
|
|
|
|
|
|
|
1368
|
|
|
|
|
|
|
sub temporal_shift_birth_death {
|
1369
|
0
|
|
|
0
|
0
|
0
|
my %options = @_;
|
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
#Check that we have a termination condition
|
1372
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) {
|
1373
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
#Error here.
|
1375
|
0
|
|
|
|
|
0
|
return undef;
|
1376
|
|
|
|
|
|
|
}
|
1377
|
|
|
|
|
|
|
|
1378
|
|
|
|
|
|
|
#Set the undefined condition to infinity
|
1379
|
0
|
0
|
|
|
|
0
|
$options{tree_size} = 1e6 unless defined $options{tree_size};
|
1380
|
0
|
0
|
|
|
|
0
|
$options{tree_age} = 1e6 unless defined $options{tree_age};
|
1381
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these
|
1383
|
0
|
|
|
|
|
0
|
my $node_id = 0;
|
1384
|
|
|
|
|
|
|
|
1385
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species
|
1386
|
0
|
|
|
|
|
0
|
my $tree = Bio::Phylo::Forest::Tree->new();
|
1387
|
0
|
|
|
|
|
0
|
my $root = Bio::Phylo::Forest::Node->new();
|
1388
|
0
|
|
|
|
|
0
|
$root->set_branch_length(0);
|
1389
|
0
|
|
|
|
|
0
|
$root->set_name( 'ID' . $node_id++ );
|
1390
|
0
|
|
|
|
|
0
|
$tree->insert($root);
|
1391
|
0
|
|
|
|
|
0
|
my @terminals = ($root);
|
1392
|
0
|
|
|
|
|
0
|
my ( $next_extinction, $next_speciation );
|
1393
|
0
|
|
|
|
|
0
|
my $time = 0;
|
1394
|
0
|
|
|
|
|
0
|
my $tree_size = 1;
|
1395
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
#Load current rates
|
1397
|
0
|
|
|
|
|
0
|
my $birth_rate = $options{birth_rates}[0];
|
1398
|
0
|
|
|
|
|
0
|
my $death_rate = $options{death_rates}[0];
|
1399
|
0
|
|
|
|
|
0
|
my $current_rates = 0;
|
1400
|
0
|
|
|
|
|
0
|
my $next_rate_change = $options{rate_times}[$current_rates+1];
|
1401
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
#Add an additional time to the end of the rate change times to simplify checking
|
1403
|
0
|
|
|
|
|
0
|
push(@{$options{rate_times}},$options{tree_size}*2);
|
|
0
|
|
|
|
|
0
|
|
1404
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge
|
1406
|
0
|
0
|
0
|
|
|
0
|
if ( defined $options{root_edge} && $options{root_edge} ) {
|
1407
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event
|
1409
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $birth_rate / $tree_size;
|
1410
|
|
|
|
|
|
|
}
|
1411
|
|
|
|
|
|
|
else {
|
1412
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away
|
1414
|
0
|
|
|
|
|
0
|
$next_speciation = 0;
|
1415
|
|
|
|
|
|
|
}
|
1416
|
|
|
|
|
|
|
|
1417
|
|
|
|
|
|
|
# print "RATES:".$time."|".$birth_rate."|".$death_rate."\n";
|
1418
|
|
|
|
|
|
|
#Time of the first extinction event. If no extinction we always
|
1419
|
|
|
|
|
|
|
#set the extinction event after the current speciation event
|
1420
|
0
|
|
|
|
|
0
|
$next_extinction = -log(rand) / $death_rate / $tree_size;
|
1421
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
#While the tree has not become extinct and the termination criterion
|
1423
|
|
|
|
|
|
|
#has not been achieved we create new speciation and extinction events
|
1424
|
0
|
|
0
|
|
|
0
|
while ($tree_size > 0
|
|
|
|
0
|
|
|
|
|
1425
|
|
|
|
|
|
|
&& $tree_size < $options{tree_size}
|
1426
|
|
|
|
|
|
|
&& $time < $options{tree_age} )
|
1427
|
|
|
|
|
|
|
{
|
1428
|
|
|
|
|
|
|
|
1429
|
|
|
|
|
|
|
# print "TIMES:".$next_extinction."|".$next_speciation."|".$next_rate_change."|\n";
|
1430
|
|
|
|
|
|
|
# print "RATES:".$time."|".$birth_rate."|".$death_rate."\n";
|
1431
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species
|
1432
|
0
|
|
|
|
|
0
|
foreach (@terminals) {
|
1433
|
|
|
|
|
|
|
$_->set_branch_length(
|
1434
|
|
|
|
|
|
|
$_->get_branch_length + min(
|
1435
|
|
|
|
|
|
|
$next_extinction, $next_speciation,
|
1436
|
0
|
|
|
|
|
0
|
$options{tree_age} - $time
|
1437
|
|
|
|
|
|
|
)
|
1438
|
|
|
|
|
|
|
);
|
1439
|
|
|
|
|
|
|
}
|
1440
|
|
|
|
|
|
|
|
1441
|
|
|
|
|
|
|
#Update the time
|
1442
|
0
|
|
|
|
|
0
|
my $time_last = $time;
|
1443
|
0
|
|
|
|
|
0
|
$time += min( $next_extinction, $next_speciation, $next_rate_change-$time_last);
|
1444
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done
|
1446
|
0
|
0
|
|
|
|
0
|
return $tree if ( $time > $options{tree_age} );
|
1447
|
|
|
|
|
|
|
|
1448
|
|
|
|
|
|
|
|
1449
|
0
|
0
|
|
|
|
0
|
if ($next_rate_change-$time_last < min( $next_extinction, $next_speciation) )
|
1450
|
|
|
|
|
|
|
{
|
1451
|
0
|
|
|
|
|
0
|
$current_rates += 1;
|
1452
|
0
|
|
|
|
|
0
|
$birth_rate = $options{birth_rates}[$current_rates];
|
1453
|
0
|
|
|
|
|
0
|
$death_rate = $options{death_rates}[$current_rates];
|
1454
|
0
|
|
|
|
|
0
|
$next_rate_change = $options{rate_times}[$current_rates+1];
|
1455
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
} else
|
1457
|
|
|
|
|
|
|
{
|
1458
|
|
|
|
|
|
|
|
1459
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
#Get the species effected by this event and remove it from the terminals list
|
1461
|
0
|
|
|
|
|
0
|
my $effected = splice( @terminals, int( rand( scalar @terminals ) ), 1 );
|
1462
|
|
|
|
|
|
|
|
1463
|
|
|
|
|
|
|
#If we have a speciation event we add two new species
|
1464
|
0
|
0
|
0
|
|
|
0
|
if ( $next_speciation < $next_extinction || !defined $next_extinction )
|
1465
|
|
|
|
|
|
|
{
|
1466
|
0
|
|
|
|
|
0
|
foreach ( 1, 2 ) {
|
1467
|
|
|
|
|
|
|
|
1468
|
|
|
|
|
|
|
#Create a new species
|
1469
|
0
|
|
|
|
|
0
|
my $child = Bio::Phylo::Forest::Node->new();
|
1470
|
0
|
|
|
|
|
0
|
$child->set_name( 'ID' . $node_id++ );
|
1471
|
|
|
|
|
|
|
|
1472
|
|
|
|
|
|
|
#Give it a zero edge length
|
1473
|
0
|
|
|
|
|
0
|
$child->set_branch_length(0);
|
1474
|
|
|
|
|
|
|
|
1475
|
|
|
|
|
|
|
#Add it as a child to the speciating species
|
1476
|
0
|
|
|
|
|
0
|
$effected->set_child($child);
|
1477
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
#Add it to the tree
|
1479
|
0
|
|
|
|
|
0
|
$tree->insert($child);
|
1480
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
#Add it to the terminals list
|
1482
|
0
|
|
|
|
|
0
|
push( @terminals, $child );
|
1483
|
|
|
|
|
|
|
}
|
1484
|
|
|
|
|
|
|
}
|
1485
|
|
|
|
|
|
|
}
|
1486
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
#We calculate the time that the next extinction and speciation
|
1488
|
|
|
|
|
|
|
#events will occur (only the earliest of these will actually
|
1489
|
|
|
|
|
|
|
#happen). NB: this approach is only appropriate for models where
|
1490
|
|
|
|
|
|
|
#speciation and extinction times are exponentially distributed.
|
1491
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand...
|
1492
|
0
|
|
|
|
|
0
|
my ( $r1, $r2 ) = ( 0, 0 );
|
1493
|
0
|
|
|
|
|
0
|
$r1 = rand until $r1;
|
1494
|
0
|
|
|
|
|
0
|
$r2 = rand until $r2;
|
1495
|
0
|
|
|
|
|
0
|
$tree_size = scalar @terminals;
|
1496
|
0
|
0
|
|
|
|
0
|
return $tree unless $tree_size;
|
1497
|
0
|
|
|
|
|
0
|
$next_speciation = -log($r1) / $birth_rate / $tree_size;
|
1498
|
0
|
|
|
|
|
0
|
$next_extinction = -log($r2) / $death_rate / $tree_size;
|
1499
|
|
|
|
|
|
|
|
1500
|
0
|
0
|
|
|
|
0
|
if ((scalar @terminals)%100==0)
|
1501
|
|
|
|
|
|
|
{
|
1502
|
0
|
|
|
|
|
0
|
print $time."|".@terminals."|\n";
|
1503
|
|
|
|
|
|
|
}
|
1504
|
|
|
|
|
|
|
|
1505
|
|
|
|
|
|
|
}
|
1506
|
0
|
|
|
|
|
0
|
return $tree;
|
1507
|
|
|
|
|
|
|
}
|
1508
|
|
|
|
|
|
|
|
1509
|
|
|
|
|
|
|
=item evolving_speciation_rate()
|
1510
|
|
|
|
|
|
|
|
1511
|
|
|
|
|
|
|
An evolutionary model featuring evolving speciation rates. Each daughter
|
1512
|
|
|
|
|
|
|
species is assigned its parent's speciation rate multiplied by a normally
|
1513
|
|
|
|
|
|
|
distributed noise factor.
|
1514
|
|
|
|
|
|
|
|
1515
|
|
|
|
|
|
|
Type : Evolutionary model
|
1516
|
|
|
|
|
|
|
Title : evolving_speciation_rate
|
1517
|
|
|
|
|
|
|
Usage : $tree = evolving_speciation_rate(%options)
|
1518
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time
|
1519
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree
|
1520
|
|
|
|
|
|
|
Args : %options with fields:
|
1521
|
|
|
|
|
|
|
birth_rate The initial speciation rate (default 1)
|
1522
|
|
|
|
|
|
|
evolving_std The standard deviation of the normal distribution
|
1523
|
|
|
|
|
|
|
from which the rate multiplier is drawn.
|
1524
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate
|
1525
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate
|
1526
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified
|
1528
|
|
|
|
|
|
|
|
1529
|
|
|
|
|
|
|
=cut
|
1530
|
|
|
|
|
|
|
|
1531
|
|
|
|
|
|
|
sub evolving_speciation_rate {
|
1532
|
0
|
|
|
0
|
1
|
0
|
my %options = @_;
|
1533
|
|
|
|
|
|
|
|
1534
|
|
|
|
|
|
|
#Check that we have a termination condition
|
1535
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) {
|
1536
|
|
|
|
|
|
|
|
1537
|
|
|
|
|
|
|
#Error here.
|
1538
|
0
|
|
|
|
|
0
|
return undef;
|
1539
|
|
|
|
|
|
|
}
|
1540
|
|
|
|
|
|
|
|
1541
|
|
|
|
|
|
|
#Set the undefined condition to infinity
|
1542
|
0
|
0
|
|
|
|
0
|
$options{tree_size} = 1e6 unless defined $options{tree_size};
|
1543
|
0
|
0
|
|
|
|
0
|
$options{tree_age} = 1e6 unless defined $options{tree_age};
|
1544
|
|
|
|
|
|
|
|
1545
|
|
|
|
|
|
|
#Set default rates
|
1546
|
0
|
0
|
|
|
|
0
|
$options{birth_rate} = 1 unless defined( $options{birth_rate} );
|
1547
|
0
|
0
|
|
|
|
0
|
$options{evolving_std} = 1 unless defined( $options{evolving_std} );
|
1548
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these
|
1550
|
0
|
|
|
|
|
0
|
my $node_id = 0;
|
1551
|
|
|
|
|
|
|
|
1552
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species
|
1553
|
0
|
|
|
|
|
0
|
my $tree = Bio::Phylo::Forest::Tree->new();
|
1554
|
0
|
|
|
|
|
0
|
my $root = Bio::Phylo::Forest::Node->new();
|
1555
|
0
|
|
|
|
|
0
|
$root->set_branch_length(0);
|
1556
|
0
|
|
|
|
|
0
|
$root->set_name( 'ID' . $node_id++ );
|
1557
|
0
|
|
|
|
|
0
|
$tree->insert($root);
|
1558
|
0
|
|
|
|
|
0
|
my @terminals = ($root);
|
1559
|
0
|
|
|
|
|
0
|
my @birth_rates = ( $options{birth_rate} );
|
1560
|
0
|
|
|
|
|
0
|
my $net_rate = $options{birth_rate};
|
1561
|
0
|
|
|
|
|
0
|
my $next_speciation;
|
1562
|
0
|
|
|
|
|
0
|
my $time = 0;
|
1563
|
0
|
|
|
|
|
0
|
my $tree_size = 1;
|
1564
|
|
|
|
|
|
|
|
1565
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge
|
1566
|
0
|
0
|
0
|
|
|
0
|
if ( defined $options{root_edge} && $options{root_edge} ) {
|
1567
|
|
|
|
|
|
|
|
1568
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event
|
1569
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $options{birth_rate} / $tree_size;
|
1570
|
|
|
|
|
|
|
}
|
1571
|
|
|
|
|
|
|
else {
|
1572
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away
|
1574
|
0
|
|
|
|
|
0
|
$next_speciation = 0;
|
1575
|
|
|
|
|
|
|
}
|
1576
|
|
|
|
|
|
|
|
1577
|
|
|
|
|
|
|
#While we haven't reached termination
|
1578
|
0
|
|
0
|
|
|
0
|
while ( $tree_size < $options{tree_size} && $time < $options{tree_age} ) {
|
1579
|
|
|
|
|
|
|
|
1580
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species
|
1581
|
0
|
|
|
|
|
0
|
foreach (@terminals) {
|
1582
|
|
|
|
|
|
|
$_->set_branch_length( $_->get_branch_length +
|
1583
|
0
|
|
|
|
|
0
|
min( $next_speciation, $options{tree_age} - $time ) );
|
1584
|
|
|
|
|
|
|
}
|
1585
|
|
|
|
|
|
|
|
1586
|
|
|
|
|
|
|
#Update the time
|
1587
|
0
|
|
|
|
|
0
|
$time += $next_speciation;
|
1588
|
|
|
|
|
|
|
|
1589
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done
|
1590
|
0
|
0
|
|
|
|
0
|
return $tree if ( $time > $options{tree_age} );
|
1591
|
|
|
|
|
|
|
|
1592
|
|
|
|
|
|
|
#Get the species effected by this event
|
1593
|
0
|
|
|
|
|
0
|
my $rand_select = rand($net_rate);
|
1594
|
0
|
|
|
|
|
0
|
my $selected = 0;
|
1595
|
0
|
|
0
|
|
|
0
|
for (
|
1596
|
|
|
|
|
|
|
;
|
1597
|
|
|
|
|
|
|
$selected < scalar @terminals
|
1598
|
|
|
|
|
|
|
&& $rand_select > $birth_rates[$selected] ;
|
1599
|
|
|
|
|
|
|
$selected++
|
1600
|
|
|
|
|
|
|
)
|
1601
|
|
|
|
|
|
|
{
|
1602
|
0
|
|
|
|
|
0
|
$rand_select -= $birth_rates[$selected];
|
1603
|
|
|
|
|
|
|
}
|
1604
|
|
|
|
|
|
|
|
1605
|
|
|
|
|
|
|
#Remove it from the terminals list
|
1606
|
0
|
|
|
|
|
0
|
my $effected = splice( @terminals, $selected, 1 );
|
1607
|
0
|
|
|
|
|
0
|
my $effected_rate = splice( @birth_rates, $selected, 1 );
|
1608
|
|
|
|
|
|
|
|
1609
|
|
|
|
|
|
|
#Update the net speciation rate
|
1610
|
0
|
|
|
|
|
0
|
$net_rate -= $effected_rate;
|
1611
|
|
|
|
|
|
|
|
1612
|
|
|
|
|
|
|
#If we have a speciation event we add two new species
|
1613
|
0
|
|
|
|
|
0
|
foreach ( 1, 2 ) {
|
1614
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
#Create a new species
|
1616
|
0
|
|
|
|
|
0
|
my $child = Bio::Phylo::Forest::Node->new();
|
1617
|
0
|
|
|
|
|
0
|
$child->set_name( 'ID' . $node_id++ );
|
1618
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
#Give it a zero edge length
|
1620
|
0
|
|
|
|
|
0
|
$child->set_branch_length(0);
|
1621
|
|
|
|
|
|
|
|
1622
|
|
|
|
|
|
|
#Add it as a child to the speciating species
|
1623
|
0
|
|
|
|
|
0
|
$effected->set_child($child);
|
1624
|
|
|
|
|
|
|
|
1625
|
|
|
|
|
|
|
#Add it to the tree
|
1626
|
0
|
|
|
|
|
0
|
$tree->insert($child);
|
1627
|
|
|
|
|
|
|
|
1628
|
|
|
|
|
|
|
#Add it to the terminals list
|
1629
|
0
|
|
|
|
|
0
|
push( @terminals, $child );
|
1630
|
|
|
|
|
|
|
|
1631
|
|
|
|
|
|
|
#New speciation rate
|
1632
|
|
|
|
|
|
|
my $new_speciation_rate =
|
1633
|
0
|
|
|
|
|
0
|
$effected_rate * ( 1 + qnorm(rand) * $options{evolving_std} );
|
1634
|
0
|
0
|
|
|
|
0
|
if ( $new_speciation_rate < 0 ) { $new_speciation_rate = 0; }
|
|
0
|
|
|
|
|
0
|
|
1635
|
0
|
|
|
|
|
0
|
push( @birth_rates, $new_speciation_rate );
|
1636
|
0
|
|
|
|
|
0
|
$net_rate += $new_speciation_rate;
|
1637
|
|
|
|
|
|
|
}
|
1638
|
|
|
|
|
|
|
|
1639
|
|
|
|
|
|
|
# $net_rate = 0;
|
1640
|
|
|
|
|
|
|
# foreach (@birth_rates) { $net_rate += $_; }
|
1641
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand...
|
1642
|
0
|
|
|
|
|
0
|
my ( $r1, $r2 ) = ( 0, 0 );
|
1643
|
0
|
|
|
|
|
0
|
$r1 = rand until $r1;
|
1644
|
0
|
|
|
|
|
0
|
$tree_size = scalar @terminals;
|
1645
|
|
|
|
|
|
|
|
1646
|
|
|
|
|
|
|
#If all species have stopped speciating (unlikely)
|
1647
|
0
|
0
|
|
|
|
0
|
if ( $net_rate == 0 ) {
|
1648
|
0
|
|
|
|
|
0
|
return $tree;
|
1649
|
|
|
|
|
|
|
}
|
1650
|
0
|
|
|
|
|
0
|
$next_speciation = -log($r1) / $net_rate / $tree_size;
|
1651
|
0
|
0
|
|
|
|
0
|
return $tree unless $tree_size;
|
1652
|
|
|
|
|
|
|
}
|
1653
|
0
|
|
|
|
|
0
|
return $tree;
|
1654
|
|
|
|
|
|
|
}
|
1655
|
|
|
|
|
|
|
|
1656
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
=item clade_shifts()
|
1658
|
|
|
|
|
|
|
|
1659
|
|
|
|
|
|
|
A constant rate birth-death model with punctuated changes in the speciation
|
1660
|
|
|
|
|
|
|
and extinction rates. At each change one lineage receives new pre-specified
|
1661
|
|
|
|
|
|
|
speciation and extinction rates.
|
1662
|
|
|
|
|
|
|
|
1663
|
|
|
|
|
|
|
Type : Evolutionary model
|
1664
|
|
|
|
|
|
|
Title : clade_shifts
|
1665
|
|
|
|
|
|
|
Usage : $tree = clade_shifts(%options)
|
1666
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time
|
1667
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree
|
1668
|
|
|
|
|
|
|
Args : %options with fields:
|
1669
|
|
|
|
|
|
|
birth_rates The speciation rates
|
1670
|
|
|
|
|
|
|
death_rates The death rates
|
1671
|
|
|
|
|
|
|
rate_times The times at which the rates are introduced to a new
|
1672
|
|
|
|
|
|
|
clade. The first time should be zero. The remaining must be in
|
1673
|
|
|
|
|
|
|
ascending order.
|
1674
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate
|
1675
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate
|
1676
|
|
|
|
|
|
|
|
1677
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified
|
1678
|
|
|
|
|
|
|
|
1679
|
|
|
|
|
|
|
=cut
|
1680
|
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
|
sub clade_shifts {
|
1682
|
0
|
|
|
0
|
1
|
0
|
my %options = @_;
|
1683
|
|
|
|
|
|
|
|
1684
|
|
|
|
|
|
|
#Check that we have a termination condition
|
1685
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) {
|
1686
|
|
|
|
|
|
|
|
1687
|
|
|
|
|
|
|
#Error here.
|
1688
|
0
|
|
|
|
|
0
|
return undef;
|
1689
|
|
|
|
|
|
|
}
|
1690
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
#Set the undefined condition to infinity
|
1692
|
0
|
0
|
|
|
|
0
|
$options{tree_size} = 1e6 unless defined $options{tree_size};
|
1693
|
0
|
0
|
|
|
|
0
|
$options{tree_age} = 1e6 unless defined $options{tree_age};
|
1694
|
|
|
|
|
|
|
|
1695
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these
|
1696
|
0
|
|
|
|
|
0
|
my $node_id = 0;
|
1697
|
|
|
|
|
|
|
|
1698
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species
|
1699
|
0
|
|
|
|
|
0
|
my $tree = Bio::Phylo::Forest::Tree->new();
|
1700
|
0
|
|
|
|
|
0
|
my $root = Bio::Phylo::Forest::Node->new();
|
1701
|
0
|
|
|
|
|
0
|
$root->set_branch_length(0);
|
1702
|
0
|
|
|
|
|
0
|
$root->set_name( 'ID' . $node_id++ );
|
1703
|
0
|
|
|
|
|
0
|
$tree->insert($root);
|
1704
|
|
|
|
|
|
|
|
1705
|
|
|
|
|
|
|
#rates
|
1706
|
0
|
|
|
|
|
0
|
my @birth_rates_in = @{$options{birth_rates}};
|
|
0
|
|
|
|
|
0
|
|
1707
|
0
|
|
|
|
|
0
|
my @death_rates_in = @{$options{death_rates}};
|
|
0
|
|
|
|
|
0
|
|
1708
|
0
|
|
|
|
|
0
|
my @rate_times_in = @{$options{rate_times}};
|
|
0
|
|
|
|
|
0
|
|
1709
|
|
|
|
|
|
|
|
1710
|
0
|
0
|
|
|
|
0
|
if ($rate_times_in[0] != 0)
|
1711
|
|
|
|
|
|
|
{
|
1712
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
|
1713
|
|
|
|
|
|
|
"The first rate time must be 0" );
|
1714
|
|
|
|
|
|
|
}
|
1715
|
0
|
0
|
|
|
|
0
|
if (scalar @birth_rates_in != scalar @death_rates_in)
|
1716
|
|
|
|
|
|
|
{
|
1717
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
|
1718
|
|
|
|
|
|
|
"birth and death rates must have the same length" );
|
1719
|
|
|
|
|
|
|
}
|
1720
|
0
|
0
|
|
|
|
0
|
if (scalar @birth_rates_in != scalar @rate_times_in)
|
1721
|
|
|
|
|
|
|
{
|
1722
|
0
|
|
|
|
|
0
|
Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
|
1723
|
|
|
|
|
|
|
"birth/death rates must have the same length as rate times" );
|
1724
|
|
|
|
|
|
|
}
|
1725
|
0
|
|
|
|
|
0
|
my @birth_rates = ($birth_rates_in[0]);
|
1726
|
0
|
|
|
|
|
0
|
my @death_rates = ($death_rates_in[0]);
|
1727
|
|
|
|
|
|
|
|
1728
|
0
|
|
|
|
|
0
|
my $net_birth_rate = $birth_rates[0];
|
1729
|
0
|
|
|
|
|
0
|
my $net_death_rate = $death_rates[0];
|
1730
|
|
|
|
|
|
|
|
1731
|
0
|
|
|
|
|
0
|
shift(@birth_rates_in);
|
1732
|
0
|
|
|
|
|
0
|
shift(@death_rates_in);
|
1733
|
|
|
|
|
|
|
|
1734
|
0
|
|
|
|
|
0
|
my @terminals = ($root);
|
1735
|
0
|
|
|
|
|
0
|
my ( $next_extinction, $next_speciation, $next_rate_change );
|
1736
|
0
|
|
|
|
|
0
|
my $time = 0;
|
1737
|
0
|
|
|
|
|
0
|
my $tree_size = 1;
|
1738
|
0
|
|
|
|
|
0
|
my $inf = 9**9**9**9;
|
1739
|
|
|
|
|
|
|
|
1740
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge
|
1741
|
0
|
0
|
0
|
|
|
0
|
if ( defined $options{root_edge} && $options{root_edge} ) {
|
1742
|
|
|
|
|
|
|
|
1743
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event
|
1744
|
0
|
0
|
|
|
|
0
|
if ($birth_rates[0] > 0)
|
1745
|
|
|
|
|
|
|
{
|
1746
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $birth_rates[0] ;
|
1747
|
|
|
|
|
|
|
} else
|
1748
|
|
|
|
|
|
|
{
|
1749
|
0
|
|
|
|
|
0
|
$next_speciation = $inf;
|
1750
|
|
|
|
|
|
|
}
|
1751
|
|
|
|
|
|
|
}
|
1752
|
|
|
|
|
|
|
else {
|
1753
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away
|
1754
|
0
|
|
|
|
|
0
|
$next_speciation = 0.0;
|
1755
|
|
|
|
|
|
|
}
|
1756
|
|
|
|
|
|
|
|
1757
|
|
|
|
|
|
|
#Time of the first extinction event. If no extinction we always
|
1758
|
|
|
|
|
|
|
#set the extinction event after the current speciation event
|
1759
|
0
|
0
|
|
|
|
0
|
if ($death_rates[0] > 0)
|
1760
|
|
|
|
|
|
|
{
|
1761
|
0
|
|
|
|
|
0
|
$next_extinction = -log(rand) / $death_rates[0];
|
1762
|
|
|
|
|
|
|
} else
|
1763
|
|
|
|
|
|
|
{
|
1764
|
0
|
|
|
|
|
0
|
$next_extinction = $inf;
|
1765
|
|
|
|
|
|
|
}
|
1766
|
|
|
|
|
|
|
|
1767
|
|
|
|
|
|
|
#Time of next rate change
|
1768
|
0
|
|
|
|
|
0
|
shift(@rate_times_in); #pop the initial 0
|
1769
|
0
|
|
|
|
|
0
|
$next_rate_change = shift(@rate_times_in);
|
1770
|
|
|
|
|
|
|
|
1771
|
|
|
|
|
|
|
#While the tree has not become extinct and the termination criterion
|
1772
|
|
|
|
|
|
|
#has not been achieved we create new speciation and extinction events
|
1773
|
0
|
|
0
|
|
|
0
|
while ($tree_size > 0
|
|
|
|
0
|
|
|
|
|
1774
|
|
|
|
|
|
|
&& $tree_size < $options{tree_size}
|
1775
|
|
|
|
|
|
|
&& $time < $options{tree_age} )
|
1776
|
|
|
|
|
|
|
{
|
1777
|
|
|
|
|
|
|
#print $time."|".$tree_size."\n";
|
1778
|
|
|
|
|
|
|
#Update rates if a clade shift is happening
|
1779
|
|
|
|
|
|
|
#TODO index rates or pop off one at a time.
|
1780
|
|
|
|
|
|
|
|
1781
|
|
|
|
|
|
|
|
1782
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species
|
1783
|
0
|
|
|
|
|
0
|
foreach (@terminals) {
|
1784
|
|
|
|
|
|
|
$_->set_branch_length(
|
1785
|
|
|
|
|
|
|
$_->get_branch_length + min(
|
1786
|
|
|
|
|
|
|
$next_extinction,
|
1787
|
|
|
|
|
|
|
$next_speciation,
|
1788
|
|
|
|
|
|
|
$next_rate_change-$time,
|
1789
|
0
|
|
|
|
|
0
|
$options{tree_age} - $time
|
1790
|
|
|
|
|
|
|
)
|
1791
|
|
|
|
|
|
|
);
|
1792
|
|
|
|
|
|
|
}
|
1793
|
|
|
|
|
|
|
|
1794
|
|
|
|
|
|
|
#Update the time
|
1795
|
0
|
|
|
|
|
0
|
my $time_last = $time;
|
1796
|
0
|
|
|
|
|
0
|
$time += min( $next_extinction, $next_speciation, $next_rate_change-$time_last );
|
1797
|
|
|
|
|
|
|
|
1798
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done
|
1799
|
0
|
0
|
|
|
|
0
|
return $tree if ( $time > $options{tree_age} );
|
1800
|
|
|
|
|
|
|
|
1801
|
|
|
|
|
|
|
#We have a rate change
|
1802
|
0
|
0
|
|
|
|
0
|
if ($next_rate_change-$time_last < min($next_extinction, $next_speciation))
|
1803
|
|
|
|
|
|
|
{
|
1804
|
|
|
|
|
|
|
#Find a random species to effect
|
1805
|
0
|
|
|
|
|
0
|
my $effected_species = int(rand($tree_size));
|
1806
|
|
|
|
|
|
|
#Subtract current rates
|
1807
|
0
|
|
|
|
|
0
|
$net_death_rate -= $death_rates[$effected_species];
|
1808
|
0
|
|
|
|
|
0
|
$net_birth_rate -= $birth_rates[$effected_species];
|
1809
|
|
|
|
|
|
|
#Get new rates
|
1810
|
0
|
|
|
|
|
0
|
$death_rates[$effected_species] = shift(@death_rates_in);
|
1811
|
0
|
|
|
|
|
0
|
$birth_rates[$effected_species] = shift(@birth_rates_in);
|
1812
|
|
|
|
|
|
|
#Add new rates
|
1813
|
0
|
|
|
|
|
0
|
$net_death_rate += $death_rates[$effected_species];
|
1814
|
0
|
|
|
|
|
0
|
$net_birth_rate += $birth_rates[$effected_species];
|
1815
|
|
|
|
|
|
|
#Get next rate change time
|
1816
|
0
|
0
|
|
|
|
0
|
if (scalar(@rate_times_in))
|
1817
|
|
|
|
|
|
|
{
|
1818
|
0
|
|
|
|
|
0
|
$next_rate_change = shift(@rate_times_in);
|
1819
|
|
|
|
|
|
|
} else
|
1820
|
|
|
|
|
|
|
{
|
1821
|
0
|
|
|
|
|
0
|
$next_rate_change = $inf;
|
1822
|
|
|
|
|
|
|
}
|
1823
|
|
|
|
|
|
|
}
|
1824
|
|
|
|
|
|
|
#Choosing a random species to speciate
|
1825
|
|
|
|
|
|
|
else
|
1826
|
|
|
|
|
|
|
{
|
1827
|
0
|
|
|
|
|
0
|
my $selected = 0;
|
1828
|
0
|
0
|
|
|
|
0
|
if ( $next_speciation < $next_extinction )
|
1829
|
|
|
|
|
|
|
{
|
1830
|
0
|
|
|
|
|
0
|
my $rand_select = rand($net_birth_rate);
|
1831
|
0
|
|
0
|
|
|
0
|
for (
|
1832
|
|
|
|
|
|
|
;
|
1833
|
|
|
|
|
|
|
$selected < scalar @terminals
|
1834
|
|
|
|
|
|
|
&& $rand_select > $birth_rates[$selected] ;
|
1835
|
|
|
|
|
|
|
$selected++
|
1836
|
|
|
|
|
|
|
)
|
1837
|
|
|
|
|
|
|
{
|
1838
|
0
|
|
|
|
|
0
|
$rand_select -= $birth_rates[$selected];
|
1839
|
|
|
|
|
|
|
}
|
1840
|
|
|
|
|
|
|
} else
|
1841
|
|
|
|
|
|
|
{
|
1842
|
0
|
|
|
|
|
0
|
my $rand_select = rand($net_death_rate);
|
1843
|
0
|
|
0
|
|
|
0
|
for (
|
1844
|
|
|
|
|
|
|
;
|
1845
|
|
|
|
|
|
|
$selected < scalar @terminals
|
1846
|
|
|
|
|
|
|
&& $rand_select > $death_rates[$selected] ;
|
1847
|
|
|
|
|
|
|
$selected++
|
1848
|
|
|
|
|
|
|
)
|
1849
|
|
|
|
|
|
|
{
|
1850
|
0
|
|
|
|
|
0
|
$rand_select -= $death_rates[$selected];
|
1851
|
|
|
|
|
|
|
}
|
1852
|
|
|
|
|
|
|
}
|
1853
|
0
|
0
|
|
|
|
0
|
if ($net_birth_rate == 0)
|
1854
|
|
|
|
|
|
|
{
|
1855
|
0
|
|
|
|
|
0
|
$selected = 0;
|
1856
|
|
|
|
|
|
|
}
|
1857
|
|
|
|
|
|
|
|
1858
|
|
|
|
|
|
|
#Remove the species effected by this event and remove it from the terminals list
|
1859
|
0
|
|
|
|
|
0
|
my $effected = splice( @terminals, $selected, 1 );
|
1860
|
0
|
|
|
|
|
0
|
my $effected_birth_rate = splice( @birth_rates, $selected, 1 );
|
1861
|
0
|
|
|
|
|
0
|
my $effected_death_rate = splice( @death_rates, $selected, 1 );
|
1862
|
|
|
|
|
|
|
|
1863
|
0
|
|
|
|
|
0
|
$net_birth_rate -= $effected_birth_rate;
|
1864
|
0
|
|
|
|
|
0
|
$net_death_rate -= $effected_death_rate;
|
1865
|
|
|
|
|
|
|
|
1866
|
|
|
|
|
|
|
#If we have a speciation event we add two new species
|
1867
|
0
|
0
|
0
|
|
|
0
|
if ( $next_speciation < $next_extinction || !defined $next_extinction )
|
1868
|
|
|
|
|
|
|
{
|
1869
|
0
|
|
|
|
|
0
|
foreach ( 1, 2 ) {
|
1870
|
|
|
|
|
|
|
|
1871
|
|
|
|
|
|
|
#Create a new species
|
1872
|
0
|
|
|
|
|
0
|
my $child = Bio::Phylo::Forest::Node->new();
|
1873
|
0
|
|
|
|
|
0
|
$child->set_name( 'ID' . $node_id++ );
|
1874
|
|
|
|
|
|
|
|
1875
|
|
|
|
|
|
|
#Give it a zero edge length
|
1876
|
0
|
|
|
|
|
0
|
$child->set_branch_length(0);
|
1877
|
|
|
|
|
|
|
|
1878
|
|
|
|
|
|
|
#Add it as a child to the speciating species
|
1879
|
0
|
|
|
|
|
0
|
$effected->set_child($child);
|
1880
|
|
|
|
|
|
|
|
1881
|
|
|
|
|
|
|
#Add it to the tree
|
1882
|
0
|
|
|
|
|
0
|
$tree->insert($child);
|
1883
|
|
|
|
|
|
|
|
1884
|
|
|
|
|
|
|
#Add it to the terminals list
|
1885
|
0
|
|
|
|
|
0
|
push( @terminals, $child );
|
1886
|
|
|
|
|
|
|
|
1887
|
0
|
|
|
|
|
0
|
push( @birth_rates, $effected_birth_rate );
|
1888
|
0
|
|
|
|
|
0
|
push( @death_rates, $effected_death_rate );
|
1889
|
|
|
|
|
|
|
|
1890
|
0
|
|
|
|
|
0
|
$net_death_rate += $effected_death_rate;
|
1891
|
0
|
|
|
|
|
0
|
$net_birth_rate += $effected_birth_rate;
|
1892
|
|
|
|
|
|
|
|
1893
|
|
|
|
|
|
|
}
|
1894
|
|
|
|
|
|
|
}
|
1895
|
|
|
|
|
|
|
}
|
1896
|
|
|
|
|
|
|
#We calculate the time that the next extinction and speciation
|
1897
|
|
|
|
|
|
|
#events will occur (only the earliest of these will actually
|
1898
|
|
|
|
|
|
|
#happen). NB: this approach is only appropriate for models where
|
1899
|
|
|
|
|
|
|
#speciation and extinction times are exponentially distributed.
|
1900
|
|
|
|
|
|
|
|
1901
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand...
|
1902
|
0
|
|
|
|
|
0
|
my ( $r1, $r2 ) = ( 0, 0 );
|
1903
|
0
|
|
|
|
|
0
|
$r1 = rand until $r1;
|
1904
|
0
|
|
|
|
|
0
|
$r2 = rand until $r2;
|
1905
|
|
|
|
|
|
|
|
1906
|
|
|
|
|
|
|
#The current tree size
|
1907
|
0
|
|
|
|
|
0
|
$tree_size = scalar @terminals;
|
1908
|
|
|
|
|
|
|
|
1909
|
0
|
0
|
|
|
|
0
|
return $tree unless $tree_size;
|
1910
|
|
|
|
|
|
|
|
1911
|
0
|
0
|
|
|
|
0
|
if ($net_birth_rate > 0)
|
1912
|
|
|
|
|
|
|
{
|
1913
|
0
|
|
|
|
|
0
|
$next_speciation = -log($r1) / $net_birth_rate;
|
1914
|
|
|
|
|
|
|
} else
|
1915
|
|
|
|
|
|
|
{
|
1916
|
0
|
|
|
|
|
0
|
$next_speciation = $inf;
|
1917
|
|
|
|
|
|
|
}
|
1918
|
|
|
|
|
|
|
|
1919
|
0
|
0
|
|
|
|
0
|
if ($net_death_rate > 0)
|
1920
|
|
|
|
|
|
|
{
|
1921
|
0
|
|
|
|
|
0
|
$next_extinction = -log($r2) / $net_death_rate;
|
1922
|
|
|
|
|
|
|
} else
|
1923
|
|
|
|
|
|
|
{
|
1924
|
0
|
|
|
|
|
0
|
$next_extinction = $inf;
|
1925
|
|
|
|
|
|
|
}
|
1926
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
}
|
1928
|
0
|
|
|
|
|
0
|
return $tree;
|
1929
|
|
|
|
|
|
|
}
|
1930
|
|
|
|
|
|
|
|
1931
|
|
|
|
|
|
|
=item beta_binomial()
|
1932
|
|
|
|
|
|
|
|
1933
|
|
|
|
|
|
|
An evolutionary model featuring evolving speciation rates. From Blum2007
|
1934
|
|
|
|
|
|
|
|
1935
|
|
|
|
|
|
|
Type : Evolutionary model
|
1936
|
|
|
|
|
|
|
Title : beta_binomial
|
1937
|
|
|
|
|
|
|
Usage : $tree = beta_binomial(%options)
|
1938
|
|
|
|
|
|
|
Function: Produces a tree from the model terminating at a given size/time
|
1939
|
|
|
|
|
|
|
Returns : Bio::Phylo::Forest::Tree
|
1940
|
|
|
|
|
|
|
Args : %options with fields:
|
1941
|
|
|
|
|
|
|
birth_rate The initial speciation rate (default 1)
|
1942
|
|
|
|
|
|
|
model_param The parameter as defined in Blum2007
|
1943
|
|
|
|
|
|
|
tree_size The size of the tree at which to terminate
|
1944
|
|
|
|
|
|
|
tree_age The age of the tree at which to terminate
|
1945
|
|
|
|
|
|
|
|
1946
|
|
|
|
|
|
|
NB: At least one of tree_size and tree_age must be specified
|
1947
|
|
|
|
|
|
|
|
1948
|
|
|
|
|
|
|
=cut
|
1949
|
|
|
|
|
|
|
|
1950
|
|
|
|
|
|
|
sub beta_binomial {
|
1951
|
0
|
|
|
0
|
1
|
0
|
my %options = @_;
|
1952
|
|
|
|
|
|
|
|
1953
|
|
|
|
|
|
|
#Check that we have a termination condition
|
1954
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $options{tree_size} or defined $options{tree_age} ) {
|
1955
|
|
|
|
|
|
|
|
1956
|
|
|
|
|
|
|
#Error here.
|
1957
|
0
|
|
|
|
|
0
|
return undef;
|
1958
|
|
|
|
|
|
|
}
|
1959
|
|
|
|
|
|
|
|
1960
|
|
|
|
|
|
|
#Set the undefined condition to infinity
|
1961
|
0
|
0
|
|
|
|
0
|
$options{tree_size} = 1e6 unless defined $options{tree_size};
|
1962
|
0
|
0
|
|
|
|
0
|
$options{tree_age} = 1e6 unless defined $options{tree_age};
|
1963
|
|
|
|
|
|
|
|
1964
|
|
|
|
|
|
|
#Set default rates
|
1965
|
0
|
0
|
|
|
|
0
|
$options{birth_rate} = 1 unless defined( $options{birth_rate} );
|
1966
|
0
|
0
|
|
|
|
0
|
$options{model_param} = 0 unless defined( $options{model_param} );
|
1967
|
|
|
|
|
|
|
|
1968
|
|
|
|
|
|
|
#Each node gets an ID number this tracks these
|
1969
|
0
|
|
|
|
|
0
|
my $node_id = 0;
|
1970
|
|
|
|
|
|
|
|
1971
|
|
|
|
|
|
|
#Create a new tree with a root, start the list of terminal species
|
1972
|
0
|
|
|
|
|
0
|
my $tree = Bio::Phylo::Forest::Tree->new();
|
1973
|
0
|
|
|
|
|
0
|
my $root = Bio::Phylo::Forest::Node->new();
|
1974
|
0
|
|
|
|
|
0
|
$root->set_branch_length(0);
|
1975
|
0
|
|
|
|
|
0
|
$root->set_name( 'ID' . $node_id++ );
|
1976
|
0
|
|
|
|
|
0
|
$tree->insert($root);
|
1977
|
0
|
|
|
|
|
0
|
my @terminals = ($root);
|
1978
|
0
|
|
|
|
|
0
|
my @birth_rates = ( $options{birth_rate} );
|
1979
|
0
|
|
|
|
|
0
|
my $net_rate = $options{birth_rate};
|
1980
|
0
|
|
|
|
|
0
|
my $next_speciation;
|
1981
|
0
|
|
|
|
|
0
|
my $time = 0;
|
1982
|
0
|
|
|
|
|
0
|
my $tree_size = 1;
|
1983
|
|
|
|
|
|
|
|
1984
|
|
|
|
|
|
|
#Check whether we have a non-zero root edge
|
1985
|
0
|
0
|
0
|
|
|
0
|
if ( defined $options{root_edge} && $options{root_edge} ) {
|
1986
|
|
|
|
|
|
|
|
1987
|
|
|
|
|
|
|
#Non-zero root. We set the time to the first speciation event
|
1988
|
0
|
|
|
|
|
0
|
$next_speciation = -log(rand) / $options{birth_rate} / $tree_size;
|
1989
|
|
|
|
|
|
|
}
|
1990
|
|
|
|
|
|
|
else {
|
1991
|
|
|
|
|
|
|
|
1992
|
|
|
|
|
|
|
#Zero root, we want a speciation event straight away
|
1993
|
0
|
|
|
|
|
0
|
$next_speciation = 0;
|
1994
|
|
|
|
|
|
|
}
|
1995
|
|
|
|
|
|
|
|
1996
|
|
|
|
|
|
|
#While we haven't reached termination
|
1997
|
0
|
|
0
|
|
|
0
|
while ( $tree_size < $options{tree_size} && $time < $options{tree_age} ) {
|
1998
|
|
|
|
|
|
|
|
1999
|
|
|
|
|
|
|
#Add the time since the last event to all terminal species
|
2000
|
0
|
|
|
|
|
0
|
foreach (@terminals) {
|
2001
|
|
|
|
|
|
|
$_->set_branch_length( $_->get_branch_length +
|
2002
|
0
|
|
|
|
|
0
|
min( $next_speciation, $options{tree_age} - $time ) );
|
2003
|
|
|
|
|
|
|
}
|
2004
|
|
|
|
|
|
|
|
2005
|
|
|
|
|
|
|
#Update the time
|
2006
|
0
|
|
|
|
|
0
|
$time += $next_speciation;
|
2007
|
|
|
|
|
|
|
|
2008
|
|
|
|
|
|
|
#If the tree exceeds the time limit we are done
|
2009
|
0
|
0
|
|
|
|
0
|
return $tree if ( $time > $options{tree_age} );
|
2010
|
|
|
|
|
|
|
|
2011
|
|
|
|
|
|
|
#Get the species effected by this event
|
2012
|
0
|
|
|
|
|
0
|
my $rand_select = rand($net_rate);
|
2013
|
0
|
|
|
|
|
0
|
my $selected = 0;
|
2014
|
0
|
|
0
|
|
|
0
|
for (
|
2015
|
|
|
|
|
|
|
;
|
2016
|
|
|
|
|
|
|
$selected < scalar @terminals
|
2017
|
|
|
|
|
|
|
&& $rand_select > $birth_rates[$selected] ;
|
2018
|
|
|
|
|
|
|
$selected++
|
2019
|
|
|
|
|
|
|
)
|
2020
|
|
|
|
|
|
|
{
|
2021
|
0
|
|
|
|
|
0
|
$rand_select -= $birth_rates[$selected];
|
2022
|
|
|
|
|
|
|
}
|
2023
|
|
|
|
|
|
|
|
2024
|
|
|
|
|
|
|
#Remove it from the terminals list
|
2025
|
0
|
|
|
|
|
0
|
my $effected = splice( @terminals, $selected, 1 );
|
2026
|
0
|
|
|
|
|
0
|
my $effected_rate = splice( @birth_rates, $selected, 1 );
|
2027
|
|
|
|
|
|
|
my $p =
|
2028
|
0
|
|
|
|
|
0
|
qbeta( rand, $options{model_param} + 1, $options{model_param} + 1 );
|
2029
|
|
|
|
|
|
|
|
2030
|
|
|
|
|
|
|
#If we have a speciation event we add two new species
|
2031
|
0
|
|
|
|
|
0
|
foreach ( 1, 2 ) {
|
2032
|
|
|
|
|
|
|
|
2033
|
|
|
|
|
|
|
#Create a new species
|
2034
|
0
|
|
|
|
|
0
|
my $child = Bio::Phylo::Forest::Node->new();
|
2035
|
0
|
|
|
|
|
0
|
$child->set_name( 'ID' . $node_id++ );
|
2036
|
|
|
|
|
|
|
|
2037
|
|
|
|
|
|
|
#Give it a zero edge length
|
2038
|
0
|
|
|
|
|
0
|
$child->set_branch_length(0);
|
2039
|
|
|
|
|
|
|
|
2040
|
|
|
|
|
|
|
#Add it as a child to the speciating species
|
2041
|
0
|
|
|
|
|
0
|
$effected->set_child($child);
|
2042
|
|
|
|
|
|
|
|
2043
|
|
|
|
|
|
|
#Add it to the tree
|
2044
|
0
|
|
|
|
|
0
|
$tree->insert($child);
|
2045
|
|
|
|
|
|
|
|
2046
|
|
|
|
|
|
|
#Add it to the terminals list
|
2047
|
0
|
|
|
|
|
0
|
push( @terminals, $child );
|
2048
|
|
|
|
|
|
|
|
2049
|
|
|
|
|
|
|
#New speciation rate
|
2050
|
0
|
|
|
|
|
0
|
my $new_speciation_rate = $effected_rate * $p;
|
2051
|
0
|
|
|
|
|
0
|
$p = 1 - $p;
|
2052
|
0
|
0
|
|
|
|
0
|
if ( $new_speciation_rate < 0 ) { $new_speciation_rate = 0; }
|
|
0
|
|
|
|
|
0
|
|
2053
|
0
|
|
|
|
|
0
|
push( @birth_rates, $new_speciation_rate );
|
2054
|
|
|
|
|
|
|
}
|
2055
|
|
|
|
|
|
|
|
2056
|
|
|
|
|
|
|
#Windows sometimes returns 0 values for rand...
|
2057
|
0
|
|
|
|
|
0
|
my ( $r1, $r2 ) = ( 0, 0 );
|
2058
|
0
|
|
|
|
|
0
|
$r1 = rand until $r1;
|
2059
|
0
|
|
|
|
|
0
|
$tree_size = scalar @terminals;
|
2060
|
|
|
|
|
|
|
|
2061
|
|
|
|
|
|
|
#If all species have stopped speciating (unlikely)
|
2062
|
0
|
0
|
|
|
|
0
|
if ( $net_rate == 0 ) {
|
2063
|
0
|
|
|
|
|
0
|
return $tree;
|
2064
|
|
|
|
|
|
|
}
|
2065
|
0
|
|
|
|
|
0
|
$next_speciation = -log($r1) / $net_rate / $tree_size;
|
2066
|
0
|
0
|
|
|
|
0
|
return $tree unless $tree_size;
|
2067
|
|
|
|
|
|
|
}
|
2068
|
0
|
|
|
|
|
0
|
return $tree;
|
2069
|
|
|
|
|
|
|
}
|
2070
|
|
|
|
|
|
|
|
2071
|
|
|
|
|
|
|
=back
|
2072
|
|
|
|
|
|
|
|
2073
|
|
|
|
|
|
|
=cut
|
2074
|
|
|
|
|
|
|
|
2075
|
|
|
|
|
|
|
=begin comment
|
2076
|
|
|
|
|
|
|
|
2077
|
|
|
|
|
|
|
###########################################################
|
2078
|
|
|
|
|
|
|
#INTERNAL METHODS
|
2079
|
|
|
|
|
|
|
#These are methods that permit additional manipulations of
|
2080
|
|
|
|
|
|
|
#a Bio::Phylo::Tree to be easily made. As such some of
|
2081
|
|
|
|
|
|
|
#these could easily be moved into Bio::Phylo::Forest::Tree
|
2082
|
|
|
|
|
|
|
###########################################################
|
2083
|
|
|
|
|
|
|
|
2084
|
|
|
|
|
|
|
=end comment
|
2085
|
|
|
|
|
|
|
|
2086
|
|
|
|
|
|
|
=cut
|
2087
|
|
|
|
|
|
|
|
2088
|
|
|
|
|
|
|
=begin comment
|
2089
|
|
|
|
|
|
|
|
2090
|
|
|
|
|
|
|
Type : Internal method
|
2091
|
|
|
|
|
|
|
Title : copy_tree
|
2092
|
|
|
|
|
|
|
Usage : $tree = copy_tree($tree)
|
2093
|
|
|
|
|
|
|
Function: Makes a new independent copy of a tree
|
2094
|
|
|
|
|
|
|
Returns : the phylogenetic $tree
|
2095
|
|
|
|
|
|
|
Args : the phylogenetic $tree
|
2096
|
|
|
|
|
|
|
|
2097
|
|
|
|
|
|
|
=end comment
|
2098
|
|
|
|
|
|
|
|
2099
|
|
|
|
|
|
|
=cut
|
2100
|
|
|
|
|
|
|
|
2101
|
|
|
|
|
|
|
sub copy_tree {
|
2102
|
15
|
|
|
15
|
0
|
105
|
return Bio::Phylo::IO->parse(
|
2103
|
|
|
|
|
|
|
-format => 'newick',
|
2104
|
|
|
|
|
|
|
-string => shift->to_newick( '-nodelabels' => 1 )
|
2105
|
|
|
|
|
|
|
)->first;
|
2106
|
|
|
|
|
|
|
}
|
2107
|
|
|
|
|
|
|
|
2108
|
|
|
|
|
|
|
=begin comment
|
2109
|
|
|
|
|
|
|
|
2110
|
|
|
|
|
|
|
Type : Internal method
|
2111
|
|
|
|
|
|
|
Title : truncate_tree_time
|
2112
|
|
|
|
|
|
|
Usage : truncate_tree_time($tree,$age)
|
2113
|
|
|
|
|
|
|
Function: Truncates the tree at the specified age
|
2114
|
|
|
|
|
|
|
Returns : N/A
|
2115
|
|
|
|
|
|
|
Args : $tree: the phylogenetic tree (which will be modified)
|
2116
|
|
|
|
|
|
|
$age: the age at which to cut the $tree
|
2117
|
|
|
|
|
|
|
|
2118
|
|
|
|
|
|
|
=end comment
|
2119
|
|
|
|
|
|
|
|
2120
|
|
|
|
|
|
|
=cut
|
2121
|
|
|
|
|
|
|
|
2122
|
|
|
|
|
|
|
sub truncate_tree_time {
|
2123
|
|
|
|
|
|
|
|
2124
|
|
|
|
|
|
|
#$node and $time are used only by this function recursively
|
2125
|
987
|
|
|
987
|
0
|
1916
|
my ( $tree, $age, $node, $time ) = @_;
|
2126
|
|
|
|
|
|
|
|
2127
|
|
|
|
|
|
|
#If node and time weren't specified we are starting from the root
|
2128
|
987
|
100
|
|
|
|
2067
|
$node = $tree->get_root unless defined $node;
|
2129
|
987
|
100
|
|
|
|
1666
|
$time = 0 unless defined $time;
|
2130
|
|
|
|
|
|
|
|
2131
|
|
|
|
|
|
|
#If we are truncating this branch
|
2132
|
987
|
100
|
|
|
|
1783
|
if ( $time + $node->get_branch_length >= $age ) {
|
2133
|
|
|
|
|
|
|
|
2134
|
|
|
|
|
|
|
#Collapse the node unless it is terminal
|
2135
|
249
|
100
|
|
|
|
610
|
$node->collapse unless $node->is_terminal();
|
2136
|
|
|
|
|
|
|
|
2137
|
|
|
|
|
|
|
#Set the branch length appropriately
|
2138
|
249
|
|
|
|
|
872
|
$node->set_branch_length( $age - $time );
|
2139
|
249
|
|
|
|
|
795
|
return;
|
2140
|
|
|
|
|
|
|
}
|
2141
|
|
|
|
|
|
|
|
2142
|
|
|
|
|
|
|
#If this node has no children we are done
|
2143
|
738
|
100
|
|
|
|
1729
|
return if $node->is_terminal();
|
2144
|
|
|
|
|
|
|
|
2145
|
|
|
|
|
|
|
#Call the function recursively on the children
|
2146
|
435
|
|
|
|
|
603
|
foreach ( @{ $node->get_children } ) {
|
|
435
|
|
|
|
|
798
|
|
2147
|
972
|
|
|
|
|
2222
|
truncate_tree_time( $tree, $age, $_,
|
2148
|
|
|
|
|
|
|
$time + $node->get_branch_length() );
|
2149
|
|
|
|
|
|
|
}
|
2150
|
|
|
|
|
|
|
}
|
2151
|
|
|
|
|
|
|
|
2152
|
|
|
|
|
|
|
=begin comment
|
2153
|
|
|
|
|
|
|
|
2154
|
|
|
|
|
|
|
Type : Internal method
|
2155
|
|
|
|
|
|
|
Title : truncate_tree_size
|
2156
|
|
|
|
|
|
|
Usage : truncate_tree_size($tree,$size)
|
2157
|
|
|
|
|
|
|
Function: Truncates the tree to the specified number of species
|
2158
|
|
|
|
|
|
|
Returns : N/A
|
2159
|
|
|
|
|
|
|
Args : $tree: the phylogenetic tree (which will be modified)
|
2160
|
|
|
|
|
|
|
$size: random species are delete so that the tree has this many terminals
|
2161
|
|
|
|
|
|
|
|
2162
|
|
|
|
|
|
|
=end comment
|
2163
|
|
|
|
|
|
|
|
2164
|
|
|
|
|
|
|
=cut
|
2165
|
|
|
|
|
|
|
|
2166
|
|
|
|
|
|
|
sub truncate_tree_size {
|
2167
|
5
|
|
|
5
|
0
|
17
|
my ( $tree, $size ) = @_;
|
2168
|
5
|
|
|
|
|
8
|
my @terminals = @{ $tree->get_terminals };
|
|
5
|
|
|
|
|
23
|
|
2169
|
5
|
|
|
|
|
30
|
my @names;
|
2170
|
|
|
|
|
|
|
|
2171
|
|
|
|
|
|
|
#Calculate the tree height and node distances from the root
|
2172
|
|
|
|
|
|
|
#much more efficient to do this in one hit than repeatedly
|
2173
|
|
|
|
|
|
|
#calling the analogous functions on the tree
|
2174
|
5
|
|
|
|
|
23
|
_calc_node_properties($tree);
|
2175
|
|
|
|
|
|
|
|
2176
|
|
|
|
|
|
|
#Only push species that are extant and store the number of those
|
2177
|
|
|
|
|
|
|
# my $tree_height = $tree->get_tallest_tip->calc_path_to_root;
|
2178
|
5
|
|
|
|
|
20
|
my $tree_height = $tree->get_root->get_generic('tree_height');
|
2179
|
5
|
|
|
|
|
17
|
foreach (@terminals) {
|
2180
|
782
|
100
|
|
|
|
1476
|
if (
|
2181
|
|
|
|
|
|
|
abs(
|
2182
|
|
|
|
|
|
|
( $_->get_generic('root_distance') - $tree_height ) /
|
2183
|
|
|
|
|
|
|
$tree_height
|
2184
|
|
|
|
|
|
|
) < 1e-6
|
2185
|
|
|
|
|
|
|
)
|
2186
|
|
|
|
|
|
|
{
|
2187
|
144
|
|
|
|
|
301
|
push( @names, $_->get_name );
|
2188
|
|
|
|
|
|
|
}
|
2189
|
|
|
|
|
|
|
}
|
2190
|
5
|
50
|
|
|
|
21
|
if ( @names < $size ) { print "Internal error\n"; }
|
|
0
|
|
|
|
|
0
|
|
2191
|
5
|
|
|
|
|
11
|
my %deletions;
|
2192
|
5
|
|
|
|
|
23
|
while ( scalar( keys %deletions ) < @names - $size ) {
|
2193
|
159
|
|
|
|
|
365
|
$deletions{ $names[ int( rand(@names) ) ] } = 1;
|
2194
|
|
|
|
|
|
|
}
|
2195
|
5
|
|
|
|
|
65
|
$tree = prune_tips( $tree, [ keys %deletions ] );
|
2196
|
5
|
|
|
|
|
158
|
return $tree;
|
2197
|
|
|
|
|
|
|
}
|
2198
|
|
|
|
|
|
|
|
2199
|
|
|
|
|
|
|
sub _get_ultrametric_size {
|
2200
|
0
|
|
|
0
|
|
0
|
my ( $tree, $size ) = @_;
|
2201
|
0
|
|
|
|
|
0
|
my @terminals = @{ $tree->get_terminals };
|
|
0
|
|
|
|
|
0
|
|
2202
|
0
|
|
|
|
|
0
|
_calc_node_properties($tree);
|
2203
|
0
|
|
|
|
|
0
|
my @names;
|
2204
|
0
|
|
|
|
|
0
|
my $tree_height = $tree->get_root->get_generic('tree_height');
|
2205
|
0
|
|
|
|
|
0
|
foreach (@terminals) {
|
2206
|
0
|
0
|
|
|
|
0
|
if (
|
2207
|
|
|
|
|
|
|
abs(
|
2208
|
|
|
|
|
|
|
( $_->get_generic('root_distance') - $tree_height ) /
|
2209
|
|
|
|
|
|
|
$tree_height
|
2210
|
|
|
|
|
|
|
) < 1e-6
|
2211
|
|
|
|
|
|
|
)
|
2212
|
|
|
|
|
|
|
{
|
2213
|
0
|
|
|
|
|
0
|
push( @names, $_->get_name );
|
2214
|
|
|
|
|
|
|
}
|
2215
|
|
|
|
|
|
|
}
|
2216
|
0
|
|
|
|
|
0
|
return scalar @names;
|
2217
|
|
|
|
|
|
|
}
|
2218
|
|
|
|
|
|
|
|
2219
|
|
|
|
|
|
|
=begin comment
|
2220
|
|
|
|
|
|
|
|
2221
|
|
|
|
|
|
|
Type : Internal method
|
2222
|
|
|
|
|
|
|
Title : remove_extinct_species
|
2223
|
|
|
|
|
|
|
Usage : remove_extinct_species($tree)
|
2224
|
|
|
|
|
|
|
Function: Removes extinct species from the tree. An extinct species
|
2225
|
|
|
|
|
|
|
is a terminal that does not extend as far as the furthest
|
2226
|
|
|
|
|
|
|
terminal(s).
|
2227
|
|
|
|
|
|
|
Returns : N/A
|
2228
|
|
|
|
|
|
|
Args : $tree: the phylogenetic tree (which will be modified)
|
2229
|
|
|
|
|
|
|
$age: the age at which to cut the $tree
|
2230
|
|
|
|
|
|
|
|
2231
|
|
|
|
|
|
|
=end comment
|
2232
|
|
|
|
|
|
|
|
2233
|
|
|
|
|
|
|
=cut
|
2234
|
|
|
|
|
|
|
|
2235
|
|
|
|
|
|
|
sub remove_extinct_species {
|
2236
|
0
|
|
|
0
|
0
|
0
|
my $tree = shift;
|
2237
|
|
|
|
|
|
|
|
2238
|
|
|
|
|
|
|
#Calculate the tree height and node distances from the root
|
2239
|
|
|
|
|
|
|
#much more efficient to do this in one hit than repeatedly
|
2240
|
|
|
|
|
|
|
#calling the analogous functions on the tree
|
2241
|
0
|
|
|
|
|
0
|
_calc_node_properties($tree);
|
2242
|
0
|
|
|
|
|
0
|
my $height = $tree->get_root->get_generic('tree_height');
|
2243
|
0
|
0
|
|
|
|
0
|
return unless $height > 0;
|
2244
|
0
|
|
|
|
|
0
|
my $leaves = $tree->get_terminals;
|
2245
|
0
|
0
|
|
|
|
0
|
return unless $leaves;
|
2246
|
0
|
|
|
|
|
0
|
my @remove;
|
2247
|
0
|
|
|
|
|
0
|
foreach ( @{$leaves} ) {
|
|
0
|
|
|
|
|
0
|
|
2248
|
|
|
|
|
|
|
|
2249
|
0
|
0
|
|
|
|
0
|
unless (
|
2250
|
|
|
|
|
|
|
abs( ( $_->get_generic('root_distance') - $height ) / $height ) <
|
2251
|
|
|
|
|
|
|
1e-6 )
|
2252
|
|
|
|
|
|
|
{
|
2253
|
0
|
|
|
|
|
0
|
push( @remove, $_->get_name );
|
2254
|
|
|
|
|
|
|
}
|
2255
|
|
|
|
|
|
|
}
|
2256
|
0
|
|
|
|
|
0
|
$tree = prune_tips( $tree, \@remove );
|
2257
|
0
|
|
|
|
|
0
|
return $tree;
|
2258
|
|
|
|
|
|
|
}
|
2259
|
|
|
|
|
|
|
|
2260
|
|
|
|
|
|
|
=begin comment
|
2261
|
|
|
|
|
|
|
|
2262
|
|
|
|
|
|
|
Type : Internal method
|
2263
|
|
|
|
|
|
|
Title : prune_tips
|
2264
|
|
|
|
|
|
|
Usage : prune_tips($tree,$tips)
|
2265
|
|
|
|
|
|
|
Function: Removes named terminals from the tree
|
2266
|
|
|
|
|
|
|
Returns : N/A
|
2267
|
|
|
|
|
|
|
Args : $tree: the phylogenetic tree (which will be modified)
|
2268
|
|
|
|
|
|
|
$tips: array ref of terminals to remove from the tree
|
2269
|
|
|
|
|
|
|
|
2270
|
|
|
|
|
|
|
NB: Available as $tree->prune_tips($tips), but had some problems with
|
2271
|
|
|
|
|
|
|
this.
|
2272
|
|
|
|
|
|
|
|
2273
|
|
|
|
|
|
|
=end comment
|
2274
|
|
|
|
|
|
|
|
2275
|
|
|
|
|
|
|
=cut
|
2276
|
|
|
|
|
|
|
|
2277
|
|
|
|
|
|
|
sub prune_tips {
|
2278
|
5
|
|
|
5
|
0
|
14
|
my ( $self, $tips ) = @_;
|
2279
|
5
|
|
|
|
|
9
|
my %names_to_delete = map { $_ => 1 } @{$tips};
|
|
94
|
|
|
|
|
167
|
|
|
5
|
|
|
|
|
14
|
|
2280
|
688
|
|
|
|
|
1121
|
my %keep = map { $_->get_name => 1 }
|
2281
|
782
|
|
|
|
|
1386
|
grep { not exists $names_to_delete{ $_->get_name } }
|
2282
|
5
|
|
|
|
|
15
|
@{ $self->get_terminals };
|
|
5
|
|
|
|
|
22
|
|
2283
|
|
|
|
|
|
|
$self->visit_depth_first(
|
2284
|
|
|
|
|
|
|
-post => sub {
|
2285
|
1511
|
|
|
1511
|
|
2285
|
my $node = shift;
|
2286
|
1511
|
100
|
|
|
|
2910
|
if ( $node->is_terminal ) {
|
2287
|
782
|
100
|
|
|
|
1814
|
if ( not $keep{ $node->get_name } ) {
|
2288
|
94
|
|
|
|
|
284
|
$node->set_parent();
|
2289
|
94
|
|
|
|
|
279
|
$self->delete($node);
|
2290
|
|
|
|
|
|
|
}
|
2291
|
|
|
|
|
|
|
}
|
2292
|
|
|
|
|
|
|
else {
|
2293
|
729
|
|
|
|
|
1120
|
my $seen_tip_to_keep = 0;
|
2294
|
729
|
|
|
|
|
1094
|
for my $tip ( @{ $node->get_terminals } ) {
|
|
729
|
|
|
|
|
1614
|
|
2295
|
16505
|
100
|
|
|
|
28557
|
$seen_tip_to_keep++ if $keep{ $tip->get_name };
|
2296
|
|
|
|
|
|
|
}
|
2297
|
729
|
100
|
|
|
|
4615
|
if ( not $seen_tip_to_keep ) {
|
2298
|
25
|
|
|
|
|
67
|
$node->set_parent();
|
2299
|
25
|
|
|
|
|
60
|
$self->delete($node);
|
2300
|
|
|
|
|
|
|
}
|
2301
|
|
|
|
|
|
|
}
|
2302
|
|
|
|
|
|
|
}
|
2303
|
5
|
|
|
|
|
132
|
);
|
2304
|
5
|
|
|
|
|
83
|
$self->remove_unbranched_internals;
|
2305
|
5
|
|
|
|
|
273
|
return $self;
|
2306
|
|
|
|
|
|
|
}
|
2307
|
|
|
|
|
|
|
|
2308
|
|
|
|
|
|
|
=begin comment
|
2309
|
|
|
|
|
|
|
|
2310
|
|
|
|
|
|
|
Type : Internal method
|
2311
|
|
|
|
|
|
|
Title : lineage_through_time
|
2312
|
|
|
|
|
|
|
Usage : ($time,$count) = lineage_through_time($tree)
|
2313
|
|
|
|
|
|
|
Function: Alternative to $tree->ltt that permits extinctions
|
2314
|
|
|
|
|
|
|
Returns : $time: array ref of times
|
2315
|
|
|
|
|
|
|
$count: array ref of species counts corresponding to the times
|
2316
|
|
|
|
|
|
|
Args : the phylogenetic $tree for which to produce the ltt data
|
2317
|
|
|
|
|
|
|
|
2318
|
|
|
|
|
|
|
=end comment
|
2319
|
|
|
|
|
|
|
|
2320
|
|
|
|
|
|
|
=cut
|
2321
|
|
|
|
|
|
|
|
2322
|
|
|
|
|
|
|
sub lineage_through_time {
|
2323
|
44
|
|
|
44
|
0
|
91
|
my $tree = shift;
|
2324
|
44
|
|
|
|
|
139
|
my ( $speciation, $extinction ) = _recursive_ltt_helper($tree);
|
2325
|
44
|
|
|
|
|
76
|
my @speciation = sort { $a <=> $b } @{$speciation};
|
|
7876
|
|
|
|
|
8586
|
|
|
44
|
|
|
|
|
296
|
|
2326
|
44
|
|
|
|
|
74
|
my @extinction = sort { $a <=> $b } @{$extinction};
|
|
8232
|
|
|
|
|
8848
|
|
|
44
|
|
|
|
|
138
|
|
2327
|
44
|
|
|
|
|
104
|
my @time = (0);
|
2328
|
44
|
|
|
|
|
83
|
my @count = (1);
|
2329
|
44
|
|
|
|
|
67
|
my $n_species = 1;
|
2330
|
44
|
|
|
|
|
180
|
my $end_time = max( @speciation, @extinction );
|
2331
|
44
|
50
|
|
|
|
137
|
return ( [], [] ) if ( $end_time == 0 );
|
2332
|
|
|
|
|
|
|
|
2333
|
|
|
|
|
|
|
#We remove any extinction events occurring at the very end of the tree (as they are not real extinctions)
|
2334
|
44
|
|
100
|
|
|
267
|
while ( scalar @extinction
|
2335
|
|
|
|
|
|
|
&& ( $end_time - $extinction[-1] ) / $end_time < 1e-6 )
|
2336
|
|
|
|
|
|
|
{
|
2337
|
594
|
|
|
|
|
1314
|
pop @extinction;
|
2338
|
|
|
|
|
|
|
}
|
2339
|
44
|
|
100
|
|
|
159
|
while ( scalar @speciation || scalar @extinction ) {
|
2340
|
3028
|
100
|
100
|
|
|
7896
|
if ( scalar @extinction == 0
|
|
|
|
100
|
|
|
|
|
2341
|
|
|
|
|
|
|
|| ( scalar @speciation && $speciation[0] < $extinction[0] ) )
|
2342
|
|
|
|
|
|
|
{
|
2343
|
1789
|
|
|
|
|
2306
|
push( @count, ++$n_species );
|
2344
|
1789
|
|
|
|
|
3273
|
push( @time, shift(@speciation) );
|
2345
|
|
|
|
|
|
|
}
|
2346
|
|
|
|
|
|
|
else {
|
2347
|
1239
|
|
|
|
|
1582
|
push( @count, --$n_species );
|
2348
|
1239
|
|
|
|
|
2289
|
push( @time, shift(@extinction) );
|
2349
|
|
|
|
|
|
|
}
|
2350
|
|
|
|
|
|
|
}
|
2351
|
44
|
|
|
|
|
243
|
return ( \@time, \@count );
|
2352
|
|
|
|
|
|
|
}
|
2353
|
|
|
|
|
|
|
|
2354
|
|
|
|
|
|
|
=begin comment
|
2355
|
|
|
|
|
|
|
|
2356
|
|
|
|
|
|
|
Type : Internal method
|
2357
|
|
|
|
|
|
|
Title : _recursive_ltt_helper
|
2358
|
|
|
|
|
|
|
Usage : ($speciation, $extinction) = _recursive_ltt_helper($tree)
|
2359
|
|
|
|
|
|
|
Function: Helper for lineage_through_time
|
2360
|
|
|
|
|
|
|
Returns : $speciation: array ref of speciation times
|
2361
|
|
|
|
|
|
|
$extinction: array ref of extinction times
|
2362
|
|
|
|
|
|
|
Args : the phylogenetic $tree for which to produce the ltt data
|
2363
|
|
|
|
|
|
|
|
2364
|
|
|
|
|
|
|
=end comment
|
2365
|
|
|
|
|
|
|
|
2366
|
|
|
|
|
|
|
=cut
|
2367
|
|
|
|
|
|
|
|
2368
|
|
|
|
|
|
|
sub _recursive_ltt_helper {
|
2369
|
3622
|
|
|
3622
|
|
6006
|
my ( $tree, $node, $time ) = @_;
|
2370
|
|
|
|
|
|
|
|
2371
|
|
|
|
|
|
|
#If we are being invoked at the root level
|
2372
|
3622
|
100
|
|
|
|
5755
|
$node = $tree->get_root unless defined $node;
|
2373
|
3622
|
100
|
|
|
|
5167
|
$time = 0 unless defined $time;
|
2374
|
|
|
|
|
|
|
|
2375
|
|
|
|
|
|
|
#The new time
|
2376
|
3622
|
|
|
|
|
7092
|
$time += $node->get_branch_length;
|
2377
|
3622
|
100
|
|
|
|
7328
|
return ( [], [$time] ) if ( $node->is_terminal );
|
2378
|
1789
|
|
|
|
|
2762
|
my @speciation;
|
2379
|
|
|
|
|
|
|
my @extinction;
|
2380
|
1789
|
|
|
|
|
2208
|
foreach ( @{ $node->get_children } ) {
|
|
1789
|
|
|
|
|
3098
|
|
2381
|
3578
|
|
|
|
|
6838
|
my ( $spec, $ext ) = _recursive_ltt_helper( $tree, $_, $time );
|
2382
|
3578
|
|
|
|
|
4840
|
@speciation = ( @speciation, @{$spec} );
|
|
3578
|
|
|
|
|
7338
|
|
2383
|
3578
|
|
|
|
|
4416
|
@extinction = ( @extinction, @{$ext} );
|
|
3578
|
|
|
|
|
11691
|
|
2384
|
|
|
|
|
|
|
}
|
2385
|
1789
|
|
|
|
|
3004
|
push( @speciation, $time );
|
2386
|
1789
|
|
|
|
|
3316
|
return ( \@speciation, \@extinction );
|
2387
|
|
|
|
|
|
|
}
|
2388
|
|
|
|
|
|
|
|
2389
|
|
|
|
|
|
|
=begin comment
|
2390
|
|
|
|
|
|
|
|
2391
|
|
|
|
|
|
|
Type : Internal method.
|
2392
|
|
|
|
|
|
|
Title : _calc_node_properties
|
2393
|
|
|
|
|
|
|
Usage : _calc_node_properties($tree);
|
2394
|
|
|
|
|
|
|
Function: Calculates the distance of nodes from the root
|
2395
|
|
|
|
|
|
|
Returns : The maximum distance from the root
|
2396
|
|
|
|
|
|
|
Args :
|
2397
|
|
|
|
|
|
|
|
2398
|
|
|
|
|
|
|
=end comment
|
2399
|
|
|
|
|
|
|
|
2400
|
|
|
|
|
|
|
=cut
|
2401
|
|
|
|
|
|
|
|
2402
|
|
|
|
|
|
|
sub _calc_node_properties {
|
2403
|
1511
|
|
|
1511
|
|
2582
|
my ( $node, $root_distance );
|
2404
|
1511
|
|
|
|
|
2068
|
my $tree = shift;
|
2405
|
1511
|
|
|
|
|
3995
|
my $root = $tree->get_root;
|
2406
|
|
|
|
|
|
|
|
2407
|
|
|
|
|
|
|
#Check whether we were given a node and distance
|
2408
|
1511
|
100
|
|
|
|
3905
|
if ( scalar @_ ) {
|
2409
|
1506
|
|
|
|
|
2318
|
$node = shift;
|
2410
|
1506
|
|
|
|
|
2083
|
$root_distance = shift;
|
2411
|
|
|
|
|
|
|
|
2412
|
|
|
|
|
|
|
#Otherwise the root is the default
|
2413
|
|
|
|
|
|
|
}
|
2414
|
|
|
|
|
|
|
else {
|
2415
|
5
|
|
|
|
|
13
|
$node = $root;
|
2416
|
5
|
|
|
|
|
41
|
$root->set_generic( tree_height => 0 );
|
2417
|
5
|
|
|
|
|
9
|
$root_distance = 0;
|
2418
|
|
|
|
|
|
|
}
|
2419
|
1511
|
|
|
|
|
5087
|
$node->set_generic( root_distance => $root_distance );
|
2420
|
1511
|
100
|
|
|
|
4164
|
if ( $root_distance > $root->get_generic('tree_height') ) {
|
2421
|
166
|
|
|
|
|
431
|
$root->set_generic( tree_height => $root_distance );
|
2422
|
|
|
|
|
|
|
}
|
2423
|
1511
|
|
|
|
|
2498
|
my $terminal_count = 0;
|
2424
|
1511
|
|
|
|
|
3639
|
my $children = $node->get_children;
|
2425
|
1511
|
50
|
|
|
|
3728
|
if ( defined $children ) {
|
2426
|
1511
|
|
|
|
|
1942
|
foreach ( @{$children} ) {
|
|
1511
|
|
|
|
|
4825
|
|
2427
|
1506
|
|
|
|
|
3813
|
_calc_node_properties( $tree, $_,
|
2428
|
|
|
|
|
|
|
$root_distance + $_->get_branch_length() );
|
2429
|
|
|
|
|
|
|
}
|
2430
|
|
|
|
|
|
|
}
|
2431
|
|
|
|
|
|
|
}
|
2432
|
|
|
|
|
|
|
|
2433
|
|
|
|
|
|
|
=begin comment
|
2434
|
|
|
|
|
|
|
|
2435
|
|
|
|
|
|
|
Type : Internal method
|
2436
|
|
|
|
|
|
|
Title : nchoosek
|
2437
|
|
|
|
|
|
|
Usage : $out = nchoosek($n,$k)
|
2438
|
|
|
|
|
|
|
Function: Returns the binomial coefficient for $n and $k
|
2439
|
|
|
|
|
|
|
Returns : the binomial coefficient
|
2440
|
|
|
|
|
|
|
Args : $n, $k
|
2441
|
|
|
|
|
|
|
|
2442
|
|
|
|
|
|
|
=end comment
|
2443
|
|
|
|
|
|
|
|
2444
|
|
|
|
|
|
|
=cut
|
2445
|
|
|
|
|
|
|
|
2446
|
|
|
|
|
|
|
sub nchoosek {
|
2447
|
0
|
|
|
0
|
0
|
|
my ( $n, $k ) = @_;
|
2448
|
0
|
|
|
|
|
|
my $r = 1;
|
2449
|
0
|
0
|
0
|
|
|
|
return 0 if ( $k > $n || $k < 0 );
|
2450
|
0
|
|
|
|
|
|
for ( my $d = 1 ; $d <= $k ; $d++ ) {
|
2451
|
0
|
|
|
|
|
|
$r *= $n--;
|
2452
|
0
|
|
|
|
|
|
$r /= $d;
|
2453
|
|
|
|
|
|
|
}
|
2454
|
0
|
|
|
|
|
|
return $r;
|
2455
|
|
|
|
|
|
|
}
|
2456
|
|
|
|
|
|
|
1;
|