File Coverage

blib/lib/Bio/Phylo/Generator.pm
Criterion Covered Total %
statement 162 162 100.0
branch 14 14 100.0
condition 13 27 48.1
subroutine 25 25 100.0
pod 8 8 100.0
total 222 236 94.0


line stmt bran cond sub pod time code
1             package Bio::Phylo::Generator;
2 1     1   554 use strict;
  1         2  
  1         27  
3 1     1   5 use warnings;
  1         2  
  1         25  
4 1     1   5 use Bio::Phylo::Util::CONSTANT 'looks_like_hash';
  1         3  
  1         45  
5 1     1   5 use Bio::Phylo::Util::Exceptions 'throw';
  1         2  
  1         33  
6 1     1   5 use Bio::Phylo::Util::Logger;
  1         2  
  1         35  
7 1     1   280 use Bio::Phylo::Util::Dependency 'Math::Random';
  1         2  
  1         5  
8 1     1   310 use Bio::Phylo::Factory;
  1         2  
  1         5  
9             Math::Random->import(qw'random_exponential random_uniform');
10             {
11             my $logger = Bio::Phylo::Util::Logger->new;
12             my $factory = Bio::Phylo::Factory->new;
13              
14             =head1 NAME
15              
16             Bio::Phylo::Generator - Generator of tree topologies
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Phylo::Factory;
21             my $fac = Bio::Phylo::Factory->new;
22             my $gen = $fac->create_generator;
23             my $trees = $gen->gen_rand_pure_birth(
24             '-tips' => 10,
25             '-model' => 'yule',
26             '-trees' => 10,
27             );
28              
29             # prints 'Bio::Phylo::Forest'
30             print ref $trees;
31              
32             =head1 DESCRIPTION
33              
34             The generator module is used to simulate trees under various models.
35              
36             =head1 METHODS
37              
38             =head2 CONSTRUCTOR
39              
40             =over
41              
42             =item new()
43              
44             Generator constructor.
45              
46             Type : Constructor
47             Title : new
48             Usage : my $gen = Bio::Phylo::Generator->new;
49             Function: Initializes a Bio::Phylo::Generator object.
50             Returns : A Bio::Phylo::Generator object.
51             Args : NONE
52              
53             =cut
54              
55             sub new {
56              
57             # could be child class
58 1     1 1 7 my $class = shift;
59              
60             # notify user
61 1         6 $logger->info("constructor called for '$class'");
62              
63             # the object turns out to be stateless
64 1         2 my $self = bless \$class, $class;
65 1         6 return $self;
66             }
67              
68             =back
69              
70             =head2 GENERATOR
71              
72             =over
73              
74             =item gen_rand_pure_birth()
75              
76             This method generates a Bio::Phylo::Forest
77             object populated with Yule/Hey trees.
78              
79             Type : Generator
80             Title : gen_rand_pure_birth
81             Usage : my $trees = $gen->gen_rand_pure_birth(
82             '-tips' => 10,
83             '-model' => 'yule',
84             '-trees' => 10,
85             );
86             Function: Generates markov tree shapes,
87             with branch lengths sampled
88             from a user defined model of
89             clade growth, for a user defined
90             number of tips.
91             Returns : A Bio::Phylo::Forest object.
92             Args : -tips => number of terminal nodes (default: 10),
93             -model => either 'yule' or 'hey',
94             -trees => number of trees to generate (default: 10)
95             Optional: -factory => a Bio::Phylo::Factory object
96              
97             =cut
98              
99             sub _yule_rand_bl {
100 43     43   62 my $i = shift;
101 43         133 return random_exponential( 1, 1 / ( $i + 1 ) );
102             }
103              
104             sub _hey_rand_bl {
105 19     19   30 my $i = shift;
106 19         62 random_exponential( 1, ( 1 / ( $i * ( $i + 1 ) ) ) );
107             }
108              
109             sub _make_split {
110 100     100   253 my ( $parent, $length, $fac, $nodes ) = @_;
111 100         120 my @tips;
112 100         209 for ( 1 .. 2 ) {
113 200         924 my $node = $fac->create_node;
114 200         645 $node->set_branch_length($length);
115 200         494 $node->set_parent($parent);
116 200         410 $nodes->{ $node->get_id } = $node;
117 200         432 push @tips, $node;
118             }
119 100         224 return @tips;
120             }
121              
122             sub gen_rand_pure_birth {
123 3     3 1 18 my $random = shift;
124 3         16 my %options = looks_like_hash @_;
125 3         9 my $model = $options{'-model'};
126 3 100       21 if ( $model =~ m/yule/i ) {
    100          
127 1         3 return $random->_gen_pure_birth(
128             '-blgen' => \&_yule_rand_bl,
129             @_,
130             );
131             }
132             elsif ( $model =~ m/hey/i ) {
133 1         7 return $random->_gen_pure_birth(
134             '-blgen' => \&_hey_rand_bl,
135             @_,
136             );
137             }
138             else {
139 1         5 throw 'BadFormat' => "model '$model' not implemented";
140             }
141             }
142              
143             sub _gen_pure_birth {
144 5     5   10 my $random = shift;
145 5         23 my %options = looks_like_hash @_;
146 5   33     30 my $factory = $options{'-factory'} || $factory;
147 5         12 my $blgen = $options{'-blgen'};
148 5   100     17 my $killrate = $options{'-killrate'} || 0;
149 5   50     13 my $ntips = $options{'-tips'} || 10;
150 5   50     16 my $ntrees = $options{'-trees'} || 10;
151 5         38 my $forest = $factory->create_forest;
152 5         18 for ( 0 .. ( $ntrees - 1 ) ) {
153              
154             # instantiate root node
155 5         32 my $root = $factory->create_node;
156 5         23 $root->set_branch_length(0);
157 5         17 my %nodes = ( $root->get_id => $root );
158              
159             # make the first split, insert new tips in @tips, from
160             # which we will draw (without replacement) a new tip
161             # to split until we've reached target number
162 5         22 push my @tips, _make_split( $root, $blgen->(1), $factory, \%nodes );
163              
164             # start growing the tree
165 5         10 my $i = 2;
166 5         9 my @extinct;
167 5         13 while (1) {
168 95 100       269 if ( rand(1) < $killrate ) {
169 5         14 my $extinct_index = int rand scalar @tips;
170 5         10 my $extinct = splice @tips, $extinct_index, 1;
171 5         11 push @extinct, $extinct;
172 5         13 delete $nodes{ $extinct->get_id };
173             }
174              
175             # obtain candidate parent of current split
176 95         115 my $parent;
177 95         204 ( $parent, @tips ) = _fetch_equiprobable(@tips);
178              
179             # generate branch length
180 95         225 my $bl = $blgen->( $i++ );
181              
182             # stretch all remaining tips to the present
183 95         744 for my $tip (@tips) {
184 920         1663 my $oldbl = $tip->get_branch_length;
185 920         1889 $tip->set_branch_length( $oldbl + $bl );
186             }
187              
188             # add new nodes to tips array
189 95         210 push @tips, _make_split( $parent, $bl, $factory, \%nodes );
190 95 100       233 last if scalar @tips >= $ntips;
191             }
192 5         34 my $tree = $factory->create_tree;
193             $tree->insert(
194 200         239 map { $_->[0] }
195 830         896 sort { $a->[1] <=> $b->[1] }
196 5         26 map { [ $_, $_->get_id ] } values %nodes
  200         343  
197             );
198 5         83 $tree->prune_tips( \@extinct );
199 5         37 $tree->_analyze;
200 5         48 $forest->insert($tree);
201             }
202 5         44 return $forest;
203             }
204              
205             =item gen_rand_birth_death()
206              
207             This method generates a Bio::Phylo::Forest
208             object populated under a birth/death model
209              
210             Type : Generator
211             Title : gen_rand_birth_death
212             Usage : my $trees = $gen->gen_rand_birth_death(
213             '-tips' => 10,
214             '-killrate' => 0.2,
215             '-trees' => 10,
216             );
217             Function: Generates trees where any growing lineage is equally
218             likely to split at any one time, and is equally likely
219             to go extinct at '-killrate'
220             Returns : A Bio::Phylo::Forest object.
221             Args : -tips => number of terminal nodes (default: 10),
222             -killrate => extinction over speciation rate (default: 0.2)
223             -trees => number of trees to generate (default: 10)
224             Optional: -factory => a Bio::Phylo::Factory object
225             Comments: Past extinction events are retained as unbranched internal
226             nodes in the produced trees.
227              
228             =cut
229              
230             sub gen_rand_birth_death {
231 1     1 1 12 my $random = shift;
232 1         7 my %options = looks_like_hash @_;
233             return $random->_gen_pure_birth(
234             '-blgen' => \&_yule_rand_bl,
235 1   50     11 '-killrate' => $options{'-killrate'} || 0.2,
236             @_,
237             );
238             }
239              
240             =item gen_exp_pure_birth()
241              
242             This method generates a Bio::Phylo::Forest object
243             populated with Yule/Hey trees whose branch lengths
244             are proportional to the expected waiting times (i.e.
245             not sampled from a distribution).
246              
247             Type : Generator
248             Title : gen_exp_pure_birth
249             Usage : my $trees = $gen->gen_exp_pure_birth(
250             '-tips' => 10,
251             '-model' => 'yule',
252             '-trees' => 10,
253             );
254             Function: Generates markov tree shapes,
255             with branch lengths following
256             the expectation under a user
257             defined model of clade growth,
258             for a user defined number of tips.
259             Returns : A Bio::Phylo::Forest object.
260             Args : -tips => number of terminal nodes (default: 10),
261             -model => either 'yule' or 'hey'
262             -trees => number of trees to generate (default: 10)
263             Optional: -factory => a Bio::Phylo::Factory object
264              
265             =cut
266              
267             sub _yule_exp_bl {
268 19     19   25 my $i = shift;
269 19         43 return 1 / ( $i + 1 );
270             }
271              
272             sub _hey_exp_bl {
273 19     19   27 my $i = shift;
274 19         41 return 1 / ( $i * ( $i + 1 ) );
275             }
276              
277             sub gen_exp_pure_birth {
278 3     3 1 28 my $random = shift;
279 3         15 my %options = looks_like_hash @_;
280 3         11 my $model = $options{'-model'};
281 3 100       19 if ( $model =~ m/yule/i ) {
    100          
282 1         7 return $random->_gen_pure_birth(
283             '-blgen' => \&_yule_exp_bl,
284             @_,
285             );
286             }
287             elsif ( $model =~ m/hey/i ) {
288 1         6 return $random->_gen_pure_birth(
289             '-blgen' => \&_hey_exp_bl,
290             @_,
291             );
292             }
293             else {
294 1         7 throw 'BadFormat' => "model '$model' not implemented";
295             }
296             }
297              
298             =item gen_coalescent()
299              
300             This method generates coalescent trees for a given effective population size
301             (popsize) and number of alleles (tips) such that the probability of coalescence
302             in the previous generation for any pair of alleles is 1 / ( 2 * popsize ).
303              
304             Type : Generator
305             Title : gen_coalescent
306             Usage : my $trees = $gen->gen_coalescent(
307             '-tips' => 10,
308             '-popsize' => 100,
309             '-trees' => 10,
310             );
311             Function: Generates coalescent trees.
312             Returns : A Bio::Phylo::Forest object.
313             Args : -tips => number of terminal nodes (default: 10)
314             -popsize => effective population size (default: 100)
315             -trees => number of trees to generate (default: 10)
316             Optional: -factory => a Bio::Phylo::Factory object
317              
318             =cut
319              
320             sub gen_coalescent {
321 1     1 1 9 my $self = shift;
322 1         8 my %args = looks_like_hash @_;
323 1   50     8 my $popsize = $args{'-popsize'} || 100;
324 1   50     4 my $ntips = $args{'-tips'} || 10;
325 1   50     4 my $ntrees = $args{'-trees'} || 10;
326 1   33     6 my $factory = $args{'-factory'} || $factory;
327 1         8 my $forest = $factory->create_forest;
328 1         3 my $cutoff = 1 / ( 2 * $popsize );
329 1         4 for my $i ( 1 .. $ntrees ) {
330 1         4 my $ngen = 1;
331 1         2 my ( @tips, @nodes );
332 1         7 push @tips, $factory->create_node() for 1 .. $ntips;
333              
334             # starting from a pool of all tips, we iterate over all
335             # possible pairs, and for each pair we test to see if
336             # the coalesce at generation $ngen, at probability
337             # 1/2N. When they do, we create a parent for the pair,
338             # take the pair out of the pool and put the parent in it
339 1         5 while ( scalar @tips > 1 ) {
340 281         313 my $poolsize = $#tips;
341 281         288 my $j = 0;
342 281         385 while ( $j < $poolsize ) {
343 1138         1224 my $k = $j + 1;
344 1138         1502 while ( $k <= $poolsize ) {
345 3927         5178 my $rand = random_uniform();
346 3927 100       11924 if ( $rand <= $cutoff ) {
347 19         33 my $tip2 = splice @tips, $k, 1;
348 19         28 my $tip1 = splice @tips, $j, 1;
349 19         108 my $parent = $factory->create_node(
350             '-generic' => { 'age' => $ngen } );
351 19         64 unshift @nodes,
352             $tip1->set_parent($parent),
353             $tip2->set_parent($parent);
354 19         34 push @tips, $parent;
355 19         26 $poolsize--;
356             }
357 3927         5409 $k++;
358             }
359 1138         1531 $j++;
360             }
361 281         397 $ngen++;
362             }
363 1         2 push @nodes, shift @tips;
364 1         6 my $tree = $factory->create_tree()->insert(@nodes);
365 1         15 $tree->agetobl;
366 1         5 $forest->insert($tree);
367             }
368 1         5 return $forest;
369             }
370              
371             =item gen_equiprobable()
372              
373             This method draws tree shapes at random,
374             such that all shapes are equally probable.
375              
376             Type : Generator
377             Title : gen_equiprobable
378             Usage : my $trees = $gen->gen_equiprobable( '-tips' => 10 );
379             Function: Generates an equiprobable tree
380             shape, with branch lengths = 1;
381             Returns : A Bio::Phylo::Forest object.
382             Args : Optional: -tips => number of terminal nodes (default: 10),
383             Optional: -trees => number of trees to generate (default: 1),
384             Optional: -factory => a Bio::Phylo::Factory object
385              
386             =cut
387              
388             sub _fetch_equiprobable {
389 133     133   277 my @tips = @_;
390 133         288 my $tip_index = int rand scalar @tips;
391 133         205 my $tip = splice @tips, $tip_index, 1;
392 133         391 return $tip, @tips;
393             }
394              
395             sub _fetch_balanced {
396 30     30   63 return @_;
397             }
398              
399             sub _fetch_ladder {
400 30     30   218 my $tip = pop;
401 30         73 return $tip, @_;
402             }
403              
404             sub _gen_simple {
405 3     3   6 my $random = shift;
406 3         13 my %options = looks_like_hash @_;
407 3         8 my $fetcher = $options{'-fetcher'};
408 3   33     18 my $factory = $options{'-factory'} || $factory;
409 3   50     11 my $ntrees = $options{'-trees'} || 1;
410 3   50     11 my $ntips = $options{'-tips'} || 10;
411 3         23 my $forest = $factory->create_forest;
412 3         11 for my $i ( 1 .. $ntrees ) {
413 3         15 my $tree = $factory->create_tree;
414 3         8 my ( @tips, @nodes );
415              
416             # each iteration, we will remove two "tips" from this
417             # and add their newly created parent to it
418             push @tips, $factory->create_node( '-branch_length' => 1, )
419 3         25 for ( 1 .. $ntips );
420              
421             # this stays above 0 because the root ends up in it
422 3         10 while ( @tips > 1 ) {
423 49         252 my $parent = $factory->create_node( '-branch_length' => 1, );
424 49         159 $tree->insert($parent);
425 49         94 for ( 1 .. 2 ) {
426 98         121 my $tip;
427 98         174 ( $tip, @tips ) = $fetcher->(@tips);
428 98         237 $tree->insert( $tip->set_parent($parent) );
429             }
430              
431             # the parent becomes a new candidate tip
432 49         115 push @tips, $parent;
433             }
434 3         14 $forest->insert($tree);
435             }
436 3         17 return $forest;
437             }
438              
439             sub gen_equiprobable {
440 1     1 1 17 return _gen_simple( @_, '-fetcher' => \&_fetch_equiprobable );
441             }
442              
443             =item gen_balanced()
444              
445             This method creates the most balanced topology possible given the number of tips
446              
447             Type : Generator
448             Title : gen_balanced
449             Usage : my $trees = $gen->gen_balanced( '-tips' => 10 );
450             Function: Generates the most balanced topology
451             possible, with branch lengths = 1;
452             Returns : A Bio::Phylo::Forest object.
453             Args : Optional: -tips => number of terminal nodes (default: 10),
454             Optional: -trees => number of trees to generate (default: 1),
455             Optional: -factory => a Bio::Phylo::Factory object
456              
457             =cut
458              
459             sub gen_balanced {
460 1     1 1 14 return _gen_simple( @_, '-fetcher' => \&_fetch_balanced );
461             }
462              
463             =item gen_ladder()
464              
465             This method creates a ladder tree for the number of tips
466              
467             Type : Generator
468             Title : gen_ladder
469             Usage : my $trees = $gen->gen_ladder( '-tips' => 10 );
470             Function: Generates the least balanced topology
471             (a ladder), with branch lengths = 1;
472             Returns : A Bio::Phylo::Forest object.
473             Args : Optional: -tips => number of terminal nodes (default: 10),
474             Optional: -trees => number of trees to generate (default: 1),
475             Optional: -factory => a Bio::Phylo::Factory object
476              
477             =cut
478              
479             sub gen_ladder {
480 1     1 1 14 return _gen_simple( @_, '-fetcher' => \&_fetch_ladder );
481             }
482              
483             =back
484              
485             =head1 SEE ALSO
486              
487             There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo>
488             for any user or developer questions and discussions.
489              
490             =over
491              
492             =item L<Bio::Phylo::Manual>
493              
494             Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>.
495              
496             =back
497              
498             =head1 CITATION
499              
500             If you use Bio::Phylo in published research, please cite it:
501              
502             B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
503             and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
504             I<BMC Bioinformatics> B<12>:63.
505             L<http://dx.doi.org/10.1186/1471-2105-12-63>
506              
507             =cut
508              
509             }
510             1;