File Coverage

Bio/PopGen/Simulation/Coalescent.pm

Criterion	Covered	Total	%
statement	119	130	91.5
branch	20	34	58.8
condition	3	9	33.3
subroutine	13	13	100.0
pod	7	7	100.0
total	162	193	83.9

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# BioPerl module for Bio::PopGen::Simulation::Coalescent
3							#
4							# Please direct questions and support issues to
5							#
6							# Cared for by Jason Stajich
7							#
8							# Copyright Jason Stajich
9							#
10							# You may distribute this module under the same terms as perl itself
11
12							# POD documentation - main docs before the code
13
14							=head1 NAME
15
16							Bio::PopGen::Simulation::Coalescent - A Coalescent simulation factory
17
18							=head1 SYNOPSIS
19
20							use Bio::PopGen::Simulation::Coalescent;
21							my @taxonnames = qw(SpeciesA SpeciesB SpeciesC SpeciesD);
22							my $sim1 = Bio::PopGen::Simulation::Coalescent->new(-samples => \@taxonnames);
23
24							my $tree = $sim1->next_tree;
25
26							# add 20 mutations randomly to the tree
27							$sim1->add_Mutations($tree,20);
28
29							# or for anonymous samples
30
31							my $sim2 = Bio::PopGen::Simulation::Coalescent->new( -sample_size => 6,
32							-maxcount => 50);
33							my $tree2 = $sim2->next_tree;
34							# add 20 mutations randomly to the tree
35							$sim2->add_Mutations($tree2,20);
36
37							=head1 DESCRIPTION
38
39							Builds a random tree every time next_tree is called or up to -maxcount
40							times with branch lengths and provides the ability to randomly add
41							mutations onto the tree with a probabilty proportional to the branch
42							lengths.
43
44							This algorithm is based on the make_tree algorithm from Richard Hudson 1990.
45
46							Hudson, R. R. 1990. Gene genealogies and the coalescent
47							process. Pp. 1-44 in D. Futuyma and J. Antonovics, eds. Oxford
48							surveys in evolutionary biology. Vol. 7. Oxford University
49							Press, New York.
50
51							This module was previously named Bio::Tree::RandomTree
52
53							=head1 FEEDBACK
54
55							=head2 Mailing Lists
56
57							User feedback is an integral part of the evolution of this and other
58							Bioperl modules. Send your comments and suggestions preferably to
59							the Bioperl mailing list. Your participation is much appreciated.
60
61							bioperl-l@bioperl.org - General discussion
62							http://bioperl.org/wiki/Mailing_lists - About the mailing lists
63
64							=head2 Support
65
66							Please direct usage questions or support issues to the mailing list:
67
68							I
69
70							rather than to the module maintainer directly. Many experienced and
71							reponsive experts will be able look at the problem and quickly
72							address it. Please include a thorough description of the problem
73							with code and data examples if at all possible.
74
75							=head2 Reporting Bugs
76
77							Report bugs to the Bioperl bug tracking system to help us keep track
78							of the bugs and their resolution. Bug reports can be submitted via
79							the web:
80
81							https://github.com/bioperl/bioperl-live/issues
82
83							=head1 AUTHOR - Jason Stajich, Matthew Hahn
84
85							Email jason-at-bioperl-dot-org
86							Email matthew-dot-hahn-at-duke-dot-edu
87
88							=head1 APPENDIX
89
90							The rest of the documentation details each of the object methods.
91							Internal methods are usually preceded with a _
92
93							=cut
94
95
96							# Let the code begin...
97
98
99							package Bio::PopGen::Simulation::Coalescent;
100	1			1		629	use vars qw($PRECISION_DIGITS);
	1					1
	1					44
101	1			1		3	use strict;
	1					1
	1					23
102
103							$PRECISION_DIGITS = 3; # Precision for the branchlength
104
105	1			1		344	use Bio::Tree::AlleleNode;
	1					2
	1					29
106	1			1		8	use Bio::PopGen::Genotype;
	1					1
	1					27
107	1			1		350	use Bio::Tree::Tree;
	1					3
	1					42
108
109	1			1		7	use base qw(Bio::Root::Root Bio::Factory::TreeFactoryI);
	1					1
	1					393
110
111
112							=head2 new
113
114							Title : new
115							Usage : my $obj = Bio::PopGen::Simulation::Coalescent->new();
116							Function: Builds a new Bio::PopGen::Simulation::Coalescent object
117							Returns : an instance of Bio::PopGen::Simulation::Coalescent
118							Args : -samples => arrayref of sample names
119							OR
120							-sample_size=> number of samples (samps will get a systematic name)
121							-maxcount => [optional] maximum number of trees to provide
122
123							=cut
124
125							sub new{
126	1			1	1	184	my ($class,@args) = @_;
127	1					13	my $self = $class->SUPER::new(@args);
128
129	1					3	$self->{'_treecounter'} = 0;
130	1					3	$self->{'_maxcount'} = 0;
131	1					23	my ($maxcount, $samps,$samplesize ) = $self->_rearrange([qw(MAXCOUNT
132							SAMPLES
133							SAMPLE_SIZE)],
134							@args);
135	1					2	my @samples;
136
137	1	50				4	if( ! defined $samps ) {
138	1	50	33			4	if( ! defined $samplesize \|\| $samplesize <= 0 ) {
139	0					0	$self->throw("Must specify a valid samplesize if parameter -SAMPLE is not specified (sampsize is $samplesize)");
140							}
141	1					5	foreach ( 1..$samplesize ) { push @samples, "Samp$_"; }
	5					21
142							} else {
143	0	0				0	if( ref($samps) !~ /ARRAY/i ) {
144	0					0	$self->throw("Must specify a valid ARRAY reference to the parameter -SAMPLES, did you forget a leading '\\'?");
145							}
146	0					0	@samples = @$samps;
147							}
148
149	1					5	$self->samples(\@samples);
150	1					5	$self->sample_size(scalar @samples);
151	1	50				3	defined $maxcount && $self->maxcount($maxcount);
152	1					5	return $self;
153							}
154
155							=head2 next_tree
156
157							Title : next_tree
158							Usage : my $tree = $factory->next_tree
159							Function: Returns a random tree based on the initialized number of nodes
160							NOTE: if maxcount is not specified on initialization or
161							set to a valid integer, subsequent calls to next_tree will
162							continue to return random trees and never return undef
163							Returns : Bio::Tree::TreeI object
164							Args : none
165
166							=cut
167
168							sub next_tree{
169	1			1	1	4	my ($self) = @_;
170							# If maxcount is set to something non-zero then next tree will
171							# continue to return valid trees until maxcount is reached
172							# otherwise will always return trees
173							return if( $self->maxcount &&
174	1	50	33			3	$self->{'_treecounter'}++ >= $self->maxcount );
175	1					2	my $size = $self->sample_size;
176
177	1					1	my $in;
178	1					2	my @tree = ();
179	1					1	my @list = ();
180
181	1					10	for($in=0;$in < 2*$size -1; $in++ ) {
182	9					25	push @tree, { 'nodenum' => "Node$in" };
183							}
184							# in C we would have 2 arrays
185							# an array of nodes (tree)
186							# and array of pointers to these nodes (list)
187							# and we just shuffle the list items to do the
188							# tree topology generation
189							# instead in perl, we will have a list of hashes (nodes) called @tree
190							# and a list of integers representing the indexes in tree called @list
191
192	1					78	for($in=0;$in < $size;$in++) {
193	5					9	$tree[$in]->{'time'} = 0;
194	5					6	$tree[$in]->{'desc1'} = undef;
195	5					4	$tree[$in]->{'desc2'} = undef;
196	5					13	push @list, $in;
197							}
198
199	1					2	my $t=0;
200							# generate times for the nodes
201	1					5	for($in = $size; $in > 1; $in-- ) {
202	4					7	$t+= -2.0 * log(1 - $self->random(1)) / ( $in * ($in-1) );
203	4					11	$tree[2 * $size - $in]->{'time'} =$t;
204							}
205							# topology generation
206	1					3	for ($in = $size; $in > 1; $in-- ) {
207	4					5	my $pick = int $self->random($in);
208	4					4	my $nodeindex = $list[$pick];
209	4					4	my $swap = 2 * $size - $in;
210	4					5	$tree[$swap]->{'desc1'} = $nodeindex;
211	4					4	$list[$pick] = $list[$in-1];
212	4					6	$pick = int rand($in - 1);
213	4					5	$nodeindex = $list[$pick];
214	4					4	$tree[$swap]->{'desc2'} = $nodeindex;
215	4					8	$list[$pick] = $swap;
216							}
217							# Let's convert the hashes into nodes
218
219	1					3	my @nodes = ();
220	1					3	foreach my $n ( @tree ) {
221							push @nodes,
222							Bio::Tree::AlleleNode->new(-id => $n->{'nodenum'},
223	9					51	-branch_length => $n->{'time'});
224							}
225	1					2	my $ct = 0;
226	1					3	foreach my $node ( @nodes ) {
227	9					11	my $n = $tree[$ct++];
228	9	100				17	if( defined $n->{'desc1'} ) {
229	4					15	$node->add_Descendent($nodes[$n->{'desc1'}]);
230							}
231	9	100				27	if( defined $n->{'desc2'} ) {
232	4					10	$node->add_Descendent($nodes[$n->{'desc2'}]);
233							}
234							}
235	1					10	my $T = Bio::Tree::Tree->new(-root => pop @nodes );
236	1					12	return $T;
237							}
238
239							=head2 add_Mutations
240
241							Title : add_Mutations
242							Usage : $factory->add_Mutations($tree, $mutcount);
243							Function: Adds mutations to a tree via a random process weighted by
244							branch length (it is a poisson distribution
245							as part of a coalescent process)
246							Returns : none
247							Args : $tree - Bio::Tree::TreeI
248							$nummut - number of mutations
249							$precision - optional # of digits for precision
250
251
252							=cut
253
254							sub add_Mutations{
255	1			1	1	335	my ($self,$tree, $nummut,$precision) = @_;
256	1		33			6	$precision \|\|= $PRECISION_DIGITS;
257	1					2	$precision = 10**$precision;
258
259	1					1	my @branches;
260							my @lens;
261	1					2	my $branchlen = 0;
262	1					1	my $last = 0;
263	1					3	my @nodes = $tree->get_nodes();
264	1					2	my $i = 0;
265
266							# Jason's somewhat simplistics way of doing a poission
267							# distribution for a fixed number of mutations
268							# build an array and put the node number in a slot
269							# representing the branch to put a mutation on
270							# but weight the number of slots per branch by the
271							# length of the branch ( ancestor's time - node time)
272
273	1					4	foreach my $node ( @nodes ) {
274	9	100				18	if( $node->ancestor ) {
275	8					13	my $len = int ( ($node->ancestor->branch_length -
276							$node->branch_length) * $precision);
277	8	50				18	if ( $len > 0 ) {
278	8					17	for( my $j =0;$j < $len;$j++) {
279	2762					3872	push @branches, $i;
280							}
281	8					11	$last += $len;
282							}
283	8					8	$branchlen += $len;
284							}
285	9	50				39	if( ! $node->isa('Bio::Tree::AlleleNode') ) {
286	0					0	bless $node, 'Bio::Tree::AlleleNode'; # rebless it to the right node
287							}
288							# This let's us reset the stored genotypes so we can keep reusing the
289							# same tree topology, but throw down mutations multiple times
290	9					25	$node->reset_Genotypes;
291	9					18	$i++;
292							}
293							# sanity check
294	1	50				4	$self->throw("branch len is $branchlen arraylen is $last")
295							unless ( $branchlen == $last );
296	1					2	my @mutations;
297	1					4	for( my $j = 0; $j < $nummut; $j++) {
298	100					178	my $index = int(rand($branchlen));
299	100					109	my $branch = $branches[$index];
300
301							# We're using an infinite sites model so every new
302							# mutation is a new site
303	100					415	my $g = Bio::PopGen::Genotype->new(-marker_name => "Mutation$j",
304							-alleles => [1]);
305	100					226	$nodes[$branch]->add_Genotype($g);
306	100					189	push @mutations, "Mutation$j";
307							# Let's add this mutation to all the children (push it down
308							# the branches to the tips)
309	100					169	foreach my $child ( $nodes[$branch]->get_all_Descendents ) {
310	116					168	$child->add_Genotype($g);
311							}
312							}
313							# Insure that everyone who doesn't have the mutation
314							# has the ancestral state, which is '0'
315	1					3	foreach my $node ( @nodes ) {
316	9					13	foreach my $m ( @mutations ) {
317	900	100				1203	if( ! $node->has_Marker($m) ) {
318	684					1740	my $emptyg = Bio::PopGen::Genotype->new(-marker_name => $m,
319							-alleles => [0]);
320	684					1130	$node->add_Genotype($emptyg);
321							}
322							}
323							}
324							}
325
326							=head2 maxcount
327
328							Title : maxcount
329							Usage : $obj->maxcount($newval)
330							Function:
331							Returns : Maxcount value
332							Args : newvalue (optional)
333
334
335							=cut
336
337							sub maxcount{
338	1			1	1	2	my ($self,$value) = @_;
339	1	50				2	if( defined $value) {
340	0	0				0	if( $value =~ /^(\d+)/ ) {
341	0					0	$self->{'maxcount'} = $1;
342							} else {
343	0					0	$self->warn("Must specify a valid Positive integer to maxcount");
344	0					0	$self->{'maxcount'} = 0;
345							}
346							}
347	1					4	return $self->{'_maxcount'};
348							}
349
350							=head2 samples
351
352							Title : samples
353							Usage : $obj->samples($newval)
354							Function:
355							Example :
356							Returns : value of samples
357							Args : newvalue (optional)
358
359
360							=cut
361
362							sub samples{
363	1			1	1	1	my ($self,$value) = @_;
364	1	50				4	if( defined $value) {
365	1	50				90	if( ref($value) !~ /ARRAY/i ) {
366	0					0	$self->warn("Must specify a valid array ref to the method 'samples'");
367	0					0	$value = [];
368							}
369	1					5	$self->{'samples'} = $value;
370							}
371	1					3	return $self->{'samples'};
372
373							}
374
375							=head2 sample_size
376
377							Title : sample_size
378							Usage : $obj->sample_size($newval)
379							Function:
380							Example :
381							Returns : value of sample_size
382							Args : newvalue (optional)
383
384
385							=cut
386
387							sub sample_size{
388	2			2	1	4	my ($self,$value) = @_;
389	2	100				5	if( defined $value) {
390	1					3	$self->{'sample_size'} = $value;
391							}
392	2					3	return $self->{'sample_size'};
393
394							}
395
396							=head2 random
397
398							Title : random
399							Usage : my $rfloat = $node->random($size)
400							Function: Generates a random number between 0 and $size
401							This is abstracted so that someone can override and provide their
402							own special RNG. This is expected to be a uniform RNG.
403							Returns : Floating point random
404							Args : $maximum size for random number (defaults to 1)
405
406
407							=cut
408
409							sub random{
410	8			8	1	8	my ($self,$max) = @_;
411	8					51	return rand($max);
412							}
413
414							1;