line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::PopGen::Population |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Jason Stajich |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Jason Stajich |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::PopGen::Population - A population of individuals |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use Bio::PopGen::Population; |
21
|
|
|
|
|
|
|
use Bio::PopGen::Individual; |
22
|
|
|
|
|
|
|
my $population = Bio::PopGen::Population->new(); |
23
|
|
|
|
|
|
|
my $ind = Bio::PopGen::Individual->new(-unique_id => 'id'); |
24
|
|
|
|
|
|
|
$population->add_Individual($ind); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
for my $ind ( $population->get_Individuals ) { |
27
|
|
|
|
|
|
|
# iterate through the individuals |
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
for my $name ( $population->get_marker_names ) { |
31
|
|
|
|
|
|
|
my $marker = $population->get_Marker($name); |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
my $num_inds = $population->get_number_individuals; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
my $homozygote_f = $population->get_Frequency_Homozygotes; |
37
|
|
|
|
|
|
|
my $heterozygote_f = $population->get_Frequency_Heterozygotes; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# make a population haploid by making fake chromosomes through |
40
|
|
|
|
|
|
|
# haplotypes -- ala allele 1 is on chrom 1 and allele 2 is on chrom 2 |
41
|
|
|
|
|
|
|
# the number of individuals created will thus be 2 x number in |
42
|
|
|
|
|
|
|
# population |
43
|
|
|
|
|
|
|
my $happop = $population->haploid_population; |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head1 DESCRIPTION |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
This is a collection of individuals. We'll have ways of generating |
49
|
|
|
|
|
|
|
L objects out so we can calculate allele_frequencies |
50
|
|
|
|
|
|
|
for implementing the various statistical tests. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head1 FEEDBACK |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head2 Mailing Lists |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
57
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
58
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
61
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head2 Support |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
I |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
70
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
71
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
72
|
|
|
|
|
|
|
with code and data examples if at all possible. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head2 Reporting Bugs |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
77
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via |
78
|
|
|
|
|
|
|
email or the web: |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 AUTHOR - Jason Stajich |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
Email jason-at-bioperl.org |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Matthew Hahn, matthew.hahn-at-duke.edu |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=head1 APPENDIX |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
93
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=cut |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# Let the code begin... |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
package Bio::PopGen::Population; |
102
|
3
|
|
|
3
|
|
707
|
use strict; |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
70
|
|
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# Object preamble - inherits from Bio::Root::Root |
105
|
|
|
|
|
|
|
|
106
|
3
|
|
|
3
|
|
696
|
use Bio::PopGen::Marker; |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
63
|
|
107
|
3
|
|
|
3
|
|
487
|
use Bio::PopGen::Genotype; |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
93
|
|
108
|
|
|
|
|
|
|
our $CheckISA = 1; |
109
|
3
|
|
|
3
|
|
11
|
use base qw(Bio::Root::Root Bio::PopGen::PopulationI); |
|
3
|
|
|
|
|
1
|
|
|
3
|
|
|
|
|
833
|
|
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=head2 new |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
Title : new |
114
|
|
|
|
|
|
|
Usage : my $obj = Bio::PopGen::Population->new(); |
115
|
|
|
|
|
|
|
Function: Builds a new Bio::PopGen::Population object |
116
|
|
|
|
|
|
|
Returns : an instance of Bio::PopGen::Population |
117
|
|
|
|
|
|
|
Args : -individuals => array ref of individuals (optional) |
118
|
|
|
|
|
|
|
-name => population name (optional) |
119
|
|
|
|
|
|
|
-source => a source tag (optional) |
120
|
|
|
|
|
|
|
-description => a short description string of the population (optional) |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=cut |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub new { |
125
|
52
|
|
|
52
|
1
|
4259
|
my($class,@args) = @_; |
126
|
|
|
|
|
|
|
|
127
|
52
|
|
|
|
|
142
|
my $self = $class->SUPER::new(@args); |
128
|
52
|
|
|
|
|
77
|
$self->{'_individuals'} = []; |
129
|
52
|
|
|
|
|
173
|
my ($name,$source,$description, |
130
|
|
|
|
|
|
|
$inds,$checkisa) = $self->_rearrange([qw(NAME |
131
|
|
|
|
|
|
|
SOURCE |
132
|
|
|
|
|
|
|
DESCRIPTION |
133
|
|
|
|
|
|
|
INDIVIDUALS |
134
|
|
|
|
|
|
|
CHECKISA)], @args); |
135
|
52
|
100
|
|
|
|
137
|
if( defined $inds ) { |
136
|
50
|
50
|
|
|
|
160
|
if( ref($inds) !~ /ARRAY/i ) { |
137
|
0
|
|
|
|
|
0
|
$self->warn("Need to provide a value array ref for the -individuals initialization flag"); |
138
|
|
|
|
|
|
|
} else { |
139
|
50
|
|
|
|
|
208
|
$self->add_Individual(@$inds); |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
52
|
100
|
|
|
|
125
|
defined $name && $self->name($name); |
144
|
52
|
100
|
|
|
|
102
|
defined $source && $self->source($source); |
145
|
52
|
100
|
|
|
|
103
|
defined $description && $self->description($description); |
146
|
52
|
100
|
|
|
|
98
|
$self->{'_checkisa'} = defined $checkisa ? $checkisa : $CheckISA; |
147
|
52
|
|
|
|
|
235
|
return $self; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head2 name |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Title : name |
154
|
|
|
|
|
|
|
Usage : my $name = $pop->name |
155
|
|
|
|
|
|
|
Function: Get the population name |
156
|
|
|
|
|
|
|
Returns : string representing population name |
157
|
|
|
|
|
|
|
Args : [optional] string representing population name |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=cut |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
sub name{ |
163
|
43
|
|
|
43
|
1
|
49
|
my $self = shift; |
164
|
43
|
100
|
|
|
|
72
|
return $self->{'_name'} = shift if @_; |
165
|
34
|
|
|
|
|
90
|
return $self->{'_name'}; |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=head2 description |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
Title : description |
171
|
|
|
|
|
|
|
Usage : my $description = $pop->description |
172
|
|
|
|
|
|
|
Function: Get the population description |
173
|
|
|
|
|
|
|
Returns : string representing population description |
174
|
|
|
|
|
|
|
Args : [optional] string representing population description |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=cut |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
sub description{ |
180
|
41
|
|
|
41
|
1
|
40
|
my $self = shift; |
181
|
41
|
100
|
|
|
|
59
|
return $self->{'_description'} = shift if @_; |
182
|
34
|
|
|
|
|
127
|
return $self->{'_description'}; |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=head2 source |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
Title : source |
188
|
|
|
|
|
|
|
Usage : my $source = $pop->source |
189
|
|
|
|
|
|
|
Function: Get the population source |
190
|
|
|
|
|
|
|
Returns : string representing population source |
191
|
|
|
|
|
|
|
Args : [optional] string representing population source |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=cut |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
sub source{ |
197
|
38
|
|
|
38
|
1
|
34
|
my $self = shift; |
198
|
38
|
100
|
|
|
|
72
|
return $self->{'_source'} = shift if @_; |
199
|
34
|
|
|
|
|
65
|
return $self->{'_source'}; |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=head2 annotation |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
Title : annotation |
205
|
|
|
|
|
|
|
Usage : my $annotation_collection = $pop->annotation; |
206
|
|
|
|
|
|
|
Function: Get/set a Bio::AnnotationCollectionI for this population |
207
|
|
|
|
|
|
|
Returns : Bio::AnnotationCollectionI object |
208
|
|
|
|
|
|
|
Args : [optional set] Bio::AnnotationCollectionI object |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=cut |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
sub annotation{ |
213
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arg) = @_; |
214
|
0
|
0
|
|
|
|
0
|
return $self->{_annotation} unless $arg; |
215
|
0
|
0
|
0
|
|
|
0
|
$self->throw("Bio::AnnotationCollectionI required for argument") unless |
216
|
|
|
|
|
|
|
ref($arg) && $arg->isa('Bio::AnnotationCollectionI'); |
217
|
0
|
|
|
|
|
0
|
return $self->{_annotation} = $arg; |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=head2 set_Allele_Frequency |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
Title : set_Allele_Frequency |
223
|
|
|
|
|
|
|
Usage : $population->set_Allele_Frequency('marker' => { 'allele1' => 0.1}); |
224
|
|
|
|
|
|
|
Function: Sets an allele frequency for a Marker for this Population |
225
|
|
|
|
|
|
|
This allows the Population to not have individual individual |
226
|
|
|
|
|
|
|
genotypes but rather a set of overall allele frequencies |
227
|
|
|
|
|
|
|
Returns : Count of the number of markers |
228
|
|
|
|
|
|
|
Args : -name => (string) marker name |
229
|
|
|
|
|
|
|
-allele => (string) allele name |
230
|
|
|
|
|
|
|
-frequency => (double) allele frequency - must be between 0 and 1 |
231
|
|
|
|
|
|
|
OR |
232
|
|
|
|
|
|
|
-frequencies => { 'marker1' => { 'allele1' => 0.01, |
233
|
|
|
|
|
|
|
'allele2' => 0.99}, |
234
|
|
|
|
|
|
|
'marker2' => ... |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=cut |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
sub set_Allele_Frequency { |
240
|
3
|
|
|
3
|
1
|
438
|
my ($self,@args) = @_; |
241
|
3
|
|
|
|
|
10
|
my ($name,$allele, $frequency, |
242
|
|
|
|
|
|
|
$frequencies) = $self->_rearrange([qw(NAME |
243
|
|
|
|
|
|
|
ALLELE |
244
|
|
|
|
|
|
|
FREQUENCY |
245
|
|
|
|
|
|
|
FREQUENCIES |
246
|
|
|
|
|
|
|
)], @args); |
247
|
3
|
100
|
|
|
|
7
|
if( defined $frequencies ) { # this supercedes the res |
248
|
1
|
50
|
|
|
|
6
|
if( ref($frequencies) =~ /HASH/i ) { |
249
|
1
|
|
|
|
|
2
|
my ($markername,$alleles); |
250
|
1
|
|
|
|
|
5
|
while( ($markername,$alleles) = each %$frequencies ) { |
251
|
2
|
|
|
|
|
9
|
$self->{'_allele_freqs'}->{$markername} = |
252
|
|
|
|
|
|
|
Bio::PopGen::Marker->new(-name => $markername, |
253
|
|
|
|
|
|
|
-allele_freq => $alleles); |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
} else { |
256
|
0
|
|
|
|
|
0
|
$self->throw("Must provide a valid hashref for the -frequencies option"); |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
} else { |
259
|
2
|
100
|
|
|
|
7
|
unless( defined $self->{'_allele_freqs'}->{$name} ) { |
260
|
1
|
|
|
|
|
4
|
$self->{'_allele_freqs'}->{$name} = |
261
|
|
|
|
|
|
|
Bio::PopGen::Marker->new(-name => $name); |
262
|
|
|
|
|
|
|
} |
263
|
2
|
|
|
|
|
5
|
$self->{'_allele_freqs'}->{$name}->add_Allele_Frequency($allele,$frequency); |
264
|
|
|
|
|
|
|
} |
265
|
3
|
|
|
|
|
4
|
return scalar keys %{$self->{'_allele_freqs'}}; |
|
3
|
|
|
|
|
8
|
|
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=head2 add_Individual |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
Title : add_Individual |
272
|
|
|
|
|
|
|
Usage : $population->add_Individual(@individuals); |
273
|
|
|
|
|
|
|
Function: Add individuals to a population |
274
|
|
|
|
|
|
|
Returns : count of the current number in the object |
275
|
|
|
|
|
|
|
Args : Array of Individuals |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=cut |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
sub add_Individual{ |
281
|
51
|
|
|
51
|
1
|
177
|
my ($self,@inds) = @_; |
282
|
51
|
|
|
|
|
79
|
foreach my $i ( @inds ) { |
283
|
1404
|
50
|
|
|
|
1397
|
next if ! defined $i; |
284
|
|
|
|
|
|
|
|
285
|
1404
|
100
|
|
|
|
1951
|
unless( $self->{'_checkisa'} ? $i->isa('Bio::PopGen::IndividualI') : 1 ) { |
|
|
50
|
|
|
|
|
|
286
|
0
|
|
|
|
|
0
|
$self->warn("cannot add an individual ($i) which is not a Bio::PopGen::IndividualI"); |
287
|
0
|
|
|
|
|
0
|
next; |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
} |
290
|
51
|
|
|
|
|
47
|
push @{$self->{'_individuals'}}, @inds; |
|
51
|
|
|
|
|
243
|
|
291
|
51
|
|
|
|
|
63
|
$self->{'_cached_markernames'} = undef; |
292
|
51
|
|
|
|
|
60
|
$self->{'_allele_freqs'} = {}; |
293
|
51
|
50
|
|
|
|
42
|
return scalar @{$self->{'_individuals'} || []}; |
|
51
|
|
|
|
|
149
|
|
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
=head2 remove_Individuals |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
Title : remove_Individuals |
300
|
|
|
|
|
|
|
Usage : $population->remove_Individuals(@ids); |
301
|
|
|
|
|
|
|
Function: Remove individual(s) to a population |
302
|
|
|
|
|
|
|
Returns : count of the current number in the object |
303
|
|
|
|
|
|
|
Args : Array of ids |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
=cut |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
sub remove_Individuals { |
308
|
1
|
|
|
1
|
1
|
824
|
my ($self,@names) = @_; |
309
|
1
|
|
|
|
|
3
|
my $i = 0; |
310
|
1
|
|
|
|
|
2
|
my %namehash; # O(1) lookup will be faster I think |
311
|
1
|
|
|
|
|
2
|
foreach my $n ( @names ) { $namehash{$n}++ } |
|
1
|
|
|
|
|
4
|
|
312
|
1
|
|
|
|
|
1
|
my @tosplice; |
313
|
1
|
50
|
|
|
|
3
|
foreach my $ind ( @{$self->{'_individuals'} || []} ) { |
|
1
|
|
|
|
|
5
|
|
314
|
2
|
100
|
|
|
|
7
|
unshift @tosplice, $i if( $namehash{$ind->unique_id} ); |
315
|
2
|
|
|
|
|
3
|
$i++; |
316
|
|
|
|
|
|
|
} |
317
|
1
|
|
|
|
|
2
|
foreach my $index ( @tosplice ) { |
318
|
1
|
|
|
|
|
1
|
splice(@{$self->{'_individuals'}}, $index,1); |
|
1
|
|
|
|
|
3
|
|
319
|
|
|
|
|
|
|
} |
320
|
1
|
|
|
|
|
1
|
$self->{'_cached_markernames'} = undef; |
321
|
1
|
|
|
|
|
2
|
$self->{'_allele_freqs'} = {}; |
322
|
1
|
50
|
|
|
|
2
|
return scalar @{$self->{'_individuals'} || []}; |
|
1
|
|
|
|
|
3
|
|
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
=head2 get_Individuals |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
Title : get_Individuals |
328
|
|
|
|
|
|
|
Usage : my @inds = $pop->get_Individuals(); |
329
|
|
|
|
|
|
|
Function: Return the individuals, alternatively restrict by a criteria |
330
|
|
|
|
|
|
|
Returns : Array of Bio::PopGen::IndividualI objects |
331
|
|
|
|
|
|
|
Args : none if want all the individuals OR, |
332
|
|
|
|
|
|
|
-unique_id => To get an individual with a specific id |
333
|
|
|
|
|
|
|
-marker => To only get individuals which have a genotype specific |
334
|
|
|
|
|
|
|
for a specific marker name |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=cut |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
sub get_Individuals{ |
340
|
93
|
|
|
93
|
1
|
1066
|
my ($self,@args) = @_; |
341
|
93
|
50
|
|
|
|
93
|
my @inds = @{$self->{'_individuals'} || []}; |
|
93
|
|
|
|
|
399
|
|
342
|
93
|
50
|
|
|
|
170
|
return unless @inds; |
343
|
93
|
100
|
|
|
|
199
|
if( @args ) { # save a little time here if @args is empty |
344
|
3
|
|
|
|
|
7
|
my ($id,$marker) = $self->_rearrange([qw(UNIQUE_ID MARKER)], @args); |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
|
347
|
3
|
100
|
|
|
|
9
|
if( defined $id ) { |
|
|
50
|
|
|
|
|
|
348
|
1
|
|
|
|
|
1
|
@inds = grep { $_->unique_id eq $id } @inds; |
|
2
|
|
|
|
|
4
|
|
349
|
|
|
|
|
|
|
} elsif (defined $marker) { |
350
|
2
|
|
|
|
|
3
|
@inds = grep { $_->has_Marker($marker) } @inds; |
|
4
|
|
|
|
|
8
|
|
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
} |
353
|
93
|
|
|
|
|
289
|
return @inds; |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
=head2 get_Genotypes |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
Title : get_Genotypes |
359
|
|
|
|
|
|
|
Usage : my @genotypes = $pop->get_Genotypes(-marker => $name) |
360
|
|
|
|
|
|
|
Function: Get the genotypes for all the individuals for a specific |
361
|
|
|
|
|
|
|
marker name |
362
|
|
|
|
|
|
|
Returns : Array of Bio::PopGen::GenotypeI objects |
363
|
|
|
|
|
|
|
Args : -marker => name of the marker |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
=cut |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
sub get_Genotypes{ |
369
|
1240
|
|
|
1240
|
1
|
1760
|
my ($self,@args) = @_; |
370
|
1240
|
|
|
|
|
3026
|
my ($name) = $self->_rearrange([qw(MARKER)],@args); |
371
|
1240
|
50
|
|
|
|
2305
|
if( defined $name ) { |
372
|
53050
|
|
|
|
|
56842
|
return grep { defined $_ } map { $_->get_Genotypes(-marker => $name) } |
|
53050
|
|
|
|
|
91799
|
|
373
|
1240
|
50
|
|
|
|
984
|
@{$self->{'_individuals'} || []} |
|
1240
|
|
|
|
|
2771
|
|
374
|
|
|
|
|
|
|
} |
375
|
0
|
|
|
|
|
0
|
$self->warn("You needed to have provided a valid -marker value"); |
376
|
0
|
|
|
|
|
0
|
return (); |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=head2 get_marker_names |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
Title : get_marker_names |
383
|
|
|
|
|
|
|
Usage : my @names = $pop->get_marker_names; |
384
|
|
|
|
|
|
|
Function: Get the names of the markers |
385
|
|
|
|
|
|
|
Returns : Array of strings |
386
|
|
|
|
|
|
|
Args : [optional] boolean flag to ignore internal cache status |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=cut |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
sub get_marker_names { |
392
|
78
|
|
|
78
|
1
|
718
|
my ($self,$force) = @_; |
393
|
48
|
50
|
|
|
|
224
|
return @{$self->{'_cached_markernames'} || []} |
394
|
78
|
100
|
66
|
|
|
318
|
if( ! $force && defined $self->{'_cached_markernames'}); |
395
|
30
|
|
|
|
|
31
|
my %unique; |
396
|
30
|
|
|
|
|
58
|
foreach my $n ( map { $_->get_marker_names } $self->get_Individuals() ) { |
|
1144
|
|
|
|
|
1379
|
|
397
|
41333
|
|
|
|
|
26159
|
$unique{$n}++; |
398
|
|
|
|
|
|
|
} |
399
|
30
|
|
|
|
|
1545
|
my @nms = keys %unique; |
400
|
30
|
100
|
|
|
|
167
|
if( $nms[0] =~ /^(Site|Codon)/ ) { |
401
|
|
|
|
|
|
|
# sort by site or codon number and do it in |
402
|
|
|
|
|
|
|
# a schwartzian transformation baby! |
403
|
2
|
|
|
|
|
6
|
@nms = map { $_->[1] } |
404
|
0
|
|
|
|
|
0
|
sort { $a->[0] <=> $b->[0] } |
405
|
2
|
|
|
|
|
3
|
map { [$_ =~ /^(?:Codon|Site)-(\d+)/, $_] } @nms; |
|
2
|
|
|
|
|
13
|
|
406
|
|
|
|
|
|
|
} |
407
|
30
|
|
|
|
|
131
|
$self->{'_cached_markernames'} = [ @nms ]; |
408
|
30
|
50
|
|
|
|
31
|
return @{$self->{'_cached_markernames'} || []}; |
|
30
|
|
|
|
|
366
|
|
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
=head2 get_Marker |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
Title : get_Marker |
415
|
|
|
|
|
|
|
Usage : my $marker = $population->get_Marker($name) |
416
|
|
|
|
|
|
|
Function: Get a Bio::PopGen::Marker object based on this population |
417
|
|
|
|
|
|
|
Returns : Bio::PopGen::MarkerI object |
418
|
|
|
|
|
|
|
Args : name of the marker |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=cut |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
sub get_Marker{ |
424
|
896
|
|
|
896
|
1
|
1977
|
my ($self,$markername) = @_; |
425
|
896
|
|
|
|
|
671
|
my $marker; |
426
|
|
|
|
|
|
|
# setup some caching too |
427
|
896
|
100
|
66
|
|
|
3620
|
if( defined $self->{'_allele_freqs'} && |
428
|
|
|
|
|
|
|
defined ($marker = $self->{'_allele_freqs'}->{$markername}) ) { |
429
|
|
|
|
|
|
|
# marker is now set to the stored value |
430
|
|
|
|
|
|
|
} else { |
431
|
490
|
|
|
|
|
1364
|
my @genotypes = $self->get_Genotypes(-marker => $markername); |
432
|
490
|
|
|
|
|
3612
|
$marker = Bio::PopGen::Marker->new(-name => $markername); |
433
|
|
|
|
|
|
|
|
434
|
490
|
50
|
|
|
|
905
|
if( ! @genotypes ) { |
435
|
0
|
|
|
|
|
0
|
$self->warn("No genotypes for Marker $markername in the population"); |
436
|
|
|
|
|
|
|
} else { |
437
|
490
|
|
|
|
|
418
|
my %alleles; |
438
|
|
|
|
|
|
|
my $count; |
439
|
490
|
|
|
|
|
586
|
for my $al ( map { $_->get_Alleles} @genotypes ) { |
|
36128
|
|
|
|
|
42346
|
|
440
|
71266
|
50
|
|
|
|
74972
|
next if($al eq '?'); |
441
|
71266
|
|
|
|
|
38468
|
$count++; |
442
|
71266
|
|
|
|
|
50713
|
$alleles{$al}++ |
443
|
|
|
|
|
|
|
} |
444
|
490
|
|
|
|
|
4424
|
foreach my $allele ( keys %alleles ) { |
445
|
934
|
|
|
|
|
2437
|
$marker->add_Allele_Frequency($allele, $alleles{$allele}/$count); |
446
|
934
|
|
|
|
|
1722
|
$marker->{_marker_coverage} = $count/2; |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
} |
449
|
490
|
|
|
|
|
2977
|
$self->{'_allele_freqs'}->{$markername} = $marker; |
450
|
|
|
|
|
|
|
} |
451
|
896
|
|
|
|
|
1751
|
return $marker; |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=head2 get_number_individuals |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
Title : get_number_individuals |
458
|
|
|
|
|
|
|
Usage : my $count = $pop->get_number_individuals; |
459
|
|
|
|
|
|
|
Function: Get the count of the number of individuals |
460
|
|
|
|
|
|
|
Returns : integer >= 0 |
461
|
|
|
|
|
|
|
Args : none |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
=cut |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
sub get_number_individuals{ |
467
|
1245
|
|
|
1245
|
1
|
1009
|
my ($self,$markername) = @_; |
468
|
|
|
|
|
|
|
|
469
|
1245
|
50
|
|
|
|
1586
|
if( $self->{'_forced_set_individuals'} ) { |
470
|
0
|
|
|
|
|
0
|
return $self->{'_forced_set_individuals'}; |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
|
473
|
1245
|
100
|
|
|
|
1300
|
unless( defined $markername ) { |
474
|
30
|
50
|
|
|
|
32
|
return scalar @{$self->{'_individuals'} || []}; |
|
30
|
|
|
|
|
108
|
|
475
|
|
|
|
|
|
|
} else { |
476
|
1215
|
|
|
|
|
712
|
my $number =0; |
477
|
1215
|
50
|
|
|
|
800
|
foreach my $individual ( @{$self->{'_individuals'} || []} ) { |
|
1215
|
|
|
|
|
1881
|
|
478
|
9063
|
50
|
|
|
|
9626
|
$number++ if( $individual->has_Marker($markername)); |
479
|
|
|
|
|
|
|
} |
480
|
1215
|
|
|
|
|
1387
|
return $number; |
481
|
|
|
|
|
|
|
} |
482
|
|
|
|
|
|
|
} |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
=head2 set_number_individuals |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
Title : set_number_individuals |
487
|
|
|
|
|
|
|
Usage : $pop->set_number_individuals($num); |
488
|
|
|
|
|
|
|
Function: Fixes the number of individuals, call this with |
489
|
|
|
|
|
|
|
0 to unset. |
490
|
|
|
|
|
|
|
Only use this if you know what you are doing, |
491
|
|
|
|
|
|
|
this is only relavent when you are just adding |
492
|
|
|
|
|
|
|
allele frequency data for a population and want to |
493
|
|
|
|
|
|
|
calculate something like theta |
494
|
|
|
|
|
|
|
Returns : none |
495
|
|
|
|
|
|
|
Args : individual count, calling it with undef or 0 |
496
|
|
|
|
|
|
|
will reset the value to return a number |
497
|
|
|
|
|
|
|
calculated from the number of individuals |
498
|
|
|
|
|
|
|
stored for this population. |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=cut |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
sub set_number_individuals{ |
503
|
0
|
|
|
0
|
1
|
0
|
my ($self,$indcount) = @_; |
504
|
0
|
|
|
|
|
0
|
return $self->{'_forced_set_individuals'} = $indcount; |
505
|
|
|
|
|
|
|
} |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
=head2 get_Frequency_Homozygotes |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
Title : get_Frequency_Homozygotes |
511
|
|
|
|
|
|
|
Usage : my $freq = $pop->get_Frequency_Homozygotes; |
512
|
|
|
|
|
|
|
Function: Calculate the frequency of homozygotes in the population |
513
|
|
|
|
|
|
|
Returns : fraction between 0 and 1 |
514
|
|
|
|
|
|
|
Args : $markername |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=cut |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
sub get_Frequency_Homozygotes{ |
520
|
405
|
|
|
405
|
1
|
327
|
my ($self,$marker,$allelename) = @_; |
521
|
405
|
|
|
|
|
333
|
my ($homozygote_count) = 0; |
522
|
405
|
50
|
33
|
|
|
748
|
return 0 if ! defined $marker || ! defined $allelename; |
523
|
|
|
|
|
|
|
$marker = $marker->name if( defined $marker && |
524
|
|
|
|
|
|
|
ref($marker) && |
525
|
405
|
0
|
33
|
|
|
976
|
( $self->{'_checkisa'} ? |
|
|
0
|
33
|
|
|
|
|
526
|
|
|
|
|
|
|
$marker->isa('Bio::PopGen::MarkerI') : 1)); |
527
|
405
|
|
|
|
|
423
|
my $total = $self->get_number_individuals($marker); |
528
|
405
|
|
|
|
|
507
|
foreach my $genotype ( $self->get_Genotypes($marker) ) { |
529
|
3021
|
|
|
|
|
3291
|
my %alleles = map { $_ => 1} $genotype->get_Alleles(); |
|
3021
|
|
|
|
|
3822
|
|
530
|
|
|
|
|
|
|
# what to do for non-diploid situations? |
531
|
3021
|
100
|
|
|
|
4773
|
if( $alleles{$allelename} ) { |
532
|
1440
|
50
|
|
|
|
2172
|
$homozygote_count++ if( keys %alleles == 1); |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
} |
535
|
405
|
50
|
|
|
|
995
|
return $total ? $homozygote_count / $total : 0; |
536
|
|
|
|
|
|
|
} |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
=head2 get_Frequency_Heterozygotes |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
Title : get_Frequency_Heterozygotes |
541
|
|
|
|
|
|
|
Usage : my $freq = $pop->get_Frequency_Homozygotes; |
542
|
|
|
|
|
|
|
Function: Calculate the frequency of homozygotes in the population |
543
|
|
|
|
|
|
|
Returns : fraction between 0 and 1 |
544
|
|
|
|
|
|
|
Args : $markername |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
=cut |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
sub get_Frequency_Heterozygotes{ |
550
|
0
|
|
|
0
|
1
|
0
|
my ($self,$marker,$allelename) = @_; |
551
|
0
|
|
|
|
|
0
|
my ($heterozygote_count) = 0; |
552
|
0
|
0
|
0
|
|
|
0
|
return 0 if ! defined $marker || ! defined $allelename; |
553
|
|
|
|
|
|
|
$marker = $marker->name if( defined $marker && ref($marker) && |
554
|
0
|
0
|
0
|
|
|
0
|
($self->{'_checkisa'} ? |
|
|
0
|
0
|
|
|
|
|
555
|
|
|
|
|
|
|
$marker->isa('Bio::PopGen::MarkerI') : 1)); |
556
|
0
|
0
|
|
|
|
0
|
if( ref($marker) ) { |
557
|
0
|
|
|
|
|
0
|
$self->warn("Passed in a ".ref($marker). " to has_Marker, expecting either a string or a Bio::PopGen::MarkerI"); |
558
|
0
|
|
|
|
|
0
|
return 0; |
559
|
|
|
|
|
|
|
} |
560
|
0
|
|
|
|
|
0
|
my $total = $self->get_number_individuals($marker); |
561
|
|
|
|
|
|
|
|
562
|
0
|
|
|
|
|
0
|
foreach my $genotype ( $self->get_Genotypes($marker) ) { |
563
|
0
|
|
|
|
|
0
|
my %alleles = map { $_ => 1} $genotype->get_Alleles(); |
|
0
|
|
|
|
|
0
|
|
564
|
|
|
|
|
|
|
# what to do for non-diploid situations? |
565
|
0
|
0
|
|
|
|
0
|
if( $alleles{$allelename} ) { |
566
|
0
|
0
|
|
|
|
0
|
$heterozygote_count++ if( keys %alleles == 2); |
567
|
|
|
|
|
|
|
} |
568
|
|
|
|
|
|
|
} |
569
|
0
|
0
|
|
|
|
0
|
return $total ? $heterozygote_count / $total : 0; |
570
|
|
|
|
|
|
|
} |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
=head2 haploid_population |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
Title : haploid_population |
575
|
|
|
|
|
|
|
Usage : my $pop = $population->haploid_population; |
576
|
|
|
|
|
|
|
Function: Make a new population where all the individuals |
577
|
|
|
|
|
|
|
are haploid - effectively an individual out of each |
578
|
|
|
|
|
|
|
chromosome an individual has. |
579
|
|
|
|
|
|
|
Returns : L |
580
|
|
|
|
|
|
|
Args : None |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
=cut |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
sub haploid_population{ |
586
|
33
|
|
|
33
|
1
|
39
|
my ($self) = @_; |
587
|
33
|
|
|
|
|
31
|
my @inds; |
588
|
33
|
|
|
|
|
66
|
my @marker_names = $self->get_marker_names; |
589
|
|
|
|
|
|
|
|
590
|
33
|
|
|
|
|
82
|
for my $ind ( $self->get_Individuals ) { |
591
|
658
|
|
|
|
|
1026
|
my @chromosomes; |
592
|
658
|
|
|
|
|
1651
|
my $id = $ind->unique_id; |
593
|
|
|
|
|
|
|
# separate genotypes into 'chromosomes' |
594
|
658
|
|
|
|
|
1287
|
for my $marker_name( @marker_names ) { |
595
|
18918
|
|
|
|
|
46196
|
my ($genotype) = $ind->get_Genotypes(-marker => $marker_name); |
596
|
18918
|
|
|
|
|
18585
|
my $i =0; |
597
|
18918
|
|
|
|
|
31865
|
for my $allele ( $genotype->get_Alleles ) { |
598
|
37194
|
|
|
|
|
23963
|
push @{$chromosomes[$i]}, |
|
37194
|
|
|
|
|
140141
|
|
599
|
|
|
|
|
|
|
Bio::PopGen::Genotype->new(-marker_name => $marker_name, |
600
|
|
|
|
|
|
|
-individual_id => $id.".$i", |
601
|
|
|
|
|
|
|
-alleles => [$allele]); |
602
|
37194
|
|
|
|
|
78119
|
$i++; |
603
|
|
|
|
|
|
|
} |
604
|
|
|
|
|
|
|
} |
605
|
658
|
|
|
|
|
1076
|
for my $chrom ( @chromosomes ) { |
606
|
1198
|
|
|
|
|
6147
|
my $copyind = ref($ind)->new(-unique_id => $id.".1", |
607
|
|
|
|
|
|
|
-genotypes => $chrom); |
608
|
1198
|
|
|
|
|
5230
|
push @inds, $ind; |
609
|
|
|
|
|
|
|
} |
610
|
|
|
|
|
|
|
} |
611
|
33
|
|
|
|
|
240
|
my $population = ref($self)->new(-name => $self->name, |
612
|
|
|
|
|
|
|
-source => $self->source, |
613
|
|
|
|
|
|
|
-description => $self->description, |
614
|
|
|
|
|
|
|
-individuals => \@inds); |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
} |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
1; |