line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::MUST::Core::SeqMask::Pmsf; |
2
|
|
|
|
|
|
|
# ABSTRACT: Posterior mean site frequencies (PMSF) for sequence sites |
3
|
|
|
|
|
|
|
$Bio::MUST::Core::SeqMask::Pmsf::VERSION = '0.212530'; |
4
|
17
|
|
|
17
|
|
145
|
use Moose; |
|
17
|
|
|
|
|
57
|
|
|
17
|
|
|
|
|
158
|
|
5
|
17
|
|
|
17
|
|
126721
|
use namespace::autoclean; |
|
17
|
|
|
|
|
62
|
|
|
17
|
|
|
|
|
212
|
|
6
|
|
|
|
|
|
|
|
7
|
17
|
|
|
17
|
|
1838
|
use autodie; |
|
17
|
|
|
|
|
57
|
|
|
17
|
|
|
|
|
194
|
|
8
|
17
|
|
|
17
|
|
97551
|
use feature qw(say); |
|
17
|
|
|
|
|
76
|
|
|
17
|
|
|
|
|
1865
|
|
9
|
|
|
|
|
|
|
|
10
|
17
|
|
|
17
|
|
146
|
use Carp; |
|
17
|
|
|
|
|
58
|
|
|
17
|
|
|
|
|
1696
|
|
11
|
17
|
|
|
17
|
|
157
|
use List::AllUtils qw(sum each_arrayref); |
|
17
|
|
|
|
|
46
|
|
|
17
|
|
|
|
|
1284
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
extends 'Bio::MUST::Core::SeqMask'; |
14
|
|
|
|
|
|
|
|
15
|
17
|
|
|
17
|
|
151
|
use Bio::MUST::Core::Types; |
|
17
|
|
|
|
|
65
|
|
|
17
|
|
|
|
|
713
|
|
16
|
17
|
|
|
17
|
|
136
|
use aliased 'Bio::MUST::Core::SeqMask::Rates'; |
|
17
|
|
|
|
|
41
|
|
|
17
|
|
|
|
|
167
|
|
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# override superclass' Bool type |
20
|
|
|
|
|
|
|
# Note: mask indices are as follow: [site][AA] |
21
|
|
|
|
|
|
|
# mask values are freqs |
22
|
|
|
|
|
|
|
has '+mask' => ( |
23
|
|
|
|
|
|
|
isa => 'ArrayRef[ArrayRef[Num]]', |
24
|
|
|
|
|
|
|
); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# TODO: mask non-applicable methods from superclass? (Liskov principle) |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub chi_square_stats { |
31
|
1
|
|
|
1
|
1
|
13
|
my $self = shift; |
32
|
1
|
|
|
|
|
3
|
my $othr = shift; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# check that both pmsf objects are the same length |
35
|
|
|
|
|
|
|
# potential bugs could come from constant sites etc |
36
|
1
|
|
|
|
|
51
|
my $s_width = $self->mask_len; |
37
|
1
|
|
|
|
|
39
|
my $o_width = $othr->mask_len; |
38
|
1
|
50
|
|
|
|
5
|
carp "[BMC] Warning: PMSF widths do not match: $s_width vs. $o_width!" |
39
|
|
|
|
|
|
|
unless $s_width == $o_width; |
40
|
|
|
|
|
|
|
|
41
|
1
|
|
|
|
|
4
|
my @stats; |
42
|
|
|
|
|
|
|
|
43
|
1
|
|
|
|
|
45
|
my $ea = each_arrayref [ $self->all_states ], [ $othr->all_states ]; |
44
|
1
|
|
|
|
|
7278
|
while (my ($s_freqs, $o_freqs) = $ea->() ) { |
45
|
|
|
|
|
|
|
push @stats, 0 + ( sprintf "%.13f", sum map { |
46
|
34607
|
|
|
|
|
82696
|
( $o_freqs->[$_] - $s_freqs->[$_] )**2 / $s_freqs->[$_] |
|
692140
|
|
|
|
|
2020049
|
|
47
|
|
|
|
|
|
|
} 0..$#$o_freqs ); |
48
|
|
|
|
|
|
|
} # Note: trick to get identical results across platforms |
49
|
|
|
|
|
|
|
# https://stackoverflow.com/questions/21204733/a-better-chi-square-test-for-perl |
50
|
|
|
|
|
|
|
|
51
|
1
|
|
|
|
|
107
|
return Rates->new( mask => \@stats ); |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# I/O methods |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub load { |
59
|
2
|
|
|
2
|
1
|
632
|
my $class = shift; |
60
|
2
|
|
|
|
|
5
|
my $infile = shift; |
61
|
|
|
|
|
|
|
|
62
|
2
|
|
|
|
|
18
|
open my $in, '<', $infile; |
63
|
|
|
|
|
|
|
|
64
|
2
|
|
|
|
|
2654
|
my $mask = $class->new(); |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
LINE: |
67
|
2
|
|
|
|
|
1707
|
while (my $line = <$in>) { |
68
|
69214
|
|
|
|
|
133026
|
chomp $line; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# skip empty lines, header line and process comment lines |
71
|
|
|
|
|
|
|
# next LINE if $line =~ $EMPTY_LINE |
72
|
|
|
|
|
|
|
# || $mask->is_comment($line); |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# split line on whitespace and ignore first value (site number) |
75
|
69214
|
|
|
|
|
1002862
|
my (undef, @fields) = split /\s+/xms, $line; |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# store AA freqs all at once |
78
|
69214
|
|
|
|
|
2981066
|
$mask->add_state( \@fields ); |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
2
|
|
|
|
|
77
|
return $mask; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
85
|
|
|
|
|
|
|
1; |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
__END__ |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=pod |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=head1 NAME |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
Bio::MUST::Core::SeqMask::Pmsf - Posterior mean site frequencies (PMSF) for sequence sites |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=head1 VERSION |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
version 0.212530 |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=head1 SYNOPSIS |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# TODO |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head1 DESCRIPTION |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# TODO |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head1 METHODS |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=head2 chi_square_stats |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=head2 load |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=head1 AUTHOR |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
Denis BAURAIN <denis.baurain@uliege.be> |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN. |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
122
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=cut |