line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MyBioinfo::Math; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
34
|
use 5.006; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
43
|
|
4
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
34
|
|
5
|
1
|
|
|
1
|
|
8
|
use warnings; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
1132
|
|
6
|
1
|
|
|
1
|
|
7
|
use Math::CDF qw(pchisq); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
54
|
|
7
|
1
|
|
|
1
|
|
6
|
use Data::Dumper; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
67
|
|
8
|
1
|
|
|
1
|
|
5
|
use MyBioinfo::Common qw(sum); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
991
|
|
9
|
|
|
|
|
|
|
require Exporter; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# Items to export into callers namespace by default. Note: do not export |
14
|
|
|
|
|
|
|
# names by default without a very good reason. Use EXPORT_OK instead. |
15
|
|
|
|
|
|
|
# Do not simply export all your public functions/methods/constants. |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# This allows declaration use MyBioinfo::Common ':all'; |
18
|
|
|
|
|
|
|
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK |
19
|
|
|
|
|
|
|
# will save memory. |
20
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( 'all' => [ qw( |
21
|
|
|
|
|
|
|
gtest_gof chisqtest_gof |
22
|
|
|
|
|
|
|
) ] ); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
our @EXPORT_OK = @{ $EXPORT_TAGS{'all'} }; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
our @EXPORT = (); |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
our $VERSION = '0.11'; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
############### CHANGES: ################### |
31
|
|
|
|
|
|
|
# Mar. 1 2012 15:25 Added protection in G-test when round-off error causes |
32
|
|
|
|
|
|
|
# G-statistic to be a small negative. |
33
|
|
|
|
|
|
|
# |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
######## Preloaded methods go here. ############## |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# Use G-test to perform Goodness-of-fit test. |
38
|
|
|
|
|
|
|
sub gtest_gof{ |
39
|
0
|
|
|
0
|
0
|
|
my $ra = shift; # reference to array of numbers. |
40
|
0
|
|
|
|
|
|
my $rp; # reference to array of probabilities. |
41
|
|
|
|
|
|
|
# If vector p is not specified, assume uniform probability. |
42
|
0
|
0
|
|
|
|
|
if(@_ > 0){ |
43
|
0
|
|
|
|
|
|
$rp = shift; |
44
|
0
|
0
|
|
|
|
|
if(!&p_sanity($rp)){ |
45
|
0
|
|
|
|
|
|
die "Probability vector contains elements that are zero or negative. G-test was not performed. Abort.\n"; |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
}else{ |
48
|
0
|
|
|
|
|
|
$rp = [ (1/@{$ra}) x @{$ra} ]; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
# Fill out the vector of expectated values. |
51
|
0
|
|
|
|
|
|
my @a_exp = fill_exp($ra, $rp); |
52
|
|
|
|
|
|
|
# Calcualte G-statistic. |
53
|
0
|
|
|
|
|
|
my $g_stat = 0; |
54
|
0
|
|
|
|
|
|
for(my $i = 0; $i < @a_exp; $i++){ |
55
|
0
|
0
|
|
|
|
|
if($ra->[$i] != 0){ # skip zero entry in observation vector. |
56
|
0
|
|
|
|
|
|
$g_stat += $ra->[$i] * log( $ra->[$i] / $a_exp[$i] ); |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
} |
59
|
0
|
|
|
|
|
|
$g_stat *= 2.0; |
60
|
|
|
|
|
|
|
# Use Chi-square distribution to calculate p-value. |
61
|
0
|
|
|
|
|
|
my $df = @{$ra} - 1; |
|
0
|
|
|
|
|
|
|
62
|
0
|
|
|
|
|
|
my $p; # p-value. |
63
|
0
|
0
|
|
|
|
|
if($g_stat <= 0){ # this may happen due to round-off errors. |
64
|
0
|
|
|
|
|
|
$p = 1; |
65
|
|
|
|
|
|
|
}else{ |
66
|
0
|
|
|
|
|
|
$p = 1 - pchisq($g_stat, $df); |
67
|
|
|
|
|
|
|
} |
68
|
0
|
|
|
|
|
|
return ($g_stat, $df, $p); |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
# Use Chi-square test to perform Goodness-of-fit test. |
72
|
|
|
|
|
|
|
sub chisqtest_gof{ |
73
|
0
|
|
|
0
|
0
|
|
my $ra = shift; # reference to array of numbers. |
74
|
0
|
|
|
|
|
|
my $rp; # reference to array of probabilities. |
75
|
|
|
|
|
|
|
# If vector p is not specified, assume uniform probability. |
76
|
0
|
0
|
|
|
|
|
if(@_ > 0){ |
77
|
0
|
|
|
|
|
|
$rp = shift; |
78
|
0
|
0
|
|
|
|
|
if(!&p_sanity($rp)){ |
79
|
0
|
|
|
|
|
|
die "Probability vector contains elements that are zero or negative. Chi-square test was not performed. Abort.\n"; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
}else{ |
82
|
0
|
|
|
|
|
|
$rp = [ (1/@{$ra}) x @{$ra} ]; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
# Fill out the vector of expectated values. |
85
|
0
|
|
|
|
|
|
my @a_exp = fill_exp($ra, $rp); |
86
|
|
|
|
|
|
|
# Calcualte Chisq-statistic. |
87
|
0
|
|
|
|
|
|
my $ch_stat = 0; |
88
|
0
|
|
|
|
|
|
for(my $i = 0; $i < @a_exp; $i++){ |
89
|
0
|
|
|
|
|
|
$ch_stat += ( $ra->[$i] - $a_exp[$i] ) ** 2 / $a_exp[$i]; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
# Use Chi-square distribution to calculate p-value. |
92
|
0
|
|
|
|
|
|
my $df = @{$ra} - 1; |
|
0
|
|
|
|
|
|
|
93
|
0
|
|
|
|
|
|
my $p = 1 - pchisq($ch_stat, $df); |
94
|
0
|
|
|
|
|
|
return ($ch_stat, $df, $p); |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# Fill out a vector of expected values. A common subroutine used by both G-test |
98
|
|
|
|
|
|
|
# and Chi-square test. |
99
|
|
|
|
|
|
|
sub fill_exp{ |
100
|
0
|
|
|
0
|
0
|
|
my($ra, $rp) = @_; |
101
|
0
|
|
|
|
|
|
my $v_sum = sum(@{$ra}); |
|
0
|
|
|
|
|
|
|
102
|
0
|
|
|
|
|
|
my @a_exp; |
103
|
0
|
|
|
|
|
|
for(my $i = 0; $i < @{$ra}; $i++){ |
|
0
|
|
|
|
|
|
|
104
|
0
|
|
|
|
|
|
push @a_exp, $v_sum * $rp->[$i]; |
105
|
|
|
|
|
|
|
} |
106
|
0
|
|
|
|
|
|
return @a_exp; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# Check the sanity for probability vector: all elements should be larger than zero. |
110
|
|
|
|
|
|
|
sub p_sanity{ |
111
|
0
|
|
|
0
|
0
|
|
my $p_ref = shift; |
112
|
0
|
|
|
|
|
|
foreach my $p(@{$p_ref}){ |
|
0
|
|
|
|
|
|
|
113
|
0
|
0
|
|
|
|
|
return 0 if $p <= 0; |
114
|
|
|
|
|
|
|
} |
115
|
0
|
|
|
|
|
|
return 1; |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
1; |
121
|
|
|
|
|
|
|
__END__ |