line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Statistics::KruskalWallis; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
$VERSION = '0.01'; |
4
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
20593
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
41
|
|
6
|
1
|
|
|
1
|
|
5
|
use Carp; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
83
|
|
7
|
1
|
|
|
1
|
|
1456
|
use Statistics::Distributions; |
|
1
|
|
|
|
|
9777
|
|
|
1
|
|
|
|
|
894
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
############## |
10
|
|
|
|
|
|
|
sub new |
11
|
|
|
|
|
|
|
{ |
12
|
2
|
|
|
2
|
0
|
23
|
my $proto = shift; |
13
|
2
|
|
33
|
|
|
15
|
my $class = ref($proto) || $proto; |
14
|
2
|
|
|
|
|
3
|
my $self= {}; |
15
|
|
|
|
|
|
|
|
16
|
2
|
|
|
|
|
7
|
$self->{sample_data} = undef; |
17
|
2
|
|
|
|
|
4
|
$self->{rank_data} = undef; |
18
|
2
|
|
|
|
|
3
|
$self->{no_of_sets} = 0; |
19
|
2
|
|
|
|
|
4
|
$self->{no_of_samples} = 0; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
22
|
2
|
|
|
|
|
5
|
bless($self,$class); |
23
|
2
|
|
|
|
|
10
|
return $self; |
24
|
|
|
|
|
|
|
} |
25
|
|
|
|
|
|
|
############## |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
############## |
28
|
|
|
|
|
|
|
sub load_data { |
29
|
6
|
|
|
6
|
0
|
16
|
my $self = shift; |
30
|
6
|
|
|
|
|
9
|
my $sample_name = shift; |
31
|
|
|
|
|
|
|
|
32
|
6
|
|
|
|
|
12
|
my (@sample_data)=@_; |
33
|
|
|
|
|
|
|
|
34
|
6
|
|
|
|
|
11
|
$self->{no_of_samples}+=@sample_data;; |
35
|
6
|
|
|
|
|
11
|
$self->{no_of_sets} = $self->{no_of_sets} + 1; |
36
|
6
|
|
|
|
|
15
|
$self->{sample_data}->{$sample_name}=\@sample_data; |
37
|
6
|
|
|
|
|
17
|
$self->{rank_data}->{$sample_name}->{sum} = 0; |
38
|
6
|
|
|
|
|
12
|
$self->{rank_data}->{$sample_name}->{n}=0; |
39
|
|
|
|
|
|
|
|
40
|
6
|
|
|
|
|
18
|
return 1; |
41
|
|
|
|
|
|
|
} # end sub load_data |
42
|
|
|
|
|
|
|
############## |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
############## |
45
|
|
|
|
|
|
|
sub perform_kruskal_wallis_test { |
46
|
1
|
|
|
1
|
0
|
3
|
my $self=shift; |
47
|
|
|
|
|
|
|
|
48
|
1
|
|
|
|
|
2
|
my ($sample_name,$sample_data_element,$sample_data_value); |
49
|
1
|
|
|
|
|
4
|
my ($grouped_data_ref) = $self->_group_data(); |
50
|
1
|
|
|
|
|
4
|
$self->_rank_data($grouped_data_ref); |
51
|
1
|
|
|
|
|
6
|
my ($H) = $self->_calculate_H(); |
52
|
1
|
|
|
|
|
7
|
my ($chi_prob)=Statistics::Distributions::chisqrprob (($self->{no_of_sets}-1),$H); |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# chi_prob only valid when no_of_sets > 3 |
55
|
1
|
|
|
|
|
80
|
return ($H,$chi_prob); |
56
|
|
|
|
|
|
|
} # end sub |
57
|
|
|
|
|
|
|
############## |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
############## |
60
|
|
|
|
|
|
|
sub _group_data { |
61
|
|
|
|
|
|
|
|
62
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
63
|
1
|
|
|
|
|
3
|
my (%grouped_data,$sample_name,$sample_data_element); |
64
|
|
|
|
|
|
|
|
65
|
1
|
|
|
|
|
2
|
foreach $sample_name (keys(%{$self->{sample_data}})) { |
|
1
|
|
|
|
|
14
|
|
66
|
3
|
|
|
|
|
4
|
foreach $sample_data_element (@{$self->{sample_data}->{$sample_name}}){ |
|
3
|
|
|
|
|
8
|
|
67
|
21
|
100
|
|
|
|
79
|
if (exists($grouped_data{$sample_data_element})) |
68
|
|
|
|
|
|
|
{ |
69
|
2
|
|
|
|
|
3
|
push @{$grouped_data{$sample_data_element}}, $sample_name; |
|
2
|
|
|
|
|
14
|
|
70
|
|
|
|
|
|
|
} # end if |
71
|
|
|
|
|
|
|
else |
72
|
|
|
|
|
|
|
{ |
73
|
19
|
|
|
|
|
83
|
$grouped_data{$sample_data_element} = [$sample_name]; |
74
|
|
|
|
|
|
|
} # end else |
75
|
|
|
|
|
|
|
} # end foreach sample_data_element |
76
|
|
|
|
|
|
|
} # end foreach sample name |
77
|
|
|
|
|
|
|
|
78
|
1
|
|
|
|
|
4
|
return (\%grouped_data); |
79
|
|
|
|
|
|
|
} # end sub _group_data |
80
|
|
|
|
|
|
|
############## |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
############## |
83
|
|
|
|
|
|
|
sub _rank_data { |
84
|
|
|
|
|
|
|
|
85
|
1
|
|
|
1
|
|
3
|
my $self = shift; |
86
|
1
|
|
|
|
|
2
|
my $grouped_data_ref = shift; |
87
|
|
|
|
|
|
|
|
88
|
1
|
|
|
|
|
2
|
my $rank = 1; |
89
|
1
|
|
|
|
|
2
|
my ($sample_name,$sample_data_value); |
90
|
|
|
|
|
|
|
|
91
|
1
|
|
|
|
|
2
|
foreach $sample_name (keys(%{$self->{sample_data}})) { |
|
1
|
|
|
|
|
5
|
|
92
|
3
|
|
|
|
|
6
|
$self->{rank_data}->{$sample_name}->{sum} = 0; |
93
|
3
|
|
|
|
|
8
|
$self->{rank_data}->{$sample_name}->{n} = 0; |
94
|
|
|
|
|
|
|
} # end foreach |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
|
97
|
1
|
|
|
|
|
11
|
foreach $sample_data_value (sort { $a <=> $b } (keys(%$grouped_data_ref))) { |
|
62
|
|
|
|
|
65
|
|
98
|
|
|
|
|
|
|
|
99
|
19
|
100
|
|
|
|
21
|
if (@{$$grouped_data_ref{$sample_data_value}} > 1) {$rank+=0.5;} |
|
19
|
|
|
|
|
46
|
|
|
2
|
|
|
|
|
4
|
|
100
|
|
|
|
|
|
|
|
101
|
19
|
|
|
|
|
23
|
foreach $sample_name (@{$$grouped_data_ref{$sample_data_value}}) { |
|
19
|
|
|
|
|
29
|
|
102
|
21
|
|
|
|
|
34
|
$self->{rank_data}->{$sample_name}->{sum}+= $rank; |
103
|
21
|
|
|
|
|
42
|
$self->{rank_data}->{$sample_name}->{n}++; |
104
|
|
|
|
|
|
|
} # end foreach |
105
|
|
|
|
|
|
|
|
106
|
19
|
|
|
|
|
34
|
$rank=int($rank+1.5); |
107
|
|
|
|
|
|
|
} # end foreach |
108
|
|
|
|
|
|
|
} # end sub |
109
|
|
|
|
|
|
|
############## |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
############## |
112
|
|
|
|
|
|
|
sub _calculate_H { |
113
|
|
|
|
|
|
|
|
114
|
1
|
|
|
1
|
|
3
|
my $self = shift; |
115
|
|
|
|
|
|
|
# calculate mean sum |
116
|
|
|
|
|
|
|
|
117
|
1
|
|
|
|
|
2
|
my $sample_name; |
118
|
1
|
|
|
|
|
1
|
my $mean_sq_sum = 0; |
119
|
|
|
|
|
|
|
|
120
|
1
|
|
|
|
|
19
|
foreach $sample_name (keys(%{$self->{sample_data}})) { |
|
1
|
|
|
|
|
4
|
|
121
|
3
|
|
|
|
|
11
|
$mean_sq_sum += ($self->{rank_data}->{$sample_name}->{sum}**2) / $self->{rank_data}->{$sample_name}->{n}; |
122
|
|
|
|
|
|
|
} # end foreach samplename |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
# calculate kw statistic |
125
|
|
|
|
|
|
|
|
126
|
1
|
|
|
|
|
4
|
my $H = 12 / ( $self->{no_of_samples} * ($self->{no_of_samples} + 1) ); |
127
|
1
|
|
|
|
|
2
|
$H = $H * $mean_sq_sum; |
128
|
1
|
|
|
|
|
2
|
$H = $H - 3 * ($self->{no_of_samples} + 1); |
129
|
|
|
|
|
|
|
|
130
|
1
|
|
|
|
|
3
|
return ($H); |
131
|
|
|
|
|
|
|
} # end sub _calculate_H |
132
|
|
|
|
|
|
|
############## |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
############## |
135
|
|
|
|
|
|
|
sub post_hoc { |
136
|
|
|
|
|
|
|
|
137
|
1
|
|
|
1
|
0
|
17
|
my $self = shift; |
138
|
1
|
|
|
|
|
2
|
my $test_name = shift; |
139
|
1
|
|
|
|
|
4
|
my ($control_group_name,$test_group_name)=@_; |
140
|
|
|
|
|
|
|
|
141
|
1
|
|
|
|
|
2
|
my ($p_value,$q); |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# one day may add further post-hoc tests |
144
|
1
|
50
|
|
|
|
6
|
if ($test_name eq 'Newman-Keuls') { |
145
|
1
|
|
|
|
|
4
|
my $SE = ( $self->{no_of_samples} * ($self->{no_of_samples} + 1) ) / 12; |
146
|
1
|
|
|
|
|
5
|
$SE = $SE * ( 1/$self->{rank_data}->{$control_group_name}->{n} + 1/$self->{rank_data}->{$test_group_name}->{n}); |
147
|
1
|
|
|
|
|
22
|
$SE = $SE**0.5; |
148
|
|
|
|
|
|
|
|
149
|
1
|
|
|
|
|
4
|
my $r1 = $self->{rank_data}->{$control_group_name}->{sum} / $self->{rank_data}->{$control_group_name}->{n}; |
150
|
1
|
|
|
|
|
4
|
my $r2 = $self->{rank_data}->{$test_group_name}->{sum} / $self->{rank_data}->{$test_group_name}->{n}; |
151
|
|
|
|
|
|
|
|
152
|
1
|
|
|
|
|
3
|
$q = ( $r1 - $r2 ) / $SE; |
153
|
|
|
|
|
|
|
|
154
|
1
|
50
|
|
|
|
4
|
if ($q>2.576) {$p_value='>0.01';} |
|
1
|
0
|
|
|
|
590
|
|
|
0
|
0
|
|
|
|
0
|
|
155
|
0
|
|
|
|
|
0
|
elsif ($q>1.960) {$p_value='>0.05';} |
156
|
0
|
|
|
|
|
0
|
elsif ($q>1.645) {$p_value='>0.1';} |
157
|
|
|
|
|
|
|
else {$p_value='<0.1';} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
} # end test |
160
|
|
|
|
|
|
|
|
161
|
1
|
|
|
|
|
11
|
return ($q,$p_value); |
162
|
|
|
|
|
|
|
} # end sub post_hoc |
163
|
|
|
|
|
|
|
############## |
164
|
|
|
|
|
|
|
1; |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
__END__ |