| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Statistics::Robust::Location; |
|
2
|
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
44811
|
use strict; |
|
|
2
|
|
|
|
|
6
|
|
|
|
2
|
|
|
|
|
89
|
|
|
4
|
2
|
|
|
2
|
|
13
|
use warnings; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
70
|
|
|
5
|
2
|
|
|
2
|
|
11
|
use Carp; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
282
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
2
|
|
|
2
|
|
1291
|
use Math::CDF qw(pbeta); |
|
|
2
|
|
|
|
|
19142
|
|
|
|
2
|
|
|
|
|
785
|
|
|
8
|
2
|
|
|
2
|
|
2184
|
use POSIX qw(floor); |
|
|
2
|
|
|
|
|
20154
|
|
|
|
2
|
|
|
|
|
30
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
2
|
|
|
2
|
|
2026
|
use base 'Statistics::Robust'; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
1414
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our @EXPORT = (); |
|
13
|
|
|
|
|
|
|
our @EXPORT_OK = (qw( |
|
14
|
|
|
|
|
|
|
median |
|
15
|
|
|
|
|
|
|
win |
|
16
|
|
|
|
|
|
|
tmean |
|
17
|
|
|
|
|
|
|
mean |
|
18
|
|
|
|
|
|
|
hd |
|
19
|
|
|
|
|
|
|
)); |
|
20
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( all => \@EXPORT_OK ); |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# Implement the median to avoid needing Statistics::Basic |
|
23
|
|
|
|
|
|
|
# which has silly Number::Format requirements (Version 1.6004) |
|
24
|
|
|
|
|
|
|
sub |
|
25
|
|
|
|
|
|
|
median |
|
26
|
|
|
|
|
|
|
{ |
|
27
|
7
|
|
|
7
|
1
|
794
|
my($x) = @_; |
|
28
|
|
|
|
|
|
|
|
|
29
|
7
|
|
|
|
|
27
|
my @y = sort {$a <=> $b} @$x; |
|
|
353
|
|
|
|
|
549
|
|
|
30
|
7
|
|
|
|
|
14
|
my $length = scalar @y; |
|
31
|
|
|
|
|
|
|
|
|
32
|
7
|
|
|
|
|
11
|
my $median = undef; |
|
33
|
7
|
100
|
|
|
|
33
|
if ( $length % 2 ) |
|
34
|
|
|
|
|
|
|
{ |
|
35
|
6
|
|
|
|
|
14
|
my $half = int ($length/2); |
|
36
|
6
|
|
|
|
|
15
|
$median = $y[$half]; |
|
37
|
|
|
|
|
|
|
} |
|
38
|
|
|
|
|
|
|
else |
|
39
|
|
|
|
|
|
|
{ |
|
40
|
1
|
|
|
|
|
5
|
my $lower_half = int (($length-1)/2); |
|
41
|
1
|
|
|
|
|
4
|
my $upper_half = int ($length/2); |
|
42
|
|
|
|
|
|
|
|
|
43
|
1
|
|
|
|
|
4
|
$median = ($y[$lower_half] + $y[$upper_half])/2; |
|
44
|
|
|
|
|
|
|
} |
|
45
|
|
|
|
|
|
|
|
|
46
|
7
|
|
|
|
|
30
|
return $median; |
|
47
|
|
|
|
|
|
|
} |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# The gamma Winsorized mean |
|
51
|
|
|
|
|
|
|
sub |
|
52
|
|
|
|
|
|
|
win |
|
53
|
|
|
|
|
|
|
{ |
|
54
|
0
|
|
|
0
|
1
|
0
|
my($x,$gamma) = @_; |
|
55
|
|
|
|
|
|
|
|
|
56
|
0
|
0
|
|
|
|
0
|
if ( not defined $gamma ) |
|
57
|
|
|
|
|
|
|
{ |
|
58
|
0
|
|
|
|
|
0
|
$gamma = 0.2; |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
0
|
|
|
|
|
0
|
my @y = sort {$a <=> $b} @$x; |
|
|
0
|
|
|
|
|
0
|
|
|
62
|
0
|
|
|
|
|
0
|
my $n = scalar @y; |
|
63
|
0
|
|
|
|
|
0
|
my $ibot = floor($gamma*$n) + 1; |
|
64
|
0
|
|
|
|
|
0
|
my $itop = $n - $ibot + 1; |
|
65
|
0
|
|
|
|
|
0
|
my $xbot = $y[$ibot]; |
|
66
|
0
|
|
|
|
|
0
|
my $xtop = $y[$itop]; |
|
67
|
|
|
|
|
|
|
|
|
68
|
0
|
|
|
|
|
0
|
for(my $i=0;$i<@y;$i++) |
|
69
|
|
|
|
|
|
|
{ |
|
70
|
0
|
0
|
|
|
|
0
|
if ( $y[$i] <= $xbot ) |
|
71
|
|
|
|
|
|
|
{ |
|
72
|
0
|
|
|
|
|
0
|
$y[$i] = $xbot; |
|
73
|
|
|
|
|
|
|
} |
|
74
|
0
|
0
|
|
|
|
0
|
if ( $y[$i] >= $xtop ) |
|
75
|
|
|
|
|
|
|
{ |
|
76
|
0
|
|
|
|
|
0
|
$y[$i] = $xtop; |
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
} |
|
79
|
|
|
|
|
|
|
|
|
80
|
0
|
|
|
|
|
0
|
my $win = mean(\@y); |
|
81
|
0
|
|
|
|
|
0
|
return $win; |
|
82
|
|
|
|
|
|
|
} |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
# The trimmed mean |
|
85
|
|
|
|
|
|
|
sub |
|
86
|
|
|
|
|
|
|
tmean |
|
87
|
|
|
|
|
|
|
{ |
|
88
|
1
|
|
|
1
|
1
|
15
|
my($x,$tr) = @_; |
|
89
|
|
|
|
|
|
|
|
|
90
|
1
|
50
|
|
|
|
5
|
if ( not defined $tr ) |
|
91
|
|
|
|
|
|
|
{ |
|
92
|
1
|
|
|
|
|
3
|
$tr = 0.2; |
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
|
|
95
|
1
|
|
|
|
|
7
|
my @y = sort {$a <=> $b} @$x; |
|
|
51
|
|
|
|
|
54
|
|
|
96
|
1
|
|
|
|
|
4
|
my $n = scalar @y; |
|
97
|
1
|
|
|
|
|
25
|
my $g = floor($tr*$n); |
|
98
|
|
|
|
|
|
|
|
|
99
|
1
|
|
|
|
|
3
|
my $tmean = 0.0; |
|
100
|
1
|
|
|
|
|
6
|
for(my $i=$g;$i<($n-$g);$i++) |
|
101
|
|
|
|
|
|
|
{ |
|
102
|
11
|
|
|
|
|
25
|
$tmean += $y[$i]; |
|
103
|
|
|
|
|
|
|
} |
|
104
|
1
|
|
|
|
|
3
|
$tmean = $tmean/($n - 2*$g); |
|
105
|
|
|
|
|
|
|
|
|
106
|
1
|
|
|
|
|
5
|
return $tmean; |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# The Harrel-Davis Estimator for quantiles |
|
110
|
|
|
|
|
|
|
sub |
|
111
|
|
|
|
|
|
|
hd |
|
112
|
|
|
|
|
|
|
{ |
|
113
|
1
|
|
|
1
|
1
|
345
|
my($x,$q) = @_; |
|
114
|
|
|
|
|
|
|
|
|
115
|
1
|
50
|
|
|
|
7
|
if ( not defined $q ) |
|
116
|
|
|
|
|
|
|
{ |
|
117
|
1
|
|
|
|
|
3
|
$q = 0.5; |
|
118
|
|
|
|
|
|
|
} |
|
119
|
1
|
|
|
|
|
2
|
my $n = scalar @$x; |
|
120
|
1
|
|
|
|
|
3
|
my $m1 = ($n+1)*$q; |
|
121
|
1
|
|
|
|
|
3
|
my $m2 = ($n+1)*(1-$q); |
|
122
|
1
|
|
|
|
|
4
|
my @y = sort {$a <=> $b} @$x; |
|
|
51
|
|
|
|
|
57
|
|
|
123
|
|
|
|
|
|
|
|
|
124
|
1
|
|
|
|
|
3
|
my @wy = (); |
|
125
|
1
|
|
|
|
|
6
|
for(my $i=1;$i<=$n;$i++) |
|
126
|
|
|
|
|
|
|
{ |
|
127
|
17
|
|
|
|
|
226
|
my $w = pbeta(($i/$n),$m1,$m2) - pbeta(($i-1)/$n, $m1,$m2); |
|
128
|
17
|
|
|
|
|
69
|
$wy[$i-1] = $w*$y[$i-1]; |
|
129
|
|
|
|
|
|
|
} |
|
130
|
|
|
|
|
|
|
|
|
131
|
1
|
|
|
|
|
7
|
my $hd = Statistics::Robust::_sum(\@wy); |
|
132
|
|
|
|
|
|
|
|
|
133
|
1
|
|
|
|
|
5
|
return $hd; |
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub |
|
137
|
|
|
|
|
|
|
mean |
|
138
|
|
|
|
|
|
|
{ |
|
139
|
1
|
|
|
1
|
1
|
302
|
my($x) = @_; |
|
140
|
|
|
|
|
|
|
|
|
141
|
1
|
|
|
|
|
3
|
my $n = scalar @$x; |
|
142
|
1
|
50
|
|
|
|
5
|
if ( $n == 0 ) { return undef;} |
|
|
0
|
|
|
|
|
0
|
|
|
143
|
1
|
|
|
|
|
4
|
my $sum = Statistics::Robust::_sum($x); |
|
144
|
1
|
|
|
|
|
4
|
my $mean = $sum/$n; |
|
145
|
|
|
|
|
|
|
|
|
146
|
1
|
|
|
|
|
4
|
return $mean; |
|
147
|
|
|
|
|
|
|
} |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
1; |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head1 NAME |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Statistics::Robust::Location - Robust Location Estimators |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
my @x = (1,4,5,3,7,2,4); |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
my($tmean) = tmean(\@x); |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
my($win) = win(\@x); |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=head1 FUNCTIONS |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=head2 win |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
my($win) = win(\@x,$gamma); |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
Return the gamma Winsorized mean. If gamma is not specified, it defaults to 0.2. |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=head2 tmean |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
my($tmean) = tmean(\@x,$tr); |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Return the trimmed mean. If $tr is not specified, it defaults to 0.2. |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=head2 mean |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
my($mean) = mean(\@x); |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
Although not a robust measure of location, it is useful for comparison as well as used internally in the computation |
|
182
|
|
|
|
|
|
|
of some robust methods. This returns the mean of the array @x. |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=head2 hd |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
my($est) = hd(\@x, $q) |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
The Harell-Davis Estimator for the quantile $q. |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=head2 median |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
my($median) = median(\@x); |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
Return the median |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=head1 AUTHOR |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
Walter Szeliga C<< >> |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=cut |