File Coverage

blib/lib/Statistics/Robust/Location.pm

Criterion	Covered	Total	%
statement	62	80	77.5
branch	5	14	35.7
condition			n/a
subroutine	10	11	90.9
pod	5	5	100.0
total	82	110	74.5

line	stmt	bran	sub	pod	time	code
1						package Statistics::Robust::Location;
2
3	2		2		44811	use strict;
	2				6
	2				89
4	2		2		13	use warnings;
	2				5
	2				70
5	2		2		11	use Carp;
	2				3
	2				282
6
7	2		2		1291	use Math::CDF qw(pbeta);
	2				19142
	2				785
8	2		2		2184	use POSIX qw(floor);
	2				20154
	2				30
9
10	2		2		2026	use base 'Statistics::Robust';
	2				4
	2				1414
11
12						our @EXPORT = ();
13						our @EXPORT_OK = (qw(
14						median
15						win
16						tmean
17						mean
18						hd
19						));
20						our %EXPORT_TAGS = ( all => \@EXPORT_OK );
21
22						# Implement the median to avoid needing Statistics::Basic
23						# which has silly Number::Format requirements (Version 1.6004)
24						sub
25						median
26						{
27	7		7	1	794	my($x) = @_;
28
29	7				27	my @y = sort {$a <=> $b} @$x;
	353				549
30	7				14	my $length = scalar @y;
31
32	7				11	my $median = undef;
33	7	100			33	if ( $length % 2 )
34						{
35	6				14	my $half = int ($length/2);
36	6				15	$median = $y[$half];
37						}
38						else
39						{
40	1				5	my $lower_half = int (($length-1)/2);
41	1				4	my $upper_half = int ($length/2);
42
43	1				4	$median = ($y[$lower_half] + $y[$upper_half])/2;
44						}
45
46	7				30	return $median;
47						}
48
49
50						# The gamma Winsorized mean
51						sub
52						win
53						{
54	0		0	1	0	my($x,$gamma) = @_;
55
56	0	0			0	if ( not defined $gamma )
57						{
58	0				0	$gamma = 0.2;
59						}
60
61	0				0	my @y = sort {$a <=> $b} @$x;
	0				0
62	0				0	my $n = scalar @y;
63	0				0	my $ibot = floor($gamma*$n) + 1;
64	0				0	my $itop = $n - $ibot + 1;
65	0				0	my $xbot = $y[$ibot];
66	0				0	my $xtop = $y[$itop];
67
68	0				0	for(my $i=0;$i<@y;$i++)
69						{
70	0	0			0	if ( $y[$i] <= $xbot )
71						{
72	0				0	$y[$i] = $xbot;
73						}
74	0	0			0	if ( $y[$i] >= $xtop )
75						{
76	0				0	$y[$i] = $xtop;
77						}
78						}
79
80	0				0	my $win = mean(\@y);
81	0				0	return $win;
82						}
83
84						# The trimmed mean
85						sub
86						tmean
87						{
88	1		1	1	15	my($x,$tr) = @_;
89
90	1	50			5	if ( not defined $tr )
91						{
92	1				3	$tr = 0.2;
93						}
94
95	1				7	my @y = sort {$a <=> $b} @$x;
	51				54
96	1				4	my $n = scalar @y;
97	1				25	my $g = floor($tr*$n);
98
99	1				3	my $tmean = 0.0;
100	1				6	for(my $i=$g;$i<($n-$g);$i++)
101						{
102	11				25	$tmean += $y[$i];
103						}
104	1				3	$tmean = $tmean/($n - 2*$g);
105
106	1				5	return $tmean;
107						}
108
109						# The Harrel-Davis Estimator for quantiles
110						sub
111						hd
112						{
113	1		1	1	345	my($x,$q) = @_;
114
115	1	50			7	if ( not defined $q )
116						{
117	1				3	$q = 0.5;
118						}
119	1				2	my $n = scalar @$x;
120	1				3	my $m1 = ($n+1)*$q;
121	1				3	my $m2 = ($n+1)*(1-$q);
122	1				4	my @y = sort {$a <=> $b} @$x;
	51				57
123
124	1				3	my @wy = ();
125	1				6	for(my $i=1;$i<=$n;$i++)
126						{
127	17				226	my $w = pbeta(($i/$n),$m1,$m2) - pbeta(($i-1)/$n, $m1,$m2);
128	17				69	$wy[$i-1] = $w*$y[$i-1];
129						}
130
131	1				7	my $hd = Statistics::Robust::_sum(\@wy);
132
133	1				5	return $hd;
134						}
135
136						sub
137						mean
138						{
139	1		1	1	302	my($x) = @_;
140
141	1				3	my $n = scalar @$x;
142	1	50			5	if ( $n == 0 ) { return undef;}
	0				0
143	1				4	my $sum = Statistics::Robust::_sum($x);
144	1				4	my $mean = $sum/$n;
145
146	1				4	return $mean;
147						}
148
149						1;
150
151						=head1 NAME
152
153						Statistics::Robust::Location - Robust Location Estimators
154
155						=head1 SYNOPSIS
156
157						my @x = (1,4,5,3,7,2,4);
158
159						my($tmean) = tmean(\@x);
160
161						my($win) = win(\@x);
162
163						=head1 FUNCTIONS
164
165						=head2 win
166
167						my($win) = win(\@x,$gamma);
168
169						Return the gamma Winsorized mean. If gamma is not specified, it defaults to 0.2.
170
171						=head2 tmean
172
173						my($tmean) = tmean(\@x,$tr);
174
175						Return the trimmed mean. If $tr is not specified, it defaults to 0.2.
176
177						=head2 mean
178
179						my($mean) = mean(\@x);
180
181						Although not a robust measure of location, it is useful for comparison as well as used internally in the computation
182						of some robust methods. This returns the mean of the array @x.
183
184						=head2 hd
185
186						my($est) = hd(\@x, $q)
187
188						The Harell-Davis Estimator for the quantile $q.
189
190						=head2 median
191
192						my($median) = median(\@x);
193
194						Return the median
195
196						=head1 AUTHOR
197
198						Walter Szeliga C<< >>
199
200						=cut