File Coverage

blib/lib/Statistics/ANOVA/Friedman.pm

Criterion	Covered	Total	%
statement	49	67	73.1
branch	8	18	44.4
condition	4	15	26.6
subroutine	13	16	81.2
pod	4	4	100.0
total	78	120	65.0

line	stmt	bran	cond	sub	pod	time	code
1							package Statistics::ANOVA::Friedman;
2
3	5			5		84830	use 5.006;
	5					13
4	5			5		18	use strict;
	5					6
	5					135
5	5			5		18	use warnings FATAL => 'all';
	5					11
	5					249
6	5			5		21	use base qw(Statistics::Data);
	5					5
	5					2915
7	5			5		125239	use Carp qw(croak);
	5					6
	5					203
8	5			5		21	use List::AllUtils qw(sum0);
	5					5
	5					183
9	5			5		2887	use Math::Cephes qw(:dists);
	5					20321
	5					1171
10	5			5		2218	use Statistics::Data::Rank;
	5					12694
	5					2646
11							$Statistics::ANOVA::Friedman::VERSION = '0.02';
12
13							=head1 NAME
14
15							Statistics::ANOVA::Friedman - Nonparametric repeated measures analysis of variance for dependent factorial measures (Friedman Test)
16
17							=head1 VERSION
18
19							This is documentation for version 0.02, released February 2017.
20
21							=head1 SYNOPSIS
22
23							use Statistics::ANOVA::Friedman;
24							my $fri = Statistics::ANOVA::Friedman->new();
25							my ($chi_value, $df, $count, $p_value) = $fri->chiprob_test(data => HOA);
26							$fri->load({1 => [2, 4, 6], 2 => [3, 3, 12], 3 => [5, 7, 11]}); # or pre-load with HOA
27							($chi_value, $df, $count, $p_value) = $fri->chiprob_test();
28							my ($f_value, $df_b, $df_w, $p_value2) = $fri->fprob_test();
29
30							=head1 DESCRIPTION
31
32							Performs the B nonparametric analysis of variance - for dependent (correlated, matched) measures of two or more discrete (nominal) variables, such as when the measures are taken from the same source (e.g., person, plot) but under different conditions. A ranking procedure is used, but, unlike the case for independent measures, the ranks are taken at each common index of each measure, i.e., within-groups.
33
34							By default, the method accounts for and corrects for ties, but if B => 0, the test-statistic is uncorrected. The correction involves accounting for the number of tied variables at each index, as per Hollander & Wolfe (1995), Eq. 7.8, p. 274.
35
36							Correctness of output is tested on installation using example data from Hollander & Wolfe (1999, p. 274ff), Rice (1995, p. 470), Sarantakos (1993, p. 404-405), and Siegal (1956, p. 167ff); tests fail if the published chi-values and degrees-of-freedom are not returned by the module.
37
38							The module uses L as a base so that data can be pre-loaded and added to per that module's methods.
39
40							=head1 SUBROUTINES/METHODS
41
42							=head2 new
43
44							$fri = Statistics::ANOVA::Friedman->new();
45
46							New object for accessing methods and storing results. This "isa" Statistics::Data object.
47
48							=head2 load, add, unload
49
50							$fri->load('a' => [1, 4], 'b' => [3, 7]);
51
52							The given data can now be used by any of the following methods. This is inherited from L, and all its other methods are available here via the class object. Only passing of data as a hash of arrays (HOA) is supported for now. Alternatively, give each of the following methods the HOA for the optional named argument B.
53
54							=head2 chiprob_test
55
56							($chi_value, $df, $count, $p_value) = $fri->chiprob_test(data => HOA, correct_ties => 1);
57
58							Performs the ANOVA and returns the chi-square value, its degrees-of-freedom, the total number of observations, and associated probability value (or only the latter if called in scalar context). Default value of optional argument B is 1.
59
60							=cut
61
62							sub chiprob_test {
63	5			5	1	4509	my ( $self, %args ) = @_;
64	5	100				51	my $data = $args{'data'} ? delete $args{'data'} : $self->get_hoa(%args);
65	5					262	my $n_bt = scalar keys %{$data};
	5					13
66	5					35	my $n_wt = $self->equal_n( data => $data );
67	5	50	33			178	croak
			33
68							'Need to have equal numbers of observations greater than 1 per two or more variables for chiprob_test'
69							if not $n_wt
70							or $n_wt == 1
71							or $n_bt < 2;
72							my $chi =
73	5	100				23	_definitely_no( $args{'correct_ties'} )
74							? _chi_ig_ties( $n_bt, $n_wt,
75							scalar Statistics::Data::Rank->sumsq_ranks_within( data => $data ) )
76							: _chi_by_ties( $n_bt, $n_wt,
77							Statistics::Data::Rank->sumsq_ranks_within( data => $data ) );
78	5					22	my $df = $n_bt - 1;
79	5					150	my $p_value = chdtrc( $df, $chi ); # Math::Cephes fn
80	5	50				43	return wantarray ? ( $chi, $df, ( $n_bt * $n_wt ), $p_value ) : $p_value;
81							}
82
83							=head2 chiprob_str
84
85							$str = $fri->chiprob_str(data => HOA, correct_ties => 1);
86
87							Performs the same test as for L but returns not an array but a string of the conventional reporting form, e.g., chi^2(df, N = total observations) = chi_value, p = p_value.
88
89							=cut
90
91							sub chiprob_str {
92	0			0	1	0	my ( $self, %args ) = @_;
93	0					0	my ( $chi_value, $df, $count, $p_value ) = $self->chiprob_test(%args);
94	0					0	return "chi^2($df, N = $count) = $chi_value, p = $p_value";
95							}
96
97							=head2 fprob_test
98
99							($f_value, $df_b, $df_w, $p_value) = $fri->fprob_test(data => HOA);
100							$p_value = $fri->fprob_test(data => HOA);
101
102							Performs the same test as above but transforms the chi-value into an I-distributed value, returning this I-equivalent value, between and within groups degrees-of-freedom, and then the associated probability off the I-distribution (or only the latter if called in scalar context). Default value of optional argument B is 1. This method has not been tested against sample data as yet.
103
104							=cut
105
106							sub fprob_test {
107	0			0	1	0	my ( $self, %args ) = @_;
108	0	0				0	my $data = $args{'data'} ? delete $args{'data'} : $self->get_hoa(%args);
109	0					0	my $n_bt = scalar keys %{$data};
	0					0
110	0					0	my $n_wt = $self->equal_n( data => $data );
111	0	0	0			0	croak
			0
112							'Need to have equal numbers of observations greater than 1 per two or more variables for fprob_test'
113							if not $n_wt
114							or $n_wt == 1
115							or $n_bt < 2;
116							my $chi =
117	0	0				0	_definitely_no( $args{'correct_ties'} )
118							? _chi_ig_ties( $n_bt, $n_wt,
119							scalar Statistics::Data::Rank->sumsq_ranks_within( data => $data ) )
120							: _chi_by_ties( $n_bt, $n_wt,
121							Statistics::Data::Rank->sumsq_ranks_within( data => $data ) );
122	0					0	my $f_value = ( ( $n_wt - 1 ) * $chi ) / ( $n_wt * ( $n_bt - 1 ) - $chi );
123	0					0	my $df_b = $n_bt - 1;
124	0					0	my $df_w = ( $n_wt - 1 ) * ($df_b);
125	0					0	my $p_value = fdtrc( $df_b, $df_w, $f_value ); # Math::Cephes fn
126	0	0				0	return wantarray ? ( $f_value, $df_b, $df_w, $p_value ) : $p_value;
127							}
128
129							=head2 fprob_str
130
131							$str = $fri->chiprob_str(data => HOA, correct_ties => 1);
132
133							Performs the same test as for L but returns not an array but a string of the conventional reporting form, e.g., F(df_b, df_w) = f_value, p = p_value.
134
135							=cut
136
137							sub fprob_str {
138	0			0	1	0	my ( $self, %args ) = @_;
139	0					0	my ( $f_value, $df_b, $df_w, $p_value ) = $self->fprob_test(%args);
140	0					0	return "F($df_b, $df_w) = $f_value, p = $p_value";
141							}
142
143							sub _chi_ig_ties {
144	1			1		677	my ( $c, $n, $sumsq ) = @_;
145	1					6	return ( 12 / ( $n * $c * ( $c + 1 ) ) ) * $sumsq - 3 * $n * ( $c + 1 );
146							}
147
148							sub _chi_by_ties {
149	4			4		2432	my ( $c, $n, $sumsq, $xtied ) = @_;
150	4					22	my $num = 12 * $sumsq - 3 * $n*2 $c * ( $c + 1 )**2;
151	4					8	my $sum = sum0( map { _sumcubes($_) - $c } values %{$xtied} );
	40					47
	4					12
152	4					17	my $den = $n * $c * ( $c + 1 ) - ( 1 / ( $c - 1 ) ) * $sum;
153	4					9	my $chi = $num / $den;
154	4					24	return $chi;
155							}
156
157							sub _sumcubes {
158	40			40		39	my @v = @_;
159	40					30	return sum0( map { $_**3 } @{ shift @v } );
	158					204
	40					39
160							}
161
162							sub _definitely_no {
163	5			5		15	my @v = @_;
164	5	100	66			62	return ( defined $v[0] and $v[0] == 0 ) ? 1 : 0;
165							}
166
167							=head1 DEPENDENCIES
168
169							L : used for summing.
170
171							L : used for probability functions.
172
173							L : used as base.
174
175							L : used to calculate the dependent sum-square of ranks. See this module for retrieving the actual arrays of ranks and sum-squares.
176
177							=head1 DIAGNOSTICS
178
179							=over 4
180
181							=item Need to have equal numbers of observations greater than 1 per two or variables for chiprob_test
182
183							Ced if there are not equal numbers of numerical values in each given variable, and if there are not at least two variables. Similarly for fprob_test.
184
185							=back
186
187							=head1 REFERENCES
188
189							Hollander, M., & Wolfe, D. A. (1999). I. New York, NY, US: Wiley.
190
191							Rice, J. A. (1995). I. Belmont, CA, US: Duxbury.
192
193							Sarantakos, S. (1993). I. Melbourne, Australia: MacMillan.
194
195							Siegal, S. (1956). I. New York, NY, US: McGraw-Hill
196
197							=head1 AUTHOR
198
199							Roderick Garton, C<< >>
200
201							=head1 BUGS AND LIMITATIONS
202
203							Please report any bugs or feature requests to C, or through
204							the web interface at L. I will be notified, and then you'll
205							automatically be notified of progress on your bug as I make changes.
206
207							=head1 SUPPORT
208
209							You can find documentation for this module with the perldoc command.
210
211							perldoc Statistics::ANOVA::Friedman
212
213							You can also look for information at:
214
215							=over 4
216
217							=item * RT: CPAN's request tracker (report bugs here)
218
219							L
220
221							=item * AnnoCPAN: Annotated CPAN documentation
222
223							L
224
225							=item * CPAN Ratings
226
227							L
228
229							=item * Search CPAN
230
231							L
232
233							=back
234
235							=head1 LICENSE AND COPYRIGHT
236
237							Copyright 2015-2017 Roderick Garton.
238
239							This program is free software; you can redistribute it and/or modify it
240							under the terms of either: the GNU General Public License as published
241							by the Free Software Foundation; or the Artistic License.
242
243							See L for more information.
244
245							=cut
246
247							1; # End of Statistics::ANOVA::Friedman