File Coverage

blib/lib/Statistics/ANOVA/Friedman.pm
Criterion Covered Total %
statement 49 67 73.1
branch 8 18 44.4
condition 4 15 26.6
subroutine 13 16 81.2
pod 4 4 100.0
total 78 120 65.0


line stmt bran cond sub pod time code
1             package Statistics::ANOVA::Friedman;
2            
3 5     5   84830 use 5.006;
  5         13  
4 5     5   18 use strict;
  5         6  
  5         135  
5 5     5   18 use warnings FATAL => 'all';
  5         11  
  5         249  
6 5     5   21 use base qw(Statistics::Data);
  5         5  
  5         2915  
7 5     5   125239 use Carp qw(croak);
  5         6  
  5         203  
8 5     5   21 use List::AllUtils qw(sum0);
  5         5  
  5         183  
9 5     5   2887 use Math::Cephes qw(:dists);
  5         20321  
  5         1171  
10 5     5   2218 use Statistics::Data::Rank;
  5         12694  
  5         2646  
11             $Statistics::ANOVA::Friedman::VERSION = '0.02';
12            
13             =head1 NAME
14            
15             Statistics::ANOVA::Friedman - Nonparametric repeated measures analysis of variance for dependent factorial measures (Friedman Test)
16            
17             =head1 VERSION
18            
19             This is documentation for version 0.02, released February 2017.
20            
21             =head1 SYNOPSIS
22            
23             use Statistics::ANOVA::Friedman;
24             my $fri = Statistics::ANOVA::Friedman->new();
25             my ($chi_value, $df, $count, $p_value) = $fri->chiprob_test(data => HOA);
26             $fri->load({1 => [2, 4, 6], 2 => [3, 3, 12], 3 => [5, 7, 11]}); # or pre-load with HOA
27             ($chi_value, $df, $count, $p_value) = $fri->chiprob_test();
28             my ($f_value, $df_b, $df_w, $p_value2) = $fri->fprob_test();
29            
30             =head1 DESCRIPTION
31            
32             Performs the B nonparametric analysis of variance - for dependent (correlated, matched) measures of two or more discrete (nominal) variables, such as when the measures are taken from the same source (e.g., person, plot) but under different conditions. A ranking procedure is used, but, unlike the case for independent measures, the ranks are taken at each common index of each measure, i.e., within-groups.
33            
34             By default, the method accounts for and corrects for ties, but if B => 0, the test-statistic is uncorrected. The correction involves accounting for the number of tied variables at each index, as per Hollander & Wolfe (1995), Eq. 7.8, p. 274.
35            
36             Correctness of output is tested on installation using example data from Hollander & Wolfe (1999, p. 274ff), Rice (1995, p. 470), Sarantakos (1993, p. 404-405), and Siegal (1956, p. 167ff); tests fail if the published chi-values and degrees-of-freedom are not returned by the module.
37            
38             The module uses L as a base so that data can be pre-loaded and added to per that module's methods.
39            
40             =head1 SUBROUTINES/METHODS
41            
42             =head2 new
43            
44             $fri = Statistics::ANOVA::Friedman->new();
45            
46             New object for accessing methods and storing results. This "isa" Statistics::Data object.
47            
48             =head2 load, add, unload
49            
50             $fri->load('a' => [1, 4], 'b' => [3, 7]);
51            
52             The given data can now be used by any of the following methods. This is inherited from L, and all its other methods are available here via the class object. Only passing of data as a hash of arrays (HOA) is supported for now. Alternatively, give each of the following methods the HOA for the optional named argument B.
53            
54             =head2 chiprob_test
55            
56             ($chi_value, $df, $count, $p_value) = $fri->chiprob_test(data => HOA, correct_ties => 1);
57            
58             Performs the ANOVA and returns the chi-square value, its degrees-of-freedom, the total number of observations, and associated probability value (or only the latter if called in scalar context). Default value of optional argument B is 1.
59            
60             =cut
61            
62             sub chiprob_test {
63 5     5 1 4509 my ( $self, %args ) = @_;
64 5 100       51 my $data = $args{'data'} ? delete $args{'data'} : $self->get_hoa(%args);
65 5         262 my $n_bt = scalar keys %{$data};
  5         13  
66 5         35 my $n_wt = $self->equal_n( data => $data );
67 5 50 33     178 croak
      33        
68             'Need to have equal numbers of observations greater than 1 per two or more variables for chiprob_test'
69             if not $n_wt
70             or $n_wt == 1
71             or $n_bt < 2;
72             my $chi =
73 5 100       23 _definitely_no( $args{'correct_ties'} )
74             ? _chi_ig_ties( $n_bt, $n_wt,
75             scalar Statistics::Data::Rank->sumsq_ranks_within( data => $data ) )
76             : _chi_by_ties( $n_bt, $n_wt,
77             Statistics::Data::Rank->sumsq_ranks_within( data => $data ) );
78 5         22 my $df = $n_bt - 1;
79 5         150 my $p_value = chdtrc( $df, $chi ); # Math::Cephes fn
80 5 50       43 return wantarray ? ( $chi, $df, ( $n_bt * $n_wt ), $p_value ) : $p_value;
81             }
82            
83             =head2 chiprob_str
84            
85             $str = $fri->chiprob_str(data => HOA, correct_ties => 1);
86            
87             Performs the same test as for L but returns not an array but a string of the conventional reporting form, e.g., chi^2(df, N = total observations) = chi_value, p = p_value.
88            
89             =cut
90            
91             sub chiprob_str {
92 0     0 1 0 my ( $self, %args ) = @_;
93 0         0 my ( $chi_value, $df, $count, $p_value ) = $self->chiprob_test(%args);
94 0         0 return "chi^2($df, N = $count) = $chi_value, p = $p_value";
95             }
96            
97             =head2 fprob_test
98            
99             ($f_value, $df_b, $df_w, $p_value) = $fri->fprob_test(data => HOA);
100             $p_value = $fri->fprob_test(data => HOA);
101            
102             Performs the same test as above but transforms the chi-value into an I-distributed value, returning this I-equivalent value, between and within groups degrees-of-freedom, and then the associated probability off the I-distribution (or only the latter if called in scalar context). Default value of optional argument B is 1. This method has not been tested against sample data as yet.
103            
104             =cut
105            
106             sub fprob_test {
107 0     0 1 0 my ( $self, %args ) = @_;
108 0 0       0 my $data = $args{'data'} ? delete $args{'data'} : $self->get_hoa(%args);
109 0         0 my $n_bt = scalar keys %{$data};
  0         0  
110 0         0 my $n_wt = $self->equal_n( data => $data );
111 0 0 0     0 croak
      0        
112             'Need to have equal numbers of observations greater than 1 per two or more variables for fprob_test'
113             if not $n_wt
114             or $n_wt == 1
115             or $n_bt < 2;
116             my $chi =
117 0 0       0 _definitely_no( $args{'correct_ties'} )
118             ? _chi_ig_ties( $n_bt, $n_wt,
119             scalar Statistics::Data::Rank->sumsq_ranks_within( data => $data ) )
120             : _chi_by_ties( $n_bt, $n_wt,
121             Statistics::Data::Rank->sumsq_ranks_within( data => $data ) );
122 0         0 my $f_value = ( ( $n_wt - 1 ) * $chi ) / ( $n_wt * ( $n_bt - 1 ) - $chi );
123 0         0 my $df_b = $n_bt - 1;
124 0         0 my $df_w = ( $n_wt - 1 ) * ($df_b);
125 0         0 my $p_value = fdtrc( $df_b, $df_w, $f_value ); # Math::Cephes fn
126 0 0       0 return wantarray ? ( $f_value, $df_b, $df_w, $p_value ) : $p_value;
127             }
128            
129             =head2 fprob_str
130            
131             $str = $fri->chiprob_str(data => HOA, correct_ties => 1);
132            
133             Performs the same test as for L but returns not an array but a string of the conventional reporting form, e.g., F(df_b, df_w) = f_value, p = p_value.
134            
135             =cut
136            
137             sub fprob_str {
138 0     0 1 0 my ( $self, %args ) = @_;
139 0         0 my ( $f_value, $df_b, $df_w, $p_value ) = $self->fprob_test(%args);
140 0         0 return "F($df_b, $df_w) = $f_value, p = $p_value";
141             }
142            
143             sub _chi_ig_ties {
144 1     1   677 my ( $c, $n, $sumsq ) = @_;
145 1         6 return ( 12 / ( $n * $c * ( $c + 1 ) ) ) * $sumsq - 3 * $n * ( $c + 1 );
146             }
147            
148             sub _chi_by_ties {
149 4     4   2432 my ( $c, $n, $sumsq, $xtied ) = @_;
150 4         22 my $num = 12 * $sumsq - 3 * $n**2 * $c * ( $c + 1 )**2;
151 4         8 my $sum = sum0( map { _sumcubes($_) - $c } values %{$xtied} );
  40         47  
  4         12  
152 4         17 my $den = $n * $c * ( $c + 1 ) - ( 1 / ( $c - 1 ) ) * $sum;
153 4         9 my $chi = $num / $den;
154 4         24 return $chi;
155             }
156            
157             sub _sumcubes {
158 40     40   39 my @v = @_;
159 40         30 return sum0( map { $_**3 } @{ shift @v } );
  158         204  
  40         39  
160             }
161            
162             sub _definitely_no {
163 5     5   15 my @v = @_;
164 5 100 66     62 return ( defined $v[0] and $v[0] == 0 ) ? 1 : 0;
165             }
166            
167             =head1 DEPENDENCIES
168            
169             L : used for summing.
170            
171             L : used for probability functions.
172            
173             L : used as base.
174            
175             L : used to calculate the dependent sum-square of ranks. See this module for retrieving the actual arrays of ranks and sum-squares.
176            
177             =head1 DIAGNOSTICS
178            
179             =over 4
180            
181             =item Need to have equal numbers of observations greater than 1 per two or variables for chiprob_test
182            
183             Ced if there are not equal numbers of numerical values in each given variable, and if there are not at least two variables. Similarly for fprob_test.
184            
185             =back
186            
187             =head1 REFERENCES
188            
189             Hollander, M., & Wolfe, D. A. (1999). I. New York, NY, US: Wiley.
190            
191             Rice, J. A. (1995). I. Belmont, CA, US: Duxbury.
192            
193             Sarantakos, S. (1993). I. Melbourne, Australia: MacMillan.
194            
195             Siegal, S. (1956). I. New York, NY, US: McGraw-Hill
196            
197             =head1 AUTHOR
198            
199             Roderick Garton, C<< >>
200            
201             =head1 BUGS AND LIMITATIONS
202            
203             Please report any bugs or feature requests to C, or through
204             the web interface at L. I will be notified, and then you'll
205             automatically be notified of progress on your bug as I make changes.
206            
207             =head1 SUPPORT
208            
209             You can find documentation for this module with the perldoc command.
210            
211             perldoc Statistics::ANOVA::Friedman
212            
213             You can also look for information at:
214            
215             =over 4
216            
217             =item * RT: CPAN's request tracker (report bugs here)
218            
219             L
220            
221             =item * AnnoCPAN: Annotated CPAN documentation
222            
223             L
224            
225             =item * CPAN Ratings
226            
227             L
228            
229             =item * Search CPAN
230            
231             L
232            
233             =back
234            
235             =head1 LICENSE AND COPYRIGHT
236            
237             Copyright 2015-2017 Roderick Garton.
238            
239             This program is free software; you can redistribute it and/or modify it
240             under the terms of either: the GNU General Public License as published
241             by the Free Software Foundation; or the Artistic License.
242            
243             See L for more information.
244            
245             =cut
246            
247             1; # End of Statistics::ANOVA::Friedman