File Coverage

blib/lib/Statistics/Sequences/Runs.pm

Criterion	Covered	Total	%
statement	16	18	88.8
branch			n/a
condition			n/a
subroutine	6	6	100.0
pod			n/a
total	22	24	91.6

line	stmt	sub	time	code
1				package Statistics::Sequences::Runs;
2	1	1	17398	use 5.008008;
	1		3
	1		31
3	1	1	4	use strict;
	1		1
	1		30
4	1	1	3	use warnings FATAL => 'all';
	1		6
	1		44
5	1	1	4	use Carp qw(carp croak);
	1		1
	1		70
6	1	1	4	use base qw(Statistics::Sequences);
	1		1
	1		367
7	1	1	277	use List::AllUtils qw(mesh sum uniq);
	0
	0
8				use Number::Misc qw(is_even);
9				use Statistics::Zed 0.08;
10				$Statistics::Sequences::Runs::VERSION = '0.21';
11
12				=pod
13
14				=head1 NAME
15
16				Statistics::Sequences::Runs - descriptives, deviation and combinatorial tests of Wald-Wolfowitz runs
17
18				=head1 VERSION
19
20				This is documentation for Version 0.21 of Statistics::Sequences::Runs.
21
22				=head1 SYNOPSIS
23
24				use strict;
25				use Statistics::Sequences::Runs 0.21; # not compatible with versions < .10
26				my $runs = Statistics::Sequences::Runs->new();
27
28				# Data make up a dichotomous sequence:
29				my @data = (qw/1 0 0 0 1 1 0 1 1 0 0 1 0 0 1 1 1 1 0 1/);
30				my $val;
31
32				# - Pre-load data to use for all methods:
33				$runs->load(\@data);
34				$val = $runs->observed();
35				$val = $runs->expected();
36
37				# - or send data as "data => $aref" to each method:
38				$val = $runs->observed(data => \@data);
39
40				# - or send frequencies of each of the 2 elements:
41				$val = $runs->expected(freqs => [11, 9]); # works with other methods except observed()
42
43				# Deviation ratio:
44				$val = $runs->z_value(ccorr => 1);
45
46				# Probability:
47				my ($z, $p) = $runs->z_value(ccorr => 1, tails => 1); # dev. ratio with p-value
48				$val = $runs->p_value(tails => 1); # normal dist. p-value itself
49				$val = $runs->p_value(exact => 1, tails => 1); # by combinatorics
50
51				# Keyed list of descriptives etc.:
52				my $href = $runs->stats_hash(values => {observed => 1, p_value => 1}, exact => 1);
53
54				# Print descriptives etc. in the same way:
55				$runs->dump(
56				values => {observed => 1, expected => 1, p_value => 1},
57				exact => 1,
58				flag => 1,
59				precision_s => 3,
60				precision_p => 7
61				);
62				# prints: observed = 11.000, expected = 10.900, p_value = 0.5700167
63
64				=head1 DESCRIPTION
65
66				The module returns statistical information re Wald-type runs: a sequence of events on 1 or more consecutive trials. For example, in a signal-detection test composed of match (H) and miss (M) events over time like H-H-M-H-M-M-M-M-H, there are 5 runs: 3 Hs, 2 Ms. This number of runs between two events can be compared with the number expected to occur by chance over the number of trials, relative to the expected variance (see REFERENCES). More runs than expected ("negative serial dependence") can denote irregularity, instability, mixing up of alternatives. Fewer runs than expected ("positive serial dependence") can denote cohesion, insulation, isolation of alternatives. Both can indicate sequential dependency: either negative (a bias to produce too many alternations), or positive (a bias to produce too many repetitions).
67
68				The distribution of runs is asymptotically normal, and a deviation-based test of extra-chance occurrence when at least one alternative has more than 20 occurrences (Siegal rule), or both event occurrences exceed 10 (Kelly, 1982), is conventionally considered reliable; otherwise, the module provides an "exact test" option, based on combinatorics.
69
70				Have non-dichotomous, continuous or multinomial data? See L for how to prepare them for test of runs.
71
72				=head1 SUBROUTINES/METHODS
73
74				=head2 Data-handling
75
76				=head3 new
77
78				$runs = Statistics::Sequences::Runs->new();
79
80				Returns a new Runs object. Expects/accepts no arguments but the classname.
81
82				=head3 load
83
84				$runs->load(@data);
85				$runs->load(\@data);
86				$runs->load(foodat => \@data); # labelled whatever
87
88				Loads data anonymously or by name - see L in the Statistics::Data manpage for details on the various ways data can be loaded and then retrieved (more than shown here).
89
90				After the load, the data are L to ensure that they contain only two unique elements - if not, carp occurs and 0 rather than 1 is returned.
91
92				Alternatively, skip this action; data don't always have to be loaded to use the stats methods here. To get the observed number of runs, data of course have to be loaded, but other stats can be got if given the observed count - otherwise, they too depend on data having been loaded.
93
94				Every load unloads all previous loads and any additions to them.
95
96				=head3 add, access, unload
97
98				See L for these additional operations on data that have been loaded.
99
100				=head2 Descriptives
101
102				=head3 observed
103
104				$v = $runs->observed(); # use the first data loaded anonymously
105				$v = $runs->observed(index => 1); # ... or give the required "index" for the loaded data
106				$v = $runs->observed(label => 'foodat'); # ... or its "label" value
107				$v = $runs->observed(data => [1, 0, 1, 1]); # ... or just give the data now
108
109				Returns the total observed number of runs in the loaded or given data. For example,
110
111				$v = $runs->observed_per_state(data => [qw/H H H T T H H/]);
112
113				returns 3.
114
115				I: runcount_observed, rco
116
117				=cut
118
119				sub observed {
120				my ( $self, @args ) = @_;
121				my $args = ref $args[0] ? $args[0] : {@args};
122				return $args->{'observed'} if defined $args->{'observed'};
123				my $data = _get_data( $self, $args );
124				my $rco = 1;
125				for ( 1 .. scalar @{$data} - 1 ) {
126				$rco++ if $data->[$_] ne $data->[ $_ - 1 ];
127				}
128				return $rco;
129				}
130				*runcount_observed = \&observed;
131				*rco = \&observed;
132
133				=head3 observed_per_state
134
135				@freq = $runs->observed_per_state(data => \@data);
136				$href = $runs->observed_per_state(data => \@data);
137
138				Returns the number of runs per state - as an array where the first element gives the count for the first state in the data, and so for the second. A keyed hashref is returned if not called in array context. For example:
139
140				@ari = $runs->observed_per_state(data => [qw/H H H T T H H/]); # returns (2, 1)
141				$ref = $runs->observed_per_state(data => [qw/H H H T T H H/]); # returns { H => 2, T => 1}
142
143				=cut
144
145				sub observed_per_state {
146				my ( $self, @args ) = @_;
147				my $args = ref $args[0] ? $args[0] : {@args};
148				my $data = _get_data( $self, $args );
149				my @states = uniq @{$data};
150				my @freqs = $data->[0] eq $states[0] ? ( 1, 0 ) : ( 0, 1 );
151				for ( 1 .. scalar @{$data} - 1 ) {
152				if ( $data->[$_] ne $data->[ $_ - 1 ] ) {
153				if ( $data->[$_] eq $states[0] ) {
154				$freqs[0]++;
155				}
156				else {
157				$freqs[1]++;
158				}
159				}
160				}
161				return wantarray ? @freqs : { mesh @states, @freqs };
162				}
163
164				=head3 expected
165
166				$v = $runs->expected(); # or specify loaded data by "index" or "label", or give it as "data" - see observed()
167				$v = $runs->expected(data => [qw/blah bing blah blah blah/]); # use these data
168				$v = $runs->expected(freqs => [12, 7]); # don't use actual data; calculate from these two Ns
169
170				Returns the expected number of runs across the loaded data. Expectation is given as follows:
171
172				=for html E[R] = ( (2n₁n₂) / (n₁ + n₂) ) + 1
173
174				where I(I is the number of observations of each element in the data.
175
176				I: runcount_expected, rce
177
178				=cut
179
180				sub expected {
181				my ( $self, @args ) = @_;
182				my $args = ref $args[0] ? $args[0] : {@args};
183				my ( $n1, $n2 ) = $self->bi_frequency($args);
184				my $sum = $n1 + $n2;
185				return $sum ? ( ( 2 * $n1 * $n2 ) / $sum ) + 1 : undef;
186				}
187				*rce = \&expected;
188				*runcount_expected = \&expected;
189
190				=head3 variance
191
192				$v = $runs->variance(); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
193				$v = $runs->variance(data => [qw/blah bing blah blah blah/]); # use these data
194				$v = $runs->variance(freqs => [5, 12]); # use these trial numbers - not any particular sequence of data
195
196				Returns the variance in the number of runs for the given data.
197
198				=for html V[R] = ( (2n₁n₂)([2n₁n₂] – [n₁ + n₂]) ) / ( ((n₁ + n₂)²)((n₁ + n₂) – 1) )
199
200				defined as above for L.
201
202				The data to test can already have been Led, or you send it directly as a flat referenced array keyed as B.
203
204				I: runcount_variance, rcv
205
206				=cut
207
208				sub variance {
209				my ( $self, @args ) = @_;
210				my $args = ref $args[0] ? $args[0] : {@args};
211				my ( $n1, $n2 ) = $self->bi_frequency($args);
212				my $sum = $n1 + $n2;
213				return $sum < 2
214				? 1
215				: ( ( 2 * $n1 * $n2 * ( ( 2 * $n1 * $n2 ) - $sum ) ) /
216				( ( $sum*2 ) ( $sum - 1 ) ) );
217				}
218				*rcv = \&variance;
219				*runcount_variance = \&variance;
220
221				=head3 observed_deviation
222
223				$v = $runs->obsdev(); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
224				$v = $runs->obsdev(data => [qw/blah bing blah blah blah/]); # use these data
225
226				Returns the deviation of (difference between) observed and expected runs for the loaded/given sequence (I - I).
227
228				I: obsdev
229
230				=cut
231
232				sub observed_deviation {
233				return observed(@_) - expected(@_);
234				}
235				*obsdev = \&observed_deviation;
236
237				=head3 standard_deviation
238
239				$v = $runs->stdev(); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
240				$v = $runs->stdev(data => [qw/blah bing blah blah blah/]);
241
242				Returns square-root of the variance.
243
244				I: stdev
245
246				=cut
247
248				sub standard_deviation {
249				return sqrt variance(@_);
250				}
251				*stdev = \&standard_deviation;
252
253				=head3 skewness
254
255				Returns run skewness as given by Barton & David (1958) based on the frequencies of the two different elements in the sequence.
256
257				=cut
258
259				sub skewness {
260				my ( $self, @args ) = @_;
261				my $args = ref $args[0] ? $args[0] : {@args};
262				my ( $n1, $n2 ) = $self->bi_frequency($args);
263				my $k3 = 0;
264				if ( $n1 != $n2 ) {
265				my $sum = $n1 + $n2;
266				$k3 =
267				( ( 2 * $n1 * $n2 ) / $sum*3 )
268				( ( ( 16 * $n1*2 $n22 ) / $sum2 ) -
269				( ( 4 * $n1 * $n2 * ( $sum + 3 ) ) / $sum ) +
270				3 * $sum );
271				}
272				return $k3;
273				}
274
275				=head3 kurtosis
276
277				Returns run kurtosis as given by Barton & David (1958) based on the frequencies of the two different elements in the sequence.
278
279				=cut
280
281				sub kurtosis {
282				my ( $self, @args ) = @_;
283				my $args = ref $args[0] ? $args[0] : {@args};
284				my ( $n1, $n2 ) = $self->bi_frequency($args);
285				my $sum = $n1 + $n2;
286				my $k4 = ( ( 2 * $n1 * $n2 ) / $sum*4 ) (
287				( ( 48 * ( 5 * $sum - 6 ) * $n1*3 $n23 ) / ( $sum2 * $sum**2 ) )
288				- (
289				( 48 * ( 2 * $sum*2 + 3 $sum - 6 ) * $n1*2 $n2**2 ) /
290				( $sum*2 $sum )
291				) + (
292				( 2 * ( 4 * $sum*3 + 45 $sum*2 - 37 $sum - 18 ) * $n1 * $n2 )
293				/ $sum**2
294				) - ( 7 * $sum*2 + 13 $sum - 6 )
295				);
296				return $k4;
297				}
298
299				=head2 Distribution and tests
300
301				=head3 pmf
302
303				$p = $runs->pmf(data => \@data); # or no args to use last pre-loaded data
304				$p = $runs->pmf(observed => 5, freqs => [5, 20]);
305
306				Implements the runs probability mass function, returning the probability for a particular number of runs given so many dichotomous events (e.g., as in Swed & Eisenhart, 1943, p. 66); i.e., for I' the observed number of runs, I {I = I'}. The required function parameters are the observed number of runs, and the frequencies (counts) of each state in the sequence, which can be given directly, as above, in the arguments B and B, respectively, or these will be worked out from a given data sequence itself (given here or as pre-loaded). For derivation, see its public internal methods L and L.
307
308				=cut
309
310				sub pmf {
311				my ( $self, @args ) = @_;
312				my $args = ref $args[0] ? $args[0] : {@args};
313				my ( $n1, $n2 ) = $self->bi_frequency($args);
314				my $u = $self->observed($args);
315				return _pmf_num( $u, $n1, $n2 ) / _pmf_denom( $n1, $n2 );
316				}
317
318				=head3 cdf
319
320				$p = $runs->cdf(data => \@data); # or no args to use last pre-loaded data
321				$p = $runs->cdf(observed => 5, freqs => [5, 20]);
322
323				Implements the cumulative distribution function for runs, returning the probability of obtaining the observed number of runs or less down to the expected number of 2 (assuming that the two possible events are actually represented in the data), as per Swed & Eisenhart (1943), p. 66; i.e., for I' the observed number of runs, I {I <= I'}. The summation is over the probability mass function L. The function parameters are the observed number of runs, and the frequencies (counts) of the two events, which can be given directly, as above, in the arguments B and B, respectively, or these will be worked out from a given data sequence itself (given here or as pre-loaded).
324
325				=cut
326
327				sub cdf {
328				my ( $self, @args ) = @_;
329				my $args = ref $args[0] ? $args[0] : {@args};
330				my ( $n1, $n2 ) = $self->bi_frequency($args);
331				my $u = $self->observed($args);
332				my $sum = 0;
333				for ( 2 .. $u ) {
334				$sum += _pmf_num( $_, $n1, $n2 );
335				}
336				return $sum / _pmf_denom( $n1, $n2 );
337				}
338
339				=head3 cdfi
340
341				$p = $runs->cdfi(data => \@data); # or no args for last pre-loaded data
342				$p = $runs->cdfi(observed => 11, freqs => [5, 11]);
343
344				Implements the (inverse) cumulative distribution function for runs, returning the probability of obtaining more than the observed number of runs up from the expected number of 2 (assuming that the two possible events are actually represented in the data), as per Swed & Eisenhart (1943), p. 66; ; i.e., for I' the observed number of runs, I = 1 - I {I <= I' - 1}. The summation is over the probability mass function L. The function parameters are the observed number of runs, and the frequencies (counts) of the two events, which can be given directly, as above, in the arguments B and B, respectively, or these will be worked out from a given data sequence itself (given here or as pre-loaded).
345
346				=cut
347
348				sub cdfi {
349				my ( $self, @args ) = @_;
350				my $args = ref $args[0] ? $args[0] : {@args};
351				my ( $n1, $n2 ) = $self->bi_frequency($args);
352				my $u = $self->observed($args);
353				my $sum = 0;
354				for ( 2 .. $u - 1 ) {
355				$sum += _pmf_num( $_, $n1, $n2 );
356				}
357				return 1 - $sum / _pmf_denom( $n1, $n2 );
358				}
359
360				=head3 z_value
361
362				$v = $runs->z_value(ccorr => 1); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
363				$v = $runs->z_value(data => $aref, ccorr => 1);
364				($zvalue, $pvalue) = $runs->z_value(data => $aref, ccorr => 1, tails => 2); # same but wanting an array, get the p-value too
365
366				Returns the zscore from a test of runcount deviation, taking the runcount expected away from that observed and dividing by the root expected runcount variance, by default with a continuity correction to expectation. Called wanting an array, returns the z-value with its I -value for the tails (1 or 2) given.
367
368				The data to test can already have been Led, or sent directly as an aref keyed as B.
369
370				Other options are B (for the z_value) and B (for the p_value).
371
372				I: runcount_zscore, rzs, zscore
373
374				=cut
375
376				sub z_value {
377				my ( $self, @args ) = @_;
378				my $args = ref $args[0] ? $args[0] : {@args};
379				my $zed = Statistics::Zed->new();
380				my ( $zval, $pval ) = $zed->zscore(
381				observed => $self->rco($args),
382				expected => $self->rce($args),
383				variance => $self->rcv($args),
384				ccorr => ( defined $args->{'ccorr'} ? $args->{'ccorr'} : 1 ),
385				tails => ( $args->{'tails'} \|\| 2 ),
386				precision_s => $args->{'precision_s'},
387				precision_p => $args->{'precision_p'},
388				);
389				return wantarray ? ( $zval, $pval ) : $zval;
390				}
391				*rzs = \&z_value;
392				*runcount_zscore = \&z_value;
393				*zscore = \&z_value;
394
395				=head3 p_value
396
397				$p = $runs->p_value(); # using loaded data and default args
398				$p = $runs->p_value(ccorr => 0\|1, tails => 1\|2); # normal-approx. for last-loaded data
399				$p = $runs->p_value(exact => 1); # calc combinatorially for observed >= or < than expectation
400				$p = $runs->p_value(data => [1, 0, 1, 1, 0], exact => 1); # given data
401				$p = $runs->p_value(freqs => [12, 12], observed => 8); # no data sequence, specify known params
402
403				Returns the probability of getting the observed number of runs or a smaller number given the number of each of the two events. By default, a large sample is assumed, and the probability is obtained from the normalized deviation, as given by the L method.
404
405				If the option B is defined and not zero, then the probability is worked out combinatorially, as per Swed & Eisenhart (1943), Eq. 1, p. 66 (see also Siegal, 1956, Eqs. 6.12a and 6.12b, p. 138). By default, this is a one-tailed test, testing the hypotheses that there are either too many or too few runs relative to chance expectation; the "correct" hypothesis is tested based on the expected value returned by the L method. Setting B => 2 simply doubles the one-tailed I -value from any of these tests. Output from these tests has been checked against the tables and examples in Swed & Eisenhart (given to 7 decimal places), and found to agree.
406
407				The option B gives the returned I -value to so many decimal places.
408
409				I: test, runs_test, rct
410
411				=cut
412
413				sub p_value {
414				my ( $self, @args ) = @_;
415				my $args = ref $args[0] ? $args[0] : {@args};
416				my $pval;
417				if ( $args->{'exact'} ) {
418				$pval =
419				( $self->observed($args) - $self->rce($args) >= 0 )
420				? $self->cdfi($args)
421				: $self->cdf($args);
422				$pval *= 2 if $args->{'tails'} and $args->{'tails'} == 2;
423				$pval = sprintf( q{%.} . $args->{'precision_p'} . 'f', $pval )
424				if $args->{'precision_p'};
425				}
426				else {
427				$pval = ( $self->zscore($args) )[1];
428				}
429				return $pval;
430				}
431				*test = \&p_value;
432				runs_test = \p_value;
433				rct = \p_value;
434
435				=head3 lrx2
436
437				Likelihood ratio chi-square test for runs by length.
438
439				=cut
440
441				sub lrx2 {
442				my ( $self, @args ) = @_;
443				my $args = ref $args[0] ? $args[0] : {@args};
444				my ( $n1, $n2 ) = $self->bi_frequency($args);
445				return;
446				}
447
448				=head3 ztest_ok
449
450				Returns true for the loaded sequence if its constituent sample numbers are sufficient for their expected runs to be normally approximated - using Siegal's (1956, p. 140) rule - ok if I of the two Is are greater than 20.
451
452				=cut
453
454				sub ztest_ok {
455				my ( $self, @args ) = @_;
456				my $args = ref $args[0] ? $args[0] : {@args};
457				my ( $n1, $n2 ) = $self->bi_frequency($args);
458				my $retval =
459				$n1 > 20 \|\| $n2 > 20
460				? 1
461				: 0
462				; # Siegal's rule (p. 140) - ok if either of the two Ns are greater than 20
463				return $retval;
464				}
465
466				=head2 Utils
467
468				Methods used internally, or for returning/printing descriptives, etc., in a bunch.
469
470				=head3 bi_frequency
471
472				@freq = $runs->bi_frequency(data => \@data); # or no args if using last pre-loaded data
473
474				Returns frequency of the two elements - or croaks if there are more than 2, and gives zero for any absent.
475
476				=cut
477
478				sub bi_frequency {
479				my ( $self, @args ) = @_;
480				my $args = ref $args[0] ? $args[0] : {@args};
481				carp
482				'Argument named \'trials\' is deprecated; use \'freqs\' to give aref of frequencies per state'
483				if $args->{'trials'};
484				return @{ $args->{'freqs'} } if ref $args->{'freqs'};
485				my $data = _get_data( $self, $args );
486				my %states = ();
487				$states{$_}++ for @{$data}; # hash keying each element with its frequency
488				croak 'Cannot compute runs: More than two states were found in the data: '
489				. join( q{, }, keys %states )
490				if scalar keys %states > 2;
491				my @vals = values %states;
492				$vals[1] = 0 if scalar @vals < 2;
493				return @vals;
494				}
495
496				=head3 n_max_seq
497
498				$n = $runs->n_max_seq(); # loaded data
499				$n = $runs->n_max_seq(data => \@data); # this sequence
500				$n = $runs->n_max_seq(observed => int, freqs => [int, int]); # these specs
501
502				Returns the number of possible sequences for the two given state frequencies. So the proverbial urn contains I1 black balls and I2 white balls, well mixed, and take I1 + I2 drawings from it without replacement, so any sequence has the same probability of occurring; how many different sequences of black and white balls are possible? For the two counts, this is "sum of I1 + I2 I I1", or:
503
504				=for html N_max = ( N₁ + N₂ )! / N₁!N₂!
505
506				With the usual definition of a probability as M / N, this is the denominator term in the runs L. This does not take into account the probability of obtaining so many of each event, of the proportion of black and white balls in the urn. (That's work and play for another day.)
507
508				=cut
509
510				sub n_max_seq {
511				my ( $self, @args ) = @_;
512				my $args = ref $args[0] ? $args[0] : {@args};
513				my @freqs = $self->bi_frequency($args);
514				return _pmf_denom(@freqs);
515				}
516
517				=head3 m_seq_k
518
519				$n = $runs->m_seq_k(); # loaded data
520				$n = $runs->m_seq_k(data => \@data); # this sequence
521				$n = $runs->m_seq_k(observed => int, freqs => [int, int]); # these specs
522
523				Returns the number of sequences that can produce I runs from I elements of a single kind, with all other kinds of elements in the sequence assumed to be of a single kind, under the conditions of L. See Swed and Eisenhart (1943), or barton and David (1958, p. 253). With the usual definition of a probability as M / N, this is the numerator term in the runs L.
524
525				=cut
526
527				sub m_seq_k {
528				my ( $self, @args ) = @_;
529				my $args = ref $args[0] ? $args[0] : {@args};
530				my $u = $self->observed($args);
531				my @freqs = $self->bi_frequency($args);
532				return _pmf_num( $u, @freqs );
533				}
534
535				=head3 stats_hash
536
537				$href = $runs->stats_hash(values => {observed => 1, expected => 1, variance => 1, z_value => 1, p_value => 1}, exact => 0, ccorr => 1);
538
539				Returns a hashref for the counts and stats as specified in its "values" argument, and with any options for calculating them (e.g., exact for p_value). See L for details. If calling via a "runs" object, the option "stat => 'runs'" is not needed (unlike when using the parent "sequences" object).
540
541				=head3 dump
542
543				$runs->dump(values => { observed => 1, variance => 1, p_value => 1}, exact => 1, flag => 1, precision_s => 3); # among other options
544
545				Print Runs-test results to STDOUT. See L for details of what stats to dump (default is observed() and p_value()). Optionally also give the data directly.
546
547				=cut
548
549				sub dump {
550				my ( $self, @args ) = @_;
551				my $args = ref $args[0] ? $args[0] : {@args};
552				$args->{'stat'} = 'runs';
553				$self->SUPER::dump($args);
554				return;
555				}
556
557				=head3 dump_data
558
559				$runs->dump_data(delim => "\n"); # print whatevers loaded (or specify by label, index, or as "data")
560
561				See L for details.
562
563				=cut
564
565				# Private methods:
566
567				sub _pmf_num {
568				my ( $u, $m, $n ) = @_;
569				my $f;
570				if ( is_even($u) ) {
571				my $k = $u / 2 - 1;
572				$f = 2 * _choose( $m - 1, $k ) * _choose( $n - 1, $k );
573				}
574				else {
575				my $k = ( $u + 1 ) / 2;
576				$f =
577				_choose( $m - 1, $k - 1 ) * _choose( $n - 1, $k - 2 ) +
578				_choose( $m - 1, $k - 2 ) * _choose( $n - 1, $k - 1 );
579				}
580				return $f;
581				}
582
583				sub _pmf_denom {
584				return _choose( sum(@_), $_[0] );
585
586				#return _factorial(sum(@_)) / ( _factorial($_[0]) * _factorial($_[1]) );
587				}
588
589				sub _choose { # from Orwant et al., p. 573
590				my ( $n, $k ) = @_;
591				my ( $res, $j ) = ( 1, 1 );
592				return 0 if $k > $n \|\| $k < 0;
593				$k = ( $n - $k ) if ( $n - $k ) < $k;
594				while ( $j <= $k ) {
595				$res *= $n--;
596				$res /= $j++;
597				}
598				return $res;
599				}
600
601				sub _factorial {
602				my ( $n, $res ) = ( shift, 1 );
603				return undef unless $n >= 0 and $n == int($n);
604				$res *= $n-- while $n > 1;
605				return $res;
606				}
607
608				sub _get_data {
609				my ( $self, $args ) = @_;
610				return ref $args->{'data'} ? $args->{'data'} : $self->read($args);
611				}
612
613				1;
614
615				__END__