File Coverage

blib/lib/Statistics/Reproducibility.pm

Criterion	Covered	Total	%
statement	21	287	7.3
branch	0	74	0.0
condition	0	51	0.0
subroutine	7	27	25.9
pod	20	20	100.0
total	48	459	10.4

line	stmt	bran	cond	sub	pod	time	code
1							package Statistics::Reproducibility;
2
3	1			1		40041	use 5.006;
	1					4
	1					245
4	1			1		6	use strict;
	1					3
	1					43
5	1			1		6	use warnings FATAL => 'all';
	1					14
	1					56
6	1			1		5	use Carp;
	1					3
	1					1697
7
8							#use Math::Geometry::Multidimensional qw/distanceToLineN diagonalComponentsN diagonalDistancesFromOriginN/;
9	1			1		1637	use Statistics::TheilSenEstimator qw/theilsen/;
	1					3869
	1					67
10	1			1		8	use Statistics::QuickMedian qw/qmedian/;
	1					3
	1					36
11	1			1		1122	use Statistics::Distributions;
	1					5815
	1					7738
12
13							=head1 NAME
14
15							Statistics::Reproducibility - Reproducibility measurement between multiple replicate experiments
16
17							=head1 VERSION
18
19							Version 0.09
20
21							=cut
22
23							our $VERSION = '0.09';
24
25
26							=head1 SYNOPSIS
27
28							This module facilitates investigation of reproducbility between multiple replicates of
29							quantitative experiments e.g. SILAC or microarray. Scatter plots are great, but
30							only 2d. Some people use correlation as a proxy for reproducibility, but it's not right.
31							This module helps you through the following items...
32
33							1) Summarize reproducibility across the replicates
34							2) Pick out replicates that agree more or less
35							3) Summarize reproducibility for individual proteins/genes/whatever
36							4) Set a cutoff for what you can call significant, based on precision
37							5) Deal with missing values (common in SILAC)
38
39							This works by going through the following steps:
40
41							(0) Choose a dataset to compare everything else to (the middlemost)
42							1) Put the middle of the data at (0,0,0,0...) by subtracting the median ... report the median
43							2) Rotate the data so the line x=y=z=... lies on a single axis. The data should be spread along this axis.
44							3) Do regression on the data and work out "wrongness" of each replicate (!)
45							4) Calculate and report ratio variance and imprecision variance
46							5) Report combined ratio and error for each protein/gene/whatever
47
48							Perhaps a little code snippet.
49
50							use Statistics::Reproducibility;
51
52							my $r = Statistics::Reproducibility->new();
53
54							=head1 SUBROUTINES/METHODS
55
56							=head2 new
57
58							=cut
59
60							sub new {
61	0			0	1		my $p = shift;
62	0		0				my $c = ref $p \|\| $p;
63	0						my $o = {
64							# scalars:
65							comparatorIndex => 0, # index of column used to compare
66							k => '', # number of columns
67							n => '', # number of rows
68							vE => '', # variance of "error" (imprecision)
69							vS => '', # variable of experimental spread
70							sdE => '', # s.d. error
71							sdS => '', # s.d. spread
72							derivedFrom => '', # the object derived from
73							derivedReason => '', # the reason the object was derived (e.g. deDiagonalize)
74
75							# arrays (foreach column)
76							'm' => [], # regression denominator
77							'c' => [], # regression constant
78							# arrays (foreach row)
79							pee => '', # p-value of error
80							pss => '', # p-value of spread
81							pes => '', # p-value of error over spread (??)
82							pse => '', # p-value of spread over error
83
84							# 2D array (LoL)
85							data => [],
86
87							#meta info
88							copyOnDerive => [qw/comparatorIndex k n vE vS sdE sdS m c pee pss pes pse copyOnDerive obs/]
89							};
90	0						bless $o, $c;
91	0						return $o;
92							}
93
94							=head2 derive
95
96							derives a new object from an old one... some fields are conserved.
97							Warning: references are copied, so m and c point to the same arrays!
98							However, if you run regression() again, they will point to new arrays.
99							Data is set up with k empty columns.
100
101							=cut
102
103							sub derive {
104	0			0	1		my ($o,$reason) = @_;
105	0						my $r = $o->new;
106	0						foreach (@{$o->{copyOnDerive}}){
	0
107	0						$r->{$_} = $o->{$_};
108							}
109	0						$r->{derivedFrom} = $o;
110	0						$r->{derivedReason} = $reason;
111	0						$r->{data} = [map {[]} (0..$o->{k}-1)];
	0
112	0						return $r;
113							}
114
115							=head2 data
116
117							Set the data. Should be rectangular, i.e. all columns the same length, and
118							we'll check it is and croak if not...
119							but can contain "empty" cells (empty string), which represent missing values
120							in the data.
121
122							returns the object for chaining.
123
124							=cut
125
126							sub data {
127	0			0	1		my ($o,@columns) = @_;
128	0						$o->{data} = [@columns];
129	0						$o->{k} = @columns;
130	0						$o->{n} = @{$columns[0]};
	0
131	0						foreach (1 .. $o->{k}-1){
132	0						croak "columns different lengths!"
133	0	0					unless @{$columns[$_]} == $o->{n};
134							}
135	0						return $o;
136							}
137
138							=head2 run
139
140							runs a recommended workflow. it's a shortcut for:
141
142
143							my $m = $r->subtractMedian();
144							$m->middlemostColumn();
145							my $d = $m->deDiagonalize();
146							$d -> regression();
147							my $e = $d->rotateToRegressionLine();
148							$e->variances();
149
150							It returns the last object. So you could do:
151
152							my $results = Statistics::Reproducibility
153							->new()
154							->data($mydata)
155							->run()
156							->printableTable($depth);
157
158							=cut
159
160							sub run {
161	0			0	1		my $r = shift;
162							# $r->data([qw/1 2 3 4 5 6 7 8/],[qw/0 1 2 3 4 5 6 7/],[qw/2.1 3.2 4.3 5.4 6.5 7.6 8.7 9.8/]);
163	0						$r->countObservations();
164	0						$r->regression();
165	0						my $m = $r->subtractMedian();
166	0						$m->applyMinimumObservations(2);
167	0						$m->middlemostColumn();
168	0						my $d = $m->deDiagonalize();
169	0						$d->applyMinimumObservations(2);
170	0						$d->regression();
171	0						my $e = $d->rotateToRegressionLine();
172	0						$e->applyMinimumObservations(2);
173	0						$e->variances();
174	0						return $e;
175							}
176
177							=head2 subtractMedian
178
179							calculates the median for each column, substracts from each column and
180							returns the new object.
181
182							=cut
183
184							sub subtractMedian {
185	0			0	1		my $o = shift;
186	0						my $r = $o->derive('subtractMedian');
187	0	0					my @medians = map {qmedian([map {$_ eq '' ? () : $_} @$_])} @{$o->{data}};
	0
	0
	0
188	0						foreach my $i(0..$#medians){
189	0	0					$r->{data}->[$i] = [map {$_ eq '' ? '' : $_ - $medians[$i]} @{$o->{data}->[$i]}];
	0
	0
190							}
191	0						$r->{medians} = \@medians;
192	0						return $r;
193							}
194
195							=head2 middlemostColumn
196
197							Calculates which of the columns is middlemost and remembers it so all
198							others are compared to it. This can be done instead of using a constructed
199							median dataset as the comparator so that the constructed one does not add to
200							the spread, and does not contribute to the observation count.
201
202							Note: the method of scoring the columns involves counting which has
203							the most middlemost values. For two columns only, the result will always
204							be the one with the least missing values. I don't think there's anything
205							wrong with that, but just so you know!
206
207							=cut
208
209							sub middlemostColumn {
210	0			0	1		my $o = shift;
211							# which is the middle most column? i.e. who has the most medians?
212
213	0						my @medianCounts = map {0} (1..$o->{k}); # stash counts
	0
214
215	0						foreach my $i(0..$o->{n}-1){ # each row
216	0						my @row = ();
217	0						foreach my $j(0..$o->{k}-1){
218	0	0	0				if(defined $o->{data}->[$j]->[$i]
219							&& $o->{data}->[$j]->[$i] ne ''){
220	0						push @row, $o->{data}->[$j]->[$i];
221							}
222							}
223							# who's in the middle?
224	0						foreach(medianI(@row)){
225	0						$medianCounts[$_] ++;
226							}
227							}
228
229							# which has the most middlemost values?
230	0						my $imax = 0;
231	0						my $max = 0;
232	0						foreach my $i(0..$o->{k}-1){
233	0	0					if($medianCounts[$i] > $max){
234	0						$imax = $i;
235	0						$max = $medianCounts[$i];
236							}
237							}
238
239							# so now we want to put this column on the left? Or should we just
240							# store that we're going to use this one as the comparator?
241	0						$o->{comparatorIndex} = $imax;
242	0						return $imax;
243							}
244
245							=head2 constructMedianLeft
246
247							Make a median column and pop it on the left. Note that the
248							regular median is used here, not the Quick Median estimator. This means
249							that for an even number of observations, the mean of the two middlemost is
250							used, which is not the case for Quick Median.
251
252							=cut
253
254							sub constructMedianLeft {
255	0			0	1		my $o = shift;
256	0						my @newcol = ();
257	0						foreach my $i(0..$o->{n}-1){
258	0						my @row = ();
259	0						foreach my $j(0..$o->{k}-1){
260	0	0	0				if(defined $o->{data}->[$i]->[$j]
261							&& $o->{data}->[$i]->[$j] ne ''){
262	0						push @row, $o->{data}->[$i]->[$j];
263							}
264							}
265	0						push @newcol, median(@row);
266							}
267	0						unshift @{$o->{data}}, \@newcol;
	0
268	0						return \@newcol;
269							}
270
271							=head2 deDiagonalize
272
273							Replicated data with some spread will naturally lie along the diagonal line,
274							y=x (in 2 dimensions, or z=y=x... in more). This function aligns the data
275							along one axis by rotation. This is done so that (a) errors are measured
276							approximately perpendicular to the spread of data and (b) unspread data
277							(a ball of points) gives a gradient of zero in Theil Sen estimator, which is
278							correct because if there's no experimental spread then there can be no
279							evidence that the replicates disagree.
280
281							Note: at this point, any missing values are REPLACED BY ZEROS! This means
282							that these data point will not disagree with any "unchanging" data, but they
283							will not support the reproducibility of "changed" data (data for proteins/genes)
284							that are regulated). The effect of this is that those points will not appear as
285							extreme in the output and will also have a larger error associated with them.
286
287							A NEW object is returned! comparatorIndex is honoured and conserved,
288							meaning that if you ran middlemostColumn, the result is the column used
289							as the Y axis in all comparisons, and the column itself will contain the
290							experimental variance.
291
292							=cut
293
294							sub deDiagonalize {
295	0			0	1		my $o = shift;
296	0						my $r = $o->derive('deDiagonalize');
297
298	0						my $ic = $o->{comparatorIndex};
299
300	0						my $a = atan2(1,1);
301
302	0						foreach my $i(0..$o->{k}-1){
303	0	0					next if $i == $ic;
304	0						foreach my $j(0..$o->{n}-1){
305	0		0				my $y = $o->{data}->[$i]->[$j] \|\| 0;
306	0		0				my $x = $o->{data}->[$ic]->[$j] \|\| 0;
307
308	0	0	0				if($y \|\| $x){
309	0						my $t = atan2($y,$x) - $a;
310	0						my $r = sqrt($x2 + $y2);
311	0						($x,$y) = ($rcos($t), $rsin($t));
312							}
313
314	0						$r->{data}->[$i]->[$j] = $y;
315	0						$r->{data}->[$ic]->[$j] = $x;
316							}
317
318							# $r->{data}->[$i] = diagonalComponentsN(
319							# $o->{data}->[$i], $o->{data}->[$ic]
320							# )
321							}
322
323							#$r->{data}->[$ic] = diagonalDistancesFromOriginN(
324							# $o->{k}, $o->{n}, @{$o->{data}}
325							#);
326	0						return $r;
327							}
328
329							=head2 countObservations
330
331							Counts the number of observations present in each point and stores in obs.
332							The result is used by applyMinimumObservations to check for unwanted data
333							before a processing event which turns empties into zeros (like deDiagonalize).
334
335							=cut
336
337							sub countObservations {
338	0			0	1		my ($o) = @_;
339	0						my @obs = ();
340	0						foreach my $j(0..$o->{n}-1){
341	0						my $c = 0;
342	0						foreach my $i(0..$o->{k}-1){
343	0	0	0				$c++ if defined $o->{data}->[$i]->[$j] && $o->{data}->[$i]->[$j] ne '';
344							}
345	0						push @obs, $c;
346							}
347	0						$o->{obs} = \@obs;
348							}
349
350							=head2 applyMinimumObservations
351
352							A method that blanks any data that does not have a minimum number of
353							values, e.g. if the minimum were 2, the point [2,3,undef] would be fine
354							but [2,undef,undef] would become [undef,undef,undef]
355
356							=cut
357
358							sub applyMinimumObservations {
359	0			0	1		my ($o,$min) = @_;
360	0						foreach my $j(0..$o->{n}-1){
361	0	0					if($o->{obs}->[$j] < $min){
362	0						foreach my $i(0..$o->{k}-1){
363	0						$o->{data}->[$i]->[$j] = '';
364							}
365	0	0					$o->{d}->[$j] = '' if exists $o->{d};
366							}
367							}
368							}
369
370							=head2 regression
371
372							Perform Theil Sen Estimator regression on the data. The regression is
373							done with the comparator on the x axis, but the symmetric parameters
374							are returned for the comparator on the y-axis.
375
376							=cut
377
378							sub regression {
379	0			0	1		my $o = shift;
380	0						my @m = map {1} (0..$o->{k}-1);
	0
381	0						my @c = map {0} (0..$o->{k}-1);
	0
382	0						foreach my $i(0..$o->{k}-1){
383	0	0					next if $i == $o->{comparatorIndex};
384	0						my ($m,$c) = theilsen(
385							$o->{data}->[$i],
386							$o->{data}->[$o->{comparatorIndex}]
387							);
388	0						$m[$i] = $m;
389	0						$c[$i] = -$c; # - because we're converting to the inverse symmetric
390							}
391	0						$o->{m} = \@m;
392	0						$o->{c} = \@c;
393	0						return ($o->{m}, $o->{c});
394							}
395
396							=head2 rotateToRegressionLine
397
398							do we need this?
399
400							=cut
401
402							sub rotateToRegressionLine {
403	0			0	1		my $o = shift;
404	0	0					croak "looks like regression() has not been called on this object"
405							unless defined $o->{c};
406
407							# use distanceToLineN([0,0,0],...) to get middle point of line for distance :-)
408							#my $O = [map {0} (1..$o->{k})]; # the origin
409							#my @MC = ($o->{m},$o->{c}); # we'll be using this a lot, maybe
410
411							#my ($dO,$X) = distanceToLineN($O,@MC); # $X is the "centre" of the line
412
413							####
414	0						my $r = $o->derive('rotateToRegressionLine');
415
416	0						my $ic = $o->{comparatorIndex};
417
418	0						$r->{d} = [];
419
420	0						foreach my $j(0..$o->{n}-1){
421	0						foreach my $i(0..$o->{k}-1){
422	0	0					next if $i == $ic;
423
424	0						my $m = $o->{m}->[$i];
425	0						my $c = $o->{c}->[$i];
426	0		0				my $y = $o->{data}->[$i]->[$j] \|\| $c;
427	0		0				my $x = $o->{data}->[$ic]->[$j] \|\| 0;
428	0						$y -= $c;
429	0	0	0				if($y \|\| $x){
430	0						my $a = atan2($m,1);
431	0						my $t = atan2($y,$x) - $a;
432	0						my $r = sqrt($x2 + $y2);
433	0						($x,$y) = ($rcos($t), $rsin($t));
434							}
435
436	0						$r->{data}->[$i]->[$j] = $y;
437	0						$r->{data}->[$ic]->[$j] = $x;
438
439							#my ($d,$x) = distanceToLineN(
440							# [$o->{data}->[$i]->[$j],$o->{data}->[$ic]->[$j]],
441							# [$o->{m}->[$i], $o->{m}->[$ic]],
442							# [$o->{c}->[$i], $o->{c}->[$ic]]
443							#);
444							#my $icv = $o->{data}->[$ic]->[$j] \|\| 0;
445							#my $iv = $o->{data}->[$i]->[$j] \|\| 0;
446							#if($icv < $iv){
447							# $d *= -1;
448							#}
449							#$r->{data}->[$i]->[$j] = $d;
450							}
451
452	0						my $sumOfSquares = 0;
453	0						my $sumOfValues = 0;
454	0	0					$sumOfSquares += $_ foreach map {$_ == $ic ? () : $r->{data}->[$_]->[$j] ** 2} (0..$o->{k}-1);
	0
455	0	0					$sumOfValues += $_ foreach map {$_ == $ic ? () : $r->{data}->[$_]->[$j]} (0..$o->{k}-1);
	0
456	0						my $rootsumsquares = sqrt($sumOfSquares);
457							#my ($d,$x) = distanceToLineN(
458							# [@coords], @MC
459							#);
460							# give the distance a sign too!
461							#my $ss = 0;
462							#foreach my $i(0..$r->{k}-1){
463							# $ss += $r->{data}->[$i]->[$j] * $r->{m}->[$i];
464							#}
465
466	0	0					$rootsumsquares *= -1 if $sumOfValues < 0;
467
468	0						push @{$r->{d}}, $rootsumsquares; # distance to line
	0
469
470							#my $ss = 0; # sum of squares
471							#foreach my $i(0..$o->{k}-1){
472							# my $xi = $X->[$i] \|\| 0;
473							# my $di = $o->{data}->[$i]->[$j] \|\| 0;
474							# $ss += ($xi - $di)**2
475							#}
476							#
477							#$r->{data}->[$ic]->[$j] = sqrt($ss); # distance to center of line
478
479							}
480
481
482	0						return $r;
483							}
484
485							=head2 variances
486
487							Calculate variances... i.e. distances from the origin along the line of
488							regression, and distances from the line of regression. This is just like
489							deDiagonalise, except that only two columns are returned.
490
491							=cut
492
493							sub variances {
494	0			0	1		my $o = shift;
495	0						my $S; # experimental spread
496							my $E; # imprecision
497	0						my $df = 0;
498	0						my $ic = $o->{comparatorIndex};
499							# we can give a value for how likely a point is to be there by imprecision alone
500	0						foreach my $j(0..$o->{n}-1){
501	0	0	0				if($o->{d}->[$j] ne '' && $o->{data}->[$ic]->[$j] ne ''){
502	0						$E += $o->{d}->[$j] ** 2;
503	0						$S += $o->{data}->[$ic]->[$j] ** 2;
504	0						$df ++;
505							}
506							}
507	0						$E /= $df;
508	0						$S /= $df;
509	0						my $sdE = sqrt($E);
510	0						my $sdS = sqrt($S);
511	0						$o->{vE} = $E;
512	0						$o->{vS} = $S;
513	0						$o->{sdE} = $sdE;
514	0						$o->{sdS} = $sdS;
515
516	0						$o->{pee} = [];
517	0						$o->{pss} = [];
518	0						$o->{pes} = [];
519	0						$o->{pse} = [];
520	0						foreach my $j(0..$o->{n}-1){
521	0						my ($pee,$pes,$pss,$pse) = ('','','','');
522	0	0	0				if($o->{d}->[$j] ne '' && $o->{data}->[$ic]->[$j] ne ''){
523	0	0					$pee = $sdE ?
524							Statistics::Distributions::tprob ($df,$o->{d}->[$j] / $sdE)
525							: 1;
526	0	0					$pes = $sdS ?
527							Statistics::Distributions::tprob ($df,$o->{d}->[$j] / $sdS)
528							: 1;
529	0	0					$pss = $sdS ?
530							Statistics::Distributions::tprob ($df,$o->{data}->[$ic]->[$j] / $sdS)
531							: 1;
532	0	0					$pse = $sdE ?
533							Statistics::Distributions::tprob ($df,$o->{data}->[$ic]->[$j] / $sdE)
534							: 1;
535							}
536	0						push @{$o->{pee}}, $pee;
	0
537	0						push @{$o->{pes}}, $pes;
	0
538	0						push @{$o->{pss}}, $pss;
	0
539	0						push @{$o->{pse}}, $pse;
	0
540							}
541	0						return ($S,$E);
542							}
543
544							=head2 printableTable, printTable
545
546							printableTable returns all available relevant info in a table
547							printTable prints all available relevant info in a table
548
549							the firts element returned is a list of columns. the rest are the columns.
550
551							data stored are:
552
553							# scalars:
554							comparatorIndex # index of column used to compare
555							k
556							n
557							vE # variance of "error" (imprecision)
558							vS # variable of experimental spread
559							sdE # s.d. error
560							sdS # s.d. spread
561
562							# arrays (foreach column)
563							m # regression denominator
564							c # regression constant
565							# arrays (foreach row)
566							d # distance from regression line
567							pee # p-value of error
568							pss # p-value of spread
569							pes # p-value of error over spread (??)
570							pse # p-value of spread over error
571
572							# 2D array (LoL)
573							data
574
575							note that the distance from the center of the distribution
576							is given by the values in data[comparatorIndex]
577
578							These methods take a single argumen: depth. Every time an object is
579							derived from another (subtractMedian, deDiagonalize and
580							rotateToRegressionLine all do this) the old object is referenced, and
581							you can include the last $depth objects in the output. Set depth to -1
582							to include all objects.
583
584							=cut
585
586							sub printableTable {
587	0			0	1		my ($o,$deep) = @_;
588	0						my @header = (qw/Statistic Value/, map {"Column $_"} (1..$o->{k}));
	0
589
590	0						my @statistics = ();
591	0						my @values = ();
592
593	0						my @statkeys = qw/comparatorIndex k n vE vS sdE sdS/;
594	0						my @statnames = qw/CompareColumn Columns Rows ErrorVariance SpreadVariance ErrorSD SpreadSD/;
595	0						foreach (0..$#statkeys){
596	0						my $statkey = $statkeys[$_];
597	0						my $statname = $statnames[$_];
598	0	0	0				if(defined $o->{$statkey} && $o->{$statkey} ne ''){
599	0						push @statistics, $statname;
600	0						push @values, $o->{$statkey};
601	0	0					if($statkey eq 'comparatorIndex'){
602	0						$values[$#values] ++; # 1-based!
603							}
604							}
605							}
606
607	0						my @printData = (\@statistics, \@values, @{$o->{data}});
	0
608
609	0	0	0				if(ref $o->{m} && @{$o->{m}} ){
	0
610	0						push @header, 'Regression','M','C';
611	0						push @printData, [map {"Column $_"} (1..$o->{k})];
	0
612	0						push @printData, $o->{m}, $o->{c};
613							}
614
615	0	0					if(ref $o->{d}){
616	0						push @header, 'DistanceToRegressionLine';
617	0						push @printData, $o->{d};
618							}
619
620	0	0					if(ref $o->{pee}){
621	0						push @header, 'ErrorPvalue';
622	0						push @printData, $o->{pee};
623
624	0						push @header, 'SpreadPvalue';
625	0						push @printData, $o->{pss};
626
627	0						push @header, 'ErrorOverSpreadPvalue';
628	0						push @printData, $o->{pes};
629
630	0						push @header, 'SpreadOverErrorPvalue';
631	0						push @printData, $o->{pse};
632							}
633
634	0	0	0				if($deep && ref($o->{derivedFrom})){
635	0						push @header, 'DerivedFrom';
636	0						push @printData, [$o->{derivedReason}];
637	0						my $d = $o->{derivedFrom}->printableTable($deep-1);
638	0						my ($dh,@dcols) = @$d;
639	0						push @header, @$dh;
640	0						push @printData, @dcols;
641							}
642
643	0						return [\@header, @printData];
644							}
645
646							sub printTable {
647	0			0	1		my ($o,$deep) = @_;
648	0						my $pt = $o->printableTable($deep);
649							# lets assume that n > number of statistics!
650	0						my $n = $o->{n};
651	0						my $w = @$pt;
652	0						print join("\t", @{$pt->[0]})."\n";
	0
653	0						foreach my $j(0..$n-1){
654	0						my @row = ();
655	0						foreach my $i(1..$w-1){
656	0	0					my $val = defined $pt->[$i]->[$j] ? $pt->[$i]->[$j] : '';
657	0						push @row, $val;
658							}
659	0						print join("\t", @row)."\n";
660							}
661							}
662
663							=head1 just some wee helper functions...
664
665							=head2 median
666
667							yes this probably exists in other modules, but I didn't want to pull in a whole
668							module for just one funciton. Anyway, this is an inefficient version for small
669							numbers of data. It sorts the list and then uses middle() to find the middle of
670							the sorted list.
671
672							=cut
673
674							sub median {
675	0	0	0	0	1		my @r = sort {$a<=>$b} map {defined && /\d/ ? $_ : ()} @_;
	0
	0
676	0						return middle(@r);
677							}
678
679							=head2 medianN
680
681							Like median, but for an even list is returns the two middlemost values.
682							This version is used in medianI.
683
684							=cut
685
686							sub medianN {
687	0	0	0	0	1		my @r = sort {$a<=>$b} map {defined && /\d/ ? $_ : ()} @_;
	0
	0
688	0						return middleN(@r);
689							}
690
691							=head2 medianI
692
693							This uses medianN to get the middlemost value(s) and then returns a list
694							of column indices indicating which columns had a middlemost value.
695							This is used in the medianLeft method when deciding which
696							column is middlemost.
697
698							=cut
699
700							sub medianI {
701	0			0	1		my @N = medianN(@_);
702	0						my @I = ();
703	0						foreach my $i(0..$#_){
704	0	0	0				if(defined $_[$i] && $_[$i] ne ''){
705	0						foreach my $n(@N){
706	0	0					if($n == $_[$i]){
707	0						push @I, $i;
708							}
709							}
710							}
711							}
712	0						return @I;
713							}
714
715							=head2 middle
716
717							middle returns the middlemost item in a list, or the mean average of the two
718							middlemost items. It doesn't sort the list first.
719
720							=cut
721
722							sub middle {
723	0	0		0	1		if(@_ % 2){
724	0						return $_[$#_/2];
725							}
726							else {
727	0						return $_[($#_+1)/2]/2 + $_[($#_-1)/2]/2;
728							}
729							}
730
731							=head2 middleN
732
733							middleN does like middle, but for even lists, it returns the two middlemost
734							items as a list. This is used by medianN.
735
736							=cut
737
738							sub middleN {
739	0	0		0	1		if(@_ % 2){
740	0						return $_[$#_/2];
741							}
742							else {
743	0						return ($_[($#_+1)/2], $_[($#_-1)/2]);
744							}
745							}
746
747
748							=head1 AUTHOR
749
750							Jimi Wills, C<< >>
751
752							=head1 BUGS
753
754							Please report any bugs or feature requests to C, or through
755							the web interface at L. I will be notified, and then you'll
756							automatically be notified of progress on your bug as I make changes.
757
758
759
760
761							=head1 SUPPORT
762
763							You can find documentation for this module with the perldoc command.
764
765							perldoc Statistics::Reproducibility
766
767
768							You can also look for information at:
769
770							=over 4
771
772							=item * RT: CPAN's request tracker (report bugs here)
773
774							L
775
776							=item * AnnoCPAN: Annotated CPAN documentation
777
778							L
779
780							=item * CPAN Ratings
781
782							L
783
784							=item * Search CPAN
785
786							L
787
788							=back
789
790
791							=head1 ACKNOWLEDGEMENTS
792
793
794							=head1 LICENSE AND COPYRIGHT
795
796							Copyright 2013 Jimi Wills.
797
798							This program is free software; you can redistribute it and/or modify it
799							under the terms of the the Artistic License (2.0). You may obtain a
800							copy of the full license at:
801
802							L
803
804							Any use, modification, and distribution of the Standard or Modified
805							Versions is governed by this Artistic License. By using, modifying or
806							distributing the Package, you accept this license. Do not use, modify,
807							or distribute the Package, if you do not accept this license.
808
809							If your Modified Version has been derived from a Modified Version made
810							by someone other than you, you are nevertheless required to ensure that
811							your Modified Version complies with the requirements of this license.
812
813							This license does not grant you the right to use any trademark, service
814							mark, tradename, or logo of the Copyright Holder.
815
816							This license includes the non-exclusive, worldwide, free-of-charge
817							patent license to make, have made, use, offer to sell, sell, import and
818							otherwise transfer the Package with respect to any patent claims
819							licensable by the Copyright Holder that are necessarily infringed by the
820							Package. If you institute patent litigation (including a cross-claim or
821							counterclaim) against any party alleging that the Package constitutes
822							direct or contributory patent infringement, then this Artistic License
823							to you shall terminate on the date that such litigation is filed.
824
825							Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER
826							AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
827							THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
828							PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY
829							YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR
830							CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR
831							CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE,
832							EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
833
834
835							=cut
836
837							1; # End of Statistics::Reproducibility