|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
25
  
 | 
 
 | 
 
 | 
  
25
  
 | 
 
 | 
1456323
 | 
 use 5.006;  | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
337
 | 
    | 
| 
2
 | 
25
 | 
 
 | 
 
 | 
  
25
  
 | 
 
 | 
146
 | 
 use strict;  | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
51
 | 
    | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
592
 | 
    | 
| 
3
 | 
25
 | 
 
 | 
 
 | 
  
25
  
 | 
 
 | 
122
 | 
 use warnings;  | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
57
 | 
    | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1766
 | 
    | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Statistics::Descriptive::LogScale;  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Statistics::Descriptive::LogScale - Memory-efficient approximate univariate  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 descriptive statistics class.  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 VERSION  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Version 0.10  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = 0.11;  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Basic usage  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The basic usage is roughly the same as that of L.  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     use Statistics::Descriptive::LogScale;  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $stat = Statistics::Descriptive::LogScale->new ();  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     while(<>) {  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         chomp;  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $stat->add_data($_);  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # This can also be done in O(1) memory, precisely  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     printf "Mean: %f +- %f\n", $stat->mean, $stat->standard_deviation;  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # This requires storing actual data, or approximating  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     printf "25%%  : %f\n", $stat->percentile(25);  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     printf "Median: %f\n", $stat->median;  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     printf "75%%  : %f\n", $stat->percentile(75);  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Save/load  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is not present in L.  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The save/load interface is designed compatible with JSON::XS.  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 However, any other serializer can be used.  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The C method is I to return unblessed hashref  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 with enough information to restore the original object.  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     use Statistics::Descriptive::LogScale;  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $stat = Statistics::Descriptive::LogScale->new ();  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # ..... much later  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Save  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print $fd encoder_of_choice( $stat->TO_JSON )  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         or die "Failed to save: $!";  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # ..... and even later  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Load  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $plain_hash = decoder_of_choice( $raw_data );  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $copy_of_stat = Statistics::Descriptive::LogScale->new( %$plain_hash );  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Import into existing LogScale instance  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $plain_hash = decoder_of_choice( $more_raw_data );  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $copy_of_stat->add_data_hash( $plain_hash->{data} );  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Histograms  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Both L and L  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 offer C method for querying data point counts.  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 However, there's also C method for making pretty pictures.  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Here's a simple text-based histogram.  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 A proper GD example was too long to fit into this margin.  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     use strict;  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     use warnings;  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     use Statistics::Descriptive::LogScale;  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $stat = Statistics::Descriptive::LogScale->new ();  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # collect/load data ...  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $re_float = qr([-+]?(?:\d+\.?\d*|\.\d+)(?:[Ee][-+]?\d+)?);  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     while (<>) {  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $stat->add_data($_) for m/($re_float)/g;  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     die "Empty set"  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         unless $stat->count;  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # get data in [ count, lower_bound, upper_bound ] format as arrayref  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $hist = $stat->histogram( count => 20 );  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # find maximum value to use as a scale factor  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $scale = $hist->[0][0];  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $scale < $_->[0] and $scale = $_->[0] for @$hist;  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     foreach (@$hist) {  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         printf "%10f %s\n", $_->[1], '#' x ($_->[0] * 68 / $scale);  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     printf "%10f\n", $hist->[-1][2];  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This module aims at providing some advanced statistical functions without  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 storing all data in memory, at the cost of certain (predictable) precision loss.  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Data is represented by a set of bins that only store counts of fitting  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 data points.  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Most bins are logarithmic, i.e. lower end / upper end ratio is constant.  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 However, around zero linear approximation may be user instead  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (see "linear_width" and "linear_thresh" parameters in new()).  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 All operations are then performed on the bins, introducing relative error  | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 which does not, however, exceed the bins' relative width ("base").  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 METHODS  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ########################################################################  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  == HOW IT WORKS ==  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  Buckets are stored in a hash: { $value => $count, ... }  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  {base} is bin width, {logbase} == log {base} (cache)  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  {linear_thresh} is where we switch to equal bin approximation  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  {linear_width} is width of bin around zero (==linear_thresh if not given)  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  {floor} is lower bound of bin whose center is 1. {logfloor} = log {floor}  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  Nearly all meaningful subs have to scan all the bins, which is bad,  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     but anyway better than scanning full sample.  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
128
 | 
25
 | 
 
 | 
 
 | 
  
25
  
 | 
 
 | 
175
 | 
 use Carp;  | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
57
 | 
    | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1538
 | 
    | 
| 
129
 | 
25
 | 
 
 | 
 
 | 
  
25
  
 | 
 
 | 
8320
 | 
 use POSIX qw(floor ceil);  | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
141281
 | 
    | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
170
 | 
    | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Fields are NOT used internally for now, so this is just a declaration  | 
| 
132
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
112
 | 
 use fields qw(  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	data count  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	linear_width base logbase floor linear_thresh only_linear logfloor  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	cache  | 
| 
136
 | 
25
 | 
 
 | 
 
 | 
  
25
  
 | 
 
 | 
42260
 | 
 );  | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36235
 | 
    | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Some internal constants  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # This is for infinite portability^W^W portable infinity  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $INF = 9**9**9;  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $re_num = qr/(?:[-+]?(?:\d+\.?\d*|\.\d+)(?:[Ee][-+]?\d+)?)/;  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 new( %options )  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 %options may include:  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * base - ratio of adjacent bins. Default is 10^(1/232), which gives  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1% precision and exact decimal powers.  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This value represents acceptable relative error in analysis results.  | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B Actual value may be slightly less than requested one.  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is done so to avoid troubles with future rounding in (de)serialization.  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * linear_width - width of linear bins around zero.  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This value represents precision of incoming data.  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Default is zero, i.e. we assume that the measurement is precise.  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B Actual value may be less (by no more than a factor of C)  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 so that borders of linear and logarithmic bins fit nicely.  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * linear_thresh - where to switch to linear approximation.  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If only one of C and C is given,  | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the other will be calculated.  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 However, user may want to specify both in some cases.  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B Actual value may be less (by no more than a factor of C)  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 so that borders of linear and logarithmic bins fit nicely.  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * only_linear = 1 (B) -  | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 throw away log approximation and become a discrete statistics  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 class with fixed precision.  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C must be given in this case.  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B This obviously kills memory efficiency, unless one knows beforehand  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 that all values come from a finite pool.  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * data - hashref with C<{ value => weight }> for initializing data.  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Used for cloning.  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See C.  | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * zero_thresh - absolute value threshold below which everything is  | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 considered zero.  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 DEPRECATED, C and C override this if given.  | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my @new_keys = qw( base linear_width linear_thresh data zero_thresh only_linear );  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# TODO Throw if extra options given?  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# TODO Check args better  | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
195
 | 
46
 | 
 
 | 
 
 | 
  
46
  
 | 
  
1
  
 | 
23519
 | 
 	my $class = shift;  | 
| 
196
 | 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
196
 | 
 	my %opt = @_;  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# First, check for only_linear option  | 
| 
199
 | 
46
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
179
 | 
 	if ($opt{only_linear}) {  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$opt{linear_width}  | 
| 
201
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
 			or croak "only_linear option given, but no linear width";  | 
| 
202
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 		$opt{only_linear} = $opt{linear_width};  | 
| 
203
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
 		delete $opt{$_} for qw(linear_width base linear_thresh zero_thresh);  | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# base for logarithmic bins, sane default: +-1%, exact decimal powers  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# UGLY HACK number->string->number to avoid  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#     future serialization inconsistencies  | 
| 
209
 | 
46
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
236
 | 
 	$opt{base} ||= 10**(1/232);  | 
| 
210
 | 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
392
 | 
 	$opt{base} = 0 + "$opt{base}";  | 
| 
211
 | 
46
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
184
 | 
 	$opt{base} > 1 or croak __PACKAGE__.": new(): base must be >1";  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# calculate where to switch to linear approximation  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# the condition is: linear bin( thresh ) ~~ log bin( thresh )  | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# i.e. thresh * base - thresh ~~ absolute error * 2  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# i.e. thresh ~~ absolute_error * 2 / (base - 1)  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# also support legacy API (zero_thresh)  | 
| 
218
 | 
46
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
164
 | 
 	if (defined $opt{linear_thresh} ) {  | 
| 
219
 | 
13
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
45
 | 
 		$opt{linear_width} ||= $opt{linear_thresh} * ($opt{base}-1);  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} else {  | 
| 
221
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
97
 | 
 		$opt{linear_thresh}  = $opt{zero_thresh};  | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$opt{linear_thresh} = abs($opt{linear_width}) / ($opt{base} - 1)  | 
| 
224
 | 
46
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
212
 | 
 		if $opt{linear_width} and !$opt{linear_thresh};  | 
| 
225
 | 
46
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
244
 | 
 	$opt{linear_thresh} ||= 0;  | 
| 
226
 | 
46
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
141
 | 
 	$opt{linear_thresh} >= 0  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		or croak __PACKAGE__.": new(): linear_thresh must be >= 0";  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Can't use fields::new anymore  | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#    due to JSON::XS incompatibility with restricted hashes  | 
| 
231
 | 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
130
 | 
 	my $self = bless {}, $class;  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
233
 | 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
191
 | 
 	$self->{base} = $opt{base};  | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# cache values to ease calculations  | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# floor = (lower end of bin) / (center of bin)  | 
| 
236
 | 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
152
 | 
 	$self->{floor} = 2/(1+$opt{base});  | 
| 
237
 | 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
177
 | 
 	$self->{logbase} = log $opt{base};  | 
| 
238
 | 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
125
 | 
 	$self->{logfloor} = log $self->{floor};  | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# bootstrap linear_thresh - make it fit bin edge  | 
| 
241
 | 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
116
 | 
 	$self->{linear_width} = $self->{linear_thresh} = 0;  | 
| 
242
 | 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
187
 | 
 	$self->{linear_thresh} = $self->_lower( $opt{linear_thresh} );  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# divide anything below linear_thresh into odd number of bins  | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#      not exceeding requested linear_width  | 
| 
246
 | 
46
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
184
 | 
 	if ($self->{linear_thresh}) {  | 
| 
247
 | 
15
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
53
 | 
 		my $linear_width = $opt{linear_width} || 2 * $self->{linear_thresh};  | 
| 
248
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
57
 | 
 		my $n_linear = ceil(2 * $self->{linear_thresh} / abs($linear_width));  | 
| 
249
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
48
 | 
 		$n_linear++ unless $n_linear % 2;  | 
| 
250
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
 		$self->{linear_width} = (2 * $self->{linear_thresh} / $n_linear);  | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
253
 | 
46
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
153
 | 
 	if ($opt{only_linear}) {  | 
| 
254
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 		$self->{linear_width} = $opt{only_linear};  | 
| 
255
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 		$self->{linear_thresh} = $INF;  | 
| 
256
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 		$self->{only_linear} = 1;  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
259
 | 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
178
 | 
 	$self->clear;  | 
| 
260
 | 
46
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
130
 | 
 	if ($opt{data}) {  | 
| 
261
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
 		$self->add_data_hash($opt{data});  | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
263
 | 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
212
 | 
 	return $self;  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 General statistical methods  | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 These methods are used to query the distribution properties. They generally  | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 follow the interface of L and co,  | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 with minor additions.  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 All methods return C on empty data set, except for  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C, C, C, C and C  | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 which all return 0.  | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B This module caches whatever it calculates very agressively.  | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Don't hesitate to use statistical functions (except for sum_of/mean_of)  | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 more than once. The cache is deleted upon data entry.  | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 clear  | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Destroy all stored data.  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub clear {  | 
| 
287
 | 
60
 | 
 
 | 
 
 | 
  
60
  
 | 
  
1
  
 | 
11725
 | 
 	my $self = shift;  | 
| 
288
 | 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2405
 | 
 	$self->{data} = {};  | 
| 
289
 | 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
181
 | 
 	$self->{count} = 0;  | 
| 
290
 | 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
134
 | 
 	delete $self->{cache};  | 
| 
291
 | 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
126
 | 
 	return $self;  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 add_data( @data )  | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Add numbers to the data pool.  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns self, so that methods can be chained.  | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If incorrect data is given (i.e. non-numeric, undef),  | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 an exception is thrown and only partial data gets inserted.  | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The state of object is guaranteed to remain consistent in such case.  | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B Cache is reset, even if no data was actually inserted.  | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B It is possible to add infinite values to data pool.  | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The module will try and calculate whatever can still be calculated.  | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 However, no portable way of serializing such values is done yet.  | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub add_data {  | 
| 
313
 | 
51217
 | 
 
 | 
 
 | 
  
51217
  
 | 
  
1
  
 | 
150318
 | 
 	my $self = shift;  | 
| 
314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
315
 | 
51217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
64994
 | 
 	delete $self->{cache};  | 
| 
316
 | 
51217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
73172
 | 
 	foreach (@_) {  | 
| 
317
 | 
90167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
143648
 | 
 		$self->{data}{ $self->_round($_) }++;  | 
| 
318
 | 
90164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
156371
 | 
 		$self->{count}++;  | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
320
 | 
51214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
85781
 | 
 	$self;  | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 count  | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns number of data points.  | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub count {  | 
| 
330
 | 
69
 | 
 
 | 
 
 | 
  
69
  
 | 
  
1
  
 | 
9123
 | 
 	my $self = shift;  | 
| 
331
 | 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
291
 | 
 	return $self->{count};  | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 min  | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 max  | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Values of minimal and maximal bins.  | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 NOTE: Due to rounding, some of the actual inserted values may fall outside  | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of the min..max range. This may change in the future.  | 
| 
342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub min {  | 
| 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $self = shift;  | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $self->_sort->[0];  | 
| 
348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub max {  | 
| 
351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $self = shift;  | 
| 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $self->_sort->[-1];  | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 sample_range  | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return sample range of the dataset, i.e. max() - min().  | 
| 
358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub sample_range {  | 
| 
362
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
934
 | 
 	my $self = shift;  | 
| 
363
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
29
 | 
 	return $self->count ? $self->max - $self->min : undef;  | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 sum  | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return sum of all data points.  | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub sum {  | 
| 
373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $self = shift;  | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $self->sum_of(sub { $_[0] });  | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 sumsq  | 
| 
378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return sum of squares of all datapoints.  | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub sumsq {  | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $self = shift;  | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $self->sum_of(sub { $_[0] * $_[0] });  | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 mean  | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return mean, or average value, i.e. sum()/count().  | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub mean {  | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $self = shift;  | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $self->{count} ? $self->sum / $self->{count} : undef;  | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 variance  | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 variance( $correction )  | 
| 
402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return data variance, i.e. E((x - E(x)) ** 2).  | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bessel's correction (division by n-1 instead of n) is used by default.  | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This may be changed by specifying $correction explicitly.  | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B The binning strategy used here should also introduce variance bias.  | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is not yet accounted for.  | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub variance {  | 
| 
414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $self = shift;  | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $correction = shift;  | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# in fact we'll receive correction='' because of how cache works  | 
| 
418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$correction = 1 unless defined $correction and length $correction;  | 
| 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return 0 if ($self->{count} < 1 + $correction);  | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $var = $self->sumsq - $self->sum**2 / $self->{count};  | 
| 
423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $var <= 0 ? 0 : $var / ( $self->{count} - $correction );  | 
| 
424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 standard_deviation  | 
| 
427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 standard_deviation( $correction )  | 
| 
429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 std_dev  | 
| 
431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 stdev  | 
| 
433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return standard deviation, i.e. square root of variance.  | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bessel's correction (division by n-1 instead of n) is used by default.  | 
| 
437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This may be changed by specifying $correction explicitly.  | 
| 
438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B The binning strategy used here should also introduce variance bias.  | 
| 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is not yet accounted for.  | 
| 
441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub standard_deviation {  | 
| 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $self = shift;  | 
| 
446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return sqrt($self->variance(@_));  | 
| 
448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 cdf ($x)  | 
| 
451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Cumulative distribution function. Returns estimated probability of  | 
| 
453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 random data point from the sample being less than C<$x>.  | 
| 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 As a special case, C accounts for I of zeroth bin count (if any).  | 
| 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Not present in Statistics::Descriptive::Full, but appears in  | 
| 
458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L.  | 
| 
459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 cdf ($x, $y)  | 
| 
461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns probability of a value being between C<$x> and C<$y> ($x <= $y).  | 
| 
463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is essentially C.  | 
| 
464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub cdf {  | 
| 
468
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
8
 | 
 	my $self = shift;  | 
| 
469
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 	return unless $self->{count};  | 
| 
470
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	return $self->_count(@_) / $self->{count};  | 
| 
471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 percentile( $n )  | 
| 
474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Find $n-th percentile, i.e. a value below which lies $n % of the data.  | 
| 
476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 0-th percentile is by definition -inf and is returned as undef  | 
| 
478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (see Statistics::Descriptive).  | 
| 
479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $n is a real number, not necessarily integer.  | 
| 
481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub percentile {  | 
| 
485
 | 
48
 | 
 
 | 
 
 | 
  
48
  
 | 
  
1
  
 | 
4949
 | 
 	my $self = shift;  | 
| 
486
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
99
 | 
 	my $x = shift;  | 
| 
487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# assert 0<=$x<=100  | 
| 
489
 | 
48
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
270
 | 
 	croak __PACKAGE__.": percentile() argument must be between 0 and 100"  | 
| 
490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		unless 0<= $x and $x <= 100;  | 
| 
491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
492
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
158
 | 
 	my $need = $x * $self->{count} / 100;  | 
| 
493
 | 
48
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
191
 | 
 	return if $need < 1;  | 
| 
494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# dichotomize  | 
| 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# $i is lowest value >= needed  | 
| 
497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# $need doesnt exceed last bin!  | 
| 
498
 | 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
154
 | 
 	my $i = _bin_search_ge( $self->_probability, $need );  | 
| 
499
 | 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
123
 | 
 	return $self->_sort->[ $i ];  | 
| 
500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 quantile( 0..4 )  | 
| 
503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 From Statistics::Descriptive manual:  | 
| 
505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   0 => zero quartile (Q0) : minimal value  | 
| 
507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   1 => first quartile (Q1) : lower quartile = lowest cut off (25%) of data = 25th percentile  | 
| 
508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   2 => second quartile (Q2) : median = it cuts data set in half = 50th percentile  | 
| 
509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   3 => third quartile (Q3) : upper quartile = highest cut off (25%) of data, or lowest 75% = 75th percentile  | 
| 
510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   4 => fourth quartile (Q4) : maximal value  | 
| 
511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub quantile {  | 
| 
515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $self = shift;  | 
| 
516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $t = shift;  | 
| 
517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	croak (__PACKAGE__.": quantile() argument must be one of 0..4")  | 
| 
519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		unless $t =~ /^[0-4]$/;  | 
| 
520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$t or return $self->min;  | 
| 
522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $self->percentile($t * 100 / 4);  | 
| 
523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 median  | 
| 
526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return median of data, a value that divides the sample in half.  | 
| 
528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Same as percentile(50).  | 
| 
529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub median {  | 
| 
533
 | 
16
 | 
 
 | 
 
 | 
  
16
  
 | 
  
1
  
 | 
1389
 | 
 	my $self = shift;  | 
| 
534
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
 	return $self->percentile(50);  | 
| 
535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 trimmed_mean( $ltrim, [ $utrim ] )  | 
| 
538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return mean of sample with $ltrim and $utrim fraction of data points  | 
| 
540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 remover from lower and upper ends respectively.  | 
| 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ltrim defaults to 0, and rtrim to ltrim.  | 
| 
543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub trimmed_mean {  | 
| 
547
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
2
 | 
 	my $self = shift;  | 
| 
548
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	my ($lower, $upper) = @_;  | 
| 
549
 | 
1
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
3
 | 
 	$lower ||= 0;  | 
| 
550
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	$upper = $lower unless defined $upper;  | 
| 
551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
552
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	my $min = $self->percentile($lower * 100);  | 
| 
553
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	my $max = $self->percentile(100 - $upper * 100);  | 
| 
554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
555
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	return unless $min < $max;  | 
| 
556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
557
 | 
1
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
9
 | 
 	return $self->mean_of(sub{$_[0]}, $min, $max);  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 harmonic_mean  | 
| 
561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return harmonic mean of the data, i.e. 1/E(1/x).  | 
| 
563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return undef if division by zero occurs (see Statistics::Descriptive).  | 
| 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub harmonic_mean {  | 
| 
569
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
988
 | 
 	my $self = shift;  | 
| 
570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
571
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	my $ret;  | 
| 
572
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	eval {  | 
| 
573
 | 
2
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
6
 | 
 		$ret = $self->count / $self->sum_of(sub { 1/$_[0] });  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
575
 | 
2
 | 
  
 50
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
33
 | 
 	if ($@ and $@ !~ /division.*zero/) {  | 
| 
576
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		die $@; # rethrow ALL BUT 1/0 which yields undef  | 
| 
577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
578
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
 	return $ret;  | 
| 
579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 geometric_mean  | 
| 
582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return geometric mean of the data, that is, exp(E(log x)).  | 
| 
584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Dies unless all data points are of the same sign.  | 
| 
586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub geometric_mean {  | 
| 
590
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
1050
 | 
 	my $self = shift;  | 
| 
591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
592
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	return unless $self->count;  | 
| 
593
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	croak __PACKAGE__.": geometric_mean() called on mixed sign sample"  | 
| 
594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if $self->min * $self->max < 0;  | 
| 
595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
596
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	return 0 if $self->{data}{0};  | 
| 
597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# this must be dog slow, but we already log() too much at this point.  | 
| 
598
 | 
1
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
6
 | 
 	my $ret = exp( $self->sum_of( sub { log abs $_[0] } ) / $self->{count} );  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
    | 
| 
599
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	return $self->min < 0 ? -$ret : $ret;  | 
| 
600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 skewness  | 
| 
603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return skewness of the distribution, calculated as  | 
| 
605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 n/(n-1)(n-2) * E((x-E(x))**3)/std_dev**3 (this is consistent with Excel).  | 
| 
606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub skewness {  | 
| 
610
 | 
20
 | 
 
 | 
 
 | 
  
20
  
 | 
  
1
  
 | 
2017
 | 
 	my $self = shift;  | 
| 
611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
612
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
 	my $n = $self->{count};  | 
| 
613
 | 
20
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
63
 | 
 	return unless $n > 2;  | 
| 
614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# code stolen from Statistics::Descriptive  | 
| 
616
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
57
 | 
 	my $skew = $n * $self->std_moment(3);  | 
| 
617
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
 	my $correction = $n / ( ($n-1) * ($n-2) );  | 
| 
618
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
96
 | 
 	return $correction * $skew;  | 
| 
619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
620
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 kurtosis  | 
| 
622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return kurtosis of the distribution, that is 4-th standardized moment - 3.  | 
| 
624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The exact formula used here is consistent with that of Excel and  | 
| 
625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Statistics::Descriptive.  | 
| 
626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
628
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub kurtosis {  | 
| 
630
 | 
22
 | 
 
 | 
 
 | 
  
22
  
 | 
  
1
  
 | 
1036
 | 
 	my $self = shift;  | 
| 
631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
632
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
 	my $n = $self->{count};  | 
| 
633
 | 
22
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
67
 | 
 	return unless $n > 3;  | 
| 
634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# code stolen from Statistics::Descriptive  | 
| 
636
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
55
 | 
 	my $kurt = $n * $self->std_moment(4);  | 
| 
637
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
74
 | 
 	my $correction1 = ( $n * ($n+1) ) / ( ($n-1) * ($n-2) * ($n-3) );  | 
| 
638
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
 	my $correction2 = ( 3  * ($n-1) ** 2) / ( ($n-2) * ($n-3) );  | 
| 
639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
640
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
99
 | 
 	return $correction1 * $kurt - $correction2;  | 
| 
641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 central_moment( $n )  | 
| 
644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
645
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return $n-th central moment, that is, E((x - E(x))^$n).  | 
| 
646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Not present in Statistics::Descriptive::Full.  | 
| 
648
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub central_moment {  | 
| 
652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $self = shift;  | 
| 
653
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $n = shift;  | 
| 
654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $mean = $self->mean;  | 
| 
656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $self->sum_of(sub{ ($_[0] - $mean) ** $n }) / $self->{count};  | 
| 
657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
658
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
659
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 std_moment( $n )  | 
| 
660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return $n-th standardized moment, that is,  | 
| 
662
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 E((x - E(x))**$n) / std_dev(x)**$n.  | 
| 
663
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
664
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Not present in Statistics::Descriptive::Full.  | 
| 
665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
666
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
667
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub std_moment {  | 
| 
669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $self = shift;  | 
| 
670
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $n = shift;  | 
| 
671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
672
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $mean = $self->mean;  | 
| 
673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $dev = $self->std_dev;  | 
| 
674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $self->sum_of(sub{ ($_[0] - $mean) ** $n })  | 
| 
675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		/ ( $dev**$n * $self->{count} );  | 
| 
676
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
678
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 abs_moment( $power, [$offset] )  | 
| 
679
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return $n-th moment of absolute value, that is, C.  | 
| 
681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Default value for offset if E(x).  | 
| 
683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Power may be fractional.  | 
| 
684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
685
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B Experimental. Not present in Statistics::Descriptive::Full.  | 
| 
686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
689
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub abs_moment {  | 
| 
690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my ($self, $power, $offset) = @_;  | 
| 
691
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$offset = $self->mean unless defined $offset;  | 
| 
693
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $self->sum_of(sub{ return abs($_[0] - $offset) ** $power })  | 
| 
694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 / $self->{count};  | 
| 
695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 std_abs_moment( $power, [$offset] )  | 
| 
698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns standardized absolute moment - like above, but scaled  | 
| 
700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 down by a factor of to standard deviation to n-th power.  | 
| 
701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
702
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 That is, C  | 
| 
703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
704
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Default value for offset if E(x).  | 
| 
705
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Power may be fractional.  | 
| 
706
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
707
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B Experimental. Not present in Statistics::Descriptive::Full.  | 
| 
708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
709
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub std_abs_moment {  | 
| 
712
 | 
14
 | 
 
 | 
 
 | 
  
14
  
 | 
  
1
  
 | 
51
 | 
     my ($self, $power, $offset) = @_;  | 
| 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
714
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
     return  $self->abs_moment($power, $offset)  | 
| 
715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 /  | 
| 
716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ($self->abs_moment(2, $offset) ** ($power/2));  | 
| 
717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
719
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 mode  | 
| 
720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
721
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Mode of a distribution is the most common value for a discrete distribution,  | 
| 
722
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 or maximum of probability density for continuous one.  | 
| 
723
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
724
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For now we assume that the distribution IS discrete, and return the bin with  | 
| 
725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the biggest hit count.  | 
| 
726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 NOTE A better algorithm is still wanted. Experimental.  | 
| 
728
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Behavior may change in the future.  | 
| 
729
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
730
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
731
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
732
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Naive implementation  | 
| 
733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Find bin w/greatest count and return it  | 
| 
734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub mode {  | 
| 
735
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $self = shift;  | 
| 
736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return if !$self->count;  | 
| 
738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $index = $self->_sort;  | 
| 
740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $index->[0] if @$index == 1;  | 
| 
741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my @count = map { $self->{data}{$_} } @$index;  | 
| 
743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
744
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $max_index;  | 
| 
745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $max_growth = 0;  | 
| 
746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	for (my $i = 0; $i<@count; $i++) {  | 
| 
747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$count[$i] > $max_growth or next;  | 
| 
748
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$max_index = $i;  | 
| 
749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$max_growth = $count[$i];  | 
| 
750
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
751
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $index->[$max_index];  | 
| 
753
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 frequency_distribution_ref( \@index )  | 
| 
756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
757
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 frequency_distribution_ref( $n )  | 
| 
758
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
759
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 frequency_distribution_ref  | 
| 
760
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return numbers of data point counts below each number in @index as hashref.  | 
| 
762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
763
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If a number is given instead of arrayref, @index is created  | 
| 
764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 by dividing [min, max] into $n intervals.  | 
| 
765
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If no parameters are given, return previous result, if any.  | 
| 
767
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub frequency_distribution_ref {  | 
| 
771
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
989
 | 
 	my $self = shift;  | 
| 
772
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	my $index = shift;  | 
| 
773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
774
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	return unless $self->count;  | 
| 
775
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# ah, compatibility - return last value  | 
| 
776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $self->{cache}{frequency_distribution_ref}  | 
| 
777
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		unless defined $index;  | 
| 
778
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# make index if number given  | 
| 
779
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	if (!ref $index) {  | 
| 
780
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 		croak __PACKAGE__.": frequency_distribution_ref(): ".  | 
| 
781
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			"argument must be array, of number > 2, not $index"  | 
| 
782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			unless $index > 2;  | 
| 
783
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		my $min = $self->_lower($self->min);  | 
| 
784
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 		my $max = $self->_upper($self->max);  | 
| 
785
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 		my $step = ($max - $min) / $index;  | 
| 
786
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		$index = [ map { $min + $_ * $step } 1..$index ];  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
787
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
788
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
789
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 	@$index = (-$INF, sort { $a <=> $b } @$index);  | 
| 
 
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
    | 
| 
790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
791
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	my @count;  | 
| 
792
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	for (my $i = 0; $i<@$index-1; $i++) {  | 
| 
793
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
 		push @count, $self->_count( $index->[$i], $index->[$i+1] );  | 
| 
794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
795
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	shift @$index; # remove -inf  | 
| 
796
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
797
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my %hash;  | 
| 
798
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
 	@hash{@$index} = @count;  | 
| 
799
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	$self->{cache}{frequency_distribution_ref} = \%hash;  | 
| 
800
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	return \%hash;  | 
| 
801
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
802
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
803
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 Specific methods  | 
| 
804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
805
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The folowing methods only apply to this module, or are experimental.  | 
| 
806
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
807
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
808
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 bucket_width  | 
| 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
811
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Get bin width (relative to center of bin). Percentiles are off  | 
| 
812
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 by no more than half of this. DEPRECATED.  | 
| 
813
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
814
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
815
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
816
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub bucket_width {  | 
| 
817
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
10
 | 
 	my $self = shift;  | 
| 
818
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
 	return $self->{base} - 1;  | 
| 
819
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
820
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
821
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 log_base  | 
| 
822
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Get upper/lower bound ratio for logarithmic bins.  | 
| 
824
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This represents relative precision of sample.  | 
| 
825
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 linear_width  | 
| 
827
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
828
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Get width of linear buckets.  | 
| 
829
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This represents absolute precision of sample.  | 
| 
830
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 linear_threshold  | 
| 
832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
833
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Get absolute value threshold below which interpolation is switched to linear.  | 
| 
834
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
835
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
836
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
837
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub log_base {  | 
| 
838
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
6
 | 
 	my $self = shift;  | 
| 
839
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	return $self->{base};  | 
| 
840
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
841
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
842
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub linear_width {  | 
| 
843
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
7
 | 
 	my $self = shift;  | 
| 
844
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
 	return $self->{linear_width};  | 
| 
845
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
846
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
847
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub linear_threshold {  | 
| 
848
 | 
8
 | 
 
 | 
 
 | 
  
8
  
 | 
  
1
  
 | 
27
 | 
 	my $self = shift;  | 
| 
849
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
 	return $self->{linear_thresh};  | 
| 
850
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
852
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 add_data_hash ( { value => weight, ... } )  | 
| 
853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
854
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Add values with counts/weights.  | 
| 
855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This can be used to import data from other  | 
| 
856
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Statistics::Descriptive::LogScale object.  | 
| 
857
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns self, so that methods can be chained.  | 
| 
859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
860
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Negative counts are allowed and treated as "forgetting" data.  | 
| 
861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If a bin count goes below zero, such bin is simply discarded.  | 
| 
862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Minus infinity weight is allowed and has the same effect.  | 
| 
863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Data is guaranteed to remain consistent.  | 
| 
864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
865
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If incorrect data is given (i.e. non-numeric, undef, or +infinity),  | 
| 
866
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 an exception is thrown and nothing changes.  | 
| 
867
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B Cache may be reset, even if no data was actually inserted.  | 
| 
869
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
870
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B It is possible to add infinite values to data pool.  | 
| 
871
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The module will try and calculate whatever can still be calculated.  | 
| 
872
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 However, no portable way of serializing such values is done yet.  | 
| 
873
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
874
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
876
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub add_data_hash {  | 
| 
877
 | 
20
 | 
 
 | 
 
 | 
  
20
  
 | 
  
1
  
 | 
56
 | 
 	my $self = shift;  | 
| 
878
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
 	my $hash = shift;  | 
| 
879
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
880
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# check incoming data for being numeric, and no +inf values  | 
| 
881
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
 	eval {  | 
| 
882
 | 
25
 | 
 
 | 
 
 | 
  
25
  
 | 
 
 | 
57971
 | 
 		use warnings FATAL => qw(numeric);  | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
84
 | 
    | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
45637
 | 
    | 
| 
883
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
100
 | 
 		while (my ($k, $v) = each %$hash) {  | 
| 
884
 | 
294
 | 
  
100
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
1437
 | 
 			$k == 0+$k and $v == 0+$v and $v < $INF  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				or die "Infinite count for $k\n";  | 
| 
886
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
887
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
888
 | 
20
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
282
 | 
 	croak __PACKAGE__.": add_data_hash failed: $@"  | 
| 
889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if $@;  | 
| 
890
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
891
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
 	delete $self->{cache};  | 
| 
892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# update our counters  | 
| 
894
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
70
 | 
 	foreach (keys %$hash) {  | 
| 
895
 | 
291
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
453
 | 
 		next unless $hash->{$_};  | 
| 
896
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
454
 | 
 		my $key = $self->_round($_);  | 
| 
897
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
898
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# Insert data. Make sure -Inf doesn't corrupt our counter.  | 
| 
899
 | 
291
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
950
 | 
 		my $newcount = ($self->{data}{$key} || 0) + $hash->{$_};  | 
| 
900
 | 
291
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
444
 | 
 		if ($newcount > 0) {  | 
| 
901
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# normal insert  | 
| 
902
 | 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
650
 | 
 			$self->{data}{$key} = $newcount;  | 
| 
903
 | 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
469
 | 
 			$self->{count} += $hash->{$_};  | 
| 
904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		} else {  | 
| 
905
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# We're "forgetting" data, AND the bin got empty  | 
| 
906
 | 
12
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
50
 | 
 			$self->{count} -= delete $self->{data}{$key} || 0;  | 
| 
907
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		};  | 
| 
908
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
909
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
 	$self;  | 
| 
910
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
912
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 get_data_hash( %options )  | 
| 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return distribution hashref {value => number of occurances}.  | 
| 
915
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
916
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is inverse of add_data_hash.  | 
| 
917
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
918
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Options may include:  | 
| 
919
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
920
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over  | 
| 
921
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
922
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * min - ignore values below this. (See find_boundaries)  | 
| 
923
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
924
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * max - ignore values above this. (See find_boundaries)  | 
| 
925
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
926
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * ltrim - ignore this % of values on lower end. (See find_boundaries)  | 
| 
927
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
928
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * rtrim - ignore this % of values on upper end. (See find_boundaries)  | 
| 
929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
930
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * noise_thresh - strip bins with count below this.  | 
| 
931
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
933
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
934
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
935
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
936
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_data_hash {  | 
| 
937
 | 
65
 | 
 
 | 
 
 | 
  
65
  
 | 
  
1
  
 | 
11750
 | 
 	my ($self, %opt) = @_;  | 
| 
938
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
939
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# shallow copy of data if no options given  | 
| 
940
 | 
65
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
168
 | 
 	return {%{ $self->{data} }} unless %opt;  | 
| 
 
 | 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
682
 | 
    | 
| 
941
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
942
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	my ($min, $max) = $self->find_boundaries( %opt );  | 
| 
943
 | 
2
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
6
 | 
 	my $noize = $opt{noize_thresh} || 0;  | 
| 
944
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
945
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	my $data = $self->{data};  | 
| 
946
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	my %hash;  | 
| 
947
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	foreach (keys %$data ) {  | 
| 
948
 | 
13
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
30
 | 
 		$_ < $min and next;  | 
| 
949
 | 
8
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 		$_ > $max and next;  | 
| 
950
 | 
8
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
16
 | 
 		$data->{$_} < $noize and next;  | 
| 
951
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 		$hash{$_} = $data->{$_};  | 
| 
952
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
953
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
954
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	return \%hash;  | 
| 
955
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
956
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
957
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 TO_JSON()  | 
| 
958
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
959
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return enough data to recreate the whole object as an unblessed hashref.  | 
| 
960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
961
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This routine conforms with C, hence the name.  | 
| 
962
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Can be called as  | 
| 
963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
964
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $str = JSON::XS->new->allow_blessed->convert_blessed->encode( $this );  | 
| 
965
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
966
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B This module DOES NOT require JSON::XS or serialize to JSON.  | 
| 
967
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 It just deals with data.  | 
| 
968
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Use C, C, C or any serializer of choice.  | 
| 
969
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
970
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $raw_data = $stat->TO_JSON;  | 
| 
971
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     Statistics::Descriptive::LogScale->new( %$raw_data );  | 
| 
972
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
973
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Would generate an exact copy of C<$stat> object  | 
| 
974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (provided it's S::D::L and not a subclass).  | 
| 
975
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
976
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 clone( [ %options ] )  | 
| 
977
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
978
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Copy constructor - returns copy of an existing object.  | 
| 
979
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Cache is not preserved.  | 
| 
980
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
981
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Constructor options may be given to override existing data. See new().  | 
| 
982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
983
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Trim options may be given to get partial data. See get_data_hash().  | 
| 
984
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
986
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
987
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub clone {  | 
| 
988
 | 
8
 | 
 
 | 
 
 | 
  
8
  
 | 
  
1
  
 | 
981
 | 
 	my ($self, %opt) = @_;  | 
| 
989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
990
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
 	my $raw = $self->TO_JSON;  | 
| 
991
 | 
8
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
28
 | 
 	if (%opt) {  | 
| 
992
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$raw->{data} = $self->get_data_hash( %opt )  | 
| 
993
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 			unless exists $opt{data};  | 
| 
994
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		exists $opt{$_} and $raw->{$_} = $opt{$_}  | 
| 
995
 | 
4
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
30
 | 
 			for @new_keys;  | 
| 
996
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
997
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
998
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
 	return (ref $self)->new( %$raw );  | 
| 
999
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1001
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub TO_JSON {  | 
| 
1002
 | 
32
 | 
 
 | 
 
 | 
  
32
  
 | 
  
1
  
 | 
173
 | 
 	my $self = shift;  | 
| 
1003
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# UGLY HACK Increase linear_thresh by a factor of base ** 1/10  | 
| 
1004
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# so that it's rounded down to present value  | 
| 
1005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return {  | 
| 
1006
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		CLASS => ref $self,  | 
| 
1007
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		VERSION => $VERSION,  | 
| 
1008
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		base => $self->{base},  | 
| 
1009
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		linear_width => $self->{linear_width},  | 
| 
1010
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		linear_thresh => $self->{linear_thresh} * ($self->{base}+9)/10,  | 
| 
1011
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		only_linear => $self->{only_linear},  | 
| 
1012
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
132
 | 
 		data => $self->get_data_hash,  | 
| 
1013
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
1014
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1015
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1016
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 scale_sample( $scale )  | 
| 
1017
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1018
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Multiply all bins' counts by given value. This can be used to adjust  | 
| 
1019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 significance of previous data before adding new data (e.g. gradually  | 
| 
1020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 "forgetting" past data in a long-running application).  | 
| 
1021
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1022
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1023
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1024
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub scale_sample {  | 
| 
1025
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
9
 | 
 	my $self = shift;  | 
| 
1026
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my $factor = shift;  | 
| 
1027
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	$factor > 0 or croak (__PACKAGE__.": scale_sample: factor must be positive");  | 
| 
1028
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1029
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	delete $self->{cache};  | 
| 
1030
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	foreach (@{ $self->_sort }) {  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
1031
 | 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
135
 | 
 		$self->{data}{$_} *= $factor;  | 
| 
1032
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
1033
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	$self->{count} *= $factor;  | 
| 
1034
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	return $self;  | 
| 
1035
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1036
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 mean_of( $code, [$min, $max] )  | 
| 
1038
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1039
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return expectation of $code over sample within given range.  | 
| 
1040
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1041
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $code is expected to be a pure function (i.e. depending only on its input  | 
| 
1042
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 value, and having no side effects).  | 
| 
1043
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1044
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The underlying integration mechanism only calculates $code once per bin,  | 
| 
1045
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 so $code should be stable as in not vary wildly over small intervals.  | 
| 
1046
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1047
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1048
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1049
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub mean_of {  | 
| 
1050
 | 
24
 | 
 
 | 
 
 | 
  
24
  
 | 
  
1
  
 | 
97
 | 
 	my $self = shift;  | 
| 
1051
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
82
 | 
 	my ($code, $min, $max) = @_;  | 
| 
1052
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1053
 | 
24
 | 
 
 | 
 
 | 
  
429
  
 | 
 
 | 
131
 | 
 	my $weight = $self->sum_of( sub {1}, $min, $max );  | 
| 
 
 | 
429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1091
 | 
    | 
| 
1054
 | 
24
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
125
 | 
 	return unless $weight;  | 
| 
1055
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
106
 | 
 	return $self->sum_of($code, $min, $max) / $weight;  | 
| 
1056
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1058
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 Experimental methods  | 
| 
1059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1060
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 These methods may be subject to change in the future, or stay, if they  | 
| 
1061
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 are good.  | 
| 
1062
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1063
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 sum_of ( $code, [ $min, $max ] )  | 
| 
1064
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Integrate arbitrary function over the sample within the [ $min, $max ] interval.  | 
| 
1066
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Default values for both limits are infinities of appropriate sign.  | 
| 
1067
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1068
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Values in the edge bins are cut using interpolation if needed.  | 
| 
1069
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1070
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 NOTE: sum_of(sub{1}, $a, $b) would return rough nubmer of data points  | 
| 
1071
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  between $a and $b.  | 
| 
1072
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1073
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 EXPERIMENTAL. The method name may change in the future.  | 
| 
1074
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1075
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1076
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1077
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub sum_of {  | 
| 
1078
 | 
189
 | 
 
 | 
 
 | 
  
189
  
 | 
  
1
  
 | 
1081
 | 
 	my $self = shift;  | 
| 
1079
 | 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
487
 | 
 	my ($code, $realmin, $realmax) = @_;  | 
| 
1080
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1081
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Just app up stuff  | 
| 
1082
 | 
189
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
853
 | 
 	if (!defined $realmin and !defined $realmax) {  | 
| 
1083
 | 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
338
 | 
 		my $sum = 0;  | 
| 
1084
 | 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
298
 | 
 		while (my ($val, $count) = each %{ $self->{data} }) {  | 
| 
 
 | 
63889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
149935
 | 
    | 
| 
1085
 | 
63723
 | 
 
 | 
 
 | 
 
 | 
 
 | 
90845
 | 
 			$sum += $count * $code->( $val );  | 
| 
1086
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		};  | 
| 
1087
 | 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1035
 | 
 		return $sum;  | 
| 
1088
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
1089
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1090
 | 
23
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
58
 | 
 	$realmin = -$INF unless defined $realmin;  | 
| 
1091
 | 
23
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
58
 | 
 	$realmax =  $INF unless defined $realmax;  | 
| 
1092
 | 
23
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
70
 | 
 	return 0 if( $realmin >= $realmax );  | 
| 
1093
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1094
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# correct limits. $min, $max are indices; $left, $right are limits  | 
| 
1095
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
63
 | 
 	my $min   = $self->_round($realmin);  | 
| 
1096
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
62
 | 
 	my $max   = $self->_round($realmax);  | 
| 
1097
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
65
 | 
 	my $left  = $self->_lower($realmin);  | 
| 
1098
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
67
 | 
 	my $right = $self->_upper($realmax);  | 
| 
1099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# find first bin that's above $left  | 
| 
1101
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
60
 | 
 	my $keys = $self->_sort;  | 
| 
1102
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
72
 | 
 	my $i = _bin_search_ge($keys, $left);  | 
| 
1103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# warn "sum_of [$min, $max]";  | 
| 
1105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# add up bins  | 
| 
1106
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
 	my $sum = 0;  | 
| 
1107
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
53
 | 
 	for (; $i < @$keys; $i++) {  | 
| 
1108
 | 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
921
 | 
 		my $val = $keys->[$i];  | 
| 
1109
 | 
284
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
598
 | 
 		last if $val > $right;  | 
| 
1110
 | 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
564
 | 
 		$sum += $self->{data}{$val} * $code->( $val );  | 
| 
1111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
1112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# cut edges: the hard part  | 
| 
1114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# min and max are now used as indices  | 
| 
1115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# if min or max hits 0, we cut it in half (i.e. into equal 0+ and 0-)  | 
| 
1116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# warn "Add up, sum_of = $sum";  | 
| 
1117
 | 
23
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
132
 | 
 	if ($self->{data}{$max}) {  | 
| 
1118
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
53
 | 
 		my $width = $self->_upper($max) - $self->_lower($max);  | 
| 
1119
 | 
20
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
67
 | 
 		my $part = $width  | 
| 
1120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			? ($self->_upper($max) - $realmax) / $width  | 
| 
1121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			: 0.5;  | 
| 
1122
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
112
 | 
 		$sum -= $self->{data}{$max} * $code->($max) * $part;  | 
| 
1123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
1124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# warn "Cut R,  sum_of = $sum";  | 
| 
1125
 | 
23
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
120
 | 
 	if ($self->{data}{$min}) {  | 
| 
1126
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
50
 | 
 		my $width = $self->_upper($min) - $self->_lower($min);  | 
| 
1127
 | 
20
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
60
 | 
 		my $part = $width  | 
| 
1128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			? ($realmin - $self->_lower($min)) / $width  | 
| 
1129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			: 0.5;  | 
| 
1130
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
68
 | 
 		$sum -= $self->{data}{$min} * $code->($min) * $part;  | 
| 
1131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
1132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# warn "Cut L,  sum_of = $sum";  | 
| 
1133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1134
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
150
 | 
 	return $sum;  | 
| 
1135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }; # end sum_of  | 
| 
1136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 histogram ( %options )  | 
| 
1138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns array of form [ [ count0_1, x0, x1 ], [count1_2, x1, x2 ], ... ]  | 
| 
1140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 where countX_Y is number of data points between X and Y.  | 
| 
1141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Options may include:  | 
| 
1143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over  | 
| 
1145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * count (+) - number of intervals to divide sample into.  | 
| 
1147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * index (+) - interval borders as array. Will be sorted before processing.  | 
| 
1149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * min - ignore values below this. Default = $self->min - epsilon.  | 
| 
1151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * max - ignore values above this. Default = $self->max + epsilon.  | 
| 
1153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * ltrim - ignore this % of values on lower end.  | 
| 
1155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * rtrim - ignore this % of values on upper end.  | 
| 
1157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * normalize_to  - adjust counts so that max number becomes nnn.  | 
| 
1159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This may be useful if one intends to draw pictures.  | 
| 
1160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
1162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Either count or index must be present.  | 
| 
1164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 NOTE: this is equivalent to frequency_distribution_ref but better suited  | 
| 
1166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 for omitting sample tails and outputting pretty pictures.  | 
| 
1167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub histogram {  | 
| 
1171
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
2338
 | 
 	my ($self, %opt) = @_;  | 
| 
1172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1173
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
 	return unless $self->count;  | 
| 
1174
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 	my ($min, $max) = $self->find_boundaries( %opt );  | 
| 
1175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# build/check index  | 
| 
1176
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	my @index = @{ $opt{index} || [] };  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
    | 
| 
1177
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	if (!@index) {  | 
| 
1178
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		my $n = $opt{count};  | 
| 
1179
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 		$n > 0 or croak (__PACKAGE__.": histogram: insufficient options (count < 1 )");  | 
| 
1180
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 		my $step = ($max - $min) / $n;  | 
| 
1181
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 		for (my $x = $min; $x <= $max; $x += $step) {  | 
| 
1182
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
 			push @index, $x;  | 
| 
1183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		};  | 
| 
1184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} else {  | 
| 
1185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# sort & uniq raw index  | 
| 
1186
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 		my %known;  | 
| 
1187
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 		@index = grep { !$known{$_}++ } @index;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
    | 
| 
1188
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 		@index = sort { $a <=> $b } @index;  | 
| 
 
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
    | 
| 
1189
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		@index > 1 or croak (__PACKAGE__.": histogram: insufficient options (index < 2)");  | 
| 
1190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
1191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# finally: estimated counts between indices!  | 
| 
1193
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	my @ret;  | 
| 
1194
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	for ( my $i = 0; $i<@index-1; $i++) {  | 
| 
1195
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
 		my $count = $self->_count( $index[$i], $index[$i+1] );  | 
| 
1196
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
61
 | 
 		push @ret, [ $count, $index[$i], $index[$i+1] ];  | 
| 
1197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
1198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# if normalize - find maximum & divide by it  | 
| 
1200
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	if (my $norm = $opt{normalize_to}) {  | 
| 
1201
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 		my $max = 0;  | 
| 
1202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$max < $_->[0] and $max = $_->[0]  | 
| 
1203
 | 
1
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
7
 | 
 			for @ret;  | 
| 
1204
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 		$norm /= $max;  | 
| 
1205
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		$_->[0] *= $norm for @ret;  | 
| 
1206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
1207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1208
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
 	return \@ret;  | 
| 
1209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 find_boundaries( %opt )  | 
| 
1212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return ($min, $max) of part of sample denoted by options.  | 
| 
1214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Options may include:  | 
| 
1216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over  | 
| 
1218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * min - ignore values below this. default = min() - epsilon.  | 
| 
1220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * max - ignore values above this. default = max() + epsilon.  | 
| 
1222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * ltrim - ignore this % of values on lower end.  | 
| 
1224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * rtrim - ignore this % of values on upper end.  | 
| 
1226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
1228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If no options are given, the whole sample is guaranteed to reside between  | 
| 
1230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 returned values.  | 
| 
1231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub find_boundaries {  | 
| 
1235
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
  
1
  
 | 
1600
 | 
 	my $self = shift;  | 
| 
1236
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
 	my %opt = @_;  | 
| 
1237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1238
 | 
9
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
52
 | 
 	return unless $self->count;  | 
| 
1239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# preprocess boundaries  | 
| 
1241
 | 
8
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
31
 | 
 	my $min = defined $opt{min} ? $opt{min} : $self->_lower( $self->min );  | 
| 
1242
 | 
8
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
31
 | 
 	my $max = defined $opt{max} ? $opt{max} : $self->_upper( $self->max );  | 
| 
1243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1244
 | 
8
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
22
 | 
 	if ($opt{ltrim}) {  | 
| 
1245
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 		my $newmin = $self->percentile( $opt{ltrim} );  | 
| 
1246
 | 
1
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
7
 | 
 		defined $newmin and $newmin > $min and $min = $newmin;  | 
| 
1247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
1248
 | 
8
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
19
 | 
 	if ($opt{utrim}) {  | 
| 
1249
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		my $newmax = $self->percentile( 100-$opt{utrim} );  | 
| 
1250
 | 
1
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
6
 | 
 		defined $newmax and $newmax < $max and $max = $newmax;  | 
| 
1251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
1252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1253
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
 	return ($min, $max);  | 
| 
1254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 format( "printf-like expression", ... )  | 
| 
1257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns a summary as requested by format string.  | 
| 
1259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Just as with printf and sprintf, a placeholder starts with a C<%>,  | 
| 
1260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 followed by formatting options and a  | 
| 
1261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The following placeholders are supported:  | 
| 
1263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over  | 
| 
1265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * % - a literal %  | 
| 
1267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * s, f, g - a normal printf acting on an extra argument.  | 
| 
1269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The number of extra arguments MUST match the number of such placeholders,  | 
| 
1270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 or this function dies.  | 
| 
1271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * n - count;  | 
| 
1273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * m - min;  | 
| 
1275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * M - max,  | 
| 
1277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * a - mean,  | 
| 
1279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * d - standard deviation,  | 
| 
1281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * S - skewness,  | 
| 
1283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * K - kurtosis,  | 
| 
1285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * q(x) - x-th quantile (requires argument),  | 
| 
1287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * p(x) - x-th percentile (requires argument),  | 
| 
1289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * P(x) - cdf - the inferred cumulative distribution function (x)  | 
| 
1291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (requires argument),  | 
| 
1292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * e(n) - central_moment - central moment of n-th power  | 
| 
1294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (requires argument),  | 
| 
1295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * E(n) - std_moment - standard moment of n-th power (requires argument),  | 
| 
1297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * A(n) - abs_moment - absolute moment of n-th power (requires argument).  | 
| 
1299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
1301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For example,  | 
| 
1303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $stat->format( "99%% results lie between %p(0.5) and %p(99.5)" );  | 
| 
1305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Or  | 
| 
1307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for( my $i = 0; $i < @stats; $i++ ) {  | 
| 
1309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print $stats[$i]->format( "%s-th average value is %a +- %d", $i );  | 
| 
1310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
1311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my %format = (  | 
| 
1315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # percent literal  | 
| 
1316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     '%' => '%',  | 
| 
1317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # placeholders without parameters  | 
| 
1318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     n => 'count',  | 
| 
1319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     m => 'min',  | 
| 
1320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     M => 'max',  | 
| 
1321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     a => 'mean',  | 
| 
1322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     d => 'std_dev',  | 
| 
1323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     S => 'skewness',  | 
| 
1324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     K => 'kurtosis',  | 
| 
1325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # placeholders with 1 parameter  | 
| 
1326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     q => 'quantile?',  | 
| 
1327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     p => 'percentile?',  | 
| 
1328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     P => 'cdf?',  | 
| 
1329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     e => 'central_moment?',  | 
| 
1330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     E => 'std_moment?',  | 
| 
1331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     A => 'abs_moment?',  | 
| 
1332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 );  | 
| 
1333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my %printf = (  | 
| 
1335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     s => 1,  | 
| 
1336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     f => 1,  | 
| 
1337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     g => 1,  | 
| 
1338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 );  | 
| 
1339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $re_format = join "|", keys %format, keys %printf;  | 
| 
1341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $re_format = qr((?:$re_format));  | 
| 
1342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub format {  | 
| 
1344
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
18
 | 
 	my ($self, $format, @extra) = @_;  | 
| 
1345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# FIXME this accepts %m(5), then dies - UGLY  | 
| 
1347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # TODO rewrite this as a giant sprintf... one day...  | 
| 
1348
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
114
 | 
 	$format =~ s <%([0-9.\-+ #]*)($re_format)(?:\(($re_num)?\)){0,1}>  | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
    | 
| 
1349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		< _format_dispatch($self, $2, $1, $3, \@extra) >ge;  | 
| 
1350
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
17
 | 
    | 
| 
1351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     croak __PACKAGE__.": Extra arguments in format()"  | 
| 
1352
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
         if @extra;  | 
| 
1353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $format;  | 
| 
1354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1356
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
 
 | 
30
 | 
 sub _format_dispatch {  | 
| 
1357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my ($obj, $method, $float, $arg, $extra) = @_;  | 
| 
1358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1359
 | 
7
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
24
 | 
     # Handle % escapes  | 
| 
1360
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	if ($method !~ /^[a-zA-Z]/) {  | 
| 
1361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return $method;  | 
| 
1362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
1363
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     # Handle printf built-in formats  | 
| 
1364
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     if (!$format{$method}) {  | 
| 
1365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         croak __PACKAGE__.": Not enough arguments in format()"  | 
| 
1366
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
             unless @$extra;  | 
| 
1367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return sprintf "%${float}${method}", shift @$extra;  | 
| 
1368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
1369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1370
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     # Now we know it's LogScale's own method  | 
| 
1371
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
18
 | 
     $method = $format{$method};  | 
| 
1372
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	if ($method =~ s/\?$//) {  | 
| 
1373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		die "Missing argument in method $method" if !defined $arg;  | 
| 
1374
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	} else {  | 
| 
1375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		die "Extra argument in method $method" if defined $arg;  | 
| 
1376
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
 	};  | 
| 
1377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $result = $obj->$method($arg);  | 
| 
1378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1379
 | 
5
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
21
 | 
 	# work around S::D::Full's convention that "-inf == undef"  | 
| 
1380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$result = -9**9**9  | 
| 
1381
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
 		if ($method eq 'percentile' and !defined $result);  | 
| 
1382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return sprintf "%${float}f", $result;  | 
| 
1383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ################################################################  | 
| 
1386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  No more public methods please  | 
| 
1387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # MEMOIZE  | 
| 
1389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # We'll keep methods' returned values under {cache}.  | 
| 
1390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # All setters destroy said cache altogether.  | 
| 
1391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # PLEASE replace this with a ready-made module if there's one.  | 
| 
1392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Sorry for this black magic, but it's too hard to write //= in EVERY method  | 
| 
1394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Long story short  | 
| 
1395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # The next sub replaces $self->foo() with  | 
| 
1396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # sub { $self->{cache}{foo} //= $self->originnal_foo }  | 
| 
1397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # All setter methods are EXPECTED to destroy {cache} altogether.  | 
| 
1398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # NOTE if you plan subclassing the method, re-memoize methods you change.  | 
| 
1400
 | 
300
 | 
 
 | 
 
 | 
  
300
  
 | 
 
 | 
636
 | 
 sub _memoize_method {  | 
| 
1401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my ($class, $name, $arg) = @_;  | 
| 
1402
 | 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1083
 | 
    | 
| 
1403
 | 
300
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
781
 | 
 	my $orig_code = $class->can($name);  | 
| 
1404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	die "Error in memoizer section ($name)"  | 
| 
1405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		unless ref $orig_code eq 'CODE';  | 
| 
1406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# begin long conditional  | 
| 
1408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $cached_code = !$arg  | 
| 
1409
 | 
253
 | 
  
100
  
 | 
 
 | 
  
253
  
 | 
 
 | 
8570
 | 
 	? sub {  | 
| 
1410
 | 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
323
 | 
 		if (!exists $_[0]->{cache}{$name}) {  | 
| 
1411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$_[0]->{cache}{$name} = $orig_code->($_[0]);  | 
| 
1412
 | 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1142
 | 
 		};  | 
| 
1413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return $_[0]->{cache}{$name};  | 
| 
1414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1415
 | 
242
 | 
 
 | 
 
 | 
  
242
  
 | 
 
 | 
2660
 | 
 	: sub {  | 
| 
1416
 | 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
420
 | 
 		my $self = shift;  | 
| 
1417
 | 
25
 | 
 
 | 
 
 | 
  
25
  
 | 
 
 | 
240
 | 
 		my $arg = do {  | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
63
 | 
    | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3424
 | 
    | 
| 
1418
 | 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
697
 | 
 			no warnings 'uninitialized'; ## no critic  | 
| 
1419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			join ':', @_;  | 
| 
1420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		};  | 
| 
1421
 | 
242
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
852
 | 
    | 
| 
1422
 | 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
411
 | 
 		if (!exists $self->{cache}{"$name:$arg"}) {  | 
| 
1423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$self->{cache}{"$name:$arg"} = $orig_code->($self, @_);  | 
| 
1424
 | 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1390
 | 
 		};  | 
| 
1425
 | 
300
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1250
 | 
 		return $self->{cache}{"$name:$arg"};  | 
| 
1426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
1427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# conditional ends here  | 
| 
1428
 | 
25
 | 
 
 | 
 
 | 
  
25
  
 | 
 
 | 
181
 | 
    | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
58
 | 
    | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
902
 | 
    | 
| 
1429
 | 
25
 | 
 
 | 
 
 | 
  
25
  
 | 
 
 | 
179
 | 
 	no strict 'refs'; ## no critic  | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
69
 | 
    | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2587
 | 
    | 
| 
1430
 | 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
519
 | 
 	no warnings 'redefine'; ## no critic  | 
| 
 
 | 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1137
 | 
    | 
| 
1431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	*{$class."::".$name} = $cached_code;  | 
| 
1432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }; # end of _memoize_method  | 
| 
1433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Memoize all the methods w/o arguments  | 
| 
1435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 foreach ( qw(sum sumsq mean min max mode) ) {  | 
| 
1436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	__PACKAGE__->_memoize_method($_);  | 
| 
1437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Memoize methods with 1 argument  | 
| 
1440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 foreach (qw(quantile central_moment std_moment abs_moment standard_deviation  | 
| 
1441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	variance)) {  | 
| 
1442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	__PACKAGE__->_memoize_method($_, 1);  | 
| 
1443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # add shorter alias of standard_deviation (this must happen AFTER memoization)  | 
| 
1446
 | 
25
 | 
 
 | 
 
 | 
  
25
  
 | 
 
 | 
177
 | 
 {  | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
69
 | 
    | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8698
 | 
    | 
| 
1447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	no warnings 'once'; ## no critic  | 
| 
1448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	*std_dev = \&standard_deviation;  | 
| 
1449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	*stdev = \&standard_deviation;  | 
| 
1450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Get number of values below $x  | 
| 
1453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Like sum_of(sub{1}, undef, $x), but faster.  | 
| 
1454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Used by cdf()  | 
| 
1455
 | 
105
 | 
 
 | 
 
 | 
  
105
  
 | 
 
 | 
144
 | 
 sub _count {  | 
| 
1456
 | 
105
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
214
 | 
 	my $self = shift;  | 
| 
1457
 | 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
95
 | 
 	@_>1 and return $self->_count($_[1]) - $self->_count($_[0]);  | 
| 
1458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $x = shift;  | 
| 
1459
 | 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
130
 | 
    | 
| 
1460
 | 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
137
 | 
 	my $upper = $self->_upper($x);  | 
| 
1461
 | 
74
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
148
 | 
 	my $i = _bin_search_gt( $self->_sort, $upper );  | 
| 
1462
 | 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
110
 | 
 	!$i-- and return 0;  | 
| 
1463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $count = $self->_probability->[$i];  | 
| 
1464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1465
 | 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
115
 | 
 	# interpolate  | 
| 
1466
 | 
65
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
273
 | 
 	my $bin = $self->_round( $x );  | 
| 
1467
 | 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
57
 | 
 	if (my $val = $self->{data}{$bin}) {  | 
| 
1468
 | 
34
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
71
 | 
 		my $width = ($upper - $bin) * 2;  | 
| 
1469
 | 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
50
 | 
 		my $part = $width ? ( ($upper - $x) / $width) : 1/2;  | 
| 
1470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$count -= $part * $val;  | 
| 
1471
 | 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
178
 | 
 	};  | 
| 
1472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $count;  | 
| 
1473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # BINARY SEARCH  | 
| 
1476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Not a method, just a function  | 
| 
1477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Takes sorted \@array and a $num  | 
| 
1478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Return lowest $i such that $array[$i] >= $num  | 
| 
1479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Return (scalar @array) if no such $i exists  | 
| 
1480
 | 
147
 | 
 
 | 
 
 | 
  
147
  
 | 
 
 | 
865
 | 
 sub _bin_search_ge {  | 
| 
1481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my ($array, $x) = @_;  | 
| 
1482
 | 
147
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
640
 | 
    | 
| 
1483
 | 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
226
 | 
 	return 0 unless @$array and $array->[0] < $x;  | 
| 
1484
 | 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
200
 | 
 	my $l = 0;  | 
| 
1485
 | 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
286
 | 
 	my $r = @$array;  | 
| 
1486
 | 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
774
 | 
 	while ($l+1 < $r) {  | 
| 
1487
 | 
405
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1045
 | 
 		my $m = int( ($l + $r) /2);  | 
| 
1488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$array->[$m] < $x ? $l = $m : $r = $m;  | 
| 
1489
 | 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
280
 | 
 	};  | 
| 
1490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $l+1;  | 
| 
1491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1492
 | 
74
 | 
 
 | 
 
 | 
  
74
  
 | 
 
 | 
130
 | 
 sub _bin_search_gt {  | 
| 
1493
 | 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
112
 | 
 	my ($array, $x) = @_;  | 
| 
1494
 | 
74
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
216
 | 
 	my $i = _bin_search_ge(@_);  | 
| 
1495
 | 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
108
 | 
 	$i++ if defined $array->[$i] and $array->[$i] == $x;  | 
| 
1496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $i;  | 
| 
1497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # THE CORE  | 
| 
1500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Here come the number=>bin functions  | 
| 
1501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # round() generates bin center  | 
| 
1502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # upper() and lower() are respective boundaries.  | 
| 
1503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Here's the algorithm:  | 
| 
1504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # 1) determine whether bin is linear or logarithmic  | 
| 
1505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # 2) for linear bins, return bin# * bucket_width (==2*absolute error)  | 
| 
1506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #         add/subtract 0.5 to get edges.  | 
| 
1507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # 3) for logarithmic bins, return base ** bin# with appropriate sign  | 
| 
1508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #         multiply by precalculated constant to get edges  | 
| 
1509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #         note that +0.5 doesn't work here, since sqrt(a*b) != (a+b)/2  | 
| 
1510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # This part is fragile and can be written better  | 
| 
1511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # center of bin containing x  | 
| 
1513
 | 
90924
 | 
 
 | 
 
 | 
  
90924
  
 | 
 
 | 
121697
 | 
 sub _round {  | 
| 
1514
 | 
90924
 | 
 
 | 
 
 | 
 
 | 
 
 | 
114147
 | 
 	my $self = shift;  | 
| 
1515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $x = shift;  | 
| 
1516
 | 
25
 | 
 
 | 
 
 | 
  
25
  
 | 
 
 | 
189
 | 
    | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
73
 | 
    | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3763
 | 
    | 
| 
1517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	use warnings FATAL => qw(numeric uninitialized);  | 
| 
1518
 | 
90924
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
168333
 | 
    | 
| 
1519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (abs($x) <= $self->{linear_thresh}) {  | 
| 
1520
 | 
1792
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
5680
 | 
 		return $self->{linear_width}  | 
| 
1521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			&& $self->{linear_width} * floor( $x / $self->{linear_width} + 0.5 );  | 
| 
1522
 | 
89129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
178971
 | 
 	};  | 
| 
1523
 | 
89129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
149352
 | 
 	my $i = floor (((log abs $x) - $self->{logfloor})/ $self->{logbase});  | 
| 
1524
 | 
89129
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
316496
 | 
 	my $value = $self->{base} ** $i;  | 
| 
1525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $x < 0 ? -$value : $value;  | 
| 
1526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # lower, upper limits of bin containing x  | 
| 
1529
 | 
887
 | 
 
 | 
 
 | 
  
887
  
 | 
 
 | 
52542
 | 
 sub _lower {  | 
| 
1530
 | 
887
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1353
 | 
 	my $self = shift;  | 
| 
1531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $x = shift;  | 
| 
1532
 | 
25
 | 
 
 | 
 
 | 
  
25
  
 | 
 
 | 
199
 | 
    | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
65
 | 
    | 
| 
 
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8028
 | 
    | 
| 
1533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	use warnings FATAL => qw(numeric uninitialized);  | 
| 
1534
 | 
887
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2322
 | 
    | 
| 
1535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (abs($x) <= $self->{linear_thresh}) {  | 
| 
1536
 | 
188
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
1784
 | 
 		return $self->{linear_width}  | 
| 
1537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			&& $self->{linear_width} * (floor( $x / $self->{linear_width} + 0.5) - 0.5);  | 
| 
1538
 | 
699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2178
 | 
 	};  | 
| 
1539
 | 
699
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1490
 | 
 	my $i = floor (((log abs $x) - $self->{logfloor} )/ $self->{logbase});  | 
| 
1540
 | 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1294
 | 
 	if ($x > 0) {  | 
| 
1541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return  $self->{floor} * $self->{base}**($i);  | 
| 
1542
 | 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1624
 | 
 	} else {  | 
| 
1543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return -$self->{floor} * $self->{base}**($i+1);  | 
| 
1544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
1545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1547
 | 
462
 | 
 
 | 
 
 | 
  
462
  
 | 
 
 | 
5227
 | 
 sub _upper {  | 
| 
1548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return -$_[0]->_lower(-$_[1]);  | 
| 
1549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # build bin index  | 
| 
1552
 | 
191
 | 
 
 | 
 
 | 
  
191
  
 | 
 
 | 
308
 | 
 sub _sort {  | 
| 
1553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $self = shift;  | 
| 
1554
 | 
191
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
758
 | 
 	return $self->{cache}{sorted}  | 
| 
 
 | 
1234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2257
 | 
    | 
| 
 
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
292
 | 
    | 
| 
1555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		||= [ sort { $a <=> $b } keys %{ $self->{data} } ];  | 
| 
1556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # build cumulative bin counts index  | 
| 
1559
 | 
107
 | 
 
 | 
 
 | 
  
107
  
 | 
 
 | 
170
 | 
 sub _probability {  | 
| 
1560
 | 
107
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
330
 | 
 	my $self = shift;  | 
| 
1561
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
54
 | 
 	return $self->{cache}{probability} ||= do {  | 
| 
1562
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
 		my @array;  | 
| 
1563
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
 		my $sum = 0;  | 
| 
 
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
89
 | 
    | 
| 
1564
 | 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
324
 | 
 		foreach (@{ $self->_sort }) {  | 
| 
1565
 | 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
391
 | 
 			$sum += $self->{data}{$_};  | 
| 
1566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			push @array, $sum;  | 
| 
1567
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
115
 | 
 		};  | 
| 
1568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		\@array;  | 
| 
1569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
1570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR  | 
| 
1573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Konstantin S. Uvarin, C<<  >>  | 
| 
1575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 BUGS  | 
| 
1577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1578
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The module is currently under development. There may be bugs.  | 
| 
1579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C only works for discrete distributions, and simply returns  | 
| 
1581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the first bin with largest bin count.  | 
| 
1582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 A better algorithm is wanted.  | 
| 
1583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C should have been made a private method.  | 
| 
1585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Its signature and/or name may change in the future.  | 
| 
1586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See the TODO file in the distribution package.  | 
| 
1588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Please feel free to post bugs and/or feature requests to github:  | 
| 
1590
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
1591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Alternatively, you can use CPAN RT  | 
| 
1593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 via e-mail C,  | 
| 
1594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 or through the web interface at  | 
| 
1595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L.  | 
| 
1596
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Your contribution is appreciated.  | 
| 
1598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SUPPORT  | 
| 
1600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 You can find documentation for this module with the perldoc command.  | 
| 
1602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     perldoc Statistics::Descriptive::LogScale  | 
| 
1604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 You can also look for information at:  | 
| 
1606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
1608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * GitHub:  | 
| 
1610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
1612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * RT: CPAN's request tracker (report bugs here)  | 
| 
1614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
1616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * AnnoCPAN: Annotated CPAN documentation  | 
| 
1618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
1620
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * CPAN Ratings  | 
| 
1622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
1624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * Search CPAN  | 
| 
1626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
1628
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
1630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 ACKNOWLEDGEMENTS  | 
| 
1632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This module was inspired by a talk that Andrew Aksyonoff, author of  | 
| 
1634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L,  | 
| 
1635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 has given at HighLoad++ conference in Moscow, 2012.  | 
| 
1636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1637
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L was and is used as reference when in doubt.  | 
| 
1638
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Several code snippets were shamelessly stolen from there.  | 
| 
1639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C and C parameter names were suggested by  | 
| 
1641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 CountZero from http://perlmonks.org  | 
| 
1642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 LICENSE AND COPYRIGHT  | 
| 
1644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1645
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Copyright 2013-2015 Konstantin S. Uvarin.  | 
| 
1646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This program is free software; you can redistribute it and/or modify it  | 
| 
1648
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 under the terms of either: the GNU General Public License as published  | 
| 
1649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 by the Free Software Foundation; or the Artistic License.  | 
| 
1650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See http://dev.perl.org/licenses/ for more information.  | 
| 
1652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1653
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1; # End of Statistics::Descriptive::LogScale  |