line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::Histogram; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
32985
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
34
|
|
4
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
49
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
our $VERSION = '0.01'; |
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
6
|
use base qw(Exporter); |
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
1290
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our @EXPORT_OK = qw(histogram); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
my @scales = (1, 2, 5, 10, 25, 50, 100, 250, 500); |
13
|
|
|
|
|
|
|
push @scales, map { ( 1 * $_, 2.5 * $_, 5 * $_) } ( |
14
|
|
|
|
|
|
|
1000, 10_000, 100_000 |
15
|
|
|
|
|
|
|
); |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
my @binsizes = (1, 2, 5, 10, 25, 50, 100, 250, 500); |
18
|
|
|
|
|
|
|
push @binsizes, map { ( 1 * $_, 2.5 * $_, 5 * $_ ) } ( |
19
|
|
|
|
|
|
|
1000, 10_000, 100_000 |
20
|
|
|
|
|
|
|
); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
sub histogram { |
23
|
0
|
|
|
0
|
1
|
|
my ($data, $opts) = @_; |
24
|
|
|
|
|
|
|
|
25
|
0
|
0
|
|
|
|
|
unless (ref $data) { |
26
|
0
|
|
|
|
|
|
$data = [@_]; |
27
|
0
|
|
|
|
|
|
$opts = {}; |
28
|
|
|
|
|
|
|
} |
29
|
0
|
|
|
|
|
|
my $pts = scalar @$data; |
30
|
0
|
|
0
|
|
|
|
$opts->{bins} ||= 8; |
31
|
0
|
0
|
|
|
|
|
$opts->{bins} = $pts if $pts < $opts->{bins}; |
32
|
0
|
|
0
|
|
|
|
$opts->{histogram_size} ||= 50; |
33
|
|
|
|
|
|
|
|
34
|
0
|
|
|
|
|
|
my $vcnt = scalar @$data; |
35
|
0
|
|
|
|
|
|
my @data = sort { $a <=> $b } @$data; |
|
0
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
|
37
|
0
|
|
|
|
|
|
my ($min, $max, $rmin, $rmax, $pmin, $pmax) |
38
|
|
|
|
|
|
|
= _check_outliers($vcnt, $opts, @data); |
39
|
|
|
|
|
|
|
|
40
|
0
|
|
|
|
|
|
my ($scale, $binsize, %bins) |
41
|
|
|
|
|
|
|
= _get_frequency($min,$max,$rmin,$rmax, $opts, \@data); |
42
|
|
|
|
|
|
|
|
43
|
0
|
|
|
|
|
|
my $hist = ""; |
44
|
0
|
|
|
|
|
|
my $hsize = $opts->{histogram_size}; |
45
|
0
|
0
|
|
|
|
|
if ($min != $rmin) { |
46
|
0
|
|
0
|
|
|
|
my $freq = _ceil(($bins{'min'}||0)/$scale); |
47
|
0
|
|
0
|
|
|
|
$hist.= sprintf "%8d %-${hsize}s - %6d\n", |
48
|
|
|
|
|
|
|
$min, |
49
|
|
|
|
|
|
|
"#" x $freq, |
50
|
|
|
|
|
|
|
($bins{'min'}||0); |
51
|
|
|
|
|
|
|
} |
52
|
0
|
|
|
|
|
|
for (my $i = _ceil(($rmin+1)/$binsize)-1; |
53
|
|
|
|
|
|
|
$i <= _ceil(($rmax+1)/$binsize)-1; $i++) { |
54
|
0
|
|
0
|
|
|
|
my $freq = _ceil(($bins{$i}||0)/$scale); |
55
|
0
|
|
|
|
|
|
my $val = $i*$binsize; |
56
|
0
|
0
|
|
|
|
|
$val = $rmin if $val < $rmin; |
57
|
0
|
|
|
|
|
|
$freq = "#" x $freq; |
58
|
0
|
|
0
|
|
|
|
$hist .= sprintf "%8d %-${hsize}s - %6d\n", |
59
|
|
|
|
|
|
|
$val, |
60
|
|
|
|
|
|
|
$freq, |
61
|
|
|
|
|
|
|
($bins{$i}||0) |
62
|
|
|
|
|
|
|
} |
63
|
0
|
0
|
|
|
|
|
if ($max != $rmax) { |
64
|
0
|
|
0
|
|
|
|
my $freq = _ceil(($bins{'max'}||0)/$scale); |
65
|
0
|
|
0
|
|
|
|
$hist.= sprintf "%8d %-${hsize}s - %6d\n", |
66
|
|
|
|
|
|
|
$pmax, |
67
|
|
|
|
|
|
|
"#" x $freq, |
68
|
|
|
|
|
|
|
($bins{'max'}||0); |
69
|
|
|
|
|
|
|
} |
70
|
0
|
|
|
|
|
|
return $hist; |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub _get_frequency { |
74
|
0
|
|
|
0
|
|
|
my ($min, $max, $rmin, $rmax, $opts, $data) = @_; |
75
|
0
|
|
|
|
|
|
my %bins = (); |
76
|
0
|
|
|
|
|
|
my $bins = $opts->{bins}; |
77
|
0
|
0
|
|
|
|
|
$bins-- if $rmin != $min; |
78
|
0
|
0
|
|
|
|
|
$bins-- if $rmax != $max; |
79
|
0
|
|
|
|
|
|
my $hsize = $opts->{histogram_size}; |
80
|
|
|
|
|
|
|
|
81
|
0
|
|
|
|
|
|
my $binsize = _best_scale( ($rmax - $rmin) / $bins, @binsizes ); |
82
|
|
|
|
|
|
|
|
83
|
0
|
|
|
|
|
|
for my $v (@$data) { |
84
|
0
|
0
|
|
|
|
|
if ( $v < $rmin ) { |
|
|
0
|
|
|
|
|
|
85
|
0
|
|
|
|
|
|
$bins{'min'}++ ; |
86
|
|
|
|
|
|
|
} elsif ( $v > $rmax ) { |
87
|
0
|
|
|
|
|
|
$bins{'max'}++ ; |
88
|
|
|
|
|
|
|
} else { |
89
|
0
|
|
|
|
|
|
$bins{_ceil(($v+1)/$binsize) - 1}++ ; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
0
|
|
|
|
|
|
my ($minf, $maxf, $scale, $maxval) = (undef, undef, 1, 0); |
94
|
0
|
|
|
|
|
|
while ( my ($key, $value) = each (%bins) ) { |
95
|
0
|
0
|
0
|
|
|
|
next if $key eq 'min' or $key eq 'max'; |
96
|
0
|
0
|
0
|
|
|
|
$minf = $key if !defined($minf) || $key < $minf; |
97
|
0
|
0
|
0
|
|
|
|
$maxf = $key if !defined($maxf) || $key > $maxf; |
98
|
|
|
|
|
|
|
|
99
|
0
|
0
|
|
|
|
|
$maxval = $value if $value > $maxval; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
0
|
0
|
|
|
|
|
$scale = _best_scale($maxval/$hsize, @scales) |
103
|
|
|
|
|
|
|
if $maxval>$hsize; |
104
|
|
|
|
|
|
|
|
105
|
0
|
|
|
|
|
|
return $scale, $binsize, %bins; |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub _ceil { |
109
|
0
|
|
|
0
|
|
|
my ($number) = shift; |
110
|
0
|
0
|
|
|
|
|
if ($number != int($number)) { |
111
|
0
|
|
|
|
|
|
$number = int($number) + 1; |
112
|
|
|
|
|
|
|
} |
113
|
0
|
|
|
|
|
|
return $number; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub _check_outliers { |
117
|
0
|
|
|
0
|
|
|
my ($vcnt, $opts, @data) = @_; |
118
|
|
|
|
|
|
|
|
119
|
0
|
|
|
|
|
|
my ($min,$max) = my ($tmin, $tmax) = @data[0,-1]; |
120
|
0
|
|
|
|
|
|
my $bins = $opts->{bins}; |
121
|
|
|
|
|
|
|
|
122
|
0
|
|
|
|
|
|
my $cnt = int($vcnt/50); #max 2+2% of outlier points |
123
|
0
|
|
|
|
|
|
my $val = $data[0]; |
124
|
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
|
my $c = 0; |
126
|
0
|
0
|
|
|
|
|
my $bn = $bins > 2 ? $bins - 2 : 2; |
127
|
0
|
|
|
|
|
|
my $bs = ($tmax - $tmin) / $bn; |
128
|
0
|
|
|
|
|
|
my $binsize = _best_scale($bs, @binsizes); |
129
|
|
|
|
|
|
|
; |
130
|
0
|
|
|
|
|
|
my ($rmin, $rmax) = (0, 0); |
131
|
0
|
|
|
|
|
|
my ($pmin, $pmax) = (0, 0); |
132
|
0
|
|
0
|
|
|
|
while ( ($tmin != $rmin) or ($tmax != $rmax) ) { |
133
|
0
|
|
|
|
|
|
$rmin = $tmin; |
134
|
0
|
|
|
|
|
|
$rmax = $tmax; |
135
|
0
|
|
|
|
|
|
$val = $data[0]; |
136
|
0
|
|
|
|
|
|
for my $i (1..$cnt) { |
137
|
|
|
|
|
|
|
# point with more than half the size of a bin are grouped |
138
|
|
|
|
|
|
|
# in a big bin, in the beginning. |
139
|
0
|
|
|
|
|
|
$c = $data[$i] - $val; |
140
|
0
|
0
|
|
|
|
|
if ( $c > $binsize ) { |
141
|
0
|
|
|
|
|
|
$tmin = $data[$i]; |
142
|
0
|
|
|
|
|
|
$val = $data[$i]; |
143
|
0
|
|
|
|
|
|
$binsize = ($tmax - $tmin) / $bn; |
144
|
|
|
|
|
|
|
} |
145
|
0
|
0
|
|
|
|
|
last if $i >= $cnt; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
0
|
|
|
|
|
|
$val = $data[-1]; |
149
|
0
|
|
|
|
|
|
for my $i (1..$cnt) { |
150
|
0
|
|
|
|
|
|
my $v1 = $data[-1-$i]; |
151
|
0
|
|
|
|
|
|
$c = $val - $v1; |
152
|
0
|
0
|
|
|
|
|
if ($c > $binsize) { |
153
|
0
|
|
|
|
|
|
$tmax = $v1; |
154
|
0
|
|
|
|
|
|
$val = $v1; |
155
|
0
|
|
|
|
|
|
$binsize = _best_scale(($tmax - $tmin) / $bn, @binsizes);; |
156
|
|
|
|
|
|
|
} |
157
|
0
|
|
|
|
|
|
$val = $v1; |
158
|
0
|
0
|
|
|
|
|
last if $i > $cnt; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
0
|
|
|
|
|
|
return ($min, $max, $rmin, $rmax, $pmin, $pmax); |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub _best_scale { |
166
|
0
|
|
|
0
|
|
|
my ($val, @opts) = @_; |
167
|
|
|
|
|
|
|
|
168
|
0
|
|
|
|
|
|
for my $opt (@opts) { |
169
|
0
|
0
|
|
|
|
|
return $opt if $opt > $val; |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
0
|
|
|
|
|
|
return 99_999_999_999; |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
1; # End of Text::Histogram |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
__END__ |