line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Normalize; |
2
|
|
|
|
|
|
|
|
3
|
5
|
|
|
5
|
|
100118
|
use warnings; |
|
5
|
|
|
|
|
11
|
|
|
5
|
|
|
|
|
164
|
|
4
|
5
|
|
|
5
|
|
30
|
use strict; |
|
5
|
|
|
|
|
11
|
|
|
5
|
|
|
|
|
166
|
|
5
|
5
|
|
|
5
|
|
3805
|
use Math::Round::Var; |
|
5
|
|
|
|
|
5023
|
|
|
5
|
|
|
|
|
5320
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 NAME |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
Normalize - normalize scores between 0 and 1. |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 VERSION |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Version 0.31 |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=cut |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
our $VERSION = '0.31'; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use Normalize; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
my %iq_rate = ('Professor' => 125.12, 'Bender' => 64, 'Dr. Zoidberg' => 28.6, 'Fray' => 13); |
24
|
|
|
|
|
|
|
my %weight_rate = ('Professor' => 70.2, 'Bender' => 600, 'Dr. Zoidberg' => 200, 'Fray' => 120); |
25
|
|
|
|
|
|
|
my $norm = Normalize->new('round_to' => 0.001); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
#larger score is better: |
28
|
|
|
|
|
|
|
$norm->normalize_to_max(\%iq_rate); |
29
|
|
|
|
|
|
|
print "\n#iq rate: larger iq is better:\n"; |
30
|
|
|
|
|
|
|
foreach my $key (keys %iq_rate) |
31
|
|
|
|
|
|
|
{ |
32
|
|
|
|
|
|
|
print "$key = $iq_rate{$key}\n"; |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
#iq rate: larger iq is better: |
36
|
|
|
|
|
|
|
#1.000 Professor |
37
|
|
|
|
|
|
|
#0.512 Bender |
38
|
|
|
|
|
|
|
#0.229 Dr. Zoidberg |
39
|
|
|
|
|
|
|
#0.104 Fray |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
#smaller score is better |
42
|
|
|
|
|
|
|
$norm->normalize_to_min(\%weight_rate, {min_default => 0.001}); |
43
|
|
|
|
|
|
|
print "\n#skinny rate: smaller weight is better:\n"; |
44
|
|
|
|
|
|
|
foreach my $key (sort {$weight_rate{$b} <=> $weight_rate{$a}} keys %weight_rate) |
45
|
|
|
|
|
|
|
{ |
46
|
|
|
|
|
|
|
print "#$weight_rate{$key}\t$key\n"; |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
##skinny rate: smaller weight is better: |
49
|
|
|
|
|
|
|
#1.000 Professor |
50
|
|
|
|
|
|
|
#0.585 Fray |
51
|
|
|
|
|
|
|
#0.351 Dr. Zoidberg |
52
|
|
|
|
|
|
|
#0.117 Bender |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
#SUMMARY RATE |
55
|
|
|
|
|
|
|
my %summary_score = map { $_ => $weight_rate{$_} + $iq_rate{$_} } keys %iq_rate; |
56
|
|
|
|
|
|
|
$norm->normalize_to_max( \%summary_score ); |
57
|
|
|
|
|
|
|
print "\n#summary score:\n"; |
58
|
|
|
|
|
|
|
foreach my $key (sort {$summary_score{$b} <=> $summary_score{$a}} keys %summary_score) |
59
|
|
|
|
|
|
|
{ |
60
|
|
|
|
|
|
|
print "#$summary_score{$key}\t$key\n"; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
#summary score: |
63
|
|
|
|
|
|
|
#1.000 Professor |
64
|
|
|
|
|
|
|
#0.344 Fray |
65
|
|
|
|
|
|
|
#0.315 Bender |
66
|
|
|
|
|
|
|
#0.290 Dr. Zoidberg |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
#Dr. Zoidberg - looser lobster! Quod erat demonstrandum |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head1 DESCRIPTION |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
This module gives you the ability to normalize score result sets. |
74
|
|
|
|
|
|
|
Sometimes a larger score is better and sometimes a smaller score is better. |
75
|
|
|
|
|
|
|
In order to compare the results from different methods? You need a way to |
76
|
|
|
|
|
|
|
normalize them: that is, to get them all within the same range and direction. |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
The normalization functions will take a hash ref {key => score} or array ref [score 1, score 2, ...scaore 3] and return the same ref, but whith scores between 0 and 1. |
79
|
|
|
|
|
|
|
Each score is scaled according to how close it to the best result, wich will always have a score of 1. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head1 METHODS |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=head2 new(%opts) |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Normalize->new(%opts) - constructor |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=head3 %opts |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
round_to - default value 0.01. Rounding precision. For more info see L |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
min_default - by default eq round_to value. Need for prevent delete on zero in normalize_to_min() |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=cut |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
sub new { |
97
|
3
|
|
|
3
|
1
|
37
|
my $caller = shift; |
98
|
3
|
|
33
|
|
|
26
|
my $class = ref($caller) || $caller; |
99
|
3
|
|
|
|
|
10
|
my $self = {}; |
100
|
3
|
|
|
|
|
11
|
bless( $self, $class ); |
101
|
3
|
|
|
|
|
17
|
$self->_init(@_); |
102
|
3
|
|
|
|
|
12
|
return $self; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
sub _init { |
106
|
3
|
|
|
3
|
|
8
|
my $self = shift; |
107
|
3
|
|
|
|
|
17
|
$self->set(@_); |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
#set default precision |
110
|
3
|
|
|
|
|
14
|
my $round_to = $self->get('round_to'); |
111
|
3
|
100
|
|
|
|
17
|
unless ($round_to) { |
112
|
1
|
|
|
|
|
2
|
$round_to = 0.01; |
113
|
1
|
|
|
|
|
4
|
$self->set( round_to => $round_to ); |
114
|
|
|
|
|
|
|
} |
115
|
3
|
|
|
|
|
36
|
$self->set( 'round_obj' => Math::Round::Var->new($round_to) ); |
116
|
3
|
|
|
|
|
9
|
return $self; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=head2 set(%params) |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
set object params |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=cut |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub set { |
126
|
8
|
|
|
8
|
1
|
254
|
my $self = shift; |
127
|
8
|
|
|
|
|
28
|
my %op = @_; |
128
|
8
|
|
|
|
|
29
|
foreach my $k ( keys %op ) { |
129
|
7
|
|
|
|
|
42
|
$self->{$k} = $op{$k}; |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
|
132
|
8
|
|
|
|
|
27
|
return $self; |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head2 get(param_name) |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
get object param |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=cut |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub get { |
142
|
24
|
|
|
24
|
1
|
976
|
my $self = shift; |
143
|
24
|
|
|
|
|
41
|
my $key = shift; |
144
|
24
|
|
|
|
|
113
|
return $self->{$key}; |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=head2 normalize_to_min($score_set_data, %opts) |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Each score is scaled according to how close it to the smaller result, wich will always have a score of 1. |
150
|
|
|
|
|
|
|
$score_set_data - hashref {key1 => score1, key2 => score2,..} or arrayref [score1, score2, ...] |
151
|
|
|
|
|
|
|
options: |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
%opts = ( |
154
|
|
|
|
|
|
|
min_default => 0.01#by default = round_to value. Need for prevent delete on zero in normalize_to_min() |
155
|
|
|
|
|
|
|
) |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
return same data structure (hashref or arrayref) |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=cut |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub normalize_to_min { |
162
|
5
|
|
|
5
|
1
|
3940
|
my $self = shift; |
163
|
5
|
|
|
|
|
11
|
my ($data) = @_; |
164
|
5
|
100
|
|
|
|
26
|
if ( ref($data) eq 'HASH' ) { |
|
|
50
|
|
|
|
|
|
165
|
3
|
|
|
|
|
14
|
return $self->_hash_small_is_better(@_); |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
elsif ( ref($data) eq 'ARRAY' ) { |
168
|
2
|
|
|
|
|
8
|
return $self->_array_small_is_better(@_); |
169
|
|
|
|
|
|
|
} |
170
|
0
|
|
|
|
|
0
|
return undef; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=head2 normalize_to_max($score_set_data) |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Each score is scaled according to how close it to the larger result, wich will always have a score of 1. |
176
|
|
|
|
|
|
|
$score_set_data - hashref {key1 => score1, key2 => score2,..} or arrayref [score1, score2, ...] |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
return same data structure (hashref or arrayref) |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=cut |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
sub normalize_to_max { |
183
|
3
|
|
|
3
|
1
|
3105
|
my $self = shift; |
184
|
3
|
|
|
|
|
6
|
my ($data) = @_; |
185
|
3
|
100
|
|
|
|
20
|
if ( ref($data) eq 'HASH' ) { |
|
|
50
|
|
|
|
|
|
186
|
2
|
|
|
|
|
11
|
return $self->_hash_max_is_better(@_); |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
elsif ( ref($data) eq 'ARRAY' ) { |
189
|
1
|
|
|
|
|
6
|
return $self->_array_max_is_better(@_); |
190
|
|
|
|
|
|
|
} |
191
|
0
|
|
|
|
|
0
|
return undef; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
sub _hash_small_is_better { |
195
|
3
|
|
|
3
|
|
7
|
my $self = shift; |
196
|
3
|
|
|
|
|
6
|
my $data = shift; |
197
|
3
|
|
50
|
|
|
15
|
my $opt = shift || {}; |
198
|
|
|
|
|
|
|
|
199
|
3
|
|
|
|
|
5
|
my $min = undef; |
200
|
3
|
|
33
|
|
|
21
|
my $min_default = $opt->{min} |
201
|
|
|
|
|
|
|
|| $self->get('min_default') |
202
|
|
|
|
|
|
|
|| $self->get('round_to'); |
203
|
3
|
|
|
|
|
13
|
my $rnd = $self->get('round_obj'); |
204
|
3
|
|
|
|
|
16
|
foreach my $d ( keys %$data ) { |
205
|
12
|
100
|
|
|
|
33
|
unless ( defined $min ) { |
206
|
3
|
|
|
|
|
5
|
$min = $data->{$d}; |
207
|
3
|
|
|
|
|
8
|
next; |
208
|
|
|
|
|
|
|
} |
209
|
9
|
100
|
|
|
|
32
|
$min = $data->{$d} if ( $data->{$d} < $min ); |
210
|
9
|
50
|
66
|
|
|
118
|
$min_default = $data->{$d} |
211
|
|
|
|
|
|
|
if ( $data->{$d} && $data->{$d} < $min_default ); |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
} |
214
|
3
|
|
66
|
|
|
14
|
$min ||= $min_default; |
215
|
|
|
|
|
|
|
|
216
|
3
|
|
|
|
|
12
|
foreach my $d ( keys %$data ) { |
217
|
12
|
|
66
|
|
|
770
|
$data->{$d} = $rnd->round( $min / ( $data->{$d} || $min_default ) ); |
218
|
|
|
|
|
|
|
} |
219
|
3
|
|
|
|
|
44
|
return $data; |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
sub _array_small_is_better { |
223
|
2
|
|
|
2
|
|
11
|
my $self = shift; |
224
|
2
|
|
|
|
|
2
|
my $data = shift; |
225
|
2
|
|
100
|
|
|
17
|
my $opt = shift || {}; |
226
|
|
|
|
|
|
|
|
227
|
2
|
|
33
|
|
|
10
|
my $min_default = $opt->{min} |
228
|
|
|
|
|
|
|
|| $self->get('min_default') |
229
|
|
|
|
|
|
|
|| $self->get('round_to'); |
230
|
2
|
|
|
|
|
5
|
my $rnd = $self->get('round_obj'); |
231
|
2
|
|
|
|
|
4
|
my $min = $data->[0]; |
232
|
2
|
|
|
|
|
25
|
foreach my $d (@$data) { |
233
|
7
|
100
|
|
|
|
18
|
$min = $d if ( $d < $min ); |
234
|
7
|
50
|
66
|
|
|
33
|
$min_default = $d if ( $d && $d < $min_default ); |
235
|
|
|
|
|
|
|
} |
236
|
2
|
|
66
|
|
|
13
|
$min ||= $min_default; |
237
|
|
|
|
|
|
|
|
238
|
2
|
|
|
|
|
7
|
foreach my $i ( 0 .. $#$data ) { |
239
|
7
|
|
66
|
|
|
94
|
$data->[$i] = $rnd->round( $min / ( $data->[$i] || $min_default ) ); |
240
|
|
|
|
|
|
|
} |
241
|
2
|
|
|
|
|
27
|
return $data; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
sub _hash_max_is_better { |
245
|
2
|
|
|
2
|
|
5
|
my $self = shift; |
246
|
2
|
|
|
|
|
4
|
my $data = shift; |
247
|
|
|
|
|
|
|
|
248
|
2
|
|
|
|
|
4
|
my $max = undef; |
249
|
2
|
|
|
|
|
9
|
my $rnd = $self->get('round_obj'); |
250
|
2
|
|
|
|
|
10
|
foreach my $d ( keys %$data ) { |
251
|
8
|
100
|
|
|
|
21
|
unless ($max) { |
252
|
2
|
|
|
|
|
5
|
$max = $data->{$d}; |
253
|
2
|
|
|
|
|
6
|
next; |
254
|
|
|
|
|
|
|
} |
255
|
6
|
100
|
|
|
|
27
|
$max = $data->{$d} if ( $data->{$d} > $max ); |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
|
259
|
2
|
|
|
|
|
8
|
foreach my $d ( keys %$data ) { |
260
|
8
|
|
|
|
|
107
|
$data->{$d} = $rnd->round( $data->{$d} / $max ); |
261
|
|
|
|
|
|
|
} |
262
|
2
|
|
|
|
|
28
|
return $data; |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
sub _array_max_is_better { |
267
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
268
|
1
|
|
|
|
|
4
|
my $data = shift; |
269
|
|
|
|
|
|
|
|
270
|
1
|
|
|
|
|
3
|
my $max = undef; |
271
|
1
|
|
|
|
|
5
|
my $rnd = $self->get('round_obj'); |
272
|
1
|
|
|
|
|
3
|
foreach my $d (@$data) { |
273
|
3
|
100
|
|
|
|
8
|
unless ($max) { |
274
|
1
|
|
|
|
|
4
|
$max = $d; |
275
|
1
|
|
|
|
|
3
|
next; |
276
|
|
|
|
|
|
|
} |
277
|
2
|
100
|
|
|
|
10
|
$max = $d if ( $d > $max ); |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
|
281
|
1
|
|
|
|
|
4
|
foreach my $i ( 0 .. $#$data ) { |
282
|
3
|
|
|
|
|
63
|
$data->[$i] = $rnd->round( $data->[$i] / $max ); |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
1
|
|
|
|
|
16
|
return $data; |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
=head1 SEE ALSO |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
L - Variations on rounding. |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
Idea for this module and normalization Algoritm from book "Programming Collective Intelligence: Building Smart Web 2.0 Applications By Toby Segaran)" L |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=head1 AUTHOR |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
Konstantin Kapitanov aka Green Kakadu, C |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
L |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=head1 BUGS |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
304
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
305
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=head1 SUPPORT |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
perldoc Normalize |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
You can also look for information at: |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
=over 4 |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
L |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
L |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
=item * CPAN Ratings |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
L |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=item * Search CPAN |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
L |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=back |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=head1 COPYRIGHT & LICENSE |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
Copyright 2009 Konstantin Kapitanov, all rights reserved. |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
346
|
|
|
|
|
|
|
under the same terms as Perl itself. |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
=cut |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
1; # End of Normalize |