line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# Data::Favorites - tally a data stream to find recently dominant items |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
#---------------------------------------------------------------------------- |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# Copyright (C) 1998-2003 Ed Halley |
6
|
|
|
|
|
|
|
# http://www.halley.cc/ed/ |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
#---------------------------------------------------------------------------- |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
package Data::Favorites; |
11
|
1
|
|
|
1
|
|
28951
|
use vars qw($VERSION); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
189
|
|
12
|
|
|
|
|
|
|
$VERSION = 1.00; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Data::Favorites - tally a data stream to find recently dominant items |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use Data::Favorites; |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my $faves = new Data::Favorites(); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
$faves->tally($_) |
25
|
|
|
|
|
|
|
foreach (@history); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
$faves->decay( 2 ); # everyone loses two points |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
$faves->clamp( time() - 24*60*60 ); # cull everyone older than a day |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
print join("\n", $faves->favorites( 5 )), "\n"; |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head1 ABSTRACT |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
A Favorites structure tracks the disposition of various keys. A key's |
36
|
|
|
|
|
|
|
disposition is a measurement of its relative predominance and freshness |
37
|
|
|
|
|
|
|
when tallied. This is a good way to infer favorites or other |
38
|
|
|
|
|
|
|
leadership-oriented facts from a historical data stream. |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
More specifically, this structure measures how often and when various |
41
|
|
|
|
|
|
|
keys are triggered by application-defined events. Those keys that are |
42
|
|
|
|
|
|
|
mentioned often will accumulate a higher number of tally points. Those |
43
|
|
|
|
|
|
|
keys that have been mentioned recently will have newer "freshness" |
44
|
|
|
|
|
|
|
stamps. Both of these factors are metered and will affect their |
45
|
|
|
|
|
|
|
positioning in a ranking of the keys. |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
At any time, keys can be culled by freshness or by their current ranking, |
48
|
|
|
|
|
|
|
or both. With these approaches, dispositions can be weighed over the |
49
|
|
|
|
|
|
|
whole historical record, rather than providing a simplistic "top events |
50
|
|
|
|
|
|
|
in the last N events" rolling count. Thus, highly popular event keys may |
51
|
|
|
|
|
|
|
remain in the set of favorites for some time, even when the key hasn't |
52
|
|
|
|
|
|
|
been seen very often recently. Popular items can be decayed gradually |
53
|
|
|
|
|
|
|
rather than cut out of a simple census window. |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=cut |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
#------------------------------------------------------------------ |
58
|
|
|
|
|
|
|
|
59
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
30
|
|
60
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
39
|
|
61
|
1
|
|
|
1
|
|
4
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
1382
|
|
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
#------------------------------------------------------------------ |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=head1 METHODS |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head2 new() |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
$faves = new Data::Favorites( ); |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
$faves = new Data::Favorites( \&stamper ); |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Create a new favorites counter object. The counter object can tally |
74
|
|
|
|
|
|
|
given elements, and also stamp the "freshness" of each element with the |
75
|
|
|
|
|
|
|
numerical return from the given stamper sub. If no sub code reference is |
76
|
|
|
|
|
|
|
given, then the C |
77
|
|
|
|
|
|
|
assumed that the sub returns a number which generally increases in value |
78
|
|
|
|
|
|
|
for fresher stamps. |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=cut |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# $faves = new Data::Favorites( ); |
83
|
|
|
|
|
|
|
# |
84
|
|
|
|
|
|
|
sub new |
85
|
|
|
|
|
|
|
{ |
86
|
1
|
|
|
1
|
1
|
563
|
my $proto = shift; |
87
|
1
|
|
33
|
|
|
9
|
my $class = ref($proto) || $proto; |
88
|
1
|
|
|
|
|
4
|
my $self = |
89
|
|
|
|
|
|
|
{ |
90
|
|
|
|
|
|
|
# key => { tally => 1, stamp => 1 } |
91
|
|
|
|
|
|
|
dispositions => { }, |
92
|
|
|
|
|
|
|
}; |
93
|
1
|
50
|
|
|
|
6
|
$self->{stamper} = shift if ref $_[0]; |
94
|
1
|
|
|
|
|
3
|
bless($self, $class); |
95
|
1
|
|
|
|
|
4
|
return $self; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=head2 tally() |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
$times = $faves->tally( $scalar ); |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
$times = $faves->tally( $scalar, $times ); |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Return the current number of times the given C<$scalar> has been seen, or |
106
|
|
|
|
|
|
|
increment that count by a given number of times. The first form returns |
107
|
|
|
|
|
|
|
C if the C<$scalar> has never been tallied. |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
Items are tracked by their string form, so if the scalars are perl |
110
|
|
|
|
|
|
|
references, take note that the whole favorites counter will not persist |
111
|
|
|
|
|
|
|
well. A future version may use C to allow for persistable |
112
|
|
|
|
|
|
|
tracking of object data. |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
Each key in the favorites counter is marked with a timestamp via the |
115
|
|
|
|
|
|
|
C |
116
|
|
|
|
|
|
|
the favorites counter object. In the case of an application-supplied |
117
|
|
|
|
|
|
|
stamper function, it will receive two arguments: this favorites counter |
118
|
|
|
|
|
|
|
itself, and the given scalar being tallied. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=cut |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# $count = $faves->tally( $scalar ); |
123
|
|
|
|
|
|
|
# $count = $faves->tally( $scalar, $times ); |
124
|
|
|
|
|
|
|
# |
125
|
|
|
|
|
|
|
sub tally |
126
|
|
|
|
|
|
|
{ |
127
|
6
|
|
|
6
|
1
|
12
|
my $self = shift; |
128
|
6
|
|
|
|
|
8
|
my $item = shift; |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# $faves->tally() or $faves->tally(undef) are calling errors. |
131
|
|
|
|
|
|
|
# |
132
|
6
|
50
|
|
|
|
15
|
carp "Called tally() with an undef key" if not defined $item; |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# $faves->tally(key) returns current tally for given key. |
135
|
|
|
|
|
|
|
# Returns undef if the key doesn't exist. |
136
|
|
|
|
|
|
|
# |
137
|
6
|
|
100
|
|
|
36
|
my $disp = $self->{dispositions}{$item} || { }; |
138
|
6
|
100
|
66
|
|
|
28
|
return if (not exists $disp->{tally}) and (not @_); |
139
|
5
|
100
|
|
|
|
16
|
return $disp->{tally} if not @_; |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# $faves->tally(key, amount) boosts current tally for given key. |
142
|
|
|
|
|
|
|
# The key is fully created with the given amount if it did not exist. |
143
|
|
|
|
|
|
|
# The key's freshness stamp is also updated. |
144
|
|
|
|
|
|
|
# |
145
|
4
|
|
|
|
|
5
|
my $tally = shift; |
146
|
4
|
|
|
|
|
9
|
$self->{dispositions}{$item} = $disp; |
147
|
|
|
|
|
|
|
$disp->{stamp} = |
148
|
|
|
|
|
|
|
$self->{stamper}? |
149
|
4
|
50
|
|
|
|
13
|
eval { &{$self->{stamper}}($self, $item) } : |
|
4
|
|
|
|
|
5
|
|
|
4
|
|
|
|
|
13
|
|
150
|
|
|
|
|
|
|
time(); |
151
|
4
|
|
|
|
|
19
|
$disp->{tally} += $tally; |
152
|
4
|
|
|
|
|
24
|
return $disp->{tally}; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head2 fresh() |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
$stamp = $faves->fresh( $scalar ); |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
Return the current freshness stamp for the given C<$scalar>. Returns |
161
|
|
|
|
|
|
|
C if the C<$scalar> has never been tallied. |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
Each key in the favorites counter is marked with a timestamp via the |
164
|
|
|
|
|
|
|
C |
165
|
|
|
|
|
|
|
the favorites counter object. In the case of an application-supplied |
166
|
|
|
|
|
|
|
stamper callback, it will receive two arguments: this favorites counter |
167
|
|
|
|
|
|
|
itself, and the given scalar being tallied. |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=cut |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# $stamp = $faves->fresh( $scalar ); |
172
|
|
|
|
|
|
|
# |
173
|
|
|
|
|
|
|
sub fresh |
174
|
|
|
|
|
|
|
{ |
175
|
1
|
|
|
1
|
1
|
3
|
my $self = shift; |
176
|
1
|
|
|
|
|
2
|
my $item = shift; |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# $faves->fresh() or $faves->fresh(undef) are calling errors. |
179
|
|
|
|
|
|
|
# |
180
|
1
|
50
|
|
|
|
4
|
carp "Called fresh() with an undef key" if not defined $item; |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
# $faves->fresh(key) returns current tally for given key. |
183
|
|
|
|
|
|
|
# Returns undef if the key doesn't exist. |
184
|
|
|
|
|
|
|
# |
185
|
1
|
|
50
|
|
|
5
|
my $disp = $self->{dispositions}{$item} || { }; |
186
|
1
|
|
50
|
|
|
6
|
return $disp->{stamp} || undef; |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=head2 decay() |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
$count = $faves->decay( ); |
193
|
|
|
|
|
|
|
$count = $faves->decay( $times ); |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
$times = $faves->decay( $scalar ); |
196
|
|
|
|
|
|
|
$times = $faves->decay( $scalar, $times ); |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
In the first pair of forms, all present keys have their tally counts |
199
|
|
|
|
|
|
|
reduced by one, or by the given number of times. In these forms, the |
200
|
|
|
|
|
|
|
returned value is the remaining count of tracked favorite keys. |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
In the latter pair of forms, an individual key C<$scalar> has its tally |
203
|
|
|
|
|
|
|
reduced by one, or by the given number of times. These forms return the |
204
|
|
|
|
|
|
|
remaining tally count for the given C<$scalar> key. |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
The favorites counter will automatically remove any key in which the |
207
|
|
|
|
|
|
|
tally count drops to zero or below. |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=cut |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
# $count = $faves->decay( ); |
212
|
|
|
|
|
|
|
# $count = $faves->decay( $times ); |
213
|
|
|
|
|
|
|
# $times = $faves->decay( $scalar ); |
214
|
|
|
|
|
|
|
# $times = $faves->decay( $scalar, $times ); |
215
|
|
|
|
|
|
|
# |
216
|
|
|
|
|
|
|
sub decay |
217
|
|
|
|
|
|
|
{ |
218
|
4
|
|
|
4
|
1
|
762
|
my $self = shift; |
219
|
4
|
|
|
|
|
8
|
my $disp = $self->{dispositions}; |
220
|
4
|
|
|
|
|
13
|
my $tally = 1; |
221
|
4
|
|
|
|
|
6
|
my $item; |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
# $faves->decay(key) decays one key by 1 tally |
224
|
|
|
|
|
|
|
# $faves->decay(key, amount) decays one key by given amount |
225
|
|
|
|
|
|
|
# |
226
|
4
|
100
|
66
|
|
|
26
|
if (@_ && exists $disp->{$_[0]}) |
227
|
|
|
|
|
|
|
{ |
228
|
3
|
|
|
|
|
4
|
$item = shift; |
229
|
3
|
50
|
|
|
|
8
|
$tally = shift if @_; |
230
|
3
|
100
|
|
|
|
11
|
if (($disp->{$item}{tally} -= $tally) <= 0) |
231
|
|
|
|
|
|
|
{ |
232
|
1
|
|
|
|
|
3
|
delete $disp->{$item}; |
233
|
1
|
|
|
|
|
3
|
return; |
234
|
|
|
|
|
|
|
} |
235
|
2
|
|
|
|
|
5
|
return $disp->{$item}{tally}; |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
# $faves->decay() decays all keys by 1 tally |
239
|
|
|
|
|
|
|
# $faves->decay(amount) decays all keys by given amount |
240
|
|
|
|
|
|
|
# Recurses to perform actual decay on each key. |
241
|
|
|
|
|
|
|
# |
242
|
1
|
50
|
|
|
|
621
|
$tally = shift if @_; |
243
|
1
|
|
|
|
|
5
|
foreach $item (keys %$disp) |
244
|
|
|
|
|
|
|
{ |
245
|
3
|
|
|
|
|
11
|
$self->decay($item, $tally); |
246
|
|
|
|
|
|
|
} |
247
|
1
|
|
|
|
|
4
|
return scalar keys %$disp; |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
=head2 clamp() |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
$count = $faves->clamp( $stamp ); |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
Clamps the set of favorites to only the freshest tallied elements. This |
256
|
|
|
|
|
|
|
method automatically removes any key in which the most recent tally is |
257
|
|
|
|
|
|
|
more stale than the given timestamp value. Timestamps are assumed to be |
258
|
|
|
|
|
|
|
numerical; lesser values represent stamps which are more stale, while |
259
|
|
|
|
|
|
|
higher values are considered more fresh. |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=cut |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# $count = $faves->clamp( $stamp ); |
264
|
|
|
|
|
|
|
# |
265
|
|
|
|
|
|
|
sub clamp |
266
|
|
|
|
|
|
|
{ |
267
|
1
|
|
|
1
|
1
|
3
|
my $self = shift; |
268
|
1
|
|
|
|
|
2
|
my $disp = $self->{dispositions}; |
269
|
1
|
|
50
|
|
|
4
|
my $stamp = shift || 0; |
270
|
|
|
|
|
|
|
|
271
|
1
|
|
|
|
|
4
|
my @items = keys %$disp; |
272
|
1
|
|
|
|
|
4
|
while (@items) |
273
|
|
|
|
|
|
|
{ |
274
|
2
|
|
|
|
|
4
|
my $item = shift @items; |
275
|
2
|
100
|
|
|
|
10
|
delete $disp->{$item} |
276
|
|
|
|
|
|
|
if $disp->{$item}{stamp} < $stamp; |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
|
279
|
1
|
|
|
|
|
3
|
return scalar keys %$disp; |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=head2 favorites() |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
@topfaves = $faves->favorites( ); |
286
|
|
|
|
|
|
|
@topfaves = $faves->favorites( $limit ); |
287
|
|
|
|
|
|
|
$count = scalar $faves->favorites( ); |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
Returns the keys sorted by the strength of their tally counts. Those |
290
|
|
|
|
|
|
|
which have equal tally counts are compared by their most recent tally |
291
|
|
|
|
|
|
|
time; the most freshly stamped is favored. If a limit is given, the list |
292
|
|
|
|
|
|
|
returned will not exceed the given length. |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
In a scalar context, returns the current count of the tallied keys in the |
295
|
|
|
|
|
|
|
favorites counter. If no limit argument is given, then no internal |
296
|
|
|
|
|
|
|
sorting work needs to be performed to return the count. |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=cut |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
# @faves = $faves->favorites( ); |
301
|
|
|
|
|
|
|
# @faves = $faves->favorites( $limit ); |
302
|
|
|
|
|
|
|
# $count = scalar $faves->favorites( ); |
303
|
|
|
|
|
|
|
# |
304
|
|
|
|
|
|
|
sub favorites |
305
|
|
|
|
|
|
|
{ |
306
|
5
|
|
|
5
|
1
|
688
|
my $self = shift; |
307
|
5
|
|
|
|
|
10
|
my $disp = $self->{dispositions}; |
308
|
5
|
100
|
66
|
|
|
40
|
return scalar keys %$disp |
309
|
|
|
|
|
|
|
if (not @_) and (not wantarray); |
310
|
6
|
50
|
|
|
|
22
|
my @faves = sort |
311
|
|
|
|
|
|
|
{ |
312
|
2
|
|
|
|
|
9
|
($disp->{$b}{tally} <=> $disp->{$a}{tally}) || |
313
|
|
|
|
|
|
|
($disp->{$b}{stamp} <=> $disp->{$a}{stamp}) |
314
|
2
|
|
|
|
|
3
|
} keys %{$disp}; |
315
|
2
|
|
66
|
|
|
12
|
my $limit = shift || @faves; |
316
|
2
|
100
|
|
|
|
14
|
$#faves = $limit-1 if @faves > $limit; |
317
|
2
|
|
|
|
|
10
|
return @faves; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
#------------------------------------------------------------------ |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
1; |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
__END__ |