line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Data::Unique; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
65658
|
use 5.006; |
|
1
|
|
|
|
|
3
|
|
4
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
18
|
|
5
|
1
|
|
|
1
|
|
21
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
30
|
|
6
|
1
|
|
|
1
|
|
478
|
use Storable::AMF0 qw(); |
|
1
|
|
|
|
|
3066
|
|
|
1
|
|
|
|
|
692
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 NAME |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
Data::Unique - Module to check for duplicate item with time expiration and disk persistence. |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 VERSION |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Version 0.02 |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=cut |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
our $VERSION = '0.02'; |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 SYNOPSIS |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
Create a data structure that avoid duplicate entries (key) whith any data and add expiration time to clean old entries. |
25
|
|
|
|
|
|
|
This module use Storable::AMF0 for the persistence. |
26
|
|
|
|
|
|
|
After some benchmark of various serialisation it is best compromise in read and write for huge quantity of data. |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
e.g. |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
#!/usr/bin/perl |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
use strict; |
34
|
|
|
|
|
|
|
use warnings; |
35
|
|
|
|
|
|
|
use Data::Dumper; |
36
|
|
|
|
|
|
|
use feature qw( say ); |
37
|
|
|
|
|
|
|
use Time::HiRes qw(gettimeofday usleep ); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
use Data::Unique; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
my $filename = '/tmp/dedup.test'; |
44
|
|
|
|
|
|
|
my @dup; |
45
|
|
|
|
|
|
|
my $dedup = Data::Unique->new( { expiration => 10, file => $filename, gc => 5 } ); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
for my $idx ( 1 .. 6 ) { |
48
|
|
|
|
|
|
|
my ( $seconds, $microseconds ) = gettimeofday; |
49
|
|
|
|
|
|
|
my $time = ( $seconds * 1000000 ) + $microseconds; |
50
|
|
|
|
|
|
|
say "$idx -> $time"; |
51
|
|
|
|
|
|
|
$dedup->item( $time, { T => $idx } ) or say "no insertion ($$time already present)"; |
52
|
|
|
|
|
|
|
push @dup, $time if ( ( $idx % 2 ) == 0 ); |
53
|
|
|
|
|
|
|
usleep 10; |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
say Data::Dumper::Dumper $dedup; |
56
|
|
|
|
|
|
|
say "Number of item=".$dedup->scalar; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
say "Expiration time ".$dedup->expiration; |
59
|
|
|
|
|
|
|
say "Number of item=".$dedup->scalar; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
say $dedup->expiration(6); |
62
|
|
|
|
|
|
|
sleep 15; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
#say "deleted item number=".$dedup->gc(); |
65
|
|
|
|
|
|
|
say "Number of item=".$dedup->scalar; |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
foreach my $ins (@dup) { |
68
|
|
|
|
|
|
|
say $dedup->item($ins, { T => time }) ? "inserting $ins" : "no insertion ($ins already present)"; |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
say "Expiration time ".$dedup->expiration; |
72
|
|
|
|
|
|
|
say "Number of item=".$dedup->scalar. ' => '.scalar( keys( %{ $dedup->{data} })); |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head1 SUBROUTINES/METHODS |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head2 new |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Create a new Data::Unique object. |
82
|
|
|
|
|
|
|
It is possible to set the default values as parameters |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
my $dedup = Data::Unique->new( |
85
|
|
|
|
|
|
|
{ |
86
|
|
|
|
|
|
|
expiration => 60, # the retention time. When reached the expiration time, the item is removed |
87
|
|
|
|
|
|
|
file => $filename, # the file used for the retention |
88
|
|
|
|
|
|
|
gc => 5 # the number of operation between garbage colletor (checking the expiration time) |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
); |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=cut |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
sub new { |
97
|
0
|
|
|
0
|
1
|
|
my ($class, $params) = @_; |
98
|
0
|
|
|
|
|
|
my %data ; |
99
|
0
|
0
|
|
|
|
|
if (-f $_[1]->{file}) { |
100
|
0
|
|
|
|
|
|
eval { %data = %{ Storable::AMF0::retrieve($_[1]->{file}) }; } |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
} |
102
|
0
|
0
|
|
|
|
|
$data{expiration} = $params->{expiration} if $params->{expiration}; |
103
|
0
|
0
|
|
|
|
|
$data{file} = $params->{file} if $params->{file} ; |
104
|
0
|
|
0
|
|
|
|
$data{iter} ||= 0; |
105
|
0
|
0
|
|
|
|
|
$data{gc} = $params->{gc} if $params->{gc} ; |
106
|
0
|
|
|
|
|
|
bless {%data}, $class; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=head2 item |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
Add item and return 1 if succeed or return 0 if the item is already present; |
112
|
|
|
|
|
|
|
The key to test for unicity is the first parameter |
113
|
|
|
|
|
|
|
The second parameter is the data. |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
$dedup->item( $time, $data ); |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
If no data is provided, only test is the item is present. |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
$dedup->item( $time ); |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=cut |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
sub item { |
124
|
0
|
|
|
0
|
1
|
|
my ($self, $key, $data) = @_; |
125
|
0
|
|
|
|
|
|
$self->{iter}++; |
126
|
0
|
0
|
0
|
|
|
|
if ($self->{expiration} > 0 && $self->{iter} >= $self->{gc}) { |
127
|
0
|
|
|
|
|
|
$self->gc(); |
128
|
0
|
|
|
|
|
|
$self->{iter} = 0; |
129
|
|
|
|
|
|
|
} |
130
|
0
|
0
|
|
|
|
|
if (exists $self->{data}{$key}) { |
131
|
0
|
|
|
|
|
|
return 0; |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
else { |
134
|
0
|
0
|
|
|
|
|
if (ref $data eq '') { |
135
|
0
|
|
|
|
|
|
$self->{data}{$key} = { val => "$data", time => time() }; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
else { |
138
|
0
|
|
|
|
|
|
$self->{data}{$key} = { val => $data, time => time() }; |
139
|
|
|
|
|
|
|
} |
140
|
0
|
|
|
|
|
|
return 1; |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=head2 expiration |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
Check or modify the expiration time (if a parameter is provided) |
147
|
|
|
|
|
|
|
If the expiration is modified, the garbage colletor run. |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
$dedup->expiration(6); # set the new expiration to 6 seconds |
150
|
|
|
|
|
|
|
$exp = $dedup->expiration; # return the current expiration time |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=cut |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sub expiration { |
155
|
0
|
|
|
0
|
1
|
|
my ($self, $exp) = @_; |
156
|
0
|
0
|
|
|
|
|
if ($exp) { |
157
|
0
|
|
|
|
|
|
$self->{expiration} = $exp; |
158
|
0
|
|
|
|
|
|
$self->gc(); |
159
|
|
|
|
|
|
|
} |
160
|
0
|
|
|
|
|
|
return $self->{expiration}; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=head2 scalar |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
Return the number of item |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
$nbr = $dedup->scalar; |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
A convenient way to do: |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
scalar keys scalar keys %{ $self->{data} }; |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=cut |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
sub scalar { |
176
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
177
|
0
|
|
|
|
|
|
return scalar keys %{ $self->{data} }; |
|
0
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head2 gc |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
Run the garbage collector to remove the expired item or modify the gc value if a paramter is provided. |
183
|
|
|
|
|
|
|
When the garbage collector is run, a sync to disk is executed. |
184
|
|
|
|
|
|
|
The garbage collector run each time the number item() action is reaching the value of the parameter gc |
185
|
|
|
|
|
|
|
If the value is 0, no automatic garbage collector is run. |
186
|
|
|
|
|
|
|
If the value < 0, this value is used as a expiration time when manually running the garbage collector. |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
$dedup->gc(); # force the garbage collector to run; |
189
|
|
|
|
|
|
|
$dedup->gc(10); # change the gc value; |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=cut |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
sub gc { |
194
|
0
|
|
|
0
|
1
|
|
my ($self, $gc) = @_; |
195
|
0
|
|
|
|
|
|
my $nbr = 0; |
196
|
0
|
0
|
0
|
|
|
|
if ($gc && $gc > 0) { |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
197
|
0
|
|
|
|
|
|
$self->{gc} = $gc; |
198
|
|
|
|
|
|
|
} elsif ($gc && $gc < 0) { |
199
|
0
|
|
|
|
|
|
my $now = time + $gc; |
200
|
0
|
|
|
|
|
|
foreach my $k (keys %{ $self->{data} }) { |
|
0
|
|
|
|
|
|
|
201
|
0
|
0
|
|
|
|
|
if ($self->{data}{$k}{time} <= $now) { |
202
|
0
|
|
|
|
|
|
delete $self->{data}{$k}; |
203
|
0
|
|
|
|
|
|
$nbr++; |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
} |
206
|
0
|
|
|
|
|
|
$self->sync; |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
} elsif ($self->{gc} > 0 && $self->{expiration} > 0) { |
209
|
0
|
|
|
|
|
|
my $now = time - $self->{expiration}; |
210
|
0
|
|
|
|
|
|
foreach my $k (keys %{ $self->{data} }) { |
|
0
|
|
|
|
|
|
|
211
|
0
|
0
|
|
|
|
|
if ($self->{data}{$k}{time} <= $now) { |
212
|
0
|
|
|
|
|
|
delete $self->{data}{$k}; |
213
|
0
|
|
|
|
|
|
$nbr++; |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
} |
216
|
0
|
|
|
|
|
|
$self->sync; |
217
|
|
|
|
|
|
|
} |
218
|
0
|
|
|
|
|
|
return $nbr; |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
=head2 sync |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
Write the data on disk. |
224
|
|
|
|
|
|
|
The sync is always done when the gc() run. |
225
|
|
|
|
|
|
|
It is possible to run it (if the gc occurence is too high) |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
$dedup->sync(); |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
=cut |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
sub sync { |
232
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
233
|
0
|
|
|
|
|
|
my $val = Storable::AMF0::store($self, $self->{file}); |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
sub DESTROY { |
237
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
238
|
0
|
|
|
|
|
|
$self->sync; |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
=head1 AUTHOR |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
DULAUNOY Fabrice, C<< >> |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=head1 BUGS |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
248
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
249
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
=head1 TODO |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
add more test |
254
|
|
|
|
|
|
|
add a delete method |
255
|
|
|
|
|
|
|
maybe TIE support |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=head1 SUPPORT |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
perldoc Data::Unique |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
You can also look for information at: |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=over 4 |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
L |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
L |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=item * CPAN Ratings |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
L |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=item * Search CPAN |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
L |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=back |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
This software is Copyright (c) 2019 by DULAUNOY Fabrice. |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
This is free software, licensed under: |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
The Artistic License 2.0 (GPL Compatible) |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=cut |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
1; # End of Data::Unique |