|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Cuckoo::Filter;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
54456
 | 
 use warnings;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
    | 
| 
4
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
8
 | 
 use strict;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
    | 
| 
5
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
1016
 | 
 use Storable qw(nfreeze thaw);  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5106
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
131
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = "0.0.4";  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
8
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
816
 | 
 use Digest;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
836
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1265
 | 
    | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @serialize_keys = qw(bucket_size max_retry fingerprint_size buckets);  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
13
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
68
 | 
     my ($class, %params) = @_;  | 
| 
14
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     my $self = {  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         bucket_size => 2 ** 18,  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         max_retry => 500,  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         fingerprint_size => 3,  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         %params,  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
20
 | 
2
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
19
 | 
     $self->{digest} //= Digest->new("SHA-1");  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # init fixed array  | 
| 
22
 | 
2
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
2776
 | 
     $self->{buckets} //= do {  | 
| 
23
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
         my @buckets = ();  | 
| 
24
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1655
 | 
         $#buckets = $self->{bucket_size};  | 
| 
25
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         $self->{item_count} = 0;  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         \@buckets  | 
| 
27
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     };  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
29
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     return bless $self, $class;  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _fingerprint {  | 
| 
33
 | 
13
 | 
 
 | 
 
 | 
  
13
  
 | 
 
 | 
25
 | 
     my ($self, $item) = @_;  | 
| 
34
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     my $digest = do {  | 
| 
35
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
         my $d= $self->{digest};  | 
| 
36
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
         $d->add($item);  | 
| 
37
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
58
 | 
         $d->digest;  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
40
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
     return substr($digest, 0, $self->{fingerprint_size}-1);  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # djb hash  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _hash {  | 
| 
45
 | 
26
 | 
 
 | 
 
 | 
  
26
  
 | 
 
 | 
32
 | 
     my $str = shift;  | 
| 
46
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
     my @bytes = unpack 'C*', $str;  | 
| 
47
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
45
 | 
     my $h = 5381;  | 
| 
48
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
     for my $i (@bytes) {  | 
| 
49
 | 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
50
 | 
         $h = (($h << 5) + $h) + $i;  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
51
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
     $h;  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub lookup {  | 
| 
55
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
  
1
  
 | 
16
 | 
     my ($self, $item) = @_;  | 
| 
56
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     my $fp = $self->_fingerprint($item);  | 
| 
57
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
     my $idx1 = _hash($item) % $self->{bucket_size};  | 
| 
58
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     my $idx2 = ($idx1 ^ _hash($fp)) % $self->{bucket_size};  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
60
 | 
9
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
44
 | 
     return defined $self->{buckets}[$idx1] || $self->{buckets}[$idx2];  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub insert {  | 
| 
64
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
13
 | 
     my ($self, $item) = @_;  | 
| 
65
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     return 0 if $self->lookup($item);  | 
| 
66
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     my $fp = $self->_fingerprint($item);  | 
| 
67
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     my $idx1 = _hash($item) % $self->{bucket_size};  | 
| 
68
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     my $idx2 = ($idx1 ^ _hash($fp)) % $self->{bucket_size};  | 
| 
69
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     for my $index ($idx1, $idx2) {  | 
| 
70
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
         if (! defined $self->{buckets}[$index]) {  | 
| 
71
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
             $self->{buckets}[$index] = $item;  | 
| 
72
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
             $self->{item_count}++;  | 
| 
73
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
             return 1;  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
77
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $index = +($idx1, $idx2)[int(rand(2))];  | 
| 
78
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     for (my $i = 0; $i < $self->{max_retry}; $i++) {  | 
| 
79
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $fp = do {  | 
| 
80
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             my $f = $self->{buckets}[$index];  | 
| 
81
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $self->{buckets}[$index] = $fp;  | 
| 
82
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $f;  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         };  | 
| 
84
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $index = ($idx1 ^ _hash($fp)) % $self->{bucket_size};  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
86
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if (! defined $self->{buckets}[$index]) {  | 
| 
87
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $self->{buckets}[$index] = $fp;  | 
| 
88
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $self->{item_count}++;  | 
| 
89
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             return 1;  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
93
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return 0;  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub delete {  | 
| 
97
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
2
 | 
     my ($self, $item) = @_;  | 
| 
98
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     my $fp = $self->_fingerprint($item);  | 
| 
99
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     my $idx1 = _hash($item) % $self->{bucket_size};  | 
| 
100
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     my $idx2 = ($idx1 ^ _hash($fp)) % $self->{bucket_size};  | 
| 
101
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     for my $index ($idx1, $idx2) {  | 
| 
102
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
         if (defined $self->{buckets}[$index]) {  | 
| 
103
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
             delete $self->{buckets}[$index];  | 
| 
104
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
             $self->{item_count}--;  | 
| 
105
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
             return 1;  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
108
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return 0;  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub count {  | 
| 
112
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my $self = shift;  | 
| 
113
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $self->{item_count};  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub serialize {  | 
| 
117
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
2
 | 
     my $self = shift;  | 
| 
118
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     my %params = map { $_ => $self->{$_} } @serialize_keys;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
119
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     return nfreeze \%params;  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub deserialize {  | 
| 
123
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
5460
 | 
     my ($serialized, $digest) = @_;  | 
| 
124
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     my $params = thaw($serialized);  | 
| 
125
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
9097
 | 
     $params->{digest} = $digest if $digest;  | 
| 
126
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     return Cuckoo::Filter->new(%$params);  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |