line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Algorithm::MinPerfHashTwoLevel; |
2
|
8
|
|
|
8
|
|
344156
|
use strict; |
|
8
|
|
|
|
|
39
|
|
|
8
|
|
|
|
|
209
|
|
3
|
8
|
|
|
8
|
|
37
|
use warnings; |
|
8
|
|
|
|
|
18
|
|
|
8
|
|
|
|
|
382
|
|
4
|
|
|
|
|
|
|
our $VERSION = '0.14'; |
5
|
|
|
|
|
|
|
our $DEFAULT_VARIANT = 3; |
6
|
|
|
|
|
|
|
|
7
|
8
|
|
|
8
|
|
40
|
use Exporter qw(import); |
|
8
|
|
|
|
|
12
|
|
|
8
|
|
|
|
|
168
|
|
8
|
8
|
|
|
8
|
|
35
|
use Carp (); |
|
8
|
|
|
|
|
13
|
|
|
8
|
|
|
|
|
152
|
|
9
|
|
|
|
|
|
|
|
10
|
8
|
|
|
8
|
|
36
|
no warnings "portable"; |
|
8
|
|
|
|
|
14
|
|
|
8
|
|
|
|
|
451
|
|
11
|
|
|
|
|
|
|
my %constant; |
12
|
|
|
|
|
|
|
BEGIN { |
13
|
8
|
|
|
8
|
|
258
|
%constant= ( |
14
|
|
|
|
|
|
|
MPH_F_FILTER_UNDEF => (1<<0), |
15
|
|
|
|
|
|
|
MPH_F_DETERMINISTIC => (1<<1), |
16
|
|
|
|
|
|
|
#MPH_F_NO_DEDUPE => (1<<2), |
17
|
|
|
|
|
|
|
#MPH_F_VALIDATE => (1<<3), |
18
|
|
|
|
|
|
|
MAX_VARIANT => 3, |
19
|
|
|
|
|
|
|
STADTX_HASH_SEED_BYTES => 16, |
20
|
|
|
|
|
|
|
STADTX_HASH_STATE_BYTES => 32, |
21
|
|
|
|
|
|
|
); |
22
|
|
|
|
|
|
|
} |
23
|
8
|
|
|
8
|
|
44
|
use constant \%constant; |
|
8
|
|
|
|
|
16
|
|
|
8
|
|
|
|
|
1582
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( |
26
|
|
|
|
|
|
|
'all' => [ |
27
|
|
|
|
|
|
|
'$DEFAULT_VARIANT', |
28
|
|
|
|
|
|
|
'MAX_VARIANT', |
29
|
|
|
|
|
|
|
qw( |
30
|
|
|
|
|
|
|
seed_state |
31
|
|
|
|
|
|
|
hash_with_state |
32
|
|
|
|
|
|
|
), sort keys %constant |
33
|
|
|
|
|
|
|
], |
34
|
|
|
|
|
|
|
'flags' => [ sort grep /MPH_F_/, keys %constant ], |
35
|
|
|
|
|
|
|
); |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
our @EXPORT = qw(); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
require XSLoader; |
43
|
|
|
|
|
|
|
XSLoader::load('Algorithm::MinPerfHashTwoLevel', $VERSION); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
# Preloaded methods go here. |
46
|
|
|
|
|
|
|
|
47
|
8
|
|
|
8
|
|
54
|
use Carp (); |
|
8
|
|
|
|
|
15
|
|
|
8
|
|
|
|
|
5702
|
|
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
sub new { |
50
|
68
|
|
|
68
|
1
|
840
|
my ($class,%opts)= @_; |
51
|
68
|
|
|
|
|
177
|
my $seed= delete($opts{seed}); |
52
|
68
|
50
|
|
|
|
220
|
delete($opts{state}) and warn "ignoring 'state' parameter"; |
53
|
|
|
|
|
|
|
|
54
|
68
|
|
|
|
|
165
|
my $o= bless \%opts, $class; |
55
|
|
|
|
|
|
|
|
56
|
68
|
100
|
|
|
|
255
|
$o->set_seed($seed) if $seed; |
57
|
|
|
|
|
|
|
|
58
|
68
|
50
|
|
|
|
177
|
$o->{variant}= $DEFAULT_VARIANT unless defined $o->{variant}; |
59
|
68
|
|
|
|
|
159
|
$o->{variant}= int(0+$o->{variant}); |
60
|
|
|
|
|
|
|
|
61
|
68
|
|
100
|
|
|
346
|
$o->{compute_flags} ||= 0; |
62
|
|
|
|
|
|
|
$o->{compute_flags} += MPH_F_FILTER_UNDEF |
63
|
68
|
50
|
|
|
|
168
|
if delete $o->{filter_undef}; |
64
|
|
|
|
|
|
|
$o->{compute_flags} += MPH_F_DETERMINISTIC |
65
|
68
|
50
|
33
|
|
|
276
|
if delete $o->{deterministic} or delete $o->{canonical}; |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
die "Unknown variant '$o->{variant}' in constructor new(), max known is " |
68
|
|
|
|
|
|
|
. MAX_VARIANT . " default is " . $DEFAULT_VARIANT |
69
|
68
|
50
|
|
|
|
167
|
if $o->{variant} > MAX_VARIANT; |
70
|
|
|
|
|
|
|
|
71
|
68
|
|
|
|
|
192
|
return $o; |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
sub compute { |
75
|
72
|
|
|
72
|
1
|
17467
|
my ($self, $source_hash)= @_; |
76
|
72
|
50
|
|
|
|
174
|
if ($source_hash) { |
77
|
72
|
|
|
|
|
151
|
$self->{source_hash}= $source_hash; |
78
|
|
|
|
|
|
|
} else { |
79
|
0
|
|
|
|
|
0
|
$source_hash= $self->{source_hash}; |
80
|
|
|
|
|
|
|
} |
81
|
72
|
|
50
|
|
|
259
|
my $debug= $self->{debug} ||= 0; |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# reuse the constructor seed. |
84
|
72
|
|
|
|
|
235
|
$self->_seed($self->{constructor_seed}); |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# find the number of keys we have to deal with |
87
|
72
|
|
50
|
|
|
235
|
my $max_tries= $self->{max_tries} || 10; |
88
|
72
|
|
|
|
|
112
|
my @failed_seeds; |
89
|
72
|
|
|
|
|
196
|
$self->{failed_seeds}= \@failed_seeds; |
90
|
|
|
|
|
|
|
|
91
|
72
|
|
|
|
|
214
|
for my $counter ( 1 .. $max_tries ) { |
92
|
88
|
|
|
|
|
212
|
my $seed= $self->get_seed; # ensure we have a seed set up (must be called before compute_xs) |
93
|
88
|
|
|
|
|
206
|
my $state= $self->get_state; # ensure we have a state set up (must be called before compute_xs) |
94
|
88
|
|
|
|
|
255
|
delete $self->{buckets}; |
95
|
|
|
|
|
|
|
|
96
|
88
|
50
|
|
|
|
203
|
printf "MPH2L compute attempt #%2d/%2d for hash with %6d keys - using seed: %s (state: %s)\n", |
97
|
|
|
|
|
|
|
$counter, |
98
|
|
|
|
|
|
|
$max_tries, |
99
|
|
|
|
|
|
|
0+keys(%$source_hash), |
100
|
|
|
|
|
|
|
unpack("H*",$seed), |
101
|
|
|
|
|
|
|
unpack("H*",$state), |
102
|
|
|
|
|
|
|
if $debug; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
my $bad_idx= compute_xs($self) |
105
|
88
|
100
|
|
|
|
3312733
|
or return $self->{buckets}; |
106
|
|
|
|
|
|
|
|
107
|
16
|
|
|
|
|
293
|
push @failed_seeds, $seed; |
108
|
16
|
50
|
|
|
|
63
|
if ($counter < $max_tries) { |
109
|
16
|
|
|
|
|
49
|
$self->re_seed(); |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
0
|
0
|
|
|
|
0
|
if ( $max_tries == 1 ) { |
114
|
0
|
|
|
|
|
0
|
die sprintf "Failed to compute minimal perfect hash using seed %s", unpack "H*", $failed_seeds[0]; |
115
|
|
|
|
|
|
|
} else { |
116
|
|
|
|
|
|
|
Carp::confess( |
117
|
|
|
|
|
|
|
sprintf "Failed to compute minimal perfect hash after %d tries. Seeds tried: %s", |
118
|
0
|
|
|
|
|
0
|
$max_tries, join(" ", map { unpack "H*", $_ } @failed_seeds) |
|
0
|
|
|
|
|
0
|
|
119
|
|
|
|
|
|
|
); |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
# NOT REACHED. |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub re_seed { |
125
|
16
|
|
|
16
|
0
|
31
|
my $self= shift; |
126
|
|
|
|
|
|
|
|
127
|
16
|
|
|
|
|
40
|
my $source= $self->get_state(); |
128
|
16
|
|
|
|
|
87
|
return $self->_seed(substr($source,0,STADTX_HASH_SEED_BYTES) ^ substr($source,STADTX_HASH_SEED_BYTES)); |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
sub _seed { |
133
|
377
|
|
|
377
|
|
649
|
my $self= shift; |
134
|
377
|
100
|
|
|
|
788
|
if (@_) { |
135
|
135
|
|
|
|
|
224
|
my $seed= shift; |
136
|
135
|
50
|
66
|
|
|
455
|
Carp::confess(sprintf "Seed should be undef, or a string exactly %d bytes long, not %d bytes", |
137
|
|
|
|
|
|
|
STADTX_HASH_SEED_BYTES,length($seed)) |
138
|
|
|
|
|
|
|
if defined($seed) and length($seed) != 16; |
139
|
135
|
|
|
|
|
278
|
$self->{seed}= $seed; |
140
|
135
|
|
|
|
|
232
|
delete $self->{state}; |
141
|
|
|
|
|
|
|
} |
142
|
377
|
100
|
|
|
|
834
|
if ( !defined $self->{seed} ) { |
143
|
|
|
|
|
|
|
#1234567812345678 |
144
|
21
|
|
|
|
|
53
|
$self->{seed}= "MinPerfHash2Levl"; |
145
|
21
|
|
|
|
|
47
|
delete $self->{state}; |
146
|
|
|
|
|
|
|
} |
147
|
377
|
|
|
|
|
1194
|
return $self->{seed}; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
sub set_seed { |
151
|
47
|
|
|
47
|
0
|
187
|
my ($self,$value)= @_; |
152
|
47
|
|
|
|
|
150
|
my $seed= $self->_seed($value); |
153
|
47
|
|
|
|
|
134
|
return $self->{constructor_seed}= $seed; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub get_seed { |
157
|
151
|
|
|
151
|
0
|
382
|
my ($self)= @_; |
158
|
151
|
|
|
|
|
330
|
return $self->_seed(); |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
sub get_state { |
163
|
110
|
|
|
110
|
0
|
171
|
my $self= shift; |
164
|
110
|
|
66
|
|
|
400
|
return $self->{state} ||= seed_state($self->_seed()); |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub get_failed_seeds { |
168
|
0
|
|
|
0
|
0
|
|
my $self= shift; |
169
|
0
|
0
|
|
|
|
|
return @{$self->{failed_seeds}||[]}; |
|
0
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
1; |
173
|
|
|
|
|
|
|
__END__ |