line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Algorithm::MinPerfHashTwoLevel; |
2
|
4
|
|
|
4
|
|
201373
|
use strict; |
|
4
|
|
|
|
|
14
|
|
|
4
|
|
|
|
|
112
|
|
3
|
4
|
|
|
4
|
|
19
|
use warnings; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
177
|
|
4
|
|
|
|
|
|
|
our $VERSION = '0.15'; |
5
|
|
|
|
|
|
|
our $DEFAULT_VARIANT = 5; |
6
|
|
|
|
|
|
|
|
7
|
4
|
|
|
4
|
|
21
|
use Exporter qw(import); |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
113
|
|
8
|
4
|
|
|
4
|
|
19
|
use Carp (); |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
99
|
|
9
|
|
|
|
|
|
|
|
10
|
4
|
|
|
4
|
|
20
|
no warnings "portable"; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
243
|
|
11
|
|
|
|
|
|
|
my %constant; |
12
|
|
|
|
|
|
|
BEGIN { |
13
|
4
|
|
|
4
|
|
132
|
%constant= ( |
14
|
|
|
|
|
|
|
MPH_F_FILTER_UNDEF => (1<<0), |
15
|
|
|
|
|
|
|
MPH_F_DETERMINISTIC => (1<<1), |
16
|
|
|
|
|
|
|
#MPH_F_NO_DEDUPE => (1<<2), |
17
|
|
|
|
|
|
|
#MPH_F_VALIDATE => (1<<3), |
18
|
|
|
|
|
|
|
MAX_VARIANT => 5, |
19
|
|
|
|
|
|
|
MIN_VARIANT => 5, |
20
|
|
|
|
|
|
|
STADTX_HASH_SEED_BYTES => 16, |
21
|
|
|
|
|
|
|
STADTX_HASH_STATE_BYTES => 32, |
22
|
|
|
|
|
|
|
); |
23
|
|
|
|
|
|
|
} |
24
|
4
|
|
|
4
|
|
24
|
use constant \%constant; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
881
|
|
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( |
27
|
|
|
|
|
|
|
'all' => [ |
28
|
|
|
|
|
|
|
'$DEFAULT_VARIANT', |
29
|
|
|
|
|
|
|
'MAX_VARIANT', |
30
|
|
|
|
|
|
|
'MIN_VARIANT', |
31
|
|
|
|
|
|
|
qw( |
32
|
|
|
|
|
|
|
seed_state |
33
|
|
|
|
|
|
|
hash_with_state |
34
|
|
|
|
|
|
|
), sort keys %constant |
35
|
|
|
|
|
|
|
], |
36
|
|
|
|
|
|
|
'flags' => [ sort grep /MPH_F_/, keys %constant ], |
37
|
|
|
|
|
|
|
); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
our @EXPORT = qw(); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
require XSLoader; |
45
|
|
|
|
|
|
|
XSLoader::load('Algorithm::MinPerfHashTwoLevel', $VERSION); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
# Preloaded methods go here. |
48
|
|
|
|
|
|
|
|
49
|
4
|
|
|
4
|
|
27
|
use Carp (); |
|
4
|
|
|
|
|
15
|
|
|
4
|
|
|
|
|
3118
|
|
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
sub new { |
52
|
93
|
|
|
93
|
1
|
698
|
my ($class,%opts)= @_; |
53
|
93
|
|
|
|
|
232
|
my $seed= delete($opts{seed}); |
54
|
93
|
50
|
|
|
|
1660
|
delete($opts{state}) and warn "ignoring 'state' parameter"; |
55
|
|
|
|
|
|
|
|
56
|
93
|
|
|
|
|
310
|
my $o= bless \%opts, $class; |
57
|
|
|
|
|
|
|
|
58
|
93
|
100
|
|
|
|
345
|
$o->set_seed($seed) if $seed; |
59
|
|
|
|
|
|
|
|
60
|
93
|
100
|
|
|
|
246
|
$o->{variant}= $DEFAULT_VARIANT unless defined $o->{variant}; |
61
|
93
|
|
|
|
|
214
|
$o->{variant}= int(0+$o->{variant}); |
62
|
|
|
|
|
|
|
|
63
|
93
|
|
100
|
|
|
287
|
$o->{compute_flags} ||= 0; |
64
|
|
|
|
|
|
|
$o->{compute_flags} += MPH_F_FILTER_UNDEF |
65
|
93
|
50
|
|
|
|
219
|
if delete $o->{filter_undef}; |
66
|
|
|
|
|
|
|
$o->{compute_flags} += MPH_F_DETERMINISTIC |
67
|
93
|
50
|
33
|
|
|
412
|
if delete $o->{deterministic} or delete $o->{canonical}; |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
die "Unknown variant '$o->{variant}' in constructor new(), max known is " |
70
|
|
|
|
|
|
|
. MAX_VARIANT . " default is " . $DEFAULT_VARIANT |
71
|
93
|
50
|
|
|
|
235
|
if $o->{variant} > MAX_VARIANT; |
72
|
|
|
|
|
|
|
die "Unknown variant '$o->{variant}' in constructor new(), min known is " |
73
|
|
|
|
|
|
|
. MIN_VARIANT . " default is " . $DEFAULT_VARIANT |
74
|
93
|
50
|
|
|
|
227
|
if $o->{variant} < MIN_VARIANT; |
75
|
|
|
|
|
|
|
|
76
|
93
|
|
|
|
|
258
|
return $o; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
sub compute { |
80
|
95
|
|
|
95
|
1
|
6517
|
my ($self, $source_hash)= @_; |
81
|
95
|
50
|
|
|
|
227
|
if ($source_hash) { |
82
|
95
|
|
|
|
|
199
|
$self->{source_hash}= $source_hash; |
83
|
|
|
|
|
|
|
} else { |
84
|
0
|
|
|
|
|
0
|
$source_hash= $self->{source_hash}; |
85
|
|
|
|
|
|
|
} |
86
|
95
|
|
50
|
|
|
309
|
my $debug= $self->{debug} ||= 0; |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# reuse the constructor seed. |
89
|
95
|
|
|
|
|
273
|
$self->_seed($self->{constructor_seed}); |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# find the number of keys we have to deal with |
92
|
95
|
|
50
|
|
|
313
|
my $max_tries= $self->{max_tries} || 10; |
93
|
95
|
|
|
|
|
162
|
my @failed_seeds; |
94
|
95
|
|
|
|
|
268
|
$self->{failed_seeds}= \@failed_seeds; |
95
|
|
|
|
|
|
|
|
96
|
95
|
|
|
|
|
293
|
for my $counter ( 1 .. $max_tries ) { |
97
|
95
|
|
|
|
|
233
|
my $seed= $self->get_seed; # ensure we have a seed set up (must be called before compute_xs) |
98
|
95
|
|
|
|
|
247
|
my $state= $self->get_state; # ensure we have a state set up (must be called before compute_xs) |
99
|
95
|
|
|
|
|
182
|
delete $self->{buckets}; |
100
|
|
|
|
|
|
|
|
101
|
95
|
50
|
|
|
|
210
|
printf "MPH2L compute attempt #%2d/%2d for hash with %6d keys - using seed: %s (state: %s)\n", |
102
|
|
|
|
|
|
|
$counter, |
103
|
|
|
|
|
|
|
$max_tries, |
104
|
|
|
|
|
|
|
0+keys(%$source_hash), |
105
|
|
|
|
|
|
|
unpack("H*",$seed), |
106
|
|
|
|
|
|
|
unpack("H*",$state), |
107
|
|
|
|
|
|
|
if $debug; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
my $bad_idx= compute_xs($self) |
110
|
95
|
50
|
|
|
|
3429193
|
or return $self->{buckets}; |
111
|
|
|
|
|
|
|
|
112
|
0
|
|
|
|
|
0
|
push @failed_seeds, $seed; |
113
|
0
|
0
|
|
|
|
0
|
if ($counter < $max_tries) { |
114
|
0
|
|
|
|
|
0
|
$self->re_seed(); |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
0
|
0
|
|
|
|
0
|
if ( $max_tries == 1 ) { |
119
|
0
|
|
|
|
|
0
|
die sprintf "Failed to compute minimal perfect hash using seed %s", unpack "H*", $failed_seeds[0]; |
120
|
|
|
|
|
|
|
} else { |
121
|
|
|
|
|
|
|
Carp::confess( |
122
|
|
|
|
|
|
|
sprintf "Failed to compute minimal perfect hash after %d tries. Seeds tried: %s", |
123
|
0
|
|
|
|
|
0
|
$max_tries, join(" ", map { unpack "H*", $_ } @failed_seeds) |
|
0
|
|
|
|
|
0
|
|
124
|
|
|
|
|
|
|
); |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
# NOT REACHED. |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub re_seed { |
130
|
0
|
|
|
0
|
0
|
0
|
my $self= shift; |
131
|
|
|
|
|
|
|
|
132
|
0
|
|
|
|
|
0
|
my $source= $self->get_state(); |
133
|
0
|
|
|
|
|
0
|
return $self->_seed(substr($source,0,STADTX_HASH_SEED_BYTES) ^ substr($source,STADTX_HASH_SEED_BYTES)); |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
sub _seed { |
138
|
439
|
|
|
439
|
|
727
|
my $self= shift; |
139
|
439
|
100
|
|
|
|
847
|
if (@_) { |
140
|
158
|
|
|
|
|
263
|
my $seed= shift; |
141
|
158
|
50
|
66
|
|
|
534
|
Carp::confess(sprintf "Seed should be undef, or a string exactly %d bytes long, not %d bytes", |
142
|
|
|
|
|
|
|
STADTX_HASH_SEED_BYTES,length($seed)) |
143
|
|
|
|
|
|
|
if defined($seed) and length($seed) != 16; |
144
|
158
|
|
|
|
|
272
|
$self->{seed}= $seed; |
145
|
158
|
|
|
|
|
264
|
delete $self->{state}; |
146
|
|
|
|
|
|
|
} |
147
|
439
|
100
|
|
|
|
1006
|
if ( !defined $self->{seed} ) { |
148
|
|
|
|
|
|
|
#1234567812345678 |
149
|
30
|
|
|
|
|
116
|
$self->{seed}= "MinPerfHash2Levl"; |
150
|
30
|
|
|
|
|
63
|
delete $self->{state}; |
151
|
|
|
|
|
|
|
} |
152
|
439
|
|
|
|
|
1279
|
return $self->{seed}; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub set_seed { |
156
|
63
|
|
|
63
|
0
|
138
|
my ($self,$value)= @_; |
157
|
63
|
|
|
|
|
165
|
my $seed= $self->_seed($value); |
158
|
63
|
|
|
|
|
152
|
return $self->{constructor_seed}= $seed; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub get_seed { |
162
|
185
|
|
|
185
|
0
|
430
|
my ($self)= @_; |
163
|
185
|
|
|
|
|
366
|
return $self->_seed(); |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub get_state { |
168
|
97
|
|
|
97
|
0
|
148
|
my $self= shift; |
169
|
97
|
|
66
|
|
|
325
|
return $self->{state} ||= seed_state($self->_seed()); |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
sub get_failed_seeds { |
173
|
0
|
|
|
0
|
0
|
|
my $self= shift; |
174
|
0
|
0
|
|
|
|
|
return @{$self->{failed_seeds}||[]}; |
|
0
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
1; |
178
|
|
|
|
|
|
|
__END__ |