File Coverage

blib/lib/Algorithm/MinPerfHashTwoLevel.pm
Criterion Covered Total %
statement 66 80 82.5
branch 17 32 53.1
condition 9 15 60.0
subroutine 14 16 87.5
pod 2 7 28.5
total 108 150 72.0


line stmt bran cond sub pod time code
1             package Algorithm::MinPerfHashTwoLevel;
2 4     4   201373 use strict;
  4         14  
  4         112  
3 4     4   19 use warnings;
  4         8  
  4         177  
4             our $VERSION = '0.15';
5             our $DEFAULT_VARIANT = 5;
6              
7 4     4   21 use Exporter qw(import);
  4         7  
  4         113  
8 4     4   19 use Carp ();
  4         7  
  4         99  
9              
10 4     4   20 no warnings "portable";
  4         8  
  4         243  
11             my %constant;
12             BEGIN {
13 4     4   132 %constant= (
14             MPH_F_FILTER_UNDEF => (1<<0),
15             MPH_F_DETERMINISTIC => (1<<1),
16             #MPH_F_NO_DEDUPE => (1<<2),
17             #MPH_F_VALIDATE => (1<<3),
18             MAX_VARIANT => 5,
19             MIN_VARIANT => 5,
20             STADTX_HASH_SEED_BYTES => 16,
21             STADTX_HASH_STATE_BYTES => 32,
22             );
23             }
24 4     4   24 use constant \%constant;
  4         8  
  4         881  
25              
26             our %EXPORT_TAGS = (
27             'all' => [
28             '$DEFAULT_VARIANT',
29             'MAX_VARIANT',
30             'MIN_VARIANT',
31             qw(
32             seed_state
33             hash_with_state
34             ), sort keys %constant
35             ],
36             'flags' => [ sort grep /MPH_F_/, keys %constant ],
37             );
38              
39             our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
40              
41             our @EXPORT = qw();
42              
43              
44             require XSLoader;
45             XSLoader::load('Algorithm::MinPerfHashTwoLevel', $VERSION);
46              
47             # Preloaded methods go here.
48              
49 4     4   27 use Carp ();
  4         15  
  4         3118  
50              
51             sub new {
52 93     93 1 698 my ($class,%opts)= @_;
53 93         232 my $seed= delete($opts{seed});
54 93 50       1660 delete($opts{state}) and warn "ignoring 'state' parameter";
55              
56 93         310 my $o= bless \%opts, $class;
57              
58 93 100       345 $o->set_seed($seed) if $seed;
59              
60 93 100       246 $o->{variant}= $DEFAULT_VARIANT unless defined $o->{variant};
61 93         214 $o->{variant}= int(0+$o->{variant});
62              
63 93   100     287 $o->{compute_flags} ||= 0;
64             $o->{compute_flags} += MPH_F_FILTER_UNDEF
65 93 50       219 if delete $o->{filter_undef};
66             $o->{compute_flags} += MPH_F_DETERMINISTIC
67 93 50 33     412 if delete $o->{deterministic} or delete $o->{canonical};
68              
69             die "Unknown variant '$o->{variant}' in constructor new(), max known is "
70             . MAX_VARIANT . " default is " . $DEFAULT_VARIANT
71 93 50       235 if $o->{variant} > MAX_VARIANT;
72             die "Unknown variant '$o->{variant}' in constructor new(), min known is "
73             . MIN_VARIANT . " default is " . $DEFAULT_VARIANT
74 93 50       227 if $o->{variant} < MIN_VARIANT;
75              
76 93         258 return $o;
77             }
78              
79             sub compute {
80 95     95 1 6517 my ($self, $source_hash)= @_;
81 95 50       227 if ($source_hash) {
82 95         199 $self->{source_hash}= $source_hash;
83             } else {
84 0         0 $source_hash= $self->{source_hash};
85             }
86 95   50     309 my $debug= $self->{debug} ||= 0;
87              
88             # reuse the constructor seed.
89 95         273 $self->_seed($self->{constructor_seed});
90              
91             # find the number of keys we have to deal with
92 95   50     313 my $max_tries= $self->{max_tries} || 10;
93 95         162 my @failed_seeds;
94 95         268 $self->{failed_seeds}= \@failed_seeds;
95              
96 95         293 for my $counter ( 1 .. $max_tries ) {
97 95         233 my $seed= $self->get_seed; # ensure we have a seed set up (must be called before compute_xs)
98 95         247 my $state= $self->get_state; # ensure we have a state set up (must be called before compute_xs)
99 95         182 delete $self->{buckets};
100              
101 95 50       210 printf "MPH2L compute attempt #%2d/%2d for hash with %6d keys - using seed: %s (state: %s)\n",
102             $counter,
103             $max_tries,
104             0+keys(%$source_hash),
105             unpack("H*",$seed),
106             unpack("H*",$state),
107             if $debug;
108              
109             my $bad_idx= compute_xs($self)
110 95 50       3429193 or return $self->{buckets};
111              
112 0         0 push @failed_seeds, $seed;
113 0 0       0 if ($counter < $max_tries) {
114 0         0 $self->re_seed();
115             }
116             }
117              
118 0 0       0 if ( $max_tries == 1 ) {
119 0         0 die sprintf "Failed to compute minimal perfect hash using seed %s", unpack "H*", $failed_seeds[0];
120             } else {
121             Carp::confess(
122             sprintf "Failed to compute minimal perfect hash after %d tries. Seeds tried: %s",
123 0         0 $max_tries, join(" ", map { unpack "H*", $_ } @failed_seeds)
  0         0  
124             );
125             }
126             # NOT REACHED.
127             }
128              
129             sub re_seed {
130 0     0 0 0 my $self= shift;
131              
132 0         0 my $source= $self->get_state();
133 0         0 return $self->_seed(substr($source,0,STADTX_HASH_SEED_BYTES) ^ substr($source,STADTX_HASH_SEED_BYTES));
134             }
135              
136              
137             sub _seed {
138 439     439   727 my $self= shift;
139 439 100       847 if (@_) {
140 158         263 my $seed= shift;
141 158 50 66     534 Carp::confess(sprintf "Seed should be undef, or a string exactly %d bytes long, not %d bytes",
142             STADTX_HASH_SEED_BYTES,length($seed))
143             if defined($seed) and length($seed) != 16;
144 158         272 $self->{seed}= $seed;
145 158         264 delete $self->{state};
146             }
147 439 100       1006 if ( !defined $self->{seed} ) {
148             #1234567812345678
149 30         116 $self->{seed}= "MinPerfHash2Levl";
150 30         63 delete $self->{state};
151             }
152 439         1279 return $self->{seed};
153             }
154              
155             sub set_seed {
156 63     63 0 138 my ($self,$value)= @_;
157 63         165 my $seed= $self->_seed($value);
158 63         152 return $self->{constructor_seed}= $seed;
159             }
160              
161             sub get_seed {
162 185     185 0 430 my ($self)= @_;
163 185         366 return $self->_seed();
164             }
165              
166              
167             sub get_state {
168 97     97 0 148 my $self= shift;
169 97   66     325 return $self->{state} ||= seed_state($self->_seed());
170             }
171              
172             sub get_failed_seeds {
173 0     0 0   my $self= shift;
174 0 0         return @{$self->{failed_seeds}||[]};
  0            
175             }
176              
177             1;
178             __END__