File Coverage

blib/lib/Algorithm/MinPerfHashTwoLevel.pm
Criterion Covered Total %
statement 71 79 89.8
branch 17 30 56.6
condition 9 15 60.0
subroutine 15 16 93.7
pod 2 7 28.5
total 114 147 77.5


line stmt bran cond sub pod time code
1             package Algorithm::MinPerfHashTwoLevel;
2 8     8   344156 use strict;
  8         39  
  8         209  
3 8     8   37 use warnings;
  8         18  
  8         382  
4             our $VERSION = '0.14';
5             our $DEFAULT_VARIANT = 3;
6              
7 8     8   40 use Exporter qw(import);
  8         12  
  8         168  
8 8     8   35 use Carp ();
  8         13  
  8         152  
9              
10 8     8   36 no warnings "portable";
  8         14  
  8         451  
11             my %constant;
12             BEGIN {
13 8     8   258 %constant= (
14             MPH_F_FILTER_UNDEF => (1<<0),
15             MPH_F_DETERMINISTIC => (1<<1),
16             #MPH_F_NO_DEDUPE => (1<<2),
17             #MPH_F_VALIDATE => (1<<3),
18             MAX_VARIANT => 3,
19             STADTX_HASH_SEED_BYTES => 16,
20             STADTX_HASH_STATE_BYTES => 32,
21             );
22             }
23 8     8   44 use constant \%constant;
  8         16  
  8         1582  
24              
25             our %EXPORT_TAGS = (
26             'all' => [
27             '$DEFAULT_VARIANT',
28             'MAX_VARIANT',
29             qw(
30             seed_state
31             hash_with_state
32             ), sort keys %constant
33             ],
34             'flags' => [ sort grep /MPH_F_/, keys %constant ],
35             );
36              
37             our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
38              
39             our @EXPORT = qw();
40              
41              
42             require XSLoader;
43             XSLoader::load('Algorithm::MinPerfHashTwoLevel', $VERSION);
44              
45             # Preloaded methods go here.
46              
47 8     8   54 use Carp ();
  8         15  
  8         5702  
48              
49             sub new {
50 68     68 1 840 my ($class,%opts)= @_;
51 68         177 my $seed= delete($opts{seed});
52 68 50       220 delete($opts{state}) and warn "ignoring 'state' parameter";
53              
54 68         165 my $o= bless \%opts, $class;
55              
56 68 100       255 $o->set_seed($seed) if $seed;
57              
58 68 50       177 $o->{variant}= $DEFAULT_VARIANT unless defined $o->{variant};
59 68         159 $o->{variant}= int(0+$o->{variant});
60              
61 68   100     346 $o->{compute_flags} ||= 0;
62             $o->{compute_flags} += MPH_F_FILTER_UNDEF
63 68 50       168 if delete $o->{filter_undef};
64             $o->{compute_flags} += MPH_F_DETERMINISTIC
65 68 50 33     276 if delete $o->{deterministic} or delete $o->{canonical};
66              
67             die "Unknown variant '$o->{variant}' in constructor new(), max known is "
68             . MAX_VARIANT . " default is " . $DEFAULT_VARIANT
69 68 50       167 if $o->{variant} > MAX_VARIANT;
70              
71 68         192 return $o;
72             }
73              
74             sub compute {
75 72     72 1 17467 my ($self, $source_hash)= @_;
76 72 50       174 if ($source_hash) {
77 72         151 $self->{source_hash}= $source_hash;
78             } else {
79 0         0 $source_hash= $self->{source_hash};
80             }
81 72   50     259 my $debug= $self->{debug} ||= 0;
82              
83             # reuse the constructor seed.
84 72         235 $self->_seed($self->{constructor_seed});
85              
86             # find the number of keys we have to deal with
87 72   50     235 my $max_tries= $self->{max_tries} || 10;
88 72         112 my @failed_seeds;
89 72         196 $self->{failed_seeds}= \@failed_seeds;
90              
91 72         214 for my $counter ( 1 .. $max_tries ) {
92 88         212 my $seed= $self->get_seed; # ensure we have a seed set up (must be called before compute_xs)
93 88         206 my $state= $self->get_state; # ensure we have a state set up (must be called before compute_xs)
94 88         255 delete $self->{buckets};
95              
96 88 50       203 printf "MPH2L compute attempt #%2d/%2d for hash with %6d keys - using seed: %s (state: %s)\n",
97             $counter,
98             $max_tries,
99             0+keys(%$source_hash),
100             unpack("H*",$seed),
101             unpack("H*",$state),
102             if $debug;
103              
104             my $bad_idx= compute_xs($self)
105 88 100       3312733 or return $self->{buckets};
106              
107 16         293 push @failed_seeds, $seed;
108 16 50       63 if ($counter < $max_tries) {
109 16         49 $self->re_seed();
110             }
111             }
112              
113 0 0       0 if ( $max_tries == 1 ) {
114 0         0 die sprintf "Failed to compute minimal perfect hash using seed %s", unpack "H*", $failed_seeds[0];
115             } else {
116             Carp::confess(
117             sprintf "Failed to compute minimal perfect hash after %d tries. Seeds tried: %s",
118 0         0 $max_tries, join(" ", map { unpack "H*", $_ } @failed_seeds)
  0         0  
119             );
120             }
121             # NOT REACHED.
122             }
123              
124             sub re_seed {
125 16     16 0 31 my $self= shift;
126              
127 16         40 my $source= $self->get_state();
128 16         87 return $self->_seed(substr($source,0,STADTX_HASH_SEED_BYTES) ^ substr($source,STADTX_HASH_SEED_BYTES));
129             }
130              
131              
132             sub _seed {
133 377     377   649 my $self= shift;
134 377 100       788 if (@_) {
135 135         224 my $seed= shift;
136 135 50 66     455 Carp::confess(sprintf "Seed should be undef, or a string exactly %d bytes long, not %d bytes",
137             STADTX_HASH_SEED_BYTES,length($seed))
138             if defined($seed) and length($seed) != 16;
139 135         278 $self->{seed}= $seed;
140 135         232 delete $self->{state};
141             }
142 377 100       834 if ( !defined $self->{seed} ) {
143             #1234567812345678
144 21         53 $self->{seed}= "MinPerfHash2Levl";
145 21         47 delete $self->{state};
146             }
147 377         1194 return $self->{seed};
148             }
149              
150             sub set_seed {
151 47     47 0 187 my ($self,$value)= @_;
152 47         150 my $seed= $self->_seed($value);
153 47         134 return $self->{constructor_seed}= $seed;
154             }
155              
156             sub get_seed {
157 151     151 0 382 my ($self)= @_;
158 151         330 return $self->_seed();
159             }
160              
161              
162             sub get_state {
163 110     110 0 171 my $self= shift;
164 110   66     400 return $self->{state} ||= seed_state($self->_seed());
165             }
166              
167             sub get_failed_seeds {
168 0     0 0   my $self= shift;
169 0 0         return @{$self->{failed_seeds}||[]};
  0            
170             }
171              
172             1;
173             __END__