| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Algorithm::MinPerfHashTwoLevel; | 
| 2 | 4 |  |  | 4 |  | 201373 | use strict; | 
|  | 4 |  |  |  |  | 14 |  | 
|  | 4 |  |  |  |  | 112 |  | 
| 3 | 4 |  |  | 4 |  | 19 | use warnings; | 
|  | 4 |  |  |  |  | 8 |  | 
|  | 4 |  |  |  |  | 177 |  | 
| 4 |  |  |  |  |  |  | our $VERSION = '0.15'; | 
| 5 |  |  |  |  |  |  | our $DEFAULT_VARIANT = 5; | 
| 6 |  |  |  |  |  |  |  | 
| 7 | 4 |  |  | 4 |  | 21 | use Exporter qw(import); | 
|  | 4 |  |  |  |  | 7 |  | 
|  | 4 |  |  |  |  | 113 |  | 
| 8 | 4 |  |  | 4 |  | 19 | use Carp (); | 
|  | 4 |  |  |  |  | 7 |  | 
|  | 4 |  |  |  |  | 99 |  | 
| 9 |  |  |  |  |  |  |  | 
| 10 | 4 |  |  | 4 |  | 20 | no warnings "portable"; | 
|  | 4 |  |  |  |  | 8 |  | 
|  | 4 |  |  |  |  | 243 |  | 
| 11 |  |  |  |  |  |  | my %constant; | 
| 12 |  |  |  |  |  |  | BEGIN { | 
| 13 | 4 |  |  | 4 |  | 132 | %constant= ( | 
| 14 |  |  |  |  |  |  | MPH_F_FILTER_UNDEF          =>  (1<<0), | 
| 15 |  |  |  |  |  |  | MPH_F_DETERMINISTIC         =>  (1<<1), | 
| 16 |  |  |  |  |  |  | #MPH_F_NO_DEDUPE             =>  (1<<2), | 
| 17 |  |  |  |  |  |  | #MPH_F_VALIDATE              =>  (1<<3), | 
| 18 |  |  |  |  |  |  | MAX_VARIANT                 =>  5, | 
| 19 |  |  |  |  |  |  | MIN_VARIANT                 =>  5, | 
| 20 |  |  |  |  |  |  | STADTX_HASH_SEED_BYTES      => 16, | 
| 21 |  |  |  |  |  |  | STADTX_HASH_STATE_BYTES     => 32, | 
| 22 |  |  |  |  |  |  | ); | 
| 23 |  |  |  |  |  |  | } | 
| 24 | 4 |  |  | 4 |  | 24 | use constant \%constant; | 
|  | 4 |  |  |  |  | 8 |  | 
|  | 4 |  |  |  |  | 881 |  | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | our %EXPORT_TAGS = ( | 
| 27 |  |  |  |  |  |  | 'all' => [ | 
| 28 |  |  |  |  |  |  | '$DEFAULT_VARIANT', | 
| 29 |  |  |  |  |  |  | 'MAX_VARIANT', | 
| 30 |  |  |  |  |  |  | 'MIN_VARIANT', | 
| 31 |  |  |  |  |  |  | qw( | 
| 32 |  |  |  |  |  |  | seed_state | 
| 33 |  |  |  |  |  |  | hash_with_state | 
| 34 |  |  |  |  |  |  | ), sort keys %constant | 
| 35 |  |  |  |  |  |  | ], | 
| 36 |  |  |  |  |  |  | 'flags' => [ sort grep /MPH_F_/, keys %constant ], | 
| 37 |  |  |  |  |  |  | ); | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | our @EXPORT = qw(); | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | require XSLoader; | 
| 45 |  |  |  |  |  |  | XSLoader::load('Algorithm::MinPerfHashTwoLevel', $VERSION); | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | # Preloaded methods go here. | 
| 48 |  |  |  |  |  |  |  | 
| 49 | 4 |  |  | 4 |  | 27 | use Carp (); | 
|  | 4 |  |  |  |  | 15 |  | 
|  | 4 |  |  |  |  | 3118 |  | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | sub new { | 
| 52 | 93 |  |  | 93 | 1 | 698 | my ($class,%opts)= @_; | 
| 53 | 93 |  |  |  |  | 232 | my $seed= delete($opts{seed}); | 
| 54 | 93 | 50 |  |  |  | 1660 | delete($opts{state}) and warn "ignoring 'state' parameter"; | 
| 55 |  |  |  |  |  |  |  | 
| 56 | 93 |  |  |  |  | 310 | my $o= bless \%opts, $class; | 
| 57 |  |  |  |  |  |  |  | 
| 58 | 93 | 100 |  |  |  | 345 | $o->set_seed($seed) if $seed; | 
| 59 |  |  |  |  |  |  |  | 
| 60 | 93 | 100 |  |  |  | 246 | $o->{variant}= $DEFAULT_VARIANT unless defined $o->{variant}; | 
| 61 | 93 |  |  |  |  | 214 | $o->{variant}= int(0+$o->{variant}); | 
| 62 |  |  |  |  |  |  |  | 
| 63 | 93 |  | 100 |  |  | 287 | $o->{compute_flags} ||= 0; | 
| 64 |  |  |  |  |  |  | $o->{compute_flags} += MPH_F_FILTER_UNDEF | 
| 65 | 93 | 50 |  |  |  | 219 | if delete $o->{filter_undef}; | 
| 66 |  |  |  |  |  |  | $o->{compute_flags} += MPH_F_DETERMINISTIC | 
| 67 | 93 | 50 | 33 |  |  | 412 | if delete $o->{deterministic} or delete $o->{canonical}; | 
| 68 |  |  |  |  |  |  |  | 
| 69 |  |  |  |  |  |  | die "Unknown variant '$o->{variant}' in constructor new(), max known is " | 
| 70 |  |  |  |  |  |  | . MAX_VARIANT . " default is " . $DEFAULT_VARIANT | 
| 71 | 93 | 50 |  |  |  | 235 | if $o->{variant} > MAX_VARIANT; | 
| 72 |  |  |  |  |  |  | die "Unknown variant '$o->{variant}' in constructor new(), min known is " | 
| 73 |  |  |  |  |  |  | . MIN_VARIANT . " default is " . $DEFAULT_VARIANT | 
| 74 | 93 | 50 |  |  |  | 227 | if $o->{variant} < MIN_VARIANT; | 
| 75 |  |  |  |  |  |  |  | 
| 76 | 93 |  |  |  |  | 258 | return $o; | 
| 77 |  |  |  |  |  |  | } | 
| 78 |  |  |  |  |  |  |  | 
| 79 |  |  |  |  |  |  | sub compute { | 
| 80 | 95 |  |  | 95 | 1 | 6517 | my ($self, $source_hash)= @_; | 
| 81 | 95 | 50 |  |  |  | 227 | if ($source_hash) { | 
| 82 | 95 |  |  |  |  | 199 | $self->{source_hash}= $source_hash; | 
| 83 |  |  |  |  |  |  | } else { | 
| 84 | 0 |  |  |  |  | 0 | $source_hash= $self->{source_hash}; | 
| 85 |  |  |  |  |  |  | } | 
| 86 | 95 |  | 50 |  |  | 309 | my $debug= $self->{debug} ||= 0; | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | # reuse the constructor seed. | 
| 89 | 95 |  |  |  |  | 273 | $self->_seed($self->{constructor_seed}); | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | # find the number of keys we have to deal with | 
| 92 | 95 |  | 50 |  |  | 313 | my $max_tries= $self->{max_tries} || 10; | 
| 93 | 95 |  |  |  |  | 162 | my @failed_seeds; | 
| 94 | 95 |  |  |  |  | 268 | $self->{failed_seeds}= \@failed_seeds; | 
| 95 |  |  |  |  |  |  |  | 
| 96 | 95 |  |  |  |  | 293 | for my $counter ( 1 .. $max_tries ) { | 
| 97 | 95 |  |  |  |  | 233 | my $seed= $self->get_seed; # ensure we have a seed set up (must be called before compute_xs) | 
| 98 | 95 |  |  |  |  | 247 | my $state= $self->get_state; # ensure we have a state set up (must be called before compute_xs) | 
| 99 | 95 |  |  |  |  | 182 | delete $self->{buckets}; | 
| 100 |  |  |  |  |  |  |  | 
| 101 | 95 | 50 |  |  |  | 210 | printf "MPH2L compute attempt #%2d/%2d for hash with %6d keys - using seed: %s (state: %s)\n", | 
| 102 |  |  |  |  |  |  | $counter, | 
| 103 |  |  |  |  |  |  | $max_tries, | 
| 104 |  |  |  |  |  |  | 0+keys(%$source_hash), | 
| 105 |  |  |  |  |  |  | unpack("H*",$seed), | 
| 106 |  |  |  |  |  |  | unpack("H*",$state), | 
| 107 |  |  |  |  |  |  | if $debug; | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | my $bad_idx= compute_xs($self) | 
| 110 | 95 | 50 |  |  |  | 3429193 | or return $self->{buckets}; | 
| 111 |  |  |  |  |  |  |  | 
| 112 | 0 |  |  |  |  | 0 | push @failed_seeds, $seed; | 
| 113 | 0 | 0 |  |  |  | 0 | if ($counter < $max_tries) { | 
| 114 | 0 |  |  |  |  | 0 | $self->re_seed(); | 
| 115 |  |  |  |  |  |  | } | 
| 116 |  |  |  |  |  |  | } | 
| 117 |  |  |  |  |  |  |  | 
| 118 | 0 | 0 |  |  |  | 0 | if ( $max_tries == 1 ) { | 
| 119 | 0 |  |  |  |  | 0 | die sprintf "Failed to compute minimal perfect hash using seed %s", unpack "H*", $failed_seeds[0]; | 
| 120 |  |  |  |  |  |  | } else { | 
| 121 |  |  |  |  |  |  | Carp::confess( | 
| 122 |  |  |  |  |  |  | sprintf "Failed to compute minimal perfect hash after %d tries. Seeds tried: %s", | 
| 123 | 0 |  |  |  |  | 0 | $max_tries, join(" ", map { unpack "H*", $_ } @failed_seeds) | 
|  | 0 |  |  |  |  | 0 |  | 
| 124 |  |  |  |  |  |  | ); | 
| 125 |  |  |  |  |  |  | } | 
| 126 |  |  |  |  |  |  | # NOT REACHED. | 
| 127 |  |  |  |  |  |  | } | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | sub re_seed { | 
| 130 | 0 |  |  | 0 | 0 | 0 | my $self= shift; | 
| 131 |  |  |  |  |  |  |  | 
| 132 | 0 |  |  |  |  | 0 | my $source= $self->get_state(); | 
| 133 | 0 |  |  |  |  | 0 | return $self->_seed(substr($source,0,STADTX_HASH_SEED_BYTES) ^ substr($source,STADTX_HASH_SEED_BYTES)); | 
| 134 |  |  |  |  |  |  | } | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | sub _seed { | 
| 138 | 439 |  |  | 439 |  | 727 | my $self= shift; | 
| 139 | 439 | 100 |  |  |  | 847 | if (@_) { | 
| 140 | 158 |  |  |  |  | 263 | my $seed= shift; | 
| 141 | 158 | 50 | 66 |  |  | 534 | Carp::confess(sprintf "Seed should be undef, or a string exactly %d bytes long, not %d bytes", | 
| 142 |  |  |  |  |  |  | STADTX_HASH_SEED_BYTES,length($seed)) | 
| 143 |  |  |  |  |  |  | if defined($seed) and length($seed) != 16; | 
| 144 | 158 |  |  |  |  | 272 | $self->{seed}= $seed; | 
| 145 | 158 |  |  |  |  | 264 | delete $self->{state}; | 
| 146 |  |  |  |  |  |  | } | 
| 147 | 439 | 100 |  |  |  | 1006 | if ( !defined $self->{seed} ) { | 
| 148 |  |  |  |  |  |  | #1234567812345678 | 
| 149 | 30 |  |  |  |  | 116 | $self->{seed}= "MinPerfHash2Levl"; | 
| 150 | 30 |  |  |  |  | 63 | delete $self->{state}; | 
| 151 |  |  |  |  |  |  | } | 
| 152 | 439 |  |  |  |  | 1279 | return $self->{seed}; | 
| 153 |  |  |  |  |  |  | } | 
| 154 |  |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | sub set_seed { | 
| 156 | 63 |  |  | 63 | 0 | 138 | my ($self,$value)= @_; | 
| 157 | 63 |  |  |  |  | 165 | my $seed= $self->_seed($value); | 
| 158 | 63 |  |  |  |  | 152 | return $self->{constructor_seed}= $seed; | 
| 159 |  |  |  |  |  |  | } | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | sub get_seed { | 
| 162 | 185 |  |  | 185 | 0 | 430 | my ($self)= @_; | 
| 163 | 185 |  |  |  |  | 366 | return $self->_seed(); | 
| 164 |  |  |  |  |  |  | } | 
| 165 |  |  |  |  |  |  |  | 
| 166 |  |  |  |  |  |  |  | 
| 167 |  |  |  |  |  |  | sub get_state { | 
| 168 | 97 |  |  | 97 | 0 | 148 | my $self= shift; | 
| 169 | 97 |  | 66 |  |  | 325 | return $self->{state} ||= seed_state($self->_seed()); | 
| 170 |  |  |  |  |  |  | } | 
| 171 |  |  |  |  |  |  |  | 
| 172 |  |  |  |  |  |  | sub get_failed_seeds { | 
| 173 | 0 |  |  | 0 | 0 |  | my $self= shift; | 
| 174 | 0 | 0 |  |  |  |  | return @{$self->{failed_seeds}||[]}; | 
|  | 0 |  |  |  |  |  |  | 
| 175 |  |  |  |  |  |  | } | 
| 176 |  |  |  |  |  |  |  | 
| 177 |  |  |  |  |  |  | 1; | 
| 178 |  |  |  |  |  |  | __END__ |