| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Tie::Hash::MinPerfHashTwoLevel::OnDisk; | 
| 2 | 2 |  |  | 2 |  | 200248 | use strict; | 
|  | 2 |  |  |  |  | 12 |  | 
|  | 2 |  |  |  |  | 51 |  | 
| 3 | 2 |  |  | 2 |  | 9 | use warnings; | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 98 |  | 
| 4 |  |  |  |  |  |  | our $VERSION = '0.16'; | 
| 5 |  |  |  |  |  |  | our $DEFAULT_VARIANT = 5; | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | # this also installs the XS routines we use into our namespace. | 
| 8 | 2 |  |  | 2 |  | 741 | use Algorithm::MinPerfHashTwoLevel ( 'hash_with_state', '$DEFAULT_VARIANT', ':flags', 'MAX_VARIANT', 'MIN_VARIANT' ); | 
|  | 2 |  |  |  |  | 5 |  | 
|  | 2 |  |  |  |  | 314 |  | 
| 9 | 2 |  |  | 2 |  | 13 | use Exporter qw(import); | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 82 |  | 
| 10 |  |  |  |  |  |  | my %constants; | 
| 11 |  |  |  |  |  |  | BEGIN { | 
| 12 | 2 |  |  | 2 |  | 42 | %constants= ( | 
| 13 |  |  |  |  |  |  | MAGIC_STR               =>  "PH2L", | 
| 14 |  |  |  |  |  |  | #MPH_F_FILTER_UNDEF      =>  (1<<0), | 
| 15 |  |  |  |  |  |  | #MPH_F_DETERMINISTIC     =>  (1<<1), | 
| 16 |  |  |  |  |  |  | MPH_F_NO_DEDUPE         =>  (1<<2), | 
| 17 |  |  |  |  |  |  | MPH_F_VALIDATE          =>  (1<<3), | 
| 18 |  |  |  |  |  |  | ); | 
| 19 |  |  |  |  |  |  | } | 
| 20 |  |  |  |  |  |  |  | 
| 21 | 2 |  |  | 2 |  | 9 | use constant \%constants; | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 114 |  | 
| 22 | 2 |  |  | 2 |  | 10 | use Carp; | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 2604 |  | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | our %EXPORT_TAGS = ( | 
| 25 |  |  |  |  |  |  | 'all' => [ qw(mph2l_tied_hashref mph2l_make_file MAX_VARIANT MIN_VARIANT), sort keys %constants ], | 
| 26 |  |  |  |  |  |  | 'flags' => ['MPH_F_DETERMINISTIC', grep /MPH_F_/, sort keys %constants], | 
| 27 |  |  |  |  |  |  | 'magic' => [grep /MAGIC/, sort keys %constants], | 
| 28 |  |  |  |  |  |  | ); | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | my $scalar_has_slash= scalar(%EXPORT_TAGS)=~m!/!; | 
| 31 |  |  |  |  |  |  | our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | our @EXPORT = qw(); | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | sub mph2l_tied_hashref { | 
| 36 | 185 |  |  | 185 | 1 | 107924 | my ($file, %opts)= @_; | 
| 37 | 185 |  |  |  |  | 663 | tie my %tied, __PACKAGE__, $file, %opts; | 
| 38 | 1 |  |  |  |  | 3 | return \%tied; | 
| 39 |  |  |  |  |  |  | } | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | sub mph2l_make_file { | 
| 42 | 1 |  |  | 1 | 1 | 652 | my ($file,%opts)= @_; | 
| 43 | 1 |  |  |  |  | 9 | return __PACKAGE__->make_file(file => $file, %opts); | 
| 44 |  |  |  |  |  |  | } | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | sub mph2l_validate_file { | 
| 47 | 0 |  |  | 0 | 1 | 0 | my ($file, %opts)= @_; | 
| 48 | 0 |  |  |  |  | 0 | return __PACKAGE__->validate_file(file => $file, %opts); | 
| 49 |  |  |  |  |  |  | } | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | sub new { | 
| 52 | 366 |  |  | 366 | 0 | 1074 | my ($class, %opts)= @_; | 
| 53 |  |  |  |  |  |  |  | 
| 54 | 366 |  | 100 |  |  | 1493 | $opts{flags} ||= 0; | 
| 55 | 366 | 100 |  |  |  | 777 | $opts{flags} |= MPH_F_VALIDATE if $opts{validate}; | 
| 56 | 366 |  |  |  |  | 486 | my $error; | 
| 57 | 366 |  |  |  |  | 28408 | my $mount= mount_file($opts{file},$error,$opts{flags}); | 
| 58 | 366 |  |  |  |  | 1158 | my $error_rsv= delete $opts{error_rsv}; | 
| 59 | 366 | 100 |  |  |  | 867 | if ($error_rsv) { | 
| 60 | 90 |  |  |  |  | 184 | $$error_rsv= $error; | 
| 61 |  |  |  |  |  |  | } | 
| 62 | 366 | 100 |  |  |  | 747 | if (!defined($mount)) { | 
| 63 | 185 | 50 |  |  |  | 297 | if ($error_rsv) { | 
| 64 | 0 |  |  |  |  | 0 | return; | 
| 65 |  |  |  |  |  |  | } else { | 
| 66 | 185 |  |  |  |  | 1651 | die "Failed to mount file '$opts{file}': $error"; | 
| 67 |  |  |  |  |  |  | } | 
| 68 |  |  |  |  |  |  | } | 
| 69 | 181 |  |  |  |  | 373 | $opts{mount}= $mount; | 
| 70 | 181 |  |  |  |  | 841 | return bless \%opts, $class; | 
| 71 |  |  |  |  |  |  | } | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  | sub TIEHASH { | 
| 74 | 276 |  |  | 276 |  | 75185 | my ($class, $file, %opts)= @_; | 
| 75 | 276 |  |  |  |  | 666 | return $class->new( file => $file, %opts ); | 
| 76 |  |  |  |  |  |  | } | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | sub FETCH { | 
| 79 | 1206672 |  |  | 1206672 |  | 6566592 | my ($self, $key)= @_; | 
| 80 | 1206672 |  |  |  |  | 1499579 | my $value; | 
| 81 | 1206672 | 50 |  |  |  | 2957108 | fetch_by_key($self->{mount},$key,$value) | 
| 82 |  |  |  |  |  |  | or return; | 
| 83 | 1206672 |  |  |  |  | 3138297 | return $value; | 
| 84 |  |  |  |  |  |  | } | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | sub EXISTS { | 
| 87 | 0 |  |  | 0 |  | 0 | my ($self, $key)= @_; | 
| 88 | 0 |  |  |  |  | 0 | return fetch_by_key($self->{mount},$key); | 
| 89 |  |  |  |  |  |  | } | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | sub FIRSTKEY { | 
| 92 | 270 |  |  | 270 |  | 1186793 | my ($self)= @_; | 
| 93 | 270 |  |  |  |  | 581 | $self->{iter_idx}= 0; | 
| 94 | 270 |  |  |  |  | 528 | return $self->NEXTKEY(); | 
| 95 |  |  |  |  |  |  | } | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | sub NEXTKEY { | 
| 98 | 905274 |  |  | 905274 |  | 2449123 | my ($self, $lastkey)= @_; | 
| 99 | 905274 |  |  |  |  | 2024048 | fetch_by_index($self->{mount},$self->{iter_idx}++,my $key); | 
| 100 | 905274 |  |  |  |  | 3079469 | return $key; | 
| 101 |  |  |  |  |  |  | } | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  | sub SCALAR { | 
| 104 | 90 |  |  | 90 |  | 566 | my ($self)= @_; | 
| 105 | 90 |  |  |  |  | 235 | my $buckets= $self->get_hdr_num_buckets(); | 
| 106 | 90 | 50 |  |  |  | 260 | if ($scalar_has_slash) { | 
| 107 | 0 |  |  |  |  | 0 | $buckets .= "/" . $buckets; | 
| 108 |  |  |  |  |  |  | } | 
| 109 | 90 |  |  |  |  | 211 | return $buckets; | 
| 110 |  |  |  |  |  |  | } | 
| 111 |  |  |  |  |  |  |  | 
| 112 |  |  |  |  |  |  | sub UNTIE { | 
| 113 | 0 |  |  | 0 |  | 0 | my ($self)= @_; | 
| 114 |  |  |  |  |  |  | } | 
| 115 |  |  |  |  |  |  |  | 
| 116 |  |  |  |  |  |  | sub DESTROY { | 
| 117 | 181 |  |  | 181 |  | 14737804 | my ($self)= @_; | 
| 118 | 181 | 50 |  |  |  | 4602 | unmount_file($self->{mount}) if $self->{mount}; | 
| 119 |  |  |  |  |  |  | } | 
| 120 |  |  |  |  |  |  |  | 
| 121 |  |  |  |  |  |  | sub STORE { | 
| 122 | 0 |  |  | 0 |  | 0 | my ($self, $key, $value)= @_; | 
| 123 | 0 |  |  |  |  | 0 | confess __PACKAGE__ . " is readonly, STORE operations are not supported"; | 
| 124 |  |  |  |  |  |  | } | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  | sub DELETE { | 
| 127 | 0 |  |  | 0 |  | 0 | my ($self, $key)= @_; | 
| 128 | 0 |  |  |  |  | 0 | confess __PACKAGE__ . " is readonly, DELETE operations are not supported"; | 
| 129 |  |  |  |  |  |  | } | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | sub CLEAR { | 
| 132 | 0 |  |  | 0 |  | 0 | my ($self)= @_; | 
| 133 | 0 |  |  |  |  | 0 | confess __PACKAGE__ . " is readonly, CLEAR operations are not supported"; | 
| 134 |  |  |  |  |  |  | } | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | sub make_file { | 
| 137 | 91 |  |  | 91 | 1 | 120216 | my ($class, %opts)= @_; | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | my $ofile= $opts{file} | 
| 140 | 91 | 50 |  |  |  | 373 | or die "file is a mandatory option to make_file"; | 
| 141 |  |  |  |  |  |  | my $source_hash= $opts{source_hash} | 
| 142 | 91 | 50 |  |  |  | 253 | or die "source_hash is a mandatory option to make_file"; | 
| 143 | 91 | 100 |  |  |  | 252 | $opts{comment}= "" unless defined $opts{comment}; | 
| 144 | 91 | 100 |  |  |  | 231 | $opts{variant}= $DEFAULT_VARIANT unless defined $opts{variant}; | 
| 145 |  |  |  |  |  |  |  | 
| 146 | 91 |  | 100 |  |  | 206 | my $comment= $opts{comment}||""; | 
| 147 | 91 |  | 50 |  |  | 372 | my $debug= $opts{debug} || 0; | 
| 148 | 91 |  |  |  |  | 183 | my $variant= int($opts{variant}); | 
| 149 | 91 |  |  |  |  | 108 | my $deterministic; | 
| 150 | 91 |  | 66 |  |  | 409 | $deterministic //= delete $opts{canonical}; | 
| 151 | 91 |  | 33 |  |  | 199 | $deterministic //= delete $opts{deterministic}; | 
| 152 | 91 |  | 50 |  |  | 206 | $deterministic //= 1; | 
| 153 |  |  |  |  |  |  |  | 
| 154 |  |  |  |  |  |  | #1234567812345678 | 
| 155 |  |  |  |  |  |  | $opts{seed} = "MinPerfHash2Levl" | 
| 156 | 91 | 100 | 66 |  |  | 284 | if !defined($opts{seed}) and $deterministic; | 
| 157 |  |  |  |  |  |  |  | 
| 158 | 91 |  | 50 |  |  | 271 | my $compute_flags= int($opts{compute_flags}||0); | 
| 159 | 91 | 50 |  |  |  | 226 | $compute_flags |= MPH_F_NO_DEDUPE if delete $opts{no_dedupe}; | 
| 160 | 91 | 100 |  |  |  | 197 | $compute_flags |= MPH_F_DETERMINISTIC | 
| 161 |  |  |  |  |  |  | if $deterministic; | 
| 162 |  |  |  |  |  |  | $compute_flags |= MPH_F_FILTER_UNDEF | 
| 163 | 91 | 50 |  |  |  | 191 | if delete $opts{filter_undef}; | 
| 164 |  |  |  |  |  |  |  | 
| 165 | 91 | 50 |  |  |  | 212 | die "Unknown variant '$variant', max known is " | 
| 166 |  |  |  |  |  |  | . MAX_VARIANT . " default is " . $DEFAULT_VARIANT | 
| 167 |  |  |  |  |  |  | if $variant > MAX_VARIANT; | 
| 168 | 91 | 50 |  |  |  | 183 | die "Unknown variant '$variant', min known is " | 
| 169 |  |  |  |  |  |  | . MIN_VARIANT . " default is " . $DEFAULT_VARIANT | 
| 170 |  |  |  |  |  |  | if $variant < MIN_VARIANT; | 
| 171 |  |  |  |  |  |  |  | 
| 172 | 91 | 50 |  |  |  | 247 | die "comment cannot contain null" | 
| 173 |  |  |  |  |  |  | if index($comment,"\0") >= 0; | 
| 174 |  |  |  |  |  |  |  | 
| 175 | 91 |  |  |  |  | 137 | my $seed= $opts{seed}; | 
| 176 |  |  |  |  |  |  | my $hasher= Algorithm::MinPerfHashTwoLevel->new( | 
| 177 |  |  |  |  |  |  | debug => $debug, | 
| 178 |  |  |  |  |  |  | seed => (ref $seed ? $$seed : $seed), | 
| 179 |  |  |  |  |  |  | variant => $variant, | 
| 180 |  |  |  |  |  |  | compute_flags => $compute_flags, | 
| 181 |  |  |  |  |  |  | max_tries => $opts{max_tries}, | 
| 182 | 91 | 100 |  |  |  | 799 | ); | 
| 183 | 91 |  |  |  |  | 301 | my $buckets= $hasher->compute($source_hash); | 
| 184 | 91 |  |  |  |  | 311 | my $buf_length= $hasher->{buf_length}; | 
| 185 | 91 |  |  |  |  | 212 | my $state= $hasher->{state}; | 
| 186 | 91 |  |  |  |  | 1211477 | my $buf= packed_xs($variant, $buf_length, $state, $comment, $compute_flags, @$buckets); | 
| 187 | 91 | 100 |  |  |  | 60315 | $$seed= $hasher->get_seed if ref $seed; | 
| 188 |  |  |  |  |  |  |  | 
| 189 | 91 |  |  |  |  | 336 | my $tmp_file= "$ofile.$$"; | 
| 190 | 91 | 50 |  |  |  | 19215 | open my $ofh, ">", $tmp_file | 
| 191 |  |  |  |  |  |  | or die "Failed to open $tmp_file for output"; | 
| 192 | 91 | 50 |  |  |  | 13973 | print $ofh $buf | 
| 193 |  |  |  |  |  |  | or die "failed to print to '$tmp_file': $!"; | 
| 194 | 91 | 50 |  |  |  | 3181 | close $ofh | 
| 195 |  |  |  |  |  |  | or die "failed to close '$tmp_file': $!"; | 
| 196 | 91 | 50 |  |  |  | 3203 | rename $tmp_file, $ofile | 
| 197 |  |  |  |  |  |  | or die "failed to rename '$tmp_file' to '$ofile': $!"; | 
| 198 | 91 |  |  |  |  | 972260 | return $ofile; | 
| 199 |  |  |  |  |  |  | } | 
| 200 |  |  |  |  |  |  |  | 
| 201 |  |  |  |  |  |  | sub validate_file { | 
| 202 | 90 |  |  | 90 | 1 | 221727 | my ($class, %opts)= @_; | 
| 203 |  |  |  |  |  |  | my $file= $opts{file} | 
| 204 | 90 | 50 |  |  |  | 299 | or die "file is a mandatory option to validate_file"; | 
| 205 | 90 |  |  |  |  | 169 | my $verbose= $opts{verbose}; | 
| 206 | 90 |  |  |  |  | 226 | my ($variant,$msg); | 
| 207 |  |  |  |  |  |  |  | 
| 208 | 90 |  |  |  |  | 0 | my $error_sv; | 
| 209 | 90 |  |  |  |  | 312 | my $self= $class->new(file => $file, flags => MPH_F_VALIDATE, error_rsv => \$error_sv); | 
| 210 | 90 | 50 |  |  |  | 245 | if ($self) { | 
| 211 | 90 |  |  |  |  | 1447 | $msg= sprintf "file '%s' is a valid '%s' file\n" | 
| 212 |  |  |  |  |  |  | . "  variant: %d\n" | 
| 213 |  |  |  |  |  |  | . "  keys: %d\n" | 
| 214 |  |  |  |  |  |  | . "  hash-state: %s\n" | 
| 215 |  |  |  |  |  |  | . "  table  checksum: %016x\n" | 
| 216 |  |  |  |  |  |  | . "  string checksum: %016x\n" | 
| 217 |  |  |  |  |  |  | . "  comment: %s" | 
| 218 |  |  |  |  |  |  | ,  $file, | 
| 219 |  |  |  |  |  |  | MAGIC_STR, | 
| 220 |  |  |  |  |  |  | $self->get_hdr_variant, | 
| 221 |  |  |  |  |  |  | $self->get_hdr_num_buckets, | 
| 222 |  |  |  |  |  |  | unpack("H*", $self->get_state), | 
| 223 |  |  |  |  |  |  | $self->get_hdr_table_checksum, | 
| 224 |  |  |  |  |  |  | $self->get_hdr_str_buf_checksum, | 
| 225 |  |  |  |  |  |  | $self->get_comment, | 
| 226 |  |  |  |  |  |  | ; | 
| 227 | 90 |  |  |  |  | 268 | $variant = $self->get_hdr_variant; | 
| 228 |  |  |  |  |  |  | } else { | 
| 229 | 0 |  |  |  |  | 0 | $msg= $error_sv; | 
| 230 |  |  |  |  |  |  | } | 
| 231 | 90 | 50 |  |  |  | 211 | if ($verbose) { | 
| 232 | 0 | 0 |  |  |  | 0 | if (defined $variant) { | 
| 233 | 0 |  |  |  |  | 0 | print $msg; | 
| 234 |  |  |  |  |  |  | } else { | 
| 235 | 0 |  |  |  |  | 0 | die $msg."\n"; | 
| 236 |  |  |  |  |  |  | } | 
| 237 |  |  |  |  |  |  | } | 
| 238 | 90 |  |  |  |  | 293 | return ($variant, $msg); | 
| 239 |  |  |  |  |  |  | } | 
| 240 |  |  |  |  |  |  |  | 
| 241 |  |  |  |  |  |  |  | 
| 242 |  |  |  |  |  |  |  | 
| 243 |  |  |  |  |  |  | 1; | 
| 244 |  |  |  |  |  |  | __END__ |