| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Digest::ManberHash; | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | =head1 NAME | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | Digest::ManberHash - a Perl package to calculate Manber Hashes | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | use Digest::ManberHash; | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | $instance = Digest::ManberHash::new($maskbits, $prime, $charcount); | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | $hash1 = $instance->DoHash($filename1); | 
| 14 |  |  |  |  |  |  | $hash2 = $instance->DoHash($filename2); | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | $similarity = $instance->Compare($hash1, $hash2); | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | =head2 Initialization | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | Use C. | 
| 23 |  |  |  |  |  |  | Parameters: | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | =over 4 | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | =item maskbits | 
| 28 |  |  |  |  |  |  |  | 
| 29 |  |  |  |  |  |  | range 1 .. 30, default 11. | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | =item prime | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | range 3 .. 65537, default 7. | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | =item charcount | 
| 36 |  |  |  |  |  |  |  | 
| 37 |  |  |  |  |  |  | range 8 .. 32768, default 64. | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | =back | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | For a detailed description please read http://citeseer.nj.nec.com/manber94finding.html. | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | =head2 Calculating hashes | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | $hash = $instance->DoHash($filename); | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | This gives an object, which has an hash of hash values stored within. | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | =head2 Comparing hashes | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | $similarity = $instance->Compare($hash1, $hash2); | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | This gives an value of 0.0 .. 1.0, depending on the similariness. | 
| 56 |  |  |  |  |  |  | Help wanted: The calculation could do better than now!! | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | =cut | 
| 60 |  |  |  |  |  |  |  | 
| 61 |  |  |  |  |  |  | require Exporter; | 
| 62 |  |  |  |  |  |  | require DynaLoader; | 
| 63 |  |  |  |  |  |  |  | 
| 64 |  |  |  |  |  |  | our @ISA = qw(Exporter DynaLoader); | 
| 65 |  |  |  |  |  |  | # Items to export into callers namespace by default. Note: do not export | 
| 66 |  |  |  |  |  |  | # names by default without a very good reason. Use EXPORT_OK instead. | 
| 67 |  |  |  |  |  |  | # Do not simply export all your public functions/methods/constants. | 
| 68 |  |  |  |  |  |  | our @EXPORT = qw( | 
| 69 |  |  |  |  |  |  | HashFile | 
| 70 |  |  |  |  |  |  | new | 
| 71 |  |  |  |  |  |  | Compare | 
| 72 |  |  |  |  |  |  | ); | 
| 73 |  |  |  |  |  |  | our $VERSION = '0.7'; | 
| 74 |  |  |  |  |  |  |  | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | sub new | 
| 77 |  |  |  |  |  |  | { | 
| 78 | 1 |  |  | 1 | 0 | 12 | my($class, $maskbits, $prime, $charcount)=@_; | 
| 79 | 1 |  |  |  |  | 1 | my($x,%a); | 
| 80 |  |  |  |  |  |  |  | 
| 81 | 1 |  | 50 |  |  | 6 | $prime||=7; | 
| 82 | 1 |  | 50 |  |  | 4 | $maskbits||=11; | 
| 83 | 1 |  | 50 |  |  | 6 | $charcount||=64; | 
| 84 |  |  |  |  |  |  |  | 
| 85 | 1 |  |  |  |  | 15 | $x=Init($prime,$maskbits,$charcount); | 
| 86 | 1 |  |  |  |  | 5 | %a=( "settings" => $x ); | 
| 87 |  |  |  |  |  |  |  | 
| 88 | 1 |  |  |  |  | 3 | bless \%a; | 
| 89 |  |  |  |  |  |  | } | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | sub DoHash | 
| 92 |  |  |  |  |  |  | { | 
| 93 | 0 |  |  | 0 | 0 |  | my($self,$filename)=@_; | 
| 94 | 0 |  |  |  |  |  | my($e,$f,%a,%b); | 
| 95 |  |  |  |  |  |  |  | 
| 96 | 0 |  |  |  |  |  | %b=(); | 
| 97 | 0 |  |  |  |  |  | ManberHash($self->{"settings"}, $filename, \%b ); | 
| 98 | 0 |  |  |  |  |  | %a= ( "data" => \%b, "base" => $self); | 
| 99 |  |  |  |  |  |  |  | 
| 100 | 0 |  |  |  |  |  | while (($e, $f) = each(%b)) | 
| 101 |  |  |  |  |  |  | { | 
| 102 | 0 | 0 |  |  |  |  | $self->{"max"}{$e}=$f if $self->{"max"}{$e} < $f; | 
| 103 |  |  |  |  |  |  | } | 
| 104 |  |  |  |  |  |  |  | 
| 105 | 0 |  |  |  |  |  | bless \%a; | 
| 106 |  |  |  |  |  |  | } | 
| 107 |  |  |  |  |  |  |  | 
| 108 |  |  |  |  |  |  | sub Compare | 
| 109 |  |  |  |  |  |  | { | 
| 110 | 0 |  |  | 0 | 0 |  | my($self,$file1,$file2)=@_; | 
| 111 | 0 |  |  |  |  |  | my(%keys,$a,$k,$c,$v,$m); | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | #return 0 if (ref($self) !~ /^HASH/); | 
| 114 | 0 | 0 | 0 |  |  |  | die if $self ne $file1->{"base"} || | 
| 115 |  |  |  |  |  |  | $self ne $file2->{"base"}; | 
| 116 |  |  |  |  |  |  |  | 
| 117 |  |  |  |  |  |  |  | 
| 118 | 0 |  |  |  |  |  | %keys=map { $_,1; } (keys %{$file1->{"data"}}, keys %{$file2->{"data"}}); | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 119 | 0 |  |  |  |  |  | $c=$a=$m=0; | 
| 120 | 0 |  |  |  |  |  | for $k (keys %keys) | 
| 121 |  |  |  |  |  |  | { | 
| 122 | 0 |  |  |  |  |  | $v = ($file1->{"data"}->{$k} - $file2->{"data"}->{$k}); | 
| 123 |  |  |  |  |  |  | #    $m += $self->{"max"}{$k} * $self->{"max"}{$k}; | 
| 124 | 0 |  |  |  |  |  | $a += $v*$v; | 
| 125 | 0 |  |  |  |  |  | $c++; | 
| 126 |  |  |  |  |  |  | #    print "$k = ",$self->{$k}," - ",$other->{$k},"($c, $a)\n"; | 
| 127 |  |  |  |  |  |  | } | 
| 128 |  |  |  |  |  |  |  | 
| 129 | 0 | 0 |  |  |  |  | return 0 if !$c; | 
| 130 |  |  |  |  |  |  | #  1 - 6*$a/($c*$c*$c - $c); | 
| 131 |  |  |  |  |  |  | #  1-sqrt($a)/$c; | 
| 132 | 0 |  |  |  |  |  | 1/(1.0+$a); | 
| 133 |  |  |  |  |  |  | } | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | bootstrap Digest::ManberHash $VERSION; | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | # Preloaded methods go here. | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | # Autoload methods go after __END__, and are processed by the autosplit program. | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | 1; | 
| 142 |  |  |  |  |  |  | __END__ |