line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Digest::ManberHash; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
Digest::ManberHash - a Perl package to calculate Manber Hashes |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use Digest::ManberHash; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
$instance = Digest::ManberHash::new($maskbits, $prime, $charcount); |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
$hash1 = $instance->DoHash($filename1); |
14
|
|
|
|
|
|
|
$hash2 = $instance->DoHash($filename2); |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
$similarity = $instance->Compare($hash1, $hash2); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 DESCRIPTION |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head2 Initialization |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
Use C. |
23
|
|
|
|
|
|
|
Parameters: |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=over 4 |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
=item maskbits |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
range 1 .. 30, default 11. |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=item prime |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
range 3 .. 65537, default 7. |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=item charcount |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
range 8 .. 32768, default 64. |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=back |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
For a detailed description please read http://citeseer.nj.nec.com/manber94finding.html. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head2 Calculating hashes |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
$hash = $instance->DoHash($filename); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
This gives an object, which has an hash of hash values stored within. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head2 Comparing hashes |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
$similarity = $instance->Compare($hash1, $hash2); |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
This gives an value of 0.0 .. 1.0, depending on the similariness. |
56
|
|
|
|
|
|
|
Help wanted: The calculation could do better than now!! |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=cut |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
require Exporter; |
62
|
|
|
|
|
|
|
require DynaLoader; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
our @ISA = qw(Exporter DynaLoader); |
65
|
|
|
|
|
|
|
# Items to export into callers namespace by default. Note: do not export |
66
|
|
|
|
|
|
|
# names by default without a very good reason. Use EXPORT_OK instead. |
67
|
|
|
|
|
|
|
# Do not simply export all your public functions/methods/constants. |
68
|
|
|
|
|
|
|
our @EXPORT = qw( |
69
|
|
|
|
|
|
|
HashFile |
70
|
|
|
|
|
|
|
new |
71
|
|
|
|
|
|
|
Compare |
72
|
|
|
|
|
|
|
); |
73
|
|
|
|
|
|
|
our $VERSION = '0.7'; |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub new |
77
|
|
|
|
|
|
|
{ |
78
|
1
|
|
|
1
|
0
|
12
|
my($class, $maskbits, $prime, $charcount)=@_; |
79
|
1
|
|
|
|
|
1
|
my($x,%a); |
80
|
|
|
|
|
|
|
|
81
|
1
|
|
50
|
|
|
6
|
$prime||=7; |
82
|
1
|
|
50
|
|
|
4
|
$maskbits||=11; |
83
|
1
|
|
50
|
|
|
6
|
$charcount||=64; |
84
|
|
|
|
|
|
|
|
85
|
1
|
|
|
|
|
15
|
$x=Init($prime,$maskbits,$charcount); |
86
|
1
|
|
|
|
|
5
|
%a=( "settings" => $x ); |
87
|
|
|
|
|
|
|
|
88
|
1
|
|
|
|
|
3
|
bless \%a; |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
sub DoHash |
92
|
|
|
|
|
|
|
{ |
93
|
0
|
|
|
0
|
0
|
|
my($self,$filename)=@_; |
94
|
0
|
|
|
|
|
|
my($e,$f,%a,%b); |
95
|
|
|
|
|
|
|
|
96
|
0
|
|
|
|
|
|
%b=(); |
97
|
0
|
|
|
|
|
|
ManberHash($self->{"settings"}, $filename, \%b ); |
98
|
0
|
|
|
|
|
|
%a= ( "data" => \%b, "base" => $self); |
99
|
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
while (($e, $f) = each(%b)) |
101
|
|
|
|
|
|
|
{ |
102
|
0
|
0
|
|
|
|
|
$self->{"max"}{$e}=$f if $self->{"max"}{$e} < $f; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
0
|
|
|
|
|
|
bless \%a; |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub Compare |
109
|
|
|
|
|
|
|
{ |
110
|
0
|
|
|
0
|
0
|
|
my($self,$file1,$file2)=@_; |
111
|
0
|
|
|
|
|
|
my(%keys,$a,$k,$c,$v,$m); |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
#return 0 if (ref($self) !~ /^HASH/); |
114
|
0
|
0
|
0
|
|
|
|
die if $self ne $file1->{"base"} || |
115
|
|
|
|
|
|
|
$self ne $file2->{"base"}; |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
|
118
|
0
|
|
|
|
|
|
%keys=map { $_,1; } (keys %{$file1->{"data"}}, keys %{$file2->{"data"}}); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
119
|
0
|
|
|
|
|
|
$c=$a=$m=0; |
120
|
0
|
|
|
|
|
|
for $k (keys %keys) |
121
|
|
|
|
|
|
|
{ |
122
|
0
|
|
|
|
|
|
$v = ($file1->{"data"}->{$k} - $file2->{"data"}->{$k}); |
123
|
|
|
|
|
|
|
# $m += $self->{"max"}{$k} * $self->{"max"}{$k}; |
124
|
0
|
|
|
|
|
|
$a += $v*$v; |
125
|
0
|
|
|
|
|
|
$c++; |
126
|
|
|
|
|
|
|
# print "$k = ",$self->{$k}," - ",$other->{$k},"($c, $a)\n"; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
0
|
0
|
|
|
|
|
return 0 if !$c; |
130
|
|
|
|
|
|
|
# 1 - 6*$a/($c*$c*$c - $c); |
131
|
|
|
|
|
|
|
# 1-sqrt($a)/$c; |
132
|
0
|
|
|
|
|
|
1/(1.0+$a); |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
bootstrap Digest::ManberHash $VERSION; |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# Preloaded methods go here. |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# Autoload methods go after __END__, and are processed by the autosplit program. |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
1; |
142
|
|
|
|
|
|
|
__END__ |