line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::Levenshtein; |
2
|
|
|
|
|
|
|
$Text::Levenshtein::VERSION = '0.15'; |
3
|
9
|
|
|
9
|
|
583968
|
use 5.006; |
|
9
|
|
|
|
|
93
|
|
4
|
9
|
|
|
9
|
|
44
|
use strict; |
|
9
|
|
|
|
|
16
|
|
|
9
|
|
|
|
|
161
|
|
5
|
9
|
|
|
9
|
|
33
|
use warnings; |
|
9
|
|
|
|
|
12
|
|
|
9
|
|
|
|
|
263
|
|
6
|
9
|
|
|
9
|
|
41
|
use Exporter; |
|
9
|
|
|
|
|
24
|
|
|
9
|
|
|
|
|
259
|
|
7
|
9
|
|
|
9
|
|
40
|
use Carp; |
|
9
|
|
|
|
|
13
|
|
|
9
|
|
|
|
|
524
|
|
8
|
9
|
|
|
9
|
|
57
|
use List::Util (); |
|
9
|
|
|
|
|
12
|
|
|
9
|
|
|
|
|
4689
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
11
|
|
|
|
|
|
|
our @EXPORT = (); |
12
|
|
|
|
|
|
|
our @EXPORT_OK = qw(distance fastdistance); |
13
|
|
|
|
|
|
|
our %EXPORT_TAGS = (); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
sub distance |
17
|
|
|
|
|
|
|
{ |
18
|
128
|
100
|
100
|
128
|
1
|
34289
|
my $opt = pop(@_) if @_ > 0 && ref($_[-1]) eq 'HASH'; |
19
|
128
|
100
|
|
|
|
476
|
croak "distance() takes 2 or more arguments" if @_ < 2; |
20
|
126
|
|
|
|
|
239
|
my ($s,@t)=@_; |
21
|
126
|
|
|
|
|
141
|
my @results; |
22
|
|
|
|
|
|
|
|
23
|
126
|
100
|
|
|
|
237
|
$opt = {} if not defined $opt; |
24
|
|
|
|
|
|
|
|
25
|
126
|
|
|
|
|
171
|
foreach my $t (@t) { |
26
|
134
|
|
|
|
|
235
|
push(@results, fastdistance($s, $t, $opt)); |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
126
|
100
|
|
|
|
302
|
return wantarray ? @results : $results[0]; |
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
my $eq_with_diacritics = sub { |
33
|
|
|
|
|
|
|
my ($x, $y) = @_; |
34
|
|
|
|
|
|
|
return $x eq $y; |
35
|
|
|
|
|
|
|
}; |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
my $eq_without_diacritics; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# This is the "Iterative with two matrix rows" version |
40
|
|
|
|
|
|
|
# from the wikipedia page |
41
|
|
|
|
|
|
|
# http://en.wikipedia.org/wiki/Levenshtein_distance#Computing_Levenshtein_distance |
42
|
|
|
|
|
|
|
sub fastdistance |
43
|
|
|
|
|
|
|
{ |
44
|
258
|
100
|
100
|
258
|
1
|
29129
|
my $opt = pop(@_) if @_ > 0 && ref($_[-1]) eq 'HASH'; |
45
|
258
|
100
|
|
|
|
667
|
croak "fastdistance() takes 2 or 3 arguments" unless @_ == 2; |
46
|
255
|
|
|
|
|
396
|
my ($s, $t) = @_; |
47
|
255
|
|
|
|
|
646
|
my (@v0, @v1); |
48
|
255
|
|
|
|
|
0
|
my ($i, $j); |
49
|
255
|
|
|
|
|
0
|
my $eq; |
50
|
|
|
|
|
|
|
|
51
|
255
|
100
|
|
|
|
429
|
$opt = {} if not defined $opt; |
52
|
255
|
100
|
|
|
|
442
|
if ($opt->{ignore_diacritics}) { |
53
|
12
|
100
|
|
|
|
16
|
if (not defined $eq_without_diacritics) { |
54
|
1
|
|
|
|
|
542
|
require Unicode::Collate; |
55
|
1
|
|
|
|
|
6334
|
my $collator = Unicode::Collate->new(normalization => undef, level => 1); |
56
|
|
|
|
|
|
|
$eq_without_diacritics = sub { |
57
|
164
|
|
|
164
|
|
266
|
return $collator->eq(@_); |
58
|
1
|
|
|
|
|
32903
|
}; |
59
|
|
|
|
|
|
|
} |
60
|
12
|
|
|
|
|
14
|
$eq = $eq_without_diacritics; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
else { |
63
|
243
|
|
|
|
|
267
|
$eq = $eq_with_diacritics; |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
255
|
100
|
|
|
|
499
|
my $s_length = defined($s) ? length($s) : 0; |
67
|
255
|
100
|
|
|
|
360
|
my $t_length = defined($t) ? length($t) : 0; |
68
|
|
|
|
|
|
|
|
69
|
255
|
100
|
|
|
|
388
|
return $s_length if $t_length == 0; |
70
|
245
|
100
|
|
|
|
343
|
return $t_length if $s_length == 0; |
71
|
|
|
|
|
|
|
|
72
|
238
|
100
|
|
|
|
369
|
return 0 if $s eq $t; |
73
|
|
|
|
|
|
|
|
74
|
223
|
|
|
|
|
409
|
for ($i = 0; $i < $t_length + 1; $i++) { |
75
|
1242
|
|
|
|
|
1759
|
$v0[$i] = $i; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
223
|
|
|
|
|
350
|
for ($i = 0; $i < $s_length; $i++) { |
79
|
1008
|
|
|
|
|
1064
|
$v1[0] = $i + 1; |
80
|
|
|
|
|
|
|
|
81
|
1008
|
|
|
|
|
1361
|
for ($j = 0; $j < $t_length; $j++) { |
82
|
|
|
|
|
|
|
# my $cost = substr($s, $i, 1) eq substr($t, $j, 1) ? 0 : 1; |
83
|
5068
|
100
|
|
|
|
7620
|
my $cost = $eq->(substr($s, $i, 1), substr($t, $j, 1)) ? 0 : 1; |
84
|
5068
|
|
|
|
|
31747
|
$v1[$j + 1] = List::Util::min( |
85
|
|
|
|
|
|
|
$v1[$j] + 1, |
86
|
|
|
|
|
|
|
$v0[$j + 1] + 1, |
87
|
|
|
|
|
|
|
$v0[$j] + $cost, |
88
|
|
|
|
|
|
|
); |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
|
91
|
1008
|
|
|
|
|
1476
|
for ($j = 0; $j < $t_length + 1; $j++) { |
92
|
6076
|
|
|
|
|
8306
|
$v0[$j] = $v1[$j]; |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
223
|
|
|
|
|
526
|
return $v1[ $t_length]; |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
1; |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
__END__ |