line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::Levenshtein; |
2
|
|
|
|
|
|
|
$Text::Levenshtein::VERSION = '0.14'; |
3
|
9
|
|
|
9
|
|
726829
|
use 5.006; |
|
9
|
|
|
|
|
112
|
|
4
|
9
|
|
|
9
|
|
49
|
use strict; |
|
9
|
|
|
|
|
16
|
|
|
9
|
|
|
|
|
214
|
|
5
|
9
|
|
|
9
|
|
42
|
use warnings; |
|
9
|
|
|
|
|
18
|
|
|
9
|
|
|
|
|
254
|
|
6
|
9
|
|
|
9
|
|
45
|
use Exporter; |
|
9
|
|
|
|
|
21
|
|
|
9
|
|
|
|
|
396
|
|
7
|
9
|
|
|
9
|
|
63
|
use Carp; |
|
9
|
|
|
|
|
18
|
|
|
9
|
|
|
|
|
538
|
|
8
|
9
|
|
|
9
|
|
56
|
use List::Util (); |
|
9
|
|
|
|
|
15
|
|
|
9
|
|
|
|
|
5923
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
11
|
|
|
|
|
|
|
our @EXPORT = (); |
12
|
|
|
|
|
|
|
our @EXPORT_OK = qw(distance fastdistance); |
13
|
|
|
|
|
|
|
our %EXPORT_TAGS = (); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
sub distance |
17
|
|
|
|
|
|
|
{ |
18
|
128
|
100
|
100
|
128
|
1
|
44709
|
my $opt = pop(@_) if @_ > 0 && ref($_[-1]) eq 'HASH'; |
19
|
128
|
100
|
|
|
|
615
|
croak "distance() takes 2 or more arguments" if @_ < 2; |
20
|
126
|
|
|
|
|
286
|
my ($s,@t)=@_; |
21
|
126
|
|
|
|
|
196
|
my @results; |
22
|
|
|
|
|
|
|
|
23
|
126
|
100
|
|
|
|
262
|
$opt = {} if not defined $opt; |
24
|
|
|
|
|
|
|
|
25
|
126
|
|
|
|
|
219
|
foreach my $t (@t) { |
26
|
134
|
|
|
|
|
270
|
push(@results, fastdistance($s, $t, $opt)); |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
126
|
100
|
|
|
|
428
|
return wantarray ? @results : $results[0]; |
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
my $eq_with_diacritics = sub { |
33
|
|
|
|
|
|
|
my ($x, $y) = @_; |
34
|
|
|
|
|
|
|
return $x eq $y; |
35
|
|
|
|
|
|
|
}; |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
my $eq_without_diacritics; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# This is the "Iterative with two matrix rows" version |
40
|
|
|
|
|
|
|
# from the wikipedia page |
41
|
|
|
|
|
|
|
# http://en.wikipedia.org/wiki/Levenshtein_distance#Computing_Levenshtein_distance |
42
|
|
|
|
|
|
|
sub fastdistance |
43
|
|
|
|
|
|
|
{ |
44
|
254
|
100
|
100
|
254
|
1
|
38133
|
my $opt = pop(@_) if @_ > 0 && ref($_[-1]) eq 'HASH'; |
45
|
254
|
100
|
|
|
|
857
|
croak "fastdistance() takes 2 or 3 arguments" unless @_ == 2; |
46
|
251
|
|
|
|
|
445
|
my ($s, $t) = @_; |
47
|
251
|
|
|
|
|
794
|
my (@v0, @v1); |
48
|
251
|
|
|
|
|
0
|
my ($i, $j); |
49
|
251
|
|
|
|
|
0
|
my $eq; |
50
|
|
|
|
|
|
|
|
51
|
251
|
100
|
|
|
|
574
|
$opt = {} if not defined $opt; |
52
|
251
|
100
|
|
|
|
520
|
if ($opt->{ignore_diacritics}) { |
53
|
12
|
100
|
|
|
|
33
|
if (not defined $eq_without_diacritics) { |
54
|
1
|
|
|
|
|
793
|
require Unicode::Collate; |
55
|
1
|
|
|
|
|
8909
|
my $collator = Unicode::Collate->new(normalization => undef, level => 1); |
56
|
|
|
|
|
|
|
$eq_without_diacritics = sub { |
57
|
164
|
|
|
164
|
|
356
|
return $collator->eq(@_); |
58
|
1
|
|
|
|
|
46264
|
}; |
59
|
|
|
|
|
|
|
} |
60
|
12
|
|
|
|
|
23
|
$eq = $eq_without_diacritics; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
else { |
63
|
239
|
|
|
|
|
341
|
$eq = $eq_with_diacritics; |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
251
|
100
|
|
|
|
517
|
return 0 if $s eq $t; |
67
|
231
|
100
|
66
|
|
|
943
|
return length($s) if !$t || length($t) == 0; |
68
|
228
|
100
|
66
|
|
|
692
|
return length($t) if !$s || length($s) == 0; |
69
|
|
|
|
|
|
|
|
70
|
223
|
|
|
|
|
317
|
my $s_length = length($s); |
71
|
223
|
|
|
|
|
324
|
my $t_length = length($t); |
72
|
|
|
|
|
|
|
|
73
|
223
|
|
|
|
|
485
|
for ($i = 0; $i < $t_length + 1; $i++) { |
74
|
1242
|
|
|
|
|
2274
|
$v0[$i] = $i; |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
223
|
|
|
|
|
430
|
for ($i = 0; $i < $s_length; $i++) { |
78
|
1008
|
|
|
|
|
1403
|
$v1[0] = $i + 1; |
79
|
|
|
|
|
|
|
|
80
|
1008
|
|
|
|
|
1707
|
for ($j = 0; $j < $t_length; $j++) { |
81
|
|
|
|
|
|
|
# my $cost = substr($s, $i, 1) eq substr($t, $j, 1) ? 0 : 1; |
82
|
5068
|
100
|
|
|
|
10219
|
my $cost = $eq->(substr($s, $i, 1), substr($t, $j, 1)) ? 0 : 1; |
83
|
5068
|
|
|
|
|
43651
|
$v1[$j + 1] = List::Util::min( |
84
|
|
|
|
|
|
|
$v1[$j] + 1, |
85
|
|
|
|
|
|
|
$v0[$j + 1] + 1, |
86
|
|
|
|
|
|
|
$v0[$j] + $cost, |
87
|
|
|
|
|
|
|
); |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
1008
|
|
|
|
|
1952
|
for ($j = 0; $j < $t_length + 1; $j++) { |
91
|
6076
|
|
|
|
|
10815
|
$v0[$j] = $v1[$j]; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
223
|
|
|
|
|
687
|
return $v1[ $t_length]; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
1; |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
__END__ |