line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Map::Tube::Plugin::FuzzyFind;
|
2
|
|
|
|
|
|
|
|
3
|
4
|
|
|
4
|
|
16068
|
use strict;
|
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
113
|
|
4
|
4
|
|
|
4
|
|
15
|
use warnings;
|
|
4
|
|
|
|
|
3
|
|
|
4
|
|
|
|
|
133
|
|
5
|
|
|
|
|
|
|
$Map::Tube::Plugin::FuzzyFind::VERSION = '0.06';
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 NAME
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
Map::Tube::Plugin::FuzzyFind - Map::Tube add-on for finding stations and lines by inexact name.
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 VERSION
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Version 0.05_01 -- Internal development only, do not use in public
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=cut
|
16
|
|
|
|
|
|
|
|
17
|
4
|
|
|
4
|
|
70
|
use 5.006;
|
|
4
|
|
|
|
|
12
|
|
|
4
|
|
|
|
|
98
|
|
18
|
4
|
|
|
4
|
|
14
|
use Carp;
|
|
4
|
|
|
|
|
3
|
|
|
4
|
|
|
|
|
248
|
|
19
|
4
|
|
|
4
|
|
1777
|
use Moo::Role;
|
|
4
|
|
|
|
|
62416
|
|
|
4
|
|
|
|
|
20
|
|
20
|
4
|
|
|
4
|
|
3161
|
use namespace::clean;
|
|
4
|
|
|
|
|
40706
|
|
|
4
|
|
|
|
|
19
|
|
21
|
4
|
|
|
4
|
|
721
|
use Try::Tiny;
|
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
5603
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
This is an add-on for L to find stations and lines by name, possibly
|
26
|
|
|
|
|
|
|
partly or inexactly specified. The module is a Moo role which gets plugged into the
|
27
|
|
|
|
|
|
|
Map::Tube::* family automatically once it is installed.
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
use strict; use warnings;
|
33
|
|
|
|
|
|
|
use Map::Tube::London;
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
my $tube = Map::Tube::London->new();
|
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
print 'line matches exactly: ',
|
38
|
|
|
|
|
|
|
scalar( $tube->fuzzy_find( search => 'erloo',
|
39
|
|
|
|
|
|
|
objects => 'lines' ) ), "\n";
|
40
|
|
|
|
|
|
|
print 'line contains : ',
|
41
|
|
|
|
|
|
|
scalar( $tube->fuzzy_find( search => 'erloo',
|
42
|
|
|
|
|
|
|
objects => 'lines',
|
43
|
|
|
|
|
|
|
method => 'in' ) ), "\n";
|
44
|
|
|
|
|
|
|
print 'same thing : ',
|
45
|
|
|
|
|
|
|
scalar( $tube->fuzzy_find( 'erloo',
|
46
|
|
|
|
|
|
|
objects => 'lines',
|
47
|
|
|
|
|
|
|
method => 'in' ) ), "\n";
|
48
|
|
|
|
|
|
|
print 'same thing : ',
|
49
|
|
|
|
|
|
|
scalar( $tube->fuzzy_find( { search => 'erloo',
|
50
|
|
|
|
|
|
|
objects => 'lines',
|
51
|
|
|
|
|
|
|
method => 'in' } ) ), "\n";
|
52
|
|
|
|
|
|
|
print 'station re : ',
|
53
|
|
|
|
|
|
|
join( ' ', $tube->fuzzy_find( search => qr/[htrv]er/i,
|
54
|
|
|
|
|
|
|
objects => 'stations' ) ), "\n";
|
55
|
|
|
|
|
|
|
print 'station re : ',
|
56
|
|
|
|
|
|
|
join( ' ', $tube->fuzzy_find( search => '[htrv]er',
|
57
|
|
|
|
|
|
|
objects => 'stations',
|
58
|
|
|
|
|
|
|
method => 'regex' ) ), "\n";
|
59
|
|
|
|
|
|
|
print 'line fuzzy : ',
|
60
|
|
|
|
|
|
|
scalar( $tube->fuzzy_find( search => 'Kyrkle',
|
61
|
|
|
|
|
|
|
objects => 'stations',
|
62
|
|
|
|
|
|
|
method => 'levenshtein' ) ), "\n";
|
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=head1 METHODS
|
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head2 fuzzy_find(%args)
|
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Find a tube line or station by some pattern, which may be partly or
|
70
|
|
|
|
|
|
|
inexactly specified. In array context, a (possibly empty) array of
|
71
|
|
|
|
|
|
|
Map::Tube::{Line,Node} objects is returned that matches the pattern. If
|
72
|
|
|
|
|
|
|
the matching method employed provides a measure of similarity, the
|
73
|
|
|
|
|
|
|
result set will be ordered by decreasing similarity. Otherwise, it will
|
74
|
|
|
|
|
|
|
be ordered alphabetically. In scalar context, a Map::Tube::{Line,Node}
|
75
|
|
|
|
|
|
|
object (or undef) is returned. In the case of more than one match, the
|
76
|
|
|
|
|
|
|
most similar or the alphabetically first match will be returned, as
|
77
|
|
|
|
|
|
|
applicable to the matching method.
|
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
C<%args> is a hash of optional named parameters to guide actions. It may
|
80
|
|
|
|
|
|
|
be specified as a hash or as a reference to a hash. For convenience, the
|
81
|
|
|
|
|
|
|
search pattern may be specified as the first argument, outside the hash.
|
82
|
|
|
|
|
|
|
While formally all arguments are optional, not specifying a serach
|
83
|
|
|
|
|
|
|
pattern will, predictably, not produce any exciting result.
|
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=over 4
|
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=item search=...
|
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
The pattern to be searched for. It may be a string or a (possibly
|
90
|
|
|
|
|
|
|
precompiled) regular expression. The latter requires the matching
|
91
|
|
|
|
|
|
|
method (cf. below) to be C<'regex'>.
|
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=item objects=...
|
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
This specifies whether stations or lines should be found. The value should
|
96
|
|
|
|
|
|
|
be either C<'lines'> or C<'stations'>. (C<'nodes'> is a synonym for
|
97
|
|
|
|
|
|
|
C<'stations'>.) If it is none of these, then both lines and stations will
|
98
|
|
|
|
|
|
|
be searched.
|
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=item method=...
|
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
The method for matching. If this parameter is missing, the default is to
|
103
|
|
|
|
|
|
|
use C<'regex'> if the pattern is a precompiled regex, or C<'exact'>
|
104
|
|
|
|
|
|
|
otherwise. Otherwise, the value should be one of the following.
|
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=over 4
|
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=item 'exact'
|
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
Exact matching will be performed (up to case). This is also the default
|
111
|
|
|
|
|
|
|
method if C<$pattern> is a string.
|
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=item 'start'
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
The given string pattern must match at the beginning of the line or station
|
116
|
|
|
|
|
|
|
name (up to case).
|
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=item 'in'
|
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
The given string pattern must match somewhere within the line or station
|
121
|
|
|
|
|
|
|
name (up to case).
|
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=item 're' or 'regex'
|
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
The given pattern is matched as a regex (case-insensitively) against the
|
126
|
|
|
|
|
|
|
line or station names. The pattern may also be specified as a precompiled
|
127
|
|
|
|
|
|
|
regex. In this case, its case sensitivity will be used unaltered.
|
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=item 'soundex'
|
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
All names matching according to the soundex algorithm
|
132
|
|
|
|
|
|
|
(see L) will be returned. Actually, a variant of DEK's
|
133
|
|
|
|
|
|
|
original algorithm is used which also tries to cope with non-ASCII characters.
|
134
|
|
|
|
|
|
|
It works well only for English-like words.
|
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=item 'metaphone'
|
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
All names matching according to the Metaphone algorithm
|
139
|
|
|
|
|
|
|
(see L) will be returned. This is a method that strives to be
|
140
|
|
|
|
|
|
|
"a modern version of soundex". It is also tuned towards English words.
|
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=item 'levenshtein'
|
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
The closest names as calculated by the Levenshtein edit distance
|
145
|
|
|
|
|
|
|
(see L) will be returned.
|
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=item 'ngram'
|
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
The closest names as calculated by a comparison of trigrams
|
150
|
|
|
|
|
|
|
(see L) will be returned. (Future versions may include
|
151
|
|
|
|
|
|
|
n-grams for n other than 3).
|
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=item 'fuzzy'
|
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Currently, this is a synonym for C<'levenshtein'>.
|
156
|
|
|
|
|
|
|
I
|
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=item a code ref
|
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
Reserved for future use.
|
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=item ...
|
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Further fuzzy matchers may be added in the future according to interest.
|
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=back
|
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=item maxdist=...
|
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
For some matchers that define a metric on srings (like Levenshtein),
|
171
|
|
|
|
|
|
|
this may specify the maximum allowable distance from the pattern specified.
|
172
|
|
|
|
|
|
|
The default is half the length of the pattern. If 0 is specified, no
|
173
|
|
|
|
|
|
|
limit will be applied. Note that, in array context, this may result in a large
|
174
|
|
|
|
|
|
|
number of returned values. In scalar context, a non-null value (including the
|
175
|
|
|
|
|
|
|
default value) may lead to no result being returned.
|
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=item maxsize=...
|
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
In array context, this may be used to specify the maximum number of values
|
180
|
|
|
|
|
|
|
to return, in order to prevent flooding. There is no default. If 0 is specified,
|
181
|
|
|
|
|
|
|
no limit will be applied. In scalar context, this parameter is disregarded.
|
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=back
|
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=cut
|
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# Note: The code comes on purpose in just one sub, in order to prevent
|
188
|
|
|
|
|
|
|
# name space pollution of Map::Tube::xxx. Other solutions are possible,
|
189
|
|
|
|
|
|
|
# but currently it is felt they would introduce more hassle than clarity.
|
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
sub fuzzy_find { ## no critic(Subroutines::ProhibitExcessComplexity)
|
193
|
0
|
|
|
0
|
1
|
|
my( $self, @tmpargs ) = @_;
|
194
|
0
|
|
|
|
|
|
my( %args, $pattern );
|
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# *** This needs to be re-organised into a hash with pointers to methods.
|
197
|
0
|
|
|
|
|
|
my %methods = ( exact => 1,
|
198
|
|
|
|
|
|
|
start => 2,
|
199
|
|
|
|
|
|
|
in => 3,
|
200
|
|
|
|
|
|
|
regex => 4,
|
201
|
|
|
|
|
|
|
re => 4,
|
202
|
|
|
|
|
|
|
soundex => 5,
|
203
|
|
|
|
|
|
|
metaphone => 6,
|
204
|
|
|
|
|
|
|
levenshtein => 7,
|
205
|
|
|
|
|
|
|
fuzzy => 7,
|
206
|
|
|
|
|
|
|
ngrams => 8,
|
207
|
|
|
|
|
|
|
);
|
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# unify the different ways of passing arguments:
|
210
|
0
|
0
|
0
|
|
|
|
if ( scalar(@tmpargs) == 1 ) {
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
211
|
0
|
0
|
|
|
|
|
if ( ref( $tmpargs[0] ) eq 'HASH' ) {
|
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
# everything in just one hash ref.
|
214
|
0
|
|
|
|
|
|
%args = %{ $tmpargs[0] };
|
|
0
|
|
|
|
|
|
|
215
|
0
|
|
|
|
|
|
$pattern = $args{search};
|
216
|
|
|
|
|
|
|
} else {
|
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
# must be a lone string or regex, with no other args
|
219
|
0
|
|
|
|
|
|
$pattern = $tmpargs[0];
|
220
|
|
|
|
|
|
|
}
|
221
|
|
|
|
|
|
|
} elsif ( ( scalar(@tmpargs) == 2 ) && ( ref( $tmpargs[1] ) eq 'HASH' ) ) {
|
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
# Pattern plus hash ref
|
224
|
0
|
|
|
|
|
|
%args = %{ $tmpargs[1] };
|
|
0
|
|
|
|
|
|
|
225
|
0
|
|
|
|
|
|
$pattern = $tmpargs[0]; # bad luck if conflicting specs...
|
226
|
|
|
|
|
|
|
} elsif ( scalar(@tmpargs) % 2 ) {
|
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# odd number of args >= 3. Must be pattern plus hash
|
229
|
0
|
|
|
|
|
|
$pattern = shift(@tmpargs);
|
230
|
0
|
|
|
|
|
|
%args = @tmpargs;
|
231
|
|
|
|
|
|
|
} else {
|
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# Even number of args >= 2. This must be a real hash.
|
234
|
0
|
|
|
|
|
|
%args = @tmpargs;
|
235
|
0
|
|
|
|
|
|
$pattern = $args{search};
|
236
|
|
|
|
|
|
|
} ## end else [ if ( scalar(@tmpargs) ...)]
|
237
|
|
|
|
|
|
|
|
238
|
0
|
0
|
|
|
|
|
return unless defined $pattern;
|
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# Now all args are in the non-empty %args hash, and we know what to search for.
|
241
|
0
|
|
0
|
|
|
|
$args{objects} ||= '';
|
242
|
0
|
0
|
|
|
|
|
$args{objects} = 'stations' if ( $args{objects} eq 'nodes' );
|
243
|
0
|
|
0
|
|
|
|
$args{maxsize} ||= 0;
|
244
|
0
|
0
|
0
|
|
|
|
$args{method} ||= ( ref( $args{search} ) eq 'Regexp' ) ? 'regex' : 'exact';
|
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# Procure the list of things in which to search
|
247
|
0
|
|
|
|
|
|
my $source;
|
248
|
0
|
0
|
|
|
|
|
if ( $args{objects} eq 'lines' ) {
|
|
|
0
|
|
|
|
|
|
249
|
0
|
|
|
|
|
|
$source = $self->get_lines();
|
250
|
|
|
|
|
|
|
} elsif ( $args{objects} eq 'stations' ) {
|
251
|
0
|
|
|
|
|
|
$source = [ values %{ $self->nodes() } ];
|
|
0
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
} else {
|
253
|
0
|
|
|
|
|
|
$source = [ @{ $self->get_lines() }, values %{ $self->nodes() } ];
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
}
|
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# Find the numerical index of the method which we should use:
|
257
|
0
|
|
0
|
|
|
|
my $nmethod = $methods{ lc( $args{method} ) } || 1;
|
258
|
|
|
|
|
|
|
|
259
|
0
|
|
|
|
|
|
my @result;
|
260
|
0
|
0
|
0
|
|
|
|
if ( $nmethod == 1 ) { ## no critic(ControlStructures::ProhibitCascadingIfElse)
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# Exact matching (up to case)
|
263
|
0
|
|
|
|
|
|
$pattern = lc($pattern);
|
264
|
0
|
|
|
|
|
|
@result = sort { $a->name() cmp $b->name() } grep { lc( $_->name() ) eq $pattern } @{$source};
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
} elsif ( $nmethod == 2 ) {
|
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
# Match at start
|
268
|
0
|
|
|
|
|
|
$pattern = lc($pattern);
|
269
|
0
|
|
|
|
|
|
@result = sort { $a->name() cmp $b->name() } grep { index( lc( $_->name() ), $pattern ) == 0 } @{$source};
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
} elsif ( $nmethod == 3 ) {
|
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
# Match some substring
|
273
|
0
|
|
|
|
|
|
$pattern = lc($pattern);
|
274
|
0
|
|
|
|
|
|
@result = sort { $a->name() cmp $b->name() } grep { index( lc( $_->name() ), $pattern ) >= 0 } @{$source};
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
} elsif ( $nmethod == 4 ) {
|
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
# Match regex
|
278
|
0
|
0
|
|
|
|
|
$pattern = qr/$pattern/i unless ref($pattern) eq 'Regexp';
|
279
|
0
|
|
|
|
|
|
@result = sort { $a->name() cmp $b->name() } grep { $_->name() =~ $pattern } @{$source};
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
} elsif ( ( $nmethod >= 5 ) && ( $nmethod <= 8 ) ) {
|
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
# Use some well-known fuzzy matcher
|
283
|
|
|
|
|
|
|
# *** This needs to be cleaned up.
|
284
|
0
|
|
|
|
|
|
my( $module, $loaded, $matcher );
|
285
|
0
|
|
|
|
|
|
$pattern = lc($pattern);
|
286
|
0
|
|
|
|
|
|
$pattern =~ s/\s+//g;
|
287
|
|
|
|
|
|
|
try {
|
288
|
0
|
0
|
|
0
|
|
|
if ( $nmethod == 5 ) {
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# Knuth's soundex, or some variant thereof, adapted for non-ASCII codes.
|
291
|
0
|
|
|
|
|
|
$module = 'Text::Soundex';
|
292
|
0
|
|
|
|
|
|
$loaded = eval qq{ require $module; 1 };
|
293
|
0
|
0
|
|
|
|
|
croak unless $loaded;
|
294
|
0
|
|
|
|
|
|
my $pattern_soundex = Text::Soundex::soundex_unicode($pattern);
|
295
|
|
|
|
|
|
|
$matcher = sub {
|
296
|
0
|
0
|
|
|
|
|
return ( Text::Soundex::soundex_unicode( $_[0] ) eq $pattern_soundex ) ? 0 : 1;
|
297
|
0
|
|
|
|
|
|
};
|
298
|
0
|
|
|
|
|
|
$args{maxdist} = 0.5; # so that exactly the matches will be retained in the map - sort - grep.
|
299
|
|
|
|
|
|
|
} elsif ( $nmethod == 6 ) {
|
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
# Metaphone, (claiming to be) "A modern Soundex". Still with a slant towards English pronunciation.
|
302
|
0
|
|
|
|
|
|
$module = 'Text::Metaphone';
|
303
|
0
|
|
|
|
|
|
$loaded = eval qq{ require $module; 1 };
|
304
|
0
|
0
|
|
|
|
|
croak unless $loaded;
|
305
|
0
|
|
|
|
|
|
my $pattern_metacode = Text::Metaphone::Metaphone($pattern);
|
306
|
|
|
|
|
|
|
$matcher = sub {
|
307
|
0
|
0
|
|
|
|
|
return ( Text::Metaphone::Metaphone( $_[0] ) eq $pattern_metacode ) ? 0 : 1;
|
308
|
0
|
|
|
|
|
|
};
|
309
|
0
|
|
|
|
|
|
$args{maxdist} = 0.5; # so that exactly the matches will be retained in the map - sort - grep.
|
310
|
|
|
|
|
|
|
} elsif ( $nmethod == 7 ) {
|
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
# Levenshtein edit distance
|
313
|
0
|
|
|
|
|
|
$module = 'Text::Levenshtein';
|
314
|
0
|
|
|
|
|
|
$loaded = eval qq{ require $module; 1 };
|
315
|
0
|
0
|
|
|
|
|
croak unless $loaded;
|
316
|
0
|
|
0
|
|
|
|
$args{maxdist} ||= ( length($pattern) + 1 ) / 2;
|
317
|
0
|
|
|
|
|
|
$matcher = sub { Text::Levenshtein::distance( $_[0], $pattern ) };
|
|
0
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
} elsif ( $nmethod == 8 ) {
|
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
# Comparison by number of matching n-grams.
|
321
|
|
|
|
|
|
|
# This needs to be re-organised so that multiple calls with the same parameters
|
322
|
|
|
|
|
|
|
# (but different search strings) will re-use the string base (not the search string).
|
323
|
0
|
|
|
|
|
|
$module = 'String::Trigram';
|
324
|
0
|
|
|
|
|
|
$loaded = eval qq{ require $module; 1 };
|
325
|
0
|
0
|
|
|
|
|
croak unless $loaded;
|
326
|
0
|
|
0
|
|
|
|
$args{maxdist} ||= length($pattern) / 2;
|
327
|
0
|
|
|
|
|
|
my %st_args;
|
328
|
0
|
|
|
|
|
|
$st_args{minSim} = 1 - $args{maxdist} / length($pattern);
|
329
|
0
|
0
|
|
|
|
|
$st_args{minSim} = 0 if ( $st_args{minSim} < 0 );
|
330
|
0
|
0
|
|
|
|
|
$st_args{minSim} = 1 if ( $st_args{minSim} > 1 );
|
331
|
0
|
|
0
|
|
|
|
$st_args{ngram} = $args{windowsize} || 3;
|
332
|
0
|
|
|
|
|
|
$st_args{warp} = 2;
|
333
|
0
|
|
|
|
|
|
$st_args{cmpBase} = [$pattern];
|
334
|
0
|
|
|
|
|
|
my $trigrammer = String::Trigram->new(%st_args);
|
335
|
|
|
|
|
|
|
$matcher = sub {
|
336
|
0
|
|
|
|
|
|
my $str = $_[0];
|
337
|
0
|
|
|
|
|
|
$str =~ s/\s+//g;
|
338
|
0
|
|
|
|
|
|
my %result;
|
339
|
0
|
|
|
|
|
|
my $nresults = $trigrammer->getSimilarStrings( $str, \%result );
|
340
|
0
|
0
|
|
|
|
|
return $nresults ? ( ( 1 - $result{$pattern} ) * length($pattern) ) : ( length($pattern) + 1 );
|
341
|
0
|
|
|
|
|
|
};
|
342
|
|
|
|
|
|
|
} ## end elsif ( $nmethod == 8 )
|
343
|
|
|
|
|
|
|
} ## end try
|
344
|
|
|
|
|
|
|
catch {
|
345
|
0
|
|
|
0
|
|
|
croak "Matcher module $module not loaded or not executed: $_";
|
346
|
0
|
|
|
|
|
|
};
|
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# $matcher is now a coderef that takes one argument, viz., a string to match.
|
349
|
|
|
|
|
|
|
# It returns the distance from the pattern; the bigger the distance, the more dissimilar.
|
350
|
|
|
|
|
|
|
# In case of matchers that provide just a "match/no match" information,
|
351
|
|
|
|
|
|
|
# 0 means "match" and 1 means "no match".
|
352
|
|
|
|
|
|
|
|
353
|
0
|
|
|
|
|
|
my %name2obj = map { $_->name() => $_ } @{$source};
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
|
355
|
0
|
|
|
|
|
|
my %distance = map { $_ => $matcher->( lc($_) ) } keys %name2obj;
|
|
0
|
|
|
|
|
|
|
356
|
0
|
0
|
|
|
|
|
@result = map { $name2obj{$_} }
|
|
0
|
|
|
|
|
|
|
357
|
0
|
0
|
|
|
|
|
sort { ( $distance{$a} <=> $distance{$b} ) || ( $a cmp $b ) }
|
358
|
0
|
|
|
|
|
|
grep { ( $args{maxdist} <= 0 ) || ( $distance{$_} <= $args{maxdist} ) } keys %distance;
|
359
|
|
|
|
|
|
|
} else {
|
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
# nada
|
362
|
|
|
|
|
|
|
}
|
363
|
|
|
|
|
|
|
|
364
|
0
|
0
|
|
|
|
|
if (wantarray) {
|
365
|
0
|
0
|
0
|
|
|
|
return @result if ( ( $args{maxsize} == 0 ) || ( scalar(@result) <= $args{maxsize} ) );
|
366
|
0
|
|
|
|
|
|
return @result[ 0 .. ( $args{maxsize} - 1 ) ];
|
367
|
|
|
|
|
|
|
}
|
368
|
|
|
|
|
|
|
|
369
|
0
|
0
|
|
|
|
|
return unless @result;
|
370
|
0
|
|
|
|
|
|
return $result[0];
|
371
|
|
|
|
|
|
|
} ## end sub fuzzy_find
|
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=head1 AUTHOR
|
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
Gisbert W. Selke, TapirSoft Selke & Selke GbR, C<< >>
|
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=head1 SEE ALSO
|
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
L
|
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=head1 CONTRIBUTORS
|
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
Thanks to Mohammad S Anwar, author of L, for that module, for great feedback,
|
384
|
|
|
|
|
|
|
discussions, advice, debugging help, and willingness to refactor his code.
|
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=head1 BUGS
|
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or
|
389
|
|
|
|
|
|
|
through the web interface at L.
|
390
|
|
|
|
|
|
|
I will be notified and then you'll automatically be notified of progress on your
|
391
|
|
|
|
|
|
|
bug as I make changes.
|
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=head1 SUPPORT
|
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command.
|
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
perldoc Map::Tube::Plugin::FuzzyFind
|
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
You can also look for information at:
|
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=over 4
|
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here)
|
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
L
|
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation
|
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
L
|
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=item * CPAN Ratings
|
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
L
|
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
=item * Search CPAN
|
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
L
|
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
=back
|
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT
|
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
Copyright (C) 2015 Gisbert W. Selke, Tapirsoft Selke & Selke GbR
|
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
426
|
|
|
|
|
|
|
the terms of the the Artistic License (2.0). You may obtain a copy of the full
|
427
|
|
|
|
|
|
|
license at:
|
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
L
|
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
Any use, modification, and distribution of the Standard or Modified Versions is
|
432
|
|
|
|
|
|
|
governed by this Artistic License.By using, modifying or distributing the Package,
|
433
|
|
|
|
|
|
|
you accept this license. Do not use, modify, or distribute the Package, if you do
|
434
|
|
|
|
|
|
|
not accept this license.
|
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
If your Modified Version has been derived from a Modified Version made by someone
|
437
|
|
|
|
|
|
|
other than you,you are nevertheless required to ensure that your Modified Version
|
438
|
|
|
|
|
|
|
complies with the requirements of this license.
|
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
This license does not grant you the right to use any trademark, service mark,
|
441
|
|
|
|
|
|
|
tradename, or logo of the Copyright Holder.
|
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
This license includes the non-exclusive, worldwide, free-of-charge patent license
|
444
|
|
|
|
|
|
|
to make, have made, use, offer to sell, sell, import and otherwise transfer the
|
445
|
|
|
|
|
|
|
Package with respect to any patent claims licensable by the Copyright Holder that
|
446
|
|
|
|
|
|
|
are necessarily infringed by the Package. If you institute patent litigation
|
447
|
|
|
|
|
|
|
(including a cross-claim or counterclaim) against any party alleging that the
|
448
|
|
|
|
|
|
|
Package constitutes direct or contributory patent infringement,then this Artistic
|
449
|
|
|
|
|
|
|
License to you shall terminate on the date that such litigation is filed.
|
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND
|
452
|
|
|
|
|
|
|
CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED
|
453
|
|
|
|
|
|
|
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
|
454
|
|
|
|
|
|
|
NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL LAW. UNLESS
|
455
|
|
|
|
|
|
|
REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT,
|
456
|
|
|
|
|
|
|
INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE
|
457
|
|
|
|
|
|
|
OF THE PACKAGE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=cut
|
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
1; # End of Map::Tube::Plugin::FuzzyFind
|