line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package String::KeyboardDistance; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
require 5.005_62; |
4
|
1
|
|
|
1
|
|
1174
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
32
|
|
5
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
3713
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
require Exporter; |
8
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 NAME |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
String::KeyboardDistance - String Comparison Algorithm |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 SYNOPSIS |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
use String::KeyboardDistance qw(:all); |
17
|
|
|
|
|
|
|
my $s1 = 'Apple'; |
18
|
|
|
|
|
|
|
my $s2 = 'Wople'; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# compute a match probability |
21
|
|
|
|
|
|
|
my $pr = qwerty_keyboard_distance_match('Apple','Wople'); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# find the keyboard distance between two strings |
24
|
|
|
|
|
|
|
my $dst = qwerty_keyboard_distance('IBM','HAL'); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# find the keyboard distance between two characters |
27
|
|
|
|
|
|
|
$dst = qwerty_char_distance('a','v'); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
print "maximum distance: $qwerty_max_distance\n"; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 DESCRIPTION |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
This module implmements a version of keyboard distance for fuzzy |
34
|
|
|
|
|
|
|
string matching. Keyboard distance is a measure of the physical |
35
|
|
|
|
|
|
|
distance between two keys on a keyboard. For example, 'g' has a |
36
|
|
|
|
|
|
|
distance of 1 from the keys 'r', 't', 'y', 'f', 'h', 'v', 'b', |
37
|
|
|
|
|
|
|
and 'n'. Immediate diagonals (like ''r, 'y', 'v', and 'n') are |
38
|
|
|
|
|
|
|
considered to have a distance of 1 instead of 1.414 to help to |
39
|
|
|
|
|
|
|
prevent horizontal/vertical bias. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
A match probability between two strings is computed from the total |
42
|
|
|
|
|
|
|
distances between corresponding characters divided by the length of the |
43
|
|
|
|
|
|
|
longer string multiplied by the maximum distance between the two furthest |
44
|
|
|
|
|
|
|
keys on the keyboard. |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
The functions in this module use a simple grid of keys. For the qwerty |
47
|
|
|
|
|
|
|
mapping, the grid is similar to the following: |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
| 0 1 2 3 4 5 6 7 8 9 10 11 12 13 |
50
|
|
|
|
|
|
|
--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ |
51
|
|
|
|
|
|
|
0 | ` | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 0 | - | = | | |
52
|
|
|
|
|
|
|
1 | | q | w | e | r | t | y | u | i | o | p | [ | ] | \ | |
53
|
|
|
|
|
|
|
2 | | a | s | d | f | g | h | j | k | l | ; | ' | | | |
54
|
|
|
|
|
|
|
3 | | z | x | c | v | b | n | m | , | . | / | | | | |
55
|
|
|
|
|
|
|
--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
The grids for both qwerty and dvorak are based on PC style keyboards. |
58
|
|
|
|
|
|
|
Shifted characters have the same coordinates (a and A, 6 and ^). |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=head2 EXPORT |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
This module exports no symbols by default. The following functions |
63
|
|
|
|
|
|
|
are available for export through EXPORT_OK: |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
build_qwerty_map |
66
|
|
|
|
|
|
|
max_qwerty_distance |
67
|
|
|
|
|
|
|
qwerty_char_distance |
68
|
|
|
|
|
|
|
qwerty_keyboard_distance |
69
|
|
|
|
|
|
|
qwerty_keyboard_distance_match |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
build_dvorak_map |
72
|
|
|
|
|
|
|
max_dvorak_distance |
73
|
|
|
|
|
|
|
dvorak_char_distance |
74
|
|
|
|
|
|
|
dvorak_keyboard_distance |
75
|
|
|
|
|
|
|
dvorak_keyboard_distance_match |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
grid_distance |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
The following varialbes are availalbe for export through EXPORT_OK: |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
@qwerty_grid |
82
|
|
|
|
|
|
|
$qwerty_map |
83
|
|
|
|
|
|
|
$qwerty_max_distance |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
@dvorak_grid |
86
|
|
|
|
|
|
|
$dvorak_map |
87
|
|
|
|
|
|
|
$dvorak_max_distance |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
Additionaly, this module supports the following export tags: |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
:Functions - import all functions |
92
|
|
|
|
|
|
|
:Variables - import all variables |
93
|
|
|
|
|
|
|
:all - import both functions and variables |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=cut |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
our @EXP_SUBS = qw( |
98
|
|
|
|
|
|
|
build_qwerty_map |
99
|
|
|
|
|
|
|
max_qwerty_distance |
100
|
|
|
|
|
|
|
qwerty_char_distance |
101
|
|
|
|
|
|
|
qwerty_keyboard_distance |
102
|
|
|
|
|
|
|
qwerty_keyboard_distance_match |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
build_dvorak_map |
105
|
|
|
|
|
|
|
max_dvorak_distance |
106
|
|
|
|
|
|
|
dvorak_char_distance |
107
|
|
|
|
|
|
|
dvorak_keyboard_distance |
108
|
|
|
|
|
|
|
dvorak_keyboard_distance_match |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
grid_distance |
111
|
|
|
|
|
|
|
); |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
our @EXP_VARS = qw( |
114
|
|
|
|
|
|
|
@qwerty_grid |
115
|
|
|
|
|
|
|
$qwerty_map |
116
|
|
|
|
|
|
|
$qwerty_max_distance |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
@dvorak_grid |
119
|
|
|
|
|
|
|
$dvorak_map |
120
|
|
|
|
|
|
|
$dvorak_max_distance |
121
|
|
|
|
|
|
|
); |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( |
124
|
|
|
|
|
|
|
'all' => [ @EXP_SUBS, @EXP_VARS ], |
125
|
|
|
|
|
|
|
'Functions' => \@EXP_SUBS, |
126
|
|
|
|
|
|
|
'Variables' => \@EXP_VARS, |
127
|
|
|
|
|
|
|
); |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
130
|
|
|
|
|
|
|
our @EXPORT = qw(); |
131
|
|
|
|
|
|
|
our $VERSION = '1.00'; |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
our @qwerty_grid = ( |
134
|
|
|
|
|
|
|
[[ split(//,q{`1234567890-= }) ],[ split(//,q{~!@#$%^&*()_+ })]], |
135
|
|
|
|
|
|
|
[[ split(//,q{ qwertyuiop[]\\})],[ split(//,q( QWERTYUIOP{}|))]], |
136
|
|
|
|
|
|
|
[[ split(//,q{ asdfghjkl;' }) ],[ split(//,q{ ASDFGHJKL:" })]], |
137
|
|
|
|
|
|
|
[[ split(//,q{ zxcvbnm,./ }) ],[ split(//,q{ ZXCVBNM<>? })]], |
138
|
|
|
|
|
|
|
); |
139
|
|
|
|
|
|
|
our $qwerty_map = build_qwerty_map(); |
140
|
|
|
|
|
|
|
our $qwerty_max_distance = max_qwerty_distance(); |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
our @dvorak_grid = ( |
143
|
|
|
|
|
|
|
[[ split(//,q{`1234567890[] }) ],[ split(//,q"~!@#$%^&*(){} ")]], |
144
|
|
|
|
|
|
|
[[ split(//,q{ ',.pyfgcrl/=\\})],[ split(//,q{ "<>PYFGCRL?+|})]], |
145
|
|
|
|
|
|
|
[[ split(//,q{ aoeuidhtns- }) ],[ split(//,q{ AOEUIDHTNS_ })]], |
146
|
|
|
|
|
|
|
[[ split(//,q{ ;qjkxbmwvz }) ],[ split(//,q{ :QJKXBMWVZ })]], |
147
|
|
|
|
|
|
|
); |
148
|
|
|
|
|
|
|
our $dvorak_map = build_dvorak_map(); |
149
|
|
|
|
|
|
|
our $dvorak_max_distance = max_dvorak_distance(); |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=head1 API REFERENCE |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=head2 build_qwerty_map |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
param: array reference to receive the map [optional] |
158
|
|
|
|
|
|
|
return: the map (array reference) |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
This function builds a map of each character to its corresponding |
161
|
|
|
|
|
|
|
location on the keyboard. The location is derived by looking at the |
162
|
|
|
|
|
|
|
keyboard as a simple grid in which the location of keys on the keyboard |
163
|
|
|
|
|
|
|
are considereed to be the same as their shifted values. The following |
164
|
|
|
|
|
|
|
keys are considered to have the same location: |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
1 and ! |
167
|
|
|
|
|
|
|
r and r |
168
|
|
|
|
|
|
|
/ and ? |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
The map is an array, where the index is the value returned by chr() for |
171
|
|
|
|
|
|
|
the character at that location. The value is an array ref describing |
172
|
|
|
|
|
|
|
the location of the character on the keyboard. The first element is the |
173
|
|
|
|
|
|
|
y position, the second is the x, and the third represents the shift value |
174
|
|
|
|
|
|
|
(0 for non-shifted, 1 for shifted). All non-keyable characters, including |
175
|
|
|
|
|
|
|
tabs and spaces, will have undef values in the map, meaning they have no |
176
|
|
|
|
|
|
|
point on the grid. These non-key characterss should be considered to be |
177
|
|
|
|
|
|
|
the maximum distance away from any other character except themselves. |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
The map is constructed at run-time from the @qwerty_grid package global, |
180
|
|
|
|
|
|
|
and cached in the $qwerty_map package global. |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
=cut |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
sub build_qwerty_map |
185
|
|
|
|
|
|
|
{ |
186
|
1
|
|
50
|
1
|
1
|
18
|
my $map = shift||[]; |
187
|
1
|
|
|
|
|
3
|
my($i,$j); |
188
|
1
|
|
|
|
|
5
|
for($i = 0; $i < @qwerty_grid; ++$i) { |
189
|
4
|
|
|
|
|
6
|
for($j = 0; $j < @{$qwerty_grid[$i][0]}; ++$j) { |
|
58
|
|
|
|
|
133
|
|
190
|
54
|
50
|
|
|
|
102
|
next unless defined $qwerty_grid[$i][0][$j]; |
191
|
54
|
100
|
|
|
|
114
|
next if ' ' eq $qwerty_grid[$i][0][$j]; |
192
|
|
|
|
|
|
|
# print "building map0: $i,$j: ",$qwerty_grid[$i][0][$j],':', |
193
|
|
|
|
|
|
|
# ord($qwerty_grid[$i][0][$j]),"\n"; |
194
|
47
|
|
|
|
|
127
|
$map->[ord $qwerty_grid[$i][0][$j]] = [$i,$j,0]; |
195
|
|
|
|
|
|
|
} |
196
|
4
|
|
|
|
|
7
|
for($j = 0; $j < @{$qwerty_grid[$i][1]}; ++$j) { |
|
60
|
|
|
|
|
137
|
|
197
|
56
|
50
|
|
|
|
106
|
next unless defined $qwerty_grid[$i][1][$j]; |
198
|
56
|
100
|
|
|
|
118
|
next if ' ' eq $qwerty_grid[$i][1][$j]; |
199
|
|
|
|
|
|
|
#print "building map1: $i,$j: ",$qwerty_grid[$i][1][$j],':', |
200
|
|
|
|
|
|
|
# ord($qwerty_grid[$i][1][$j]),"\n"; |
201
|
47
|
|
|
|
|
135
|
$map->[ord $qwerty_grid[$i][1][$j]] = [$i,$j,1]; |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
} |
204
|
1
|
|
|
|
|
3
|
return $map; |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=head2 build_dvorak_map |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
param: array reference to receive the map [optional] |
210
|
|
|
|
|
|
|
return: the map (array reference) |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
This function is identical to build_qwerty_map, with the exception |
213
|
|
|
|
|
|
|
that it builds a map for dvorak keyboards, and the map is constructed |
214
|
|
|
|
|
|
|
from the @dvorak_grid package global, and cached in the $dvorak_map |
215
|
|
|
|
|
|
|
package global. |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=cut |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
sub build_dvorak_map |
220
|
|
|
|
|
|
|
{ |
221
|
1
|
|
50
|
1
|
1
|
7
|
my $map = shift||[]; |
222
|
1
|
|
|
|
|
2
|
my($i,$j); |
223
|
1
|
|
|
|
|
3
|
for($i = 0; $i < @dvorak_grid; ++$i) { |
224
|
4
|
|
|
|
|
5
|
for($j = 0; $j < @{$dvorak_grid[$i][0]}; ++$j) { |
|
60
|
|
|
|
|
149
|
|
225
|
56
|
50
|
|
|
|
101
|
next unless defined $dvorak_grid[$i][0][$j]; |
226
|
56
|
100
|
|
|
|
104
|
next if ' ' eq $dvorak_grid[$i][0][$j]; |
227
|
|
|
|
|
|
|
# print "building map0: $i,$j: ",$dvorak_grid[$i][0][$j],':', |
228
|
|
|
|
|
|
|
# ord($dvorak_grid[$i][0][$j]),"\n"; |
229
|
47
|
|
|
|
|
107
|
$map->[ord $dvorak_grid[$i][0][$j]] = [$i,$j,0]; |
230
|
|
|
|
|
|
|
} |
231
|
4
|
|
|
|
|
7
|
for($j = 0; $j < @{$dvorak_grid[$i][1]}; ++$j) { |
|
60
|
|
|
|
|
100
|
|
232
|
56
|
50
|
|
|
|
87
|
next unless defined $dvorak_grid[$i][1][$j]; |
233
|
56
|
100
|
|
|
|
92
|
next if ' ' eq $dvorak_grid[$i][1][$j]; |
234
|
|
|
|
|
|
|
#print "building map1: $i,$j: ",$dvorak_grid[$i][1][$j],':', |
235
|
|
|
|
|
|
|
# ord($dvorak_grid[$i][1][$j]),"\n"; |
236
|
47
|
|
|
|
|
101
|
$map->[ord $dvorak_grid[$i][1][$j]] = [$i,$j,1]; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
} |
239
|
1
|
|
|
|
|
3
|
return $map; |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=head2 max_qwerty_distance |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
return: maximum distance |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
This function dynamicly computes the maximum distance between |
247
|
|
|
|
|
|
|
keys in the qwerty map. The maximum key distance is stored in |
248
|
|
|
|
|
|
|
the $qwerty_max_distance package global. |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=cut |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
sub max_qwerty_distance |
253
|
|
|
|
|
|
|
{ |
254
|
1
|
|
|
1
|
1
|
2
|
my $max = 0; |
255
|
1
|
|
|
|
|
5
|
for(my $i = 0; $i < @$qwerty_map; ++$i) { |
256
|
127
|
100
|
|
|
|
263
|
next unless $qwerty_map->[$i]; |
257
|
94
|
|
|
|
|
208
|
for(my $j = 0; $j < @$qwerty_map; ++$j) { |
258
|
11938
|
100
|
|
|
|
28376
|
next unless $qwerty_map->[$j]; |
259
|
8836
|
100
|
|
|
|
15487
|
next if $i == $j; |
260
|
8742
|
|
|
|
|
19487
|
my $dst = qwerty_char_distance(chr($i),chr($j)); |
261
|
8742
|
100
|
|
|
|
36715
|
$max = $dst if $dst > $max; |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
} |
264
|
1
|
|
|
|
|
4
|
return $max; |
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=head2 max_dvorak_distance |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
return: maximum distance |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
This function dynamicly computes the maximum distance between |
272
|
|
|
|
|
|
|
keys in the dvorak map. The maximum key distance is stored in |
273
|
|
|
|
|
|
|
the $dvorak_max_distance package global. |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=cut |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
sub max_dvorak_distance |
278
|
|
|
|
|
|
|
{ |
279
|
1
|
|
|
1
|
1
|
2
|
my $max = 0; |
280
|
1
|
|
|
|
|
5
|
for(my $i = 0; $i < @$dvorak_map; ++$i) { |
281
|
127
|
100
|
|
|
|
240
|
next unless $dvorak_map->[$i]; |
282
|
94
|
|
|
|
|
176
|
for(my $j = 0; $j < @$dvorak_map; ++$j) { |
283
|
11938
|
100
|
|
|
|
22795
|
next unless $dvorak_map->[$j]; |
284
|
8836
|
100
|
|
|
|
13632
|
next if $i == $j; |
285
|
8742
|
|
|
|
|
16372
|
my $dst = dvorak_char_distance(chr($i),chr($j)); |
286
|
8742
|
100
|
|
|
|
27221
|
$max = $dst if $dst > $max; |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
} |
289
|
1
|
|
|
|
|
7
|
return $max; |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=head2 qwerty_char_distance |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
param: char 1 |
295
|
|
|
|
|
|
|
param: char 2 |
296
|
|
|
|
|
|
|
return: distance |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
This function computes the distance between the two characters passed |
299
|
|
|
|
|
|
|
on a qwerty keyboard. |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=cut |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
sub qwerty_char_distance |
304
|
|
|
|
|
|
|
{ |
305
|
8926
|
100
|
|
8926
|
1
|
18158
|
return 0 if $_[0] eq $_[1]; # return 0 if same, regardless of map |
306
|
|
|
|
|
|
|
# if either of the chars are not in the map, return the max distance |
307
|
8789
|
50
|
|
|
|
16127
|
return $qwerty_max_distance unless $qwerty_map->[ord $_[0]]; |
308
|
8789
|
50
|
|
|
|
15827
|
return $qwerty_max_distance unless $qwerty_map->[ord $_[1]]; |
309
|
8789
|
|
|
|
|
13668
|
return grid_distance( |
310
|
8789
|
|
|
|
|
17877
|
@{$qwerty_map->[ord $_[0]]}[0,1], |
311
|
8789
|
|
|
|
|
9352
|
@{$qwerty_map->[ord $_[1]]}[0,1], |
312
|
|
|
|
|
|
|
); |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=head2 dvorak_char_distance |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
param: char 1 |
318
|
|
|
|
|
|
|
param: char 2 |
319
|
|
|
|
|
|
|
return: distance |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
This function computes the distance between the two characters passed |
322
|
|
|
|
|
|
|
on a dvorak keyboard. |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=cut |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
sub dvorak_char_distance |
327
|
|
|
|
|
|
|
{ |
328
|
8926
|
100
|
|
8926
|
1
|
15625
|
return 0 if $_[0] eq $_[1]; # return 0 if same, regardless of map |
329
|
|
|
|
|
|
|
# if either of the chars are not in the map, return the max distance |
330
|
8789
|
50
|
|
|
|
14668
|
return $dvorak_max_distance unless $dvorak_map->[ord $_[0]]; |
331
|
8789
|
50
|
|
|
|
13633
|
return $dvorak_max_distance unless $dvorak_map->[ord $_[1]]; |
332
|
8789
|
|
|
|
|
11952
|
return grid_distance( |
333
|
8789
|
|
|
|
|
15861
|
@{$dvorak_map->[ord $_[0]]}[0,1], |
334
|
8789
|
|
|
|
|
7889
|
@{$dvorak_map->[ord $_[1]]}[0,1], |
335
|
|
|
|
|
|
|
); |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
=head2 grid_distance |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
param: x1 - point 1's x coordinate |
341
|
|
|
|
|
|
|
param: y1 - point 1's y coordinate |
342
|
|
|
|
|
|
|
param: x2 - point 2's x coordinate |
343
|
|
|
|
|
|
|
param: y2 - point 2's y coordinate |
344
|
|
|
|
|
|
|
return: distance between points |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
This function returns the distance between two points. If the two |
347
|
|
|
|
|
|
|
points have an x distance of 1, and a y distance of 1, then they |
348
|
|
|
|
|
|
|
are considered to be a distance of 1 apart. This is meant to help |
349
|
|
|
|
|
|
|
prevent horizontal/vertical bias in the distancing function. Otherwise |
350
|
|
|
|
|
|
|
we use the following formula: |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
sqrt( (x1 - x2)**2 + (y1 - y2)**2 ); |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=cut |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
sub grid_distance |
357
|
|
|
|
|
|
|
{ |
358
|
17578
|
100
|
100
|
17578
|
1
|
44300
|
return 0 if($_[0] == $_[2] && $_[1] == $_[3]); # points are same |
359
|
17390
|
100
|
100
|
|
|
53776
|
return 1 if(abs($_[0] - $_[2]) == 1 && abs($_[1] - $_[3]) == 1); # same as 1 |
360
|
16358
|
|
|
|
|
37192
|
sqrt(($_[0] - $_[2])**2 + ($_[1] - $_[3])**2); |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
=head2 qwerty_keyboard_distance |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
param: string1 |
366
|
|
|
|
|
|
|
param: string2 |
367
|
|
|
|
|
|
|
return: distance |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
Returns the sum of the distances between corresponding characters |
370
|
|
|
|
|
|
|
in the two strings. If one string is longer than the other the |
371
|
|
|
|
|
|
|
remaining characters are counted as having the same value as the |
372
|
|
|
|
|
|
|
maximum distance. |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
=cut |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
sub qwerty_keyboard_distance |
377
|
|
|
|
|
|
|
{ |
378
|
29
|
|
|
29
|
1
|
35
|
my($s1,$s2) = @_; |
379
|
29
|
|
|
|
|
38
|
my($l1,$l2) = (length($s1),length($s2)); |
380
|
29
|
100
|
|
|
|
40
|
my $short = $l1 < $l2 ? $s1 : $s2; |
381
|
29
|
100
|
|
|
|
46
|
my $long = $l1 < $l2 ? $s2 : $s1; |
382
|
29
|
100
|
|
|
|
38
|
my $ls = $l1 < $l2 ? $l1 : $l2; |
383
|
29
|
100
|
|
|
|
40
|
my $ll = $l1 < $l2 ? $l2 : $l1; |
384
|
|
|
|
|
|
|
|
385
|
29
|
|
|
|
|
29
|
my $tot = 0; |
386
|
29
|
|
|
|
|
24
|
my $i; |
387
|
29
|
|
|
|
|
62
|
for($i = 0; $i < $ls; ++$i) { |
388
|
|
|
|
|
|
|
#print "calling distance(",substr($short,$i,1),',',$long,$i,1,")\n"; |
389
|
184
|
|
|
|
|
367
|
$tot += abs(qwerty_char_distance(substr($short,$i,1),substr($long,$i,1))); |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
|
392
|
29
|
|
|
|
|
59
|
while($i < $ll) { |
393
|
12
|
|
|
|
|
12
|
$tot += $qwerty_max_distance; |
394
|
12
|
|
|
|
|
19
|
++$i; |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
|
397
|
29
|
|
|
|
|
60
|
return $tot; |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
=head2 qwerty_keyboard_distance_match |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
param: string1 |
403
|
|
|
|
|
|
|
param: string2 |
404
|
|
|
|
|
|
|
return: probability of match |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
The probability of a match is: |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
Pr = 1 - ( D / (L * M) ) |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
Where D is the distance between the two strings, L is the length of |
411
|
|
|
|
|
|
|
the longer string, and M is the maximum character distance. |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
=cut |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
sub qwerty_keyboard_distance_match |
416
|
|
|
|
|
|
|
{ |
417
|
29
|
|
|
29
|
1
|
1114
|
my($s1,$s2) = @_; |
418
|
29
|
|
|
|
|
64
|
my($l1,$l2) = (length($s1),length($s2)); |
419
|
29
|
100
|
|
|
|
58
|
my $ls = $l1 < $l2 ? $l1 : $l2; |
420
|
29
|
100
|
|
|
|
37
|
my $ll = $l1 < $l2 ? $l2 : $l1; |
421
|
29
|
|
|
|
|
47
|
my $dst = qwerty_keyboard_distance($s1,$s2); |
422
|
29
|
|
|
|
|
90
|
return (1 - ($dst/($ll*$qwerty_max_distance))); |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=head2 dvorak_keyboard_distance |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
param: string1 |
428
|
|
|
|
|
|
|
param: string2 |
429
|
|
|
|
|
|
|
return: distance |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
Returns the sum of the distances between corresponding characters |
432
|
|
|
|
|
|
|
in the two strings. If one string is longer than the other the |
433
|
|
|
|
|
|
|
remaining characters are counted as having the same value as the |
434
|
|
|
|
|
|
|
maximum distance. |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=cut |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
sub dvorak_keyboard_distance |
439
|
|
|
|
|
|
|
{ |
440
|
29
|
|
|
29
|
1
|
32
|
my($s1,$s2) = @_; |
441
|
29
|
|
|
|
|
34
|
my($l1,$l2) = (length($s1),length($s2)); |
442
|
29
|
100
|
|
|
|
43
|
my $short = $l1 < $l2 ? $s1 : $s2; |
443
|
29
|
100
|
|
|
|
40
|
my $long = $l1 < $l2 ? $s2 : $s1; |
444
|
29
|
100
|
|
|
|
49
|
my $ls = $l1 < $l2 ? $l1 : $l2; |
445
|
29
|
100
|
|
|
|
36
|
my $ll = $l1 < $l2 ? $l2 : $l1; |
446
|
|
|
|
|
|
|
|
447
|
29
|
|
|
|
|
27
|
my $tot = 0; |
448
|
29
|
|
|
|
|
28
|
my $i = 0; |
449
|
29
|
|
|
|
|
55
|
for($i = 0; $i < $ls; ++$i) { |
450
|
|
|
|
|
|
|
#print "calling distance(",substr($short,$i,1),',',$long,$i,1,")\n"; |
451
|
184
|
|
|
|
|
376
|
$tot += abs(dvorak_char_distance(substr($short,$i,1),substr($long,$i,1))); |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
|
454
|
29
|
|
|
|
|
54
|
while($i < $ll) { |
455
|
12
|
|
|
|
|
14
|
$tot += $dvorak_max_distance; |
456
|
12
|
|
|
|
|
22
|
++$i; |
457
|
|
|
|
|
|
|
} |
458
|
29
|
|
|
|
|
56
|
return $tot; |
459
|
|
|
|
|
|
|
} |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=head2 dvorak_keyboard_distance_match |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
param: string1 |
464
|
|
|
|
|
|
|
param: string2 |
465
|
|
|
|
|
|
|
return: probability of match |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
The probability of a match is computed in the same way as |
468
|
|
|
|
|
|
|
for qwerty_keyboard_distance_match(). |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=cut |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
sub dvorak_keyboard_distance_match |
473
|
|
|
|
|
|
|
{ |
474
|
29
|
|
|
29
|
1
|
82
|
my($s1,$s2) = @_; |
475
|
29
|
|
|
|
|
37
|
my($l1,$l2) = (length($s1),length($s2)); |
476
|
29
|
100
|
|
|
|
51
|
my $ls = $l1 < $l2 ? $l1 : $l2; |
477
|
29
|
100
|
|
|
|
43
|
my $ll = $l1 < $l2 ? $l2 : $l1; |
478
|
29
|
|
|
|
|
40
|
my $dst = dvorak_keyboard_distance($s1,$s2); |
479
|
29
|
|
|
|
|
74
|
return 1 - ($dst/($ll*$dvorak_max_distance)); |
480
|
|
|
|
|
|
|
} |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
1; |
483
|
|
|
|
|
|
|
__END__ |