| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Unicode::Homoglyph::Replace; |
|
2
|
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
136646
|
use 5.008; |
|
|
2
|
|
|
|
|
19
|
|
|
4
|
2
|
|
|
2
|
|
11
|
use strict; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
57
|
|
|
5
|
2
|
|
|
2
|
|
9
|
use warnings; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
76
|
|
|
6
|
2
|
|
|
2
|
|
656
|
use utf8; |
|
|
2
|
|
|
|
|
19
|
|
|
|
2
|
|
|
|
|
9
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
2
|
|
|
2
|
|
66
|
use Exporter qw(import); |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
4265
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our @EXPORT_OK = qw(replace_homoglyphs disguise); |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Unicode::Homoglyph::Replace - replace homoglyphs with their ASCII lookalike equivalents |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=cut |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
our $VERSION = '0.01'; |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
use Unicode::Homoglyph::Replace qw(replace_homoglyphs); |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
my $replaced = replace_homoglyphs("..."); |
|
26
|
|
|
|
|
|
|
... |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
Unicode has various homoglyphs - characters which look the same or mostly the |
|
33
|
|
|
|
|
|
|
the same, but are different characters. |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
If you're trying to filter input in some way, but support Unicode text, then |
|
36
|
|
|
|
|
|
|
such homoglyphs can be used to get past your filters. For instance, there are |
|
37
|
|
|
|
|
|
|
B other characters that look like a colon. |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
So, if someone wants to be a ⅾⅰⅽk to bypass your filters, they can replace some |
|
40
|
|
|
|
|
|
|
characters with look-alike (or at least look-similar) characters which your |
|
41
|
|
|
|
|
|
|
profanity / spam filters won't recognise. (That example there was |
|
42
|
|
|
|
|
|
|
C<\x{217E}\x{2170}\x{217D}k> - i.e. the characters SMALL ROMAN NUMERAL |
|
43
|
|
|
|
|
|
|
FIVE HUNDRED, SMALL ROMAN NUMERAL ONE, SMALL ROMAN NUMERAL ONE HUNDRED, |
|
44
|
|
|
|
|
|
|
and a "k".) |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=cut |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# This list of homoglyphs was lifted from Unicode::Homoglyph, and changed to |
|
49
|
|
|
|
|
|
|
# note which ASCII character each is a homoglyph for. (It strikes me as very |
|
50
|
|
|
|
|
|
|
# odd that the original version didn't do that...) |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
our %homoglyphs = ( |
|
53
|
|
|
|
|
|
|
" " => [ |
|
54
|
|
|
|
|
|
|
"\x{0020}", # # SPACE |
|
55
|
|
|
|
|
|
|
"\x{00A0}", # NO-BREAK SPACE |
|
56
|
|
|
|
|
|
|
"\x{2000}", # EN QUAD |
|
57
|
|
|
|
|
|
|
"\x{2001}", # EM QUAD |
|
58
|
|
|
|
|
|
|
"\x{2002}", # EN SPACE |
|
59
|
|
|
|
|
|
|
"\x{2003}", # EM SPACE |
|
60
|
|
|
|
|
|
|
"\x{2004}", # THREE-PER-EM SPACE |
|
61
|
|
|
|
|
|
|
"\x{2005}", # FOUR-PER-EM SPACE |
|
62
|
|
|
|
|
|
|
"\x{2006}", # SIX-PER-EM SPACE |
|
63
|
|
|
|
|
|
|
"\x{2007}", # FIGURE SPACE |
|
64
|
|
|
|
|
|
|
"\x{2008}", # PUNCTUATION SPACE |
|
65
|
|
|
|
|
|
|
"\x{2009}", # THIN SPACE |
|
66
|
|
|
|
|
|
|
"\x{200A}", # HAIR SPACE |
|
67
|
|
|
|
|
|
|
"\x{202F}", # NARROW NO-BREAK SPACE |
|
68
|
|
|
|
|
|
|
"\x{205F}", # MEDIUM MATHEMATICAL SPACE |
|
69
|
|
|
|
|
|
|
], |
|
70
|
|
|
|
|
|
|
"!" => [ |
|
71
|
|
|
|
|
|
|
"\x{0021}", # ! # EXCLAMATION MARK |
|
72
|
|
|
|
|
|
|
"\x{01C3}", # LATIN LETTER RETROFLEX CLICK |
|
73
|
|
|
|
|
|
|
"\x{2D51}", # TIFINAGH LETTER TUAREG YANG |
|
74
|
|
|
|
|
|
|
"\x{FE15}", # PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK |
|
75
|
|
|
|
|
|
|
"\x{FE57}", # SMALL EXCLAMATION MARK |
|
76
|
|
|
|
|
|
|
"\x{FF01}", # FULLWIDTH EXCLAMATION MARK |
|
77
|
|
|
|
|
|
|
], |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
"\"" => [ |
|
80
|
|
|
|
|
|
|
"\x{0022}", # " # QUOTATION MARK |
|
81
|
|
|
|
|
|
|
"\x{FF02}", # FULLWIDTH QUOTATION MARK |
|
82
|
|
|
|
|
|
|
], |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
"#" => [ |
|
85
|
|
|
|
|
|
|
"\x{0023}", # # # NUMBER SIGN |
|
86
|
|
|
|
|
|
|
"\x{FE5F}", # SMALL NUMBER SIGN |
|
87
|
|
|
|
|
|
|
"\x{FF03}", # FULLWIDTH NUMBER SIGN |
|
88
|
|
|
|
|
|
|
], |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
"\$" => [ |
|
91
|
|
|
|
|
|
|
"\x{0024}", # $ # DOLLAR SIGN |
|
92
|
|
|
|
|
|
|
"\x{FE69}", # SMALL DOLLAR SIGN |
|
93
|
|
|
|
|
|
|
"\x{FF04}", # FULLWIDTH DOLLAR SIGN |
|
94
|
|
|
|
|
|
|
], |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
"\%" => [ |
|
97
|
|
|
|
|
|
|
"\x{0025}", # % # PERCENT SIGN |
|
98
|
|
|
|
|
|
|
"\x{066A}", # ARABIC PERCENT SIGN |
|
99
|
|
|
|
|
|
|
"\x{2052}", # COMMERCIAL MINUS SIGN |
|
100
|
|
|
|
|
|
|
"\x{FE6A}", # SMALL PERCENT SIGN |
|
101
|
|
|
|
|
|
|
"\x{FF05}", # FULLWIDTH PERCENT SIGN |
|
102
|
|
|
|
|
|
|
], |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
"&" => [ |
|
105
|
|
|
|
|
|
|
"\x{0026}", # & # AMPERSAND |
|
106
|
|
|
|
|
|
|
"\x{FE60}", # SMALL AMPERSAND |
|
107
|
|
|
|
|
|
|
"\x{FF06}", # FULLWIDTH AMPERSAND |
|
108
|
|
|
|
|
|
|
], |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
"'" => [ |
|
111
|
|
|
|
|
|
|
"\x{0027}", # ' # APOSTROPHE |
|
112
|
|
|
|
|
|
|
"\x{02B9}", # MODIFIER LETTER PRIME |
|
113
|
|
|
|
|
|
|
"\x{0374}", # GREEK NUMERAL SIGN |
|
114
|
|
|
|
|
|
|
"\x{FF07}", # FULLWIDTH APOSTROPHE |
|
115
|
|
|
|
|
|
|
], |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
"(" => [ |
|
118
|
|
|
|
|
|
|
"\x{0028}", # ( # LEFT PARENTHESIS |
|
119
|
|
|
|
|
|
|
"\x{FE59}", # SMALL LEFT PARENTHESIS |
|
120
|
|
|
|
|
|
|
"\x{FF08}", # FULLWIDTH LEFT PARENTHESIS |
|
121
|
|
|
|
|
|
|
], |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
")" => [ |
|
124
|
|
|
|
|
|
|
"\x{0029}", # ) # RIGHT PARENTHESIS |
|
125
|
|
|
|
|
|
|
"\x{FF09}", # FULLWIDTH RIGHT PARENTHESIS |
|
126
|
|
|
|
|
|
|
"\x{FE5A}", # SMALL RIGHT PARENTHESIS |
|
127
|
|
|
|
|
|
|
], |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
"*" => [ |
|
130
|
|
|
|
|
|
|
"\x{002A}", # * # ASTERISK |
|
131
|
|
|
|
|
|
|
"\x{22C6}", # STAR OPERATOR |
|
132
|
|
|
|
|
|
|
"\x{FE61}", # SMALL ASTERISK |
|
133
|
|
|
|
|
|
|
"\x{FF0A}", # FULLWIDTH ASTERISK |
|
134
|
|
|
|
|
|
|
], |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
"+" => [ |
|
137
|
|
|
|
|
|
|
"\x{002B}", # + # PLUS SIGN |
|
138
|
|
|
|
|
|
|
"\x{16ED}", # RUNIC CROSS PUNCTUATION |
|
139
|
|
|
|
|
|
|
"\x{FE62}", # SMALL PLUS SIGN |
|
140
|
|
|
|
|
|
|
"\x{FF0B}", # FULLWIDTH PLUS SIGN |
|
141
|
|
|
|
|
|
|
], |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
"," => [ |
|
144
|
|
|
|
|
|
|
"\x{002C}", # , # COMMA |
|
145
|
|
|
|
|
|
|
"\x{02CF}", # MODIFIER LETTER LOW ACUTE ACCENT |
|
146
|
|
|
|
|
|
|
"\x{16E7}", # RUNIC LETTER SHORT-TWIG-YR |
|
147
|
|
|
|
|
|
|
"\x{201A}", # SINGLE LOW-9 QUOTATION MARK |
|
148
|
|
|
|
|
|
|
"\x{FF0C}", # FULLWIDTH COMMA |
|
149
|
|
|
|
|
|
|
], |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
"-" => [ |
|
152
|
|
|
|
|
|
|
"\x{002D}", # - # HYPHEN-MINUS |
|
153
|
|
|
|
|
|
|
"\x{02D7}", # MODIFIER LETTER MINUS SIGN |
|
154
|
|
|
|
|
|
|
"\x{2212}", # MINUS SIGN |
|
155
|
|
|
|
|
|
|
"\x{23BC}", # HORIZONTAL SCAN LINE-7 |
|
156
|
|
|
|
|
|
|
"\x{2574}", # BOX DRAWINGS LIGHT LEFT |
|
157
|
|
|
|
|
|
|
"\x{FE63}", # SMALL HYPHEN-MINUS |
|
158
|
|
|
|
|
|
|
"\x{FF0D}", # FULLWIDTH HYPHEN-MINUS |
|
159
|
|
|
|
|
|
|
], |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
"." => [ |
|
162
|
|
|
|
|
|
|
"\x{002E}", # . # FULL STOP |
|
163
|
|
|
|
|
|
|
"\x{2024}", # ONE DOT LEADER |
|
164
|
|
|
|
|
|
|
"\x{FF0E}", # FULLWIDTH FULL STOP |
|
165
|
|
|
|
|
|
|
], |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
"/" => [ |
|
168
|
|
|
|
|
|
|
"\x{002F}", # / # SOLIDUS |
|
169
|
|
|
|
|
|
|
"\x{FF0F}", # FULLWIDTH SOLIDUS |
|
170
|
|
|
|
|
|
|
"\x{1735}", # PHILIPPINE SINGLE PUNCTUATION |
|
171
|
|
|
|
|
|
|
"\x{2044}", # FRACTION SLASH |
|
172
|
|
|
|
|
|
|
"\x{2215}", # DIVISION SLASH |
|
173
|
|
|
|
|
|
|
"\x{29F8}", # BIG SOLIDUS |
|
174
|
|
|
|
|
|
|
], |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
"2" => [ |
|
177
|
|
|
|
|
|
|
"\x{0032}", # 2 # DIGIT TWO |
|
178
|
|
|
|
|
|
|
"\x{14BF}", # CANADIAN SYLLABICS SAYISI M |
|
179
|
|
|
|
|
|
|
], |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
"3" => [ |
|
182
|
|
|
|
|
|
|
"\x{0033}", # 3 # DIGIT THREE |
|
183
|
|
|
|
|
|
|
"\x{01B7}", # LATIN CAPITAL LETTER EZH |
|
184
|
|
|
|
|
|
|
"\x{2128}", # BLACK-LETTER CAPITAL Z |
|
185
|
|
|
|
|
|
|
], |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
"4" => [ |
|
188
|
|
|
|
|
|
|
"\x{0034}", # 4 # DIGIT FOUR |
|
189
|
|
|
|
|
|
|
"\x{13CE}", # CHEROKEE LETTER SE |
|
190
|
|
|
|
|
|
|
], |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
"6" => [ |
|
193
|
|
|
|
|
|
|
"\x{0036}", # 6 # DIGIT SIX |
|
194
|
|
|
|
|
|
|
"\x{13EE}", # CHEROKEE LETTER WV |
|
195
|
|
|
|
|
|
|
], |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
"9" => [ |
|
198
|
|
|
|
|
|
|
"\x{0039}", # 9 # DIGIT NINE |
|
199
|
|
|
|
|
|
|
"\x{13ED}", # CHEROKEE LETTER WU |
|
200
|
|
|
|
|
|
|
], |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
":" => [ |
|
203
|
|
|
|
|
|
|
"\x{003A}", # : # COLON |
|
204
|
|
|
|
|
|
|
"\x{02D0}", # MODIFIER LETTER TRIANGULAR COLON |
|
205
|
|
|
|
|
|
|
"\x{02F8}", # MODIFIER LETTER RAISED COLON |
|
206
|
|
|
|
|
|
|
"\x{0589}", # ARMENIAN FULL STOP |
|
207
|
|
|
|
|
|
|
"\x{1361}", # ETHIOPIC WORDSPACE |
|
208
|
|
|
|
|
|
|
"\x{16EC}", # RUNIC MULTIPLE PUNCTUATION |
|
209
|
|
|
|
|
|
|
"\x{205A}", # TWO DOT PUNCTUATION |
|
210
|
|
|
|
|
|
|
"\x{2236}", # RATIO |
|
211
|
|
|
|
|
|
|
"\x{2806}", # BRAILLE PATTERN DOTS-23 |
|
212
|
|
|
|
|
|
|
"\x{FE13}", # PRESENTATION FORM FOR VERTICAL COLON |
|
213
|
|
|
|
|
|
|
"\x{FE55}", # SMALL COLON |
|
214
|
|
|
|
|
|
|
"\x{FF1A}", # FULLWIDTH COLON |
|
215
|
|
|
|
|
|
|
], |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
";" => [ |
|
218
|
|
|
|
|
|
|
"\x{003B}", # ; # SEMICOLON |
|
219
|
|
|
|
|
|
|
"\x{037E}", # GREEK QUESTION MARK |
|
220
|
|
|
|
|
|
|
"\x{FE14}", # PRESENTATION FORM FOR VERTICAL SEMICOLON |
|
221
|
|
|
|
|
|
|
"\x{FE54}", # SMALL SEMICOLON |
|
222
|
|
|
|
|
|
|
"\x{FF1B}", # FULLWIDTH SEMICOLON |
|
223
|
|
|
|
|
|
|
], |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
"<" => [ |
|
226
|
|
|
|
|
|
|
"\x{003C}", # < # LESS-THAN SIGN |
|
227
|
|
|
|
|
|
|
"\x{02C2}", # MODIFIER LETTER LEFT ARROWHEAD |
|
228
|
|
|
|
|
|
|
"\x{2039}", # SINGLE LEFT-POINTING ANGLE QUOTATION MARK |
|
229
|
|
|
|
|
|
|
"\x{227A}", # PRECEDES |
|
230
|
|
|
|
|
|
|
"\x{276E}", # HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT |
|
231
|
|
|
|
|
|
|
"\x{2D66}", # TIFINAGH LETTER YE |
|
232
|
|
|
|
|
|
|
"\x{FE64}", # SMALL LESS-THAN SIGN |
|
233
|
|
|
|
|
|
|
"\x{FF1C}", # FULLWIDTH LESS-THAN SIGN |
|
234
|
|
|
|
|
|
|
], |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
"=" => [ |
|
237
|
|
|
|
|
|
|
"\x{003D}", # = # EQUALS SIGN |
|
238
|
|
|
|
|
|
|
"\x{2550}", # BOX DRAWINGS DOUBLE HORIZONTAL |
|
239
|
|
|
|
|
|
|
"\x{268C}", # DIGRAM FOR GREATER YANG |
|
240
|
|
|
|
|
|
|
"\x{FE66}", # SMALL EQUALS SIGN |
|
241
|
|
|
|
|
|
|
"\x{FF1D}", # FULLWIDTH EQUALS SIGN |
|
242
|
|
|
|
|
|
|
], |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
">" => [ |
|
245
|
|
|
|
|
|
|
"\x{003E}", # > # GREATER-THAN SIGN |
|
246
|
|
|
|
|
|
|
"\x{02C3}", # MODIFIER LETTER RIGHT ARROWHEAD |
|
247
|
|
|
|
|
|
|
"\x{203A}", # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK |
|
248
|
|
|
|
|
|
|
"\x{227B}", # SUCCEEDS |
|
249
|
|
|
|
|
|
|
"\x{276F}", # HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT |
|
250
|
|
|
|
|
|
|
"\x{FE65}", # SMALL GREATER-THAN SIGN |
|
251
|
|
|
|
|
|
|
"\x{FF1E}", # FULLWIDTH GREATER-THAN SIGN |
|
252
|
|
|
|
|
|
|
], |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
"?" => [ |
|
255
|
|
|
|
|
|
|
"\x{003F}", # ? # QUESTION MARK |
|
256
|
|
|
|
|
|
|
"\x{FE16}", # PRESENTATION FORM FOR VERTICAL QUESTION MARK |
|
257
|
|
|
|
|
|
|
"\x{FE56}", # SMALL QUESTION MARK |
|
258
|
|
|
|
|
|
|
"\x{FF1F}", # FULLWIDTH QUESTION MARK |
|
259
|
|
|
|
|
|
|
], |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
"\@" => [ |
|
262
|
|
|
|
|
|
|
"\x{0040}", # @ # COMMERCIAL AT |
|
263
|
|
|
|
|
|
|
"\x{FE6B}", # SMALL COMMERCIAL AT |
|
264
|
|
|
|
|
|
|
"\x{FF20}", # FULLWIDTH COMMERCIAL AT |
|
265
|
|
|
|
|
|
|
], |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
"A" => [ |
|
268
|
|
|
|
|
|
|
"\x{0041}", # A # LATIN CAPITAL LETTER A |
|
269
|
|
|
|
|
|
|
"\x{0391}", # GREEK CAPITAL LETTER ALPHA |
|
270
|
|
|
|
|
|
|
"\x{0410}", # CYRILLIC CAPITAL LETTER A |
|
271
|
|
|
|
|
|
|
"\x{13AA}", # CHEROKEE LETTER GO |
|
272
|
|
|
|
|
|
|
], |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
"B" => [ |
|
275
|
|
|
|
|
|
|
"\x{0042}", # B # LATIN CAPITAL LETTER B |
|
276
|
|
|
|
|
|
|
"\x{0392}", # GREEK CAPITAL LETTER BETA |
|
277
|
|
|
|
|
|
|
"\x{0412}", # CYRILLIC CAPITAL LETTER VE |
|
278
|
|
|
|
|
|
|
"\x{13F4}", # CHEROKEE LETTER YV |
|
279
|
|
|
|
|
|
|
"\x{15F7}", # CANADIAN SYLLABICS CARRIER KHE |
|
280
|
|
|
|
|
|
|
"\x{2C82}", # COPTIC CAPITAL LETTER VIDA |
|
281
|
|
|
|
|
|
|
], |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
"C" => [ |
|
284
|
|
|
|
|
|
|
"\x{0043}", # C # LATIN CAPITAL LETTER C |
|
285
|
|
|
|
|
|
|
"\x{03F9}", # GREEK CAPITAL LUNATE SIGMA SYMBOL |
|
286
|
|
|
|
|
|
|
"\x{0421}", # CYRILLIC CAPITAL LETTER ES |
|
287
|
|
|
|
|
|
|
"\x{13DF}", # CHEROKEE LETTER TLI |
|
288
|
|
|
|
|
|
|
"\x{216D}", # ROMAN NUMERAL ONE HUNDRED |
|
289
|
|
|
|
|
|
|
"\x{2CA4}", # COPTIC CAPITAL LETTER SIMA |
|
290
|
|
|
|
|
|
|
], |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
"D" => [ |
|
293
|
|
|
|
|
|
|
"\x{0044}", # D # LATIN CAPITAL LETTER D |
|
294
|
|
|
|
|
|
|
"\x{13A0}", # CHEROKEE LETTER A |
|
295
|
|
|
|
|
|
|
"\x{15EA}", # CANADIAN SYLLABICS CARRIER PE |
|
296
|
|
|
|
|
|
|
"\x{216E}", # ROMAN NUMERAL FIVE HUNDRED |
|
297
|
|
|
|
|
|
|
], |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
"E" => [ |
|
300
|
|
|
|
|
|
|
"\x{0045}", # E # LATIN CAPITAL LETTER E |
|
301
|
|
|
|
|
|
|
"\x{0395}", # GREEK CAPITAL LETTER EPSILON |
|
302
|
|
|
|
|
|
|
"\x{0415}", # CYRILLIC CAPITAL LETTER IE |
|
303
|
|
|
|
|
|
|
"\x{13AC}", # CHEROKEE LETTER GV |
|
304
|
|
|
|
|
|
|
], |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
"F" => [ |
|
307
|
|
|
|
|
|
|
"\x{0046}", # F # LATIN CAPITAL LETTER F |
|
308
|
|
|
|
|
|
|
"\x{15B4}", # CANADIAN SYLLABICS BLACKFOOT WE |
|
309
|
|
|
|
|
|
|
], |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
"G" => [ |
|
312
|
|
|
|
|
|
|
"\x{0047}", # G # LATIN CAPITAL LETTER G |
|
313
|
|
|
|
|
|
|
"\x{050C}", # CYRILLIC CAPITAL LETTER KOMI SJE |
|
314
|
|
|
|
|
|
|
"\x{13C0}", # CHEROKEE LETTER NAH |
|
315
|
|
|
|
|
|
|
], |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
"H" => [ |
|
318
|
|
|
|
|
|
|
"\x{0048}", # H # LATIN CAPITAL LETTER H |
|
319
|
|
|
|
|
|
|
"\x{0397}", # GREEK CAPITAL LETTER ETA |
|
320
|
|
|
|
|
|
|
"\x{041D}", # CYRILLIC CAPITAL LETTER EN |
|
321
|
|
|
|
|
|
|
"\x{12D8}", # ETHIOPIC SYLLABLE ZA |
|
322
|
|
|
|
|
|
|
"\x{13BB}", # CHEROKEE LETTER MI |
|
323
|
|
|
|
|
|
|
"\x{157C}", # CANADIAN SYLLABICS NUNAVUT H |
|
324
|
|
|
|
|
|
|
"\x{2C8E}", # COPTIC CAPITAL LETTER HATE |
|
325
|
|
|
|
|
|
|
], |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
"I" => [ |
|
328
|
|
|
|
|
|
|
"\x{0049}", # I # LATIN CAPITAL LETTER I |
|
329
|
|
|
|
|
|
|
"\x{0399}", # GREEK CAPITAL LETTER IOTA |
|
330
|
|
|
|
|
|
|
"\x{0406}", # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I |
|
331
|
|
|
|
|
|
|
"\x{2160}", # ROMAN NUMERAL ONE |
|
332
|
|
|
|
|
|
|
], |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
"J" => [ |
|
335
|
|
|
|
|
|
|
"\x{004A}", # J # LATIN CAPITAL LETTER J |
|
336
|
|
|
|
|
|
|
"\x{0408}", # CYRILLIC CAPITAL LETTER JE |
|
337
|
|
|
|
|
|
|
"\x{13AB}", # CHEROKEE LETTER GU |
|
338
|
|
|
|
|
|
|
"\x{148D}", # CANADIAN SYLLABICS CO |
|
339
|
|
|
|
|
|
|
], |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
"K" => [ |
|
342
|
|
|
|
|
|
|
"\x{004B}", # K # LATIN CAPITAL LETTER K |
|
343
|
|
|
|
|
|
|
"\x{039A}", # GREEK CAPITAL LETTER KAPPA |
|
344
|
|
|
|
|
|
|
"\x{13E6}", # CHEROKEE LETTER TSO |
|
345
|
|
|
|
|
|
|
"\x{16D5}", # RUNIC LETTER OPEN-P |
|
346
|
|
|
|
|
|
|
"\x{212A}", # KELVIN SIGN |
|
347
|
|
|
|
|
|
|
"\x{2C94}", # COPTIC CAPITAL LETTER KAPA |
|
348
|
|
|
|
|
|
|
], |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
"L" => [ |
|
351
|
|
|
|
|
|
|
"\x{004C}", # L # LATIN CAPITAL LETTER L |
|
352
|
|
|
|
|
|
|
"\x{13DE}", # CHEROKEE LETTER TLE |
|
353
|
|
|
|
|
|
|
"\x{14AA}", # CANADIAN SYLLABICS MA |
|
354
|
|
|
|
|
|
|
"\x{216C}", # ROMAN NUMERAL FIFTY |
|
355
|
|
|
|
|
|
|
], |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
"M" => [ |
|
358
|
|
|
|
|
|
|
"\x{004D}", # M # LATIN CAPITAL LETTER M |
|
359
|
|
|
|
|
|
|
"\x{039C}", # GREEK CAPITAL LETTER MU |
|
360
|
|
|
|
|
|
|
"\x{03FA}", # GREEK CAPITAL LETTER SAN |
|
361
|
|
|
|
|
|
|
"\x{041C}", # CYRILLIC CAPITAL LETTER EM |
|
362
|
|
|
|
|
|
|
"\x{13B7}", # CHEROKEE LETTER LU |
|
363
|
|
|
|
|
|
|
"\x{216F}", # ROMAN NUMERAL ONE THOUSAND |
|
364
|
|
|
|
|
|
|
], |
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
"N" => [ |
|
367
|
|
|
|
|
|
|
"\x{004E}", # N # LATIN CAPITAL LETTER N |
|
368
|
|
|
|
|
|
|
"\x{039D}", # GREEK CAPITAL LETTER NU |
|
369
|
|
|
|
|
|
|
"\x{2C9A}", # COPTIC CAPITAL LETTER NI |
|
370
|
|
|
|
|
|
|
], |
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
"O" => [ |
|
373
|
|
|
|
|
|
|
"\x{004F}", # O # LATIN CAPITAL LETTER O |
|
374
|
|
|
|
|
|
|
"\x{039F}", # GREEK CAPITAL LETTER OMICRON |
|
375
|
|
|
|
|
|
|
"\x{041E}", # CYRILLIC CAPITAL LETTER O |
|
376
|
|
|
|
|
|
|
"\x{2C9E}", # COPTIC CAPITAL LETTER O |
|
377
|
|
|
|
|
|
|
], |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
"P" => [ |
|
380
|
|
|
|
|
|
|
"\x{0050}", # P # LATIN CAPITAL LETTER P |
|
381
|
|
|
|
|
|
|
"\x{03A1}", # GREEK CAPITAL LETTER RHO |
|
382
|
|
|
|
|
|
|
"\x{0420}", # CYRILLIC CAPITAL LETTER ER |
|
383
|
|
|
|
|
|
|
"\x{13E2}", # CHEROKEE LETTER TLV |
|
384
|
|
|
|
|
|
|
"\x{2CA2}", # COPTIC CAPITAL LETTER RO |
|
385
|
|
|
|
|
|
|
], |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
"Q" => [ |
|
388
|
|
|
|
|
|
|
"\x{0051}", # Q # LATIN CAPITAL LETTER Q |
|
389
|
|
|
|
|
|
|
"\x{051A}", # CYRILLIC CAPITAL LETTER QA |
|
390
|
|
|
|
|
|
|
"\x{2D55}", # TIFINAGH LETTER YARR |
|
391
|
|
|
|
|
|
|
], |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
"R" => [ |
|
394
|
|
|
|
|
|
|
"\x{0052}", # R # LATIN CAPITAL LETTER R |
|
395
|
|
|
|
|
|
|
"\x{13A1}", # CHEROKEE LETTER E |
|
396
|
|
|
|
|
|
|
"\x{13D2}", # CHEROKEE LETTER SV |
|
397
|
|
|
|
|
|
|
"\x{1587}", # CANADIAN SYLLABICS TLHI |
|
398
|
|
|
|
|
|
|
], |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
"S" => [ |
|
401
|
|
|
|
|
|
|
"\x{0053}", # S # LATIN CAPITAL LETTER S |
|
402
|
|
|
|
|
|
|
"\x{0405}", # CYRILLIC CAPITAL LETTER DZE |
|
403
|
|
|
|
|
|
|
"\x{13DA}", # CHEROKEE LETTER DU |
|
404
|
|
|
|
|
|
|
], |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
"T" => [ |
|
407
|
|
|
|
|
|
|
"\x{0054}", # T # LATIN CAPITAL LETTER T |
|
408
|
|
|
|
|
|
|
"\x{03A4}", # GREEK CAPITAL LETTER TAU |
|
409
|
|
|
|
|
|
|
"\x{0422}", # CYRILLIC CAPITAL LETTER TE |
|
410
|
|
|
|
|
|
|
"\x{13A2}", # CHEROKEE LETTER I |
|
411
|
|
|
|
|
|
|
], |
|
412
|
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
"V" => [ |
|
414
|
|
|
|
|
|
|
"\x{0056}", # V # LATIN CAPITAL LETTER V |
|
415
|
|
|
|
|
|
|
"\x{13D9}", # CHEROKEE LETTER DO |
|
416
|
|
|
|
|
|
|
"\x{2164}", # ROMAN NUMERAL FIVE |
|
417
|
|
|
|
|
|
|
], |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
"W" => [ |
|
420
|
|
|
|
|
|
|
"\x{0057}", # W # LATIN CAPITAL LETTER W |
|
421
|
|
|
|
|
|
|
"\x{13B3}", # CHEROKEE LETTER LA |
|
422
|
|
|
|
|
|
|
"\x{13D4}", # CHEROKEE LETTER TA |
|
423
|
|
|
|
|
|
|
], |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
"X" => [ |
|
426
|
|
|
|
|
|
|
"\x{0058}", # X # LATIN CAPITAL LETTER X |
|
427
|
|
|
|
|
|
|
"\x{03A7}", # GREEK CAPITAL LETTER CHI |
|
428
|
|
|
|
|
|
|
"\x{0425}", # CYRILLIC CAPITAL LETTER HA |
|
429
|
|
|
|
|
|
|
"\x{2169}", # ROMAN NUMERAL TEN |
|
430
|
|
|
|
|
|
|
"\x{2CAC}", # COPTIC CAPITAL LETTER KHI |
|
431
|
|
|
|
|
|
|
], |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
"Y" => [ |
|
434
|
|
|
|
|
|
|
"\x{0059}", # Y # LATIN CAPITAL LETTER Y |
|
435
|
|
|
|
|
|
|
"\x{03A5}", # GREEK CAPITAL LETTER UPSILON |
|
436
|
|
|
|
|
|
|
"\x{2CA8}", # COPTIC CAPITAL LETTER UA |
|
437
|
|
|
|
|
|
|
], |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
"Z" => [ |
|
440
|
|
|
|
|
|
|
"\x{005A}", # Z # LATIN CAPITAL LETTER Z |
|
441
|
|
|
|
|
|
|
"\x{0396}", # GREEK CAPITAL LETTER ZETA |
|
442
|
|
|
|
|
|
|
"\x{13C3}", # CHEROKEE LETTER NO |
|
443
|
|
|
|
|
|
|
], |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
"[" => [ |
|
446
|
|
|
|
|
|
|
"\x{005B}", # [ # LEFT SQUARE BRACKET |
|
447
|
|
|
|
|
|
|
"\x{FF3B}", # FULLWIDTH LEFT SQUARE BRACKET |
|
448
|
|
|
|
|
|
|
], |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
"\\" => [ |
|
451
|
|
|
|
|
|
|
"\x{005C}", # \ # REVERSE SOLIDUS |
|
452
|
|
|
|
|
|
|
"\x{2216}", # SET MINUS |
|
453
|
|
|
|
|
|
|
"\x{29F5}", # REVERSE SOLIDUS OPERATOR |
|
454
|
|
|
|
|
|
|
"\x{29F9}", # BIG REVERSE SOLIDUS |
|
455
|
|
|
|
|
|
|
"\x{FE68}", # SMALL REVERSE SOLIDUS |
|
456
|
|
|
|
|
|
|
"\x{FF3C}", # FULLWIDTH REVERSE SOLIDUS |
|
457
|
|
|
|
|
|
|
], |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
"]" => [ |
|
460
|
|
|
|
|
|
|
"\x{005D}", # ] # RIGHT SQUARE BRACKET |
|
461
|
|
|
|
|
|
|
"\x{FF3D}", # FULLWIDTH RIGHT SQUARE BRACKET |
|
462
|
|
|
|
|
|
|
], |
|
463
|
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
"^" => [ |
|
465
|
|
|
|
|
|
|
"\x{005E}", # ^ # CIRCUMFLEX ACCENT |
|
466
|
|
|
|
|
|
|
"\x{02C4}", # MODIFIER LETTER UP ARROWHEAD |
|
467
|
|
|
|
|
|
|
"\x{02C6}", # MODIFIER LETTER CIRCUMFLEX ACCENT |
|
468
|
|
|
|
|
|
|
"\x{1DBA}", # MODIFIER LETTER SMALL TURNED V |
|
469
|
|
|
|
|
|
|
"\x{2303}", # UP ARROWHEAD |
|
470
|
|
|
|
|
|
|
"\x{FF3E}", # FULLWIDTH CIRCUMFLEX ACCENT |
|
471
|
|
|
|
|
|
|
], |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
"_" => [ |
|
474
|
|
|
|
|
|
|
"\x{005F}", # _ # LOW LINE |
|
475
|
|
|
|
|
|
|
"\x{02CD}", # MODIFIER LETTER LOW MACRON |
|
476
|
|
|
|
|
|
|
"\x{268A}", # MONOGRAM FOR YANG |
|
477
|
|
|
|
|
|
|
"\x{FF3F}", # FULLWIDTH LOW LINE |
|
478
|
|
|
|
|
|
|
], |
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
"`" => [ |
|
481
|
|
|
|
|
|
|
"\x{0060}", # ` # GRAVE ACCENT |
|
482
|
|
|
|
|
|
|
"\x{02CB}", # MODIFIER LETTER GRAVE ACCENT |
|
483
|
|
|
|
|
|
|
"\x{1FEF}", # GREEK VARIA |
|
484
|
|
|
|
|
|
|
"\x{2035}", # REVERSED PRIME |
|
485
|
|
|
|
|
|
|
"\x{FF40}", # FULLWIDTH GRAVE ACCENT |
|
486
|
|
|
|
|
|
|
], |
|
487
|
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
"a" => [ |
|
489
|
|
|
|
|
|
|
"\x{0061}", # a # LATIN SMALL LETTER A |
|
490
|
|
|
|
|
|
|
"\x{0251}", # LATIN SMALL LETTER ALPHA |
|
491
|
|
|
|
|
|
|
"\x{0430}", # CYRILLIC SMALL LETTER A |
|
492
|
|
|
|
|
|
|
], |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
"c" => [ |
|
495
|
|
|
|
|
|
|
"\x{0063}", # c # LATIN SMALL LETTER C |
|
496
|
|
|
|
|
|
|
"\x{03F2}", # GREEK LUNATE SIGMA SYMBOL |
|
497
|
|
|
|
|
|
|
"\x{0441}", # CYRILLIC SMALL LETTER ES |
|
498
|
|
|
|
|
|
|
"\x{217D}", # SMALL ROMAN NUMERAL ONE HUNDRED |
|
499
|
|
|
|
|
|
|
], |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
"d" => [ |
|
502
|
|
|
|
|
|
|
"\x{0064}", # d # LATIN SMALL LETTER D |
|
503
|
|
|
|
|
|
|
"\x{0501}", # CYRILLIC SMALL LETTER KOMI DE |
|
504
|
|
|
|
|
|
|
"\x{217E}", # SMALL ROMAN NUMERAL FIVE HUNDRED |
|
505
|
|
|
|
|
|
|
], |
|
506
|
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
"e" => [ |
|
508
|
|
|
|
|
|
|
"\x{0065}", # e # LATIN SMALL LETTER E |
|
509
|
|
|
|
|
|
|
"\x{0435}", # CYRILLIC SMALL LETTER IE |
|
510
|
|
|
|
|
|
|
"\x{1971}", # TAI LE LETTER TONE-3 |
|
511
|
|
|
|
|
|
|
], |
|
512
|
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
"g" => [ |
|
514
|
|
|
|
|
|
|
"\x{0067}", # g # LATIN SMALL LETTER G |
|
515
|
|
|
|
|
|
|
"\x{0261}", # LATIN SMALL LETTER SCRIPT G |
|
516
|
|
|
|
|
|
|
], |
|
517
|
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
"h" => [ |
|
519
|
|
|
|
|
|
|
"\x{0068}", # h # LATIN SMALL LETTER H |
|
520
|
|
|
|
|
|
|
"\x{04BB}", # CYRILLIC SMALL LETTER SHHA |
|
521
|
|
|
|
|
|
|
], |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
"i" => [ |
|
524
|
|
|
|
|
|
|
"\x{0069}", # i # LATIN SMALL LETTER I |
|
525
|
|
|
|
|
|
|
"\x{0456}", # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I |
|
526
|
|
|
|
|
|
|
"\x{2170}", # SMALL ROMAN NUMERAL ONE |
|
527
|
|
|
|
|
|
|
], |
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
"j" => [ |
|
530
|
|
|
|
|
|
|
"\x{006A}", # j # LATIN SMALL LETTER J |
|
531
|
|
|
|
|
|
|
"\x{03F3}", # GREEK LETTER YOT |
|
532
|
|
|
|
|
|
|
"\x{0458}", # CYRILLIC SMALL LETTER JE |
|
533
|
|
|
|
|
|
|
], |
|
534
|
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
"l" => [ |
|
536
|
|
|
|
|
|
|
"\x{006C}", # l # LATIN SMALL LETTER L |
|
537
|
|
|
|
|
|
|
"\x{217C}", # SMALL ROMAN NUMERAL FIFTY |
|
538
|
|
|
|
|
|
|
], |
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
"m" => [ |
|
541
|
|
|
|
|
|
|
"\x{006D}", # m # LATIN SMALL LETTER M |
|
542
|
|
|
|
|
|
|
"\x{217F}", # SMALL ROMAN NUMERAL ONE THOUSAND |
|
543
|
|
|
|
|
|
|
], |
|
544
|
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
"n" => [ |
|
546
|
|
|
|
|
|
|
"\x{006E}", # n # LATIN SMALL LETTER N |
|
547
|
|
|
|
|
|
|
"\x{1952}", # TAI LE LETTER NGA |
|
548
|
|
|
|
|
|
|
], |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
"o" => [ |
|
551
|
|
|
|
|
|
|
"\x{006F}", # o # LATIN SMALL LETTER O |
|
552
|
|
|
|
|
|
|
"\x{03BF}", # GREEK SMALL LETTER OMICRON |
|
553
|
|
|
|
|
|
|
"\x{043E}", # CYRILLIC SMALL LETTER O |
|
554
|
|
|
|
|
|
|
"\x{0D20}", # MALAYALAM LETTER TTHA |
|
555
|
|
|
|
|
|
|
"\x{2C9F}", # COPTIC SMALL LETTER O |
|
556
|
|
|
|
|
|
|
], |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
"p" => [ |
|
559
|
|
|
|
|
|
|
"\x{0070}", # p # LATIN SMALL LETTER P |
|
560
|
|
|
|
|
|
|
"\x{0440}", # CYRILLIC SMALL LETTER ER |
|
561
|
|
|
|
|
|
|
"\x{2CA3}", # COPTIC SMALL LETTER RO |
|
562
|
|
|
|
|
|
|
], |
|
563
|
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
"s" => [ |
|
565
|
|
|
|
|
|
|
"\x{0073}", # s # LATIN SMALL LETTER S |
|
566
|
|
|
|
|
|
|
"\x{0073}", # s # LATIN SMALL LETTER S |
|
567
|
|
|
|
|
|
|
"\x{0455}", # CYRILLIC SMALL LETTER DZE |
|
568
|
|
|
|
|
|
|
], |
|
569
|
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
"u" => [ |
|
571
|
|
|
|
|
|
|
"\x{0075}", # u # LATIN SMALL LETTER U |
|
572
|
|
|
|
|
|
|
"\x{1959}", # TAI LE LETTER PA |
|
573
|
|
|
|
|
|
|
"\x{222A}", # UNION |
|
574
|
|
|
|
|
|
|
], |
|
575
|
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
"v" => [ |
|
577
|
|
|
|
|
|
|
"\x{0076}", # v # LATIN SMALL LETTER V |
|
578
|
|
|
|
|
|
|
"\x{1D20}", # LATIN LETTER SMALL CAPITAL V |
|
579
|
|
|
|
|
|
|
"\x{2174}", # SMALL ROMAN NUMERAL FIVE |
|
580
|
|
|
|
|
|
|
"\x{2228}", # LOGICAL OR |
|
581
|
|
|
|
|
|
|
"\x{22C1}", # N-ARY LOGICAL OR |
|
582
|
|
|
|
|
|
|
], |
|
583
|
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
"w" => [ |
|
585
|
|
|
|
|
|
|
"\x{0077}", # w # LATIN SMALL LETTER W |
|
586
|
|
|
|
|
|
|
"\x{1D21}", # LATIN LETTER SMALL CAPITAL W |
|
587
|
|
|
|
|
|
|
], |
|
588
|
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
"x" => [ |
|
591
|
|
|
|
|
|
|
"\x{0078}", # x # LATIN SMALL LETTER X |
|
592
|
|
|
|
|
|
|
"\x{0445}", # CYRILLIC SMALL LETTER HA |
|
593
|
|
|
|
|
|
|
"\x{2179}", # SMALL ROMAN NUMERAL TEN |
|
594
|
|
|
|
|
|
|
"\x{2CAD}", # COPTIC SMALL LETTER KHI |
|
595
|
|
|
|
|
|
|
], |
|
596
|
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
"y" => [ |
|
598
|
|
|
|
|
|
|
"\x{0079}", # y # LATIN SMALL LETTER Y |
|
599
|
|
|
|
|
|
|
"\x{0443}", # CYRILLIC SMALL LETTER U |
|
600
|
|
|
|
|
|
|
"\x{1EFF}", # LATIN SMALL LETTER Y WITH LOOP |
|
601
|
|
|
|
|
|
|
], |
|
602
|
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
"z" => [ |
|
604
|
|
|
|
|
|
|
"\x{007A}", # z # LATIN SMALL LETTER Z |
|
605
|
|
|
|
|
|
|
"\x{1D22}", # LATIN LETTER SMALL CAPITAL Z |
|
606
|
|
|
|
|
|
|
], |
|
607
|
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
"{" => [ |
|
609
|
|
|
|
|
|
|
"\x{007B}", # { # LEFT CURLY BRACKET |
|
610
|
|
|
|
|
|
|
"\x{FE5B}", # SMALL LEFT CURLY BRACKET |
|
611
|
|
|
|
|
|
|
"\x{FF5B}", # FULLWIDTH LEFT CURLY BRACKET |
|
612
|
|
|
|
|
|
|
], |
|
613
|
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
"|" => [ |
|
615
|
|
|
|
|
|
|
"\x{007C}", # | # VERTICAL LINE |
|
616
|
|
|
|
|
|
|
"\x{01C0}", # LATIN LETTER DENTAL CLICK |
|
617
|
|
|
|
|
|
|
"\x{16C1}", # RUNIC LETTER ISAZ IS ISS I |
|
618
|
|
|
|
|
|
|
"\x{239C}", # LEFT PARENTHESIS EXTENSION |
|
619
|
|
|
|
|
|
|
"\x{239F}", # RIGHT PARENTHESIS EXTENSION |
|
620
|
|
|
|
|
|
|
"\x{23A2}", # LEFT SQUARE BRACKET EXTENSION |
|
621
|
|
|
|
|
|
|
"\x{23A5}", # RIGHT SQUARE BRACKET EXTENSION |
|
622
|
|
|
|
|
|
|
"\x{23AA}", # CURLY BRACKET EXTENSION |
|
623
|
|
|
|
|
|
|
"\x{23AE}", # INTEGRAL EXTENSION |
|
624
|
|
|
|
|
|
|
"\x{FF5C}", # FULLWIDTH VERTICAL LINE |
|
625
|
|
|
|
|
|
|
"\x{FFE8}", # HALFWIDTH FORMS LIGHT VERTICAL |
|
626
|
|
|
|
|
|
|
], |
|
627
|
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
"}" => [ |
|
629
|
|
|
|
|
|
|
"\x{007D}", # } # RIGHT CURLY BRACKET |
|
630
|
|
|
|
|
|
|
"\x{FE5C}", # SMALL RIGHT CURLY BRACKET |
|
631
|
|
|
|
|
|
|
"\x{FF5D}", # FULLWIDTH RIGHT CURLY BRACKET |
|
632
|
|
|
|
|
|
|
], |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
"~" => [ |
|
635
|
|
|
|
|
|
|
"\x{007E}", # ~ # TILDE |
|
636
|
|
|
|
|
|
|
"\x{02DC}", # SMALL TILDE |
|
637
|
|
|
|
|
|
|
"\x{2053}", # SWUNG DASH |
|
638
|
|
|
|
|
|
|
"\x{223C}", # TILDE OPERATOR |
|
639
|
|
|
|
|
|
|
"\x{FF5E}", # FULLWIDTH TILDE |
|
640
|
|
|
|
|
|
|
], |
|
641
|
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
); |
|
643
|
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
my %replace_map; |
|
646
|
|
|
|
|
|
|
sub _build_replace_map { |
|
647
|
1
|
|
|
1
|
|
25
|
for my $ascii_char (keys %homoglyphs) { |
|
648
|
83
|
|
|
|
|
107
|
for my $homoglyph (@{ $homoglyphs{$ascii_char} }) { |
|
|
83
|
|
|
|
|
163
|
|
|
649
|
340
|
|
|
|
|
808
|
$replace_map{$homoglyph} = $ascii_char; |
|
650
|
|
|
|
|
|
|
} |
|
651
|
|
|
|
|
|
|
} |
|
652
|
|
|
|
|
|
|
} |
|
653
|
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
# TODO: this would probably be much more efficient if we build up a tr/// |
|
656
|
|
|
|
|
|
|
# transliteration, I suspect. |
|
657
|
|
|
|
|
|
|
sub replace_homoglyphs { |
|
658
|
84
|
|
|
84
|
0
|
27263
|
my $input = shift; |
|
659
|
84
|
|
|
|
|
139
|
my $result; |
|
660
|
84
|
100
|
|
|
|
223
|
_build_replace_map() unless keys %replace_map; |
|
661
|
84
|
|
|
|
|
525
|
for my $char (split //, $input) { |
|
662
|
2536
|
|
66
|
|
|
5906
|
$result .= $replace_map{$char} // $char; |
|
663
|
|
|
|
|
|
|
} |
|
664
|
84
|
|
|
|
|
508
|
return $result; |
|
665
|
|
|
|
|
|
|
} |
|
666
|
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
# Mostly for testing, take a string, and for each character we have a choice of |
|
671
|
|
|
|
|
|
|
# homoglyphs for, pick one at random and use it. |
|
672
|
|
|
|
|
|
|
sub disguise { |
|
673
|
80
|
|
|
80
|
0
|
42254
|
my $input = shift; |
|
674
|
80
|
|
|
|
|
144
|
my $result; |
|
675
|
80
|
|
|
|
|
540
|
for my $char (split //, $input) { |
|
676
|
2450
|
100
|
|
|
|
4405
|
if (my $possible_homoglyphs = $homoglyphs{$char}) { |
|
677
|
2140
|
|
|
|
|
4571
|
$result .= $possible_homoglyphs->[int rand @$possible_homoglyphs]; |
|
678
|
|
|
|
|
|
|
} else { |
|
679
|
310
|
|
|
|
|
939
|
$result .= $char; |
|
680
|
|
|
|
|
|
|
} |
|
681
|
|
|
|
|
|
|
} |
|
682
|
80
|
|
|
|
|
322
|
return $result; |
|
683
|
|
|
|
|
|
|
} |
|
684
|
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
=head1 AUTHOR |
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
David Precious, C<< >> |
|
689
|
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
=head1 BUGS |
|
691
|
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
|
693
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
|
694
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
|
695
|
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
=head1 SUPPORT |
|
700
|
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
|
702
|
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
perldoc Unicode::Homoglyph::Replace |
|
704
|
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
You can also look for information at: |
|
707
|
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
=over 4 |
|
709
|
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
|
711
|
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
L |
|
713
|
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
|
715
|
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
L |
|
717
|
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
=item * CPAN Ratings |
|
719
|
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
L |
|
721
|
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
=item * Search CPAN |
|
723
|
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
L |
|
725
|
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
=back |
|
727
|
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
730
|
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
L, where the list of homoglyphs came from. |
|
732
|
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
|
735
|
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
|
738
|
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
Copyright 2018 David Precious. |
|
740
|
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
|
742
|
|
|
|
|
|
|
under the terms of the the Artistic License (2.0). You may obtain a |
|
743
|
|
|
|
|
|
|
copy of the full license at: |
|
744
|
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
L |
|
746
|
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
Any use, modification, and distribution of the Standard or Modified |
|
748
|
|
|
|
|
|
|
Versions is governed by this Artistic License. By using, modifying or |
|
749
|
|
|
|
|
|
|
distributing the Package, you accept this license. Do not use, modify, |
|
750
|
|
|
|
|
|
|
or distribute the Package, if you do not accept this license. |
|
751
|
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
If your Modified Version has been derived from a Modified Version made |
|
753
|
|
|
|
|
|
|
by someone other than you, you are nevertheless required to ensure that |
|
754
|
|
|
|
|
|
|
your Modified Version complies with the requirements of this license. |
|
755
|
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
This license does not grant you the right to use any trademark, service |
|
757
|
|
|
|
|
|
|
mark, tradename, or logo of the Copyright Holder. |
|
758
|
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
This license includes the non-exclusive, worldwide, free-of-charge |
|
760
|
|
|
|
|
|
|
patent license to make, have made, use, offer to sell, sell, import and |
|
761
|
|
|
|
|
|
|
otherwise transfer the Package with respect to any patent claims |
|
762
|
|
|
|
|
|
|
licensable by the Copyright Holder that are necessarily infringed by the |
|
763
|
|
|
|
|
|
|
Package. If you institute patent litigation (including a cross-claim or |
|
764
|
|
|
|
|
|
|
counterclaim) against any party alleging that the Package constitutes |
|
765
|
|
|
|
|
|
|
direct or contributory patent infringement, then this Artistic License |
|
766
|
|
|
|
|
|
|
to you shall terminate on the date that such litigation is filed. |
|
767
|
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER |
|
769
|
|
|
|
|
|
|
AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. |
|
770
|
|
|
|
|
|
|
THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR |
|
771
|
|
|
|
|
|
|
PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY |
|
772
|
|
|
|
|
|
|
YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR |
|
773
|
|
|
|
|
|
|
CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR |
|
774
|
|
|
|
|
|
|
CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, |
|
775
|
|
|
|
|
|
|
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
776
|
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
=cut |
|
779
|
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
1; # End of Unicode::Homoglyph::Replace |