line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Unicode::Homoglyph::Replace; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
136646
|
use 5.008; |
|
2
|
|
|
|
|
19
|
|
4
|
2
|
|
|
2
|
|
11
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
57
|
|
5
|
2
|
|
|
2
|
|
9
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
76
|
|
6
|
2
|
|
|
2
|
|
656
|
use utf8; |
|
2
|
|
|
|
|
19
|
|
|
2
|
|
|
|
|
9
|
|
7
|
|
|
|
|
|
|
|
8
|
2
|
|
|
2
|
|
66
|
use Exporter qw(import); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
4265
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our @EXPORT_OK = qw(replace_homoglyphs disguise); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Unicode::Homoglyph::Replace - replace homoglyphs with their ASCII lookalike equivalents |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=cut |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
our $VERSION = '0.01'; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 SYNOPSIS |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
use Unicode::Homoglyph::Replace qw(replace_homoglyphs); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
my $replaced = replace_homoglyphs("..."); |
26
|
|
|
|
|
|
|
... |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 DESCRIPTION |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
Unicode has various homoglyphs - characters which look the same or mostly the |
33
|
|
|
|
|
|
|
the same, but are different characters. |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
If you're trying to filter input in some way, but support Unicode text, then |
36
|
|
|
|
|
|
|
such homoglyphs can be used to get past your filters. For instance, there are |
37
|
|
|
|
|
|
|
B other characters that look like a colon. |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
So, if someone wants to be a ⅾⅰⅽk to bypass your filters, they can replace some |
40
|
|
|
|
|
|
|
characters with look-alike (or at least look-similar) characters which your |
41
|
|
|
|
|
|
|
profanity / spam filters won't recognise. (That example there was |
42
|
|
|
|
|
|
|
C<\x{217E}\x{2170}\x{217D}k> - i.e. the characters SMALL ROMAN NUMERAL |
43
|
|
|
|
|
|
|
FIVE HUNDRED, SMALL ROMAN NUMERAL ONE, SMALL ROMAN NUMERAL ONE HUNDRED, |
44
|
|
|
|
|
|
|
and a "k".) |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=cut |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# This list of homoglyphs was lifted from Unicode::Homoglyph, and changed to |
49
|
|
|
|
|
|
|
# note which ASCII character each is a homoglyph for. (It strikes me as very |
50
|
|
|
|
|
|
|
# odd that the original version didn't do that...) |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
our %homoglyphs = ( |
53
|
|
|
|
|
|
|
" " => [ |
54
|
|
|
|
|
|
|
"\x{0020}", # # SPACE |
55
|
|
|
|
|
|
|
"\x{00A0}", # NO-BREAK SPACE |
56
|
|
|
|
|
|
|
"\x{2000}", # EN QUAD |
57
|
|
|
|
|
|
|
"\x{2001}", # EM QUAD |
58
|
|
|
|
|
|
|
"\x{2002}", # EN SPACE |
59
|
|
|
|
|
|
|
"\x{2003}", # EM SPACE |
60
|
|
|
|
|
|
|
"\x{2004}", # THREE-PER-EM SPACE |
61
|
|
|
|
|
|
|
"\x{2005}", # FOUR-PER-EM SPACE |
62
|
|
|
|
|
|
|
"\x{2006}", # SIX-PER-EM SPACE |
63
|
|
|
|
|
|
|
"\x{2007}", # FIGURE SPACE |
64
|
|
|
|
|
|
|
"\x{2008}", # PUNCTUATION SPACE |
65
|
|
|
|
|
|
|
"\x{2009}", # THIN SPACE |
66
|
|
|
|
|
|
|
"\x{200A}", # HAIR SPACE |
67
|
|
|
|
|
|
|
"\x{202F}", # NARROW NO-BREAK SPACE |
68
|
|
|
|
|
|
|
"\x{205F}", # MEDIUM MATHEMATICAL SPACE |
69
|
|
|
|
|
|
|
], |
70
|
|
|
|
|
|
|
"!" => [ |
71
|
|
|
|
|
|
|
"\x{0021}", # ! # EXCLAMATION MARK |
72
|
|
|
|
|
|
|
"\x{01C3}", # LATIN LETTER RETROFLEX CLICK |
73
|
|
|
|
|
|
|
"\x{2D51}", # TIFINAGH LETTER TUAREG YANG |
74
|
|
|
|
|
|
|
"\x{FE15}", # PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK |
75
|
|
|
|
|
|
|
"\x{FE57}", # SMALL EXCLAMATION MARK |
76
|
|
|
|
|
|
|
"\x{FF01}", # FULLWIDTH EXCLAMATION MARK |
77
|
|
|
|
|
|
|
], |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
"\"" => [ |
80
|
|
|
|
|
|
|
"\x{0022}", # " # QUOTATION MARK |
81
|
|
|
|
|
|
|
"\x{FF02}", # FULLWIDTH QUOTATION MARK |
82
|
|
|
|
|
|
|
], |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
"#" => [ |
85
|
|
|
|
|
|
|
"\x{0023}", # # # NUMBER SIGN |
86
|
|
|
|
|
|
|
"\x{FE5F}", # SMALL NUMBER SIGN |
87
|
|
|
|
|
|
|
"\x{FF03}", # FULLWIDTH NUMBER SIGN |
88
|
|
|
|
|
|
|
], |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
"\$" => [ |
91
|
|
|
|
|
|
|
"\x{0024}", # $ # DOLLAR SIGN |
92
|
|
|
|
|
|
|
"\x{FE69}", # SMALL DOLLAR SIGN |
93
|
|
|
|
|
|
|
"\x{FF04}", # FULLWIDTH DOLLAR SIGN |
94
|
|
|
|
|
|
|
], |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
"\%" => [ |
97
|
|
|
|
|
|
|
"\x{0025}", # % # PERCENT SIGN |
98
|
|
|
|
|
|
|
"\x{066A}", # ARABIC PERCENT SIGN |
99
|
|
|
|
|
|
|
"\x{2052}", # COMMERCIAL MINUS SIGN |
100
|
|
|
|
|
|
|
"\x{FE6A}", # SMALL PERCENT SIGN |
101
|
|
|
|
|
|
|
"\x{FF05}", # FULLWIDTH PERCENT SIGN |
102
|
|
|
|
|
|
|
], |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
"&" => [ |
105
|
|
|
|
|
|
|
"\x{0026}", # & # AMPERSAND |
106
|
|
|
|
|
|
|
"\x{FE60}", # SMALL AMPERSAND |
107
|
|
|
|
|
|
|
"\x{FF06}", # FULLWIDTH AMPERSAND |
108
|
|
|
|
|
|
|
], |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
"'" => [ |
111
|
|
|
|
|
|
|
"\x{0027}", # ' # APOSTROPHE |
112
|
|
|
|
|
|
|
"\x{02B9}", # MODIFIER LETTER PRIME |
113
|
|
|
|
|
|
|
"\x{0374}", # GREEK NUMERAL SIGN |
114
|
|
|
|
|
|
|
"\x{FF07}", # FULLWIDTH APOSTROPHE |
115
|
|
|
|
|
|
|
], |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
"(" => [ |
118
|
|
|
|
|
|
|
"\x{0028}", # ( # LEFT PARENTHESIS |
119
|
|
|
|
|
|
|
"\x{FE59}", # SMALL LEFT PARENTHESIS |
120
|
|
|
|
|
|
|
"\x{FF08}", # FULLWIDTH LEFT PARENTHESIS |
121
|
|
|
|
|
|
|
], |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
")" => [ |
124
|
|
|
|
|
|
|
"\x{0029}", # ) # RIGHT PARENTHESIS |
125
|
|
|
|
|
|
|
"\x{FF09}", # FULLWIDTH RIGHT PARENTHESIS |
126
|
|
|
|
|
|
|
"\x{FE5A}", # SMALL RIGHT PARENTHESIS |
127
|
|
|
|
|
|
|
], |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
"*" => [ |
130
|
|
|
|
|
|
|
"\x{002A}", # * # ASTERISK |
131
|
|
|
|
|
|
|
"\x{22C6}", # STAR OPERATOR |
132
|
|
|
|
|
|
|
"\x{FE61}", # SMALL ASTERISK |
133
|
|
|
|
|
|
|
"\x{FF0A}", # FULLWIDTH ASTERISK |
134
|
|
|
|
|
|
|
], |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
"+" => [ |
137
|
|
|
|
|
|
|
"\x{002B}", # + # PLUS SIGN |
138
|
|
|
|
|
|
|
"\x{16ED}", # RUNIC CROSS PUNCTUATION |
139
|
|
|
|
|
|
|
"\x{FE62}", # SMALL PLUS SIGN |
140
|
|
|
|
|
|
|
"\x{FF0B}", # FULLWIDTH PLUS SIGN |
141
|
|
|
|
|
|
|
], |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
"," => [ |
144
|
|
|
|
|
|
|
"\x{002C}", # , # COMMA |
145
|
|
|
|
|
|
|
"\x{02CF}", # MODIFIER LETTER LOW ACUTE ACCENT |
146
|
|
|
|
|
|
|
"\x{16E7}", # RUNIC LETTER SHORT-TWIG-YR |
147
|
|
|
|
|
|
|
"\x{201A}", # SINGLE LOW-9 QUOTATION MARK |
148
|
|
|
|
|
|
|
"\x{FF0C}", # FULLWIDTH COMMA |
149
|
|
|
|
|
|
|
], |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
"-" => [ |
152
|
|
|
|
|
|
|
"\x{002D}", # - # HYPHEN-MINUS |
153
|
|
|
|
|
|
|
"\x{02D7}", # MODIFIER LETTER MINUS SIGN |
154
|
|
|
|
|
|
|
"\x{2212}", # MINUS SIGN |
155
|
|
|
|
|
|
|
"\x{23BC}", # HORIZONTAL SCAN LINE-7 |
156
|
|
|
|
|
|
|
"\x{2574}", # BOX DRAWINGS LIGHT LEFT |
157
|
|
|
|
|
|
|
"\x{FE63}", # SMALL HYPHEN-MINUS |
158
|
|
|
|
|
|
|
"\x{FF0D}", # FULLWIDTH HYPHEN-MINUS |
159
|
|
|
|
|
|
|
], |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
"." => [ |
162
|
|
|
|
|
|
|
"\x{002E}", # . # FULL STOP |
163
|
|
|
|
|
|
|
"\x{2024}", # ONE DOT LEADER |
164
|
|
|
|
|
|
|
"\x{FF0E}", # FULLWIDTH FULL STOP |
165
|
|
|
|
|
|
|
], |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
"/" => [ |
168
|
|
|
|
|
|
|
"\x{002F}", # / # SOLIDUS |
169
|
|
|
|
|
|
|
"\x{FF0F}", # FULLWIDTH SOLIDUS |
170
|
|
|
|
|
|
|
"\x{1735}", # PHILIPPINE SINGLE PUNCTUATION |
171
|
|
|
|
|
|
|
"\x{2044}", # FRACTION SLASH |
172
|
|
|
|
|
|
|
"\x{2215}", # DIVISION SLASH |
173
|
|
|
|
|
|
|
"\x{29F8}", # BIG SOLIDUS |
174
|
|
|
|
|
|
|
], |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
"2" => [ |
177
|
|
|
|
|
|
|
"\x{0032}", # 2 # DIGIT TWO |
178
|
|
|
|
|
|
|
"\x{14BF}", # CANADIAN SYLLABICS SAYISI M |
179
|
|
|
|
|
|
|
], |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
"3" => [ |
182
|
|
|
|
|
|
|
"\x{0033}", # 3 # DIGIT THREE |
183
|
|
|
|
|
|
|
"\x{01B7}", # LATIN CAPITAL LETTER EZH |
184
|
|
|
|
|
|
|
"\x{2128}", # BLACK-LETTER CAPITAL Z |
185
|
|
|
|
|
|
|
], |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
"4" => [ |
188
|
|
|
|
|
|
|
"\x{0034}", # 4 # DIGIT FOUR |
189
|
|
|
|
|
|
|
"\x{13CE}", # CHEROKEE LETTER SE |
190
|
|
|
|
|
|
|
], |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
"6" => [ |
193
|
|
|
|
|
|
|
"\x{0036}", # 6 # DIGIT SIX |
194
|
|
|
|
|
|
|
"\x{13EE}", # CHEROKEE LETTER WV |
195
|
|
|
|
|
|
|
], |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
"9" => [ |
198
|
|
|
|
|
|
|
"\x{0039}", # 9 # DIGIT NINE |
199
|
|
|
|
|
|
|
"\x{13ED}", # CHEROKEE LETTER WU |
200
|
|
|
|
|
|
|
], |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
":" => [ |
203
|
|
|
|
|
|
|
"\x{003A}", # : # COLON |
204
|
|
|
|
|
|
|
"\x{02D0}", # MODIFIER LETTER TRIANGULAR COLON |
205
|
|
|
|
|
|
|
"\x{02F8}", # MODIFIER LETTER RAISED COLON |
206
|
|
|
|
|
|
|
"\x{0589}", # ARMENIAN FULL STOP |
207
|
|
|
|
|
|
|
"\x{1361}", # ETHIOPIC WORDSPACE |
208
|
|
|
|
|
|
|
"\x{16EC}", # RUNIC MULTIPLE PUNCTUATION |
209
|
|
|
|
|
|
|
"\x{205A}", # TWO DOT PUNCTUATION |
210
|
|
|
|
|
|
|
"\x{2236}", # RATIO |
211
|
|
|
|
|
|
|
"\x{2806}", # BRAILLE PATTERN DOTS-23 |
212
|
|
|
|
|
|
|
"\x{FE13}", # PRESENTATION FORM FOR VERTICAL COLON |
213
|
|
|
|
|
|
|
"\x{FE55}", # SMALL COLON |
214
|
|
|
|
|
|
|
"\x{FF1A}", # FULLWIDTH COLON |
215
|
|
|
|
|
|
|
], |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
";" => [ |
218
|
|
|
|
|
|
|
"\x{003B}", # ; # SEMICOLON |
219
|
|
|
|
|
|
|
"\x{037E}", # GREEK QUESTION MARK |
220
|
|
|
|
|
|
|
"\x{FE14}", # PRESENTATION FORM FOR VERTICAL SEMICOLON |
221
|
|
|
|
|
|
|
"\x{FE54}", # SMALL SEMICOLON |
222
|
|
|
|
|
|
|
"\x{FF1B}", # FULLWIDTH SEMICOLON |
223
|
|
|
|
|
|
|
], |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
"<" => [ |
226
|
|
|
|
|
|
|
"\x{003C}", # < # LESS-THAN SIGN |
227
|
|
|
|
|
|
|
"\x{02C2}", # MODIFIER LETTER LEFT ARROWHEAD |
228
|
|
|
|
|
|
|
"\x{2039}", # SINGLE LEFT-POINTING ANGLE QUOTATION MARK |
229
|
|
|
|
|
|
|
"\x{227A}", # PRECEDES |
230
|
|
|
|
|
|
|
"\x{276E}", # HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT |
231
|
|
|
|
|
|
|
"\x{2D66}", # TIFINAGH LETTER YE |
232
|
|
|
|
|
|
|
"\x{FE64}", # SMALL LESS-THAN SIGN |
233
|
|
|
|
|
|
|
"\x{FF1C}", # FULLWIDTH LESS-THAN SIGN |
234
|
|
|
|
|
|
|
], |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
"=" => [ |
237
|
|
|
|
|
|
|
"\x{003D}", # = # EQUALS SIGN |
238
|
|
|
|
|
|
|
"\x{2550}", # BOX DRAWINGS DOUBLE HORIZONTAL |
239
|
|
|
|
|
|
|
"\x{268C}", # DIGRAM FOR GREATER YANG |
240
|
|
|
|
|
|
|
"\x{FE66}", # SMALL EQUALS SIGN |
241
|
|
|
|
|
|
|
"\x{FF1D}", # FULLWIDTH EQUALS SIGN |
242
|
|
|
|
|
|
|
], |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
">" => [ |
245
|
|
|
|
|
|
|
"\x{003E}", # > # GREATER-THAN SIGN |
246
|
|
|
|
|
|
|
"\x{02C3}", # MODIFIER LETTER RIGHT ARROWHEAD |
247
|
|
|
|
|
|
|
"\x{203A}", # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK |
248
|
|
|
|
|
|
|
"\x{227B}", # SUCCEEDS |
249
|
|
|
|
|
|
|
"\x{276F}", # HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT |
250
|
|
|
|
|
|
|
"\x{FE65}", # SMALL GREATER-THAN SIGN |
251
|
|
|
|
|
|
|
"\x{FF1E}", # FULLWIDTH GREATER-THAN SIGN |
252
|
|
|
|
|
|
|
], |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
"?" => [ |
255
|
|
|
|
|
|
|
"\x{003F}", # ? # QUESTION MARK |
256
|
|
|
|
|
|
|
"\x{FE16}", # PRESENTATION FORM FOR VERTICAL QUESTION MARK |
257
|
|
|
|
|
|
|
"\x{FE56}", # SMALL QUESTION MARK |
258
|
|
|
|
|
|
|
"\x{FF1F}", # FULLWIDTH QUESTION MARK |
259
|
|
|
|
|
|
|
], |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
"\@" => [ |
262
|
|
|
|
|
|
|
"\x{0040}", # @ # COMMERCIAL AT |
263
|
|
|
|
|
|
|
"\x{FE6B}", # SMALL COMMERCIAL AT |
264
|
|
|
|
|
|
|
"\x{FF20}", # FULLWIDTH COMMERCIAL AT |
265
|
|
|
|
|
|
|
], |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
"A" => [ |
268
|
|
|
|
|
|
|
"\x{0041}", # A # LATIN CAPITAL LETTER A |
269
|
|
|
|
|
|
|
"\x{0391}", # GREEK CAPITAL LETTER ALPHA |
270
|
|
|
|
|
|
|
"\x{0410}", # CYRILLIC CAPITAL LETTER A |
271
|
|
|
|
|
|
|
"\x{13AA}", # CHEROKEE LETTER GO |
272
|
|
|
|
|
|
|
], |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
"B" => [ |
275
|
|
|
|
|
|
|
"\x{0042}", # B # LATIN CAPITAL LETTER B |
276
|
|
|
|
|
|
|
"\x{0392}", # GREEK CAPITAL LETTER BETA |
277
|
|
|
|
|
|
|
"\x{0412}", # CYRILLIC CAPITAL LETTER VE |
278
|
|
|
|
|
|
|
"\x{13F4}", # CHEROKEE LETTER YV |
279
|
|
|
|
|
|
|
"\x{15F7}", # CANADIAN SYLLABICS CARRIER KHE |
280
|
|
|
|
|
|
|
"\x{2C82}", # COPTIC CAPITAL LETTER VIDA |
281
|
|
|
|
|
|
|
], |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
"C" => [ |
284
|
|
|
|
|
|
|
"\x{0043}", # C # LATIN CAPITAL LETTER C |
285
|
|
|
|
|
|
|
"\x{03F9}", # GREEK CAPITAL LUNATE SIGMA SYMBOL |
286
|
|
|
|
|
|
|
"\x{0421}", # CYRILLIC CAPITAL LETTER ES |
287
|
|
|
|
|
|
|
"\x{13DF}", # CHEROKEE LETTER TLI |
288
|
|
|
|
|
|
|
"\x{216D}", # ROMAN NUMERAL ONE HUNDRED |
289
|
|
|
|
|
|
|
"\x{2CA4}", # COPTIC CAPITAL LETTER SIMA |
290
|
|
|
|
|
|
|
], |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
"D" => [ |
293
|
|
|
|
|
|
|
"\x{0044}", # D # LATIN CAPITAL LETTER D |
294
|
|
|
|
|
|
|
"\x{13A0}", # CHEROKEE LETTER A |
295
|
|
|
|
|
|
|
"\x{15EA}", # CANADIAN SYLLABICS CARRIER PE |
296
|
|
|
|
|
|
|
"\x{216E}", # ROMAN NUMERAL FIVE HUNDRED |
297
|
|
|
|
|
|
|
], |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
"E" => [ |
300
|
|
|
|
|
|
|
"\x{0045}", # E # LATIN CAPITAL LETTER E |
301
|
|
|
|
|
|
|
"\x{0395}", # GREEK CAPITAL LETTER EPSILON |
302
|
|
|
|
|
|
|
"\x{0415}", # CYRILLIC CAPITAL LETTER IE |
303
|
|
|
|
|
|
|
"\x{13AC}", # CHEROKEE LETTER GV |
304
|
|
|
|
|
|
|
], |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
"F" => [ |
307
|
|
|
|
|
|
|
"\x{0046}", # F # LATIN CAPITAL LETTER F |
308
|
|
|
|
|
|
|
"\x{15B4}", # CANADIAN SYLLABICS BLACKFOOT WE |
309
|
|
|
|
|
|
|
], |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
"G" => [ |
312
|
|
|
|
|
|
|
"\x{0047}", # G # LATIN CAPITAL LETTER G |
313
|
|
|
|
|
|
|
"\x{050C}", # CYRILLIC CAPITAL LETTER KOMI SJE |
314
|
|
|
|
|
|
|
"\x{13C0}", # CHEROKEE LETTER NAH |
315
|
|
|
|
|
|
|
], |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
"H" => [ |
318
|
|
|
|
|
|
|
"\x{0048}", # H # LATIN CAPITAL LETTER H |
319
|
|
|
|
|
|
|
"\x{0397}", # GREEK CAPITAL LETTER ETA |
320
|
|
|
|
|
|
|
"\x{041D}", # CYRILLIC CAPITAL LETTER EN |
321
|
|
|
|
|
|
|
"\x{12D8}", # ETHIOPIC SYLLABLE ZA |
322
|
|
|
|
|
|
|
"\x{13BB}", # CHEROKEE LETTER MI |
323
|
|
|
|
|
|
|
"\x{157C}", # CANADIAN SYLLABICS NUNAVUT H |
324
|
|
|
|
|
|
|
"\x{2C8E}", # COPTIC CAPITAL LETTER HATE |
325
|
|
|
|
|
|
|
], |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
"I" => [ |
328
|
|
|
|
|
|
|
"\x{0049}", # I # LATIN CAPITAL LETTER I |
329
|
|
|
|
|
|
|
"\x{0399}", # GREEK CAPITAL LETTER IOTA |
330
|
|
|
|
|
|
|
"\x{0406}", # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I |
331
|
|
|
|
|
|
|
"\x{2160}", # ROMAN NUMERAL ONE |
332
|
|
|
|
|
|
|
], |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
"J" => [ |
335
|
|
|
|
|
|
|
"\x{004A}", # J # LATIN CAPITAL LETTER J |
336
|
|
|
|
|
|
|
"\x{0408}", # CYRILLIC CAPITAL LETTER JE |
337
|
|
|
|
|
|
|
"\x{13AB}", # CHEROKEE LETTER GU |
338
|
|
|
|
|
|
|
"\x{148D}", # CANADIAN SYLLABICS CO |
339
|
|
|
|
|
|
|
], |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
"K" => [ |
342
|
|
|
|
|
|
|
"\x{004B}", # K # LATIN CAPITAL LETTER K |
343
|
|
|
|
|
|
|
"\x{039A}", # GREEK CAPITAL LETTER KAPPA |
344
|
|
|
|
|
|
|
"\x{13E6}", # CHEROKEE LETTER TSO |
345
|
|
|
|
|
|
|
"\x{16D5}", # RUNIC LETTER OPEN-P |
346
|
|
|
|
|
|
|
"\x{212A}", # KELVIN SIGN |
347
|
|
|
|
|
|
|
"\x{2C94}", # COPTIC CAPITAL LETTER KAPA |
348
|
|
|
|
|
|
|
], |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
"L" => [ |
351
|
|
|
|
|
|
|
"\x{004C}", # L # LATIN CAPITAL LETTER L |
352
|
|
|
|
|
|
|
"\x{13DE}", # CHEROKEE LETTER TLE |
353
|
|
|
|
|
|
|
"\x{14AA}", # CANADIAN SYLLABICS MA |
354
|
|
|
|
|
|
|
"\x{216C}", # ROMAN NUMERAL FIFTY |
355
|
|
|
|
|
|
|
], |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
"M" => [ |
358
|
|
|
|
|
|
|
"\x{004D}", # M # LATIN CAPITAL LETTER M |
359
|
|
|
|
|
|
|
"\x{039C}", # GREEK CAPITAL LETTER MU |
360
|
|
|
|
|
|
|
"\x{03FA}", # GREEK CAPITAL LETTER SAN |
361
|
|
|
|
|
|
|
"\x{041C}", # CYRILLIC CAPITAL LETTER EM |
362
|
|
|
|
|
|
|
"\x{13B7}", # CHEROKEE LETTER LU |
363
|
|
|
|
|
|
|
"\x{216F}", # ROMAN NUMERAL ONE THOUSAND |
364
|
|
|
|
|
|
|
], |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
"N" => [ |
367
|
|
|
|
|
|
|
"\x{004E}", # N # LATIN CAPITAL LETTER N |
368
|
|
|
|
|
|
|
"\x{039D}", # GREEK CAPITAL LETTER NU |
369
|
|
|
|
|
|
|
"\x{2C9A}", # COPTIC CAPITAL LETTER NI |
370
|
|
|
|
|
|
|
], |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
"O" => [ |
373
|
|
|
|
|
|
|
"\x{004F}", # O # LATIN CAPITAL LETTER O |
374
|
|
|
|
|
|
|
"\x{039F}", # GREEK CAPITAL LETTER OMICRON |
375
|
|
|
|
|
|
|
"\x{041E}", # CYRILLIC CAPITAL LETTER O |
376
|
|
|
|
|
|
|
"\x{2C9E}", # COPTIC CAPITAL LETTER O |
377
|
|
|
|
|
|
|
], |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
"P" => [ |
380
|
|
|
|
|
|
|
"\x{0050}", # P # LATIN CAPITAL LETTER P |
381
|
|
|
|
|
|
|
"\x{03A1}", # GREEK CAPITAL LETTER RHO |
382
|
|
|
|
|
|
|
"\x{0420}", # CYRILLIC CAPITAL LETTER ER |
383
|
|
|
|
|
|
|
"\x{13E2}", # CHEROKEE LETTER TLV |
384
|
|
|
|
|
|
|
"\x{2CA2}", # COPTIC CAPITAL LETTER RO |
385
|
|
|
|
|
|
|
], |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
"Q" => [ |
388
|
|
|
|
|
|
|
"\x{0051}", # Q # LATIN CAPITAL LETTER Q |
389
|
|
|
|
|
|
|
"\x{051A}", # CYRILLIC CAPITAL LETTER QA |
390
|
|
|
|
|
|
|
"\x{2D55}", # TIFINAGH LETTER YARR |
391
|
|
|
|
|
|
|
], |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
"R" => [ |
394
|
|
|
|
|
|
|
"\x{0052}", # R # LATIN CAPITAL LETTER R |
395
|
|
|
|
|
|
|
"\x{13A1}", # CHEROKEE LETTER E |
396
|
|
|
|
|
|
|
"\x{13D2}", # CHEROKEE LETTER SV |
397
|
|
|
|
|
|
|
"\x{1587}", # CANADIAN SYLLABICS TLHI |
398
|
|
|
|
|
|
|
], |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
"S" => [ |
401
|
|
|
|
|
|
|
"\x{0053}", # S # LATIN CAPITAL LETTER S |
402
|
|
|
|
|
|
|
"\x{0405}", # CYRILLIC CAPITAL LETTER DZE |
403
|
|
|
|
|
|
|
"\x{13DA}", # CHEROKEE LETTER DU |
404
|
|
|
|
|
|
|
], |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
"T" => [ |
407
|
|
|
|
|
|
|
"\x{0054}", # T # LATIN CAPITAL LETTER T |
408
|
|
|
|
|
|
|
"\x{03A4}", # GREEK CAPITAL LETTER TAU |
409
|
|
|
|
|
|
|
"\x{0422}", # CYRILLIC CAPITAL LETTER TE |
410
|
|
|
|
|
|
|
"\x{13A2}", # CHEROKEE LETTER I |
411
|
|
|
|
|
|
|
], |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
"V" => [ |
414
|
|
|
|
|
|
|
"\x{0056}", # V # LATIN CAPITAL LETTER V |
415
|
|
|
|
|
|
|
"\x{13D9}", # CHEROKEE LETTER DO |
416
|
|
|
|
|
|
|
"\x{2164}", # ROMAN NUMERAL FIVE |
417
|
|
|
|
|
|
|
], |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
"W" => [ |
420
|
|
|
|
|
|
|
"\x{0057}", # W # LATIN CAPITAL LETTER W |
421
|
|
|
|
|
|
|
"\x{13B3}", # CHEROKEE LETTER LA |
422
|
|
|
|
|
|
|
"\x{13D4}", # CHEROKEE LETTER TA |
423
|
|
|
|
|
|
|
], |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
"X" => [ |
426
|
|
|
|
|
|
|
"\x{0058}", # X # LATIN CAPITAL LETTER X |
427
|
|
|
|
|
|
|
"\x{03A7}", # GREEK CAPITAL LETTER CHI |
428
|
|
|
|
|
|
|
"\x{0425}", # CYRILLIC CAPITAL LETTER HA |
429
|
|
|
|
|
|
|
"\x{2169}", # ROMAN NUMERAL TEN |
430
|
|
|
|
|
|
|
"\x{2CAC}", # COPTIC CAPITAL LETTER KHI |
431
|
|
|
|
|
|
|
], |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
"Y" => [ |
434
|
|
|
|
|
|
|
"\x{0059}", # Y # LATIN CAPITAL LETTER Y |
435
|
|
|
|
|
|
|
"\x{03A5}", # GREEK CAPITAL LETTER UPSILON |
436
|
|
|
|
|
|
|
"\x{2CA8}", # COPTIC CAPITAL LETTER UA |
437
|
|
|
|
|
|
|
], |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
"Z" => [ |
440
|
|
|
|
|
|
|
"\x{005A}", # Z # LATIN CAPITAL LETTER Z |
441
|
|
|
|
|
|
|
"\x{0396}", # GREEK CAPITAL LETTER ZETA |
442
|
|
|
|
|
|
|
"\x{13C3}", # CHEROKEE LETTER NO |
443
|
|
|
|
|
|
|
], |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
"[" => [ |
446
|
|
|
|
|
|
|
"\x{005B}", # [ # LEFT SQUARE BRACKET |
447
|
|
|
|
|
|
|
"\x{FF3B}", # FULLWIDTH LEFT SQUARE BRACKET |
448
|
|
|
|
|
|
|
], |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
"\\" => [ |
451
|
|
|
|
|
|
|
"\x{005C}", # \ # REVERSE SOLIDUS |
452
|
|
|
|
|
|
|
"\x{2216}", # SET MINUS |
453
|
|
|
|
|
|
|
"\x{29F5}", # REVERSE SOLIDUS OPERATOR |
454
|
|
|
|
|
|
|
"\x{29F9}", # BIG REVERSE SOLIDUS |
455
|
|
|
|
|
|
|
"\x{FE68}", # SMALL REVERSE SOLIDUS |
456
|
|
|
|
|
|
|
"\x{FF3C}", # FULLWIDTH REVERSE SOLIDUS |
457
|
|
|
|
|
|
|
], |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
"]" => [ |
460
|
|
|
|
|
|
|
"\x{005D}", # ] # RIGHT SQUARE BRACKET |
461
|
|
|
|
|
|
|
"\x{FF3D}", # FULLWIDTH RIGHT SQUARE BRACKET |
462
|
|
|
|
|
|
|
], |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
"^" => [ |
465
|
|
|
|
|
|
|
"\x{005E}", # ^ # CIRCUMFLEX ACCENT |
466
|
|
|
|
|
|
|
"\x{02C4}", # MODIFIER LETTER UP ARROWHEAD |
467
|
|
|
|
|
|
|
"\x{02C6}", # MODIFIER LETTER CIRCUMFLEX ACCENT |
468
|
|
|
|
|
|
|
"\x{1DBA}", # MODIFIER LETTER SMALL TURNED V |
469
|
|
|
|
|
|
|
"\x{2303}", # UP ARROWHEAD |
470
|
|
|
|
|
|
|
"\x{FF3E}", # FULLWIDTH CIRCUMFLEX ACCENT |
471
|
|
|
|
|
|
|
], |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
"_" => [ |
474
|
|
|
|
|
|
|
"\x{005F}", # _ # LOW LINE |
475
|
|
|
|
|
|
|
"\x{02CD}", # MODIFIER LETTER LOW MACRON |
476
|
|
|
|
|
|
|
"\x{268A}", # MONOGRAM FOR YANG |
477
|
|
|
|
|
|
|
"\x{FF3F}", # FULLWIDTH LOW LINE |
478
|
|
|
|
|
|
|
], |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
"`" => [ |
481
|
|
|
|
|
|
|
"\x{0060}", # ` # GRAVE ACCENT |
482
|
|
|
|
|
|
|
"\x{02CB}", # MODIFIER LETTER GRAVE ACCENT |
483
|
|
|
|
|
|
|
"\x{1FEF}", # GREEK VARIA |
484
|
|
|
|
|
|
|
"\x{2035}", # REVERSED PRIME |
485
|
|
|
|
|
|
|
"\x{FF40}", # FULLWIDTH GRAVE ACCENT |
486
|
|
|
|
|
|
|
], |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
"a" => [ |
489
|
|
|
|
|
|
|
"\x{0061}", # a # LATIN SMALL LETTER A |
490
|
|
|
|
|
|
|
"\x{0251}", # LATIN SMALL LETTER ALPHA |
491
|
|
|
|
|
|
|
"\x{0430}", # CYRILLIC SMALL LETTER A |
492
|
|
|
|
|
|
|
], |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
"c" => [ |
495
|
|
|
|
|
|
|
"\x{0063}", # c # LATIN SMALL LETTER C |
496
|
|
|
|
|
|
|
"\x{03F2}", # GREEK LUNATE SIGMA SYMBOL |
497
|
|
|
|
|
|
|
"\x{0441}", # CYRILLIC SMALL LETTER ES |
498
|
|
|
|
|
|
|
"\x{217D}", # SMALL ROMAN NUMERAL ONE HUNDRED |
499
|
|
|
|
|
|
|
], |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
"d" => [ |
502
|
|
|
|
|
|
|
"\x{0064}", # d # LATIN SMALL LETTER D |
503
|
|
|
|
|
|
|
"\x{0501}", # CYRILLIC SMALL LETTER KOMI DE |
504
|
|
|
|
|
|
|
"\x{217E}", # SMALL ROMAN NUMERAL FIVE HUNDRED |
505
|
|
|
|
|
|
|
], |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
"e" => [ |
508
|
|
|
|
|
|
|
"\x{0065}", # e # LATIN SMALL LETTER E |
509
|
|
|
|
|
|
|
"\x{0435}", # CYRILLIC SMALL LETTER IE |
510
|
|
|
|
|
|
|
"\x{1971}", # TAI LE LETTER TONE-3 |
511
|
|
|
|
|
|
|
], |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
"g" => [ |
514
|
|
|
|
|
|
|
"\x{0067}", # g # LATIN SMALL LETTER G |
515
|
|
|
|
|
|
|
"\x{0261}", # LATIN SMALL LETTER SCRIPT G |
516
|
|
|
|
|
|
|
], |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
"h" => [ |
519
|
|
|
|
|
|
|
"\x{0068}", # h # LATIN SMALL LETTER H |
520
|
|
|
|
|
|
|
"\x{04BB}", # CYRILLIC SMALL LETTER SHHA |
521
|
|
|
|
|
|
|
], |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
"i" => [ |
524
|
|
|
|
|
|
|
"\x{0069}", # i # LATIN SMALL LETTER I |
525
|
|
|
|
|
|
|
"\x{0456}", # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I |
526
|
|
|
|
|
|
|
"\x{2170}", # SMALL ROMAN NUMERAL ONE |
527
|
|
|
|
|
|
|
], |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
"j" => [ |
530
|
|
|
|
|
|
|
"\x{006A}", # j # LATIN SMALL LETTER J |
531
|
|
|
|
|
|
|
"\x{03F3}", # GREEK LETTER YOT |
532
|
|
|
|
|
|
|
"\x{0458}", # CYRILLIC SMALL LETTER JE |
533
|
|
|
|
|
|
|
], |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
"l" => [ |
536
|
|
|
|
|
|
|
"\x{006C}", # l # LATIN SMALL LETTER L |
537
|
|
|
|
|
|
|
"\x{217C}", # SMALL ROMAN NUMERAL FIFTY |
538
|
|
|
|
|
|
|
], |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
"m" => [ |
541
|
|
|
|
|
|
|
"\x{006D}", # m # LATIN SMALL LETTER M |
542
|
|
|
|
|
|
|
"\x{217F}", # SMALL ROMAN NUMERAL ONE THOUSAND |
543
|
|
|
|
|
|
|
], |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
"n" => [ |
546
|
|
|
|
|
|
|
"\x{006E}", # n # LATIN SMALL LETTER N |
547
|
|
|
|
|
|
|
"\x{1952}", # TAI LE LETTER NGA |
548
|
|
|
|
|
|
|
], |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
"o" => [ |
551
|
|
|
|
|
|
|
"\x{006F}", # o # LATIN SMALL LETTER O |
552
|
|
|
|
|
|
|
"\x{03BF}", # GREEK SMALL LETTER OMICRON |
553
|
|
|
|
|
|
|
"\x{043E}", # CYRILLIC SMALL LETTER O |
554
|
|
|
|
|
|
|
"\x{0D20}", # MALAYALAM LETTER TTHA |
555
|
|
|
|
|
|
|
"\x{2C9F}", # COPTIC SMALL LETTER O |
556
|
|
|
|
|
|
|
], |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
"p" => [ |
559
|
|
|
|
|
|
|
"\x{0070}", # p # LATIN SMALL LETTER P |
560
|
|
|
|
|
|
|
"\x{0440}", # CYRILLIC SMALL LETTER ER |
561
|
|
|
|
|
|
|
"\x{2CA3}", # COPTIC SMALL LETTER RO |
562
|
|
|
|
|
|
|
], |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
"s" => [ |
565
|
|
|
|
|
|
|
"\x{0073}", # s # LATIN SMALL LETTER S |
566
|
|
|
|
|
|
|
"\x{0073}", # s # LATIN SMALL LETTER S |
567
|
|
|
|
|
|
|
"\x{0455}", # CYRILLIC SMALL LETTER DZE |
568
|
|
|
|
|
|
|
], |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
"u" => [ |
571
|
|
|
|
|
|
|
"\x{0075}", # u # LATIN SMALL LETTER U |
572
|
|
|
|
|
|
|
"\x{1959}", # TAI LE LETTER PA |
573
|
|
|
|
|
|
|
"\x{222A}", # UNION |
574
|
|
|
|
|
|
|
], |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
"v" => [ |
577
|
|
|
|
|
|
|
"\x{0076}", # v # LATIN SMALL LETTER V |
578
|
|
|
|
|
|
|
"\x{1D20}", # LATIN LETTER SMALL CAPITAL V |
579
|
|
|
|
|
|
|
"\x{2174}", # SMALL ROMAN NUMERAL FIVE |
580
|
|
|
|
|
|
|
"\x{2228}", # LOGICAL OR |
581
|
|
|
|
|
|
|
"\x{22C1}", # N-ARY LOGICAL OR |
582
|
|
|
|
|
|
|
], |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
"w" => [ |
585
|
|
|
|
|
|
|
"\x{0077}", # w # LATIN SMALL LETTER W |
586
|
|
|
|
|
|
|
"\x{1D21}", # LATIN LETTER SMALL CAPITAL W |
587
|
|
|
|
|
|
|
], |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
"x" => [ |
591
|
|
|
|
|
|
|
"\x{0078}", # x # LATIN SMALL LETTER X |
592
|
|
|
|
|
|
|
"\x{0445}", # CYRILLIC SMALL LETTER HA |
593
|
|
|
|
|
|
|
"\x{2179}", # SMALL ROMAN NUMERAL TEN |
594
|
|
|
|
|
|
|
"\x{2CAD}", # COPTIC SMALL LETTER KHI |
595
|
|
|
|
|
|
|
], |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
"y" => [ |
598
|
|
|
|
|
|
|
"\x{0079}", # y # LATIN SMALL LETTER Y |
599
|
|
|
|
|
|
|
"\x{0443}", # CYRILLIC SMALL LETTER U |
600
|
|
|
|
|
|
|
"\x{1EFF}", # LATIN SMALL LETTER Y WITH LOOP |
601
|
|
|
|
|
|
|
], |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
"z" => [ |
604
|
|
|
|
|
|
|
"\x{007A}", # z # LATIN SMALL LETTER Z |
605
|
|
|
|
|
|
|
"\x{1D22}", # LATIN LETTER SMALL CAPITAL Z |
606
|
|
|
|
|
|
|
], |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
"{" => [ |
609
|
|
|
|
|
|
|
"\x{007B}", # { # LEFT CURLY BRACKET |
610
|
|
|
|
|
|
|
"\x{FE5B}", # SMALL LEFT CURLY BRACKET |
611
|
|
|
|
|
|
|
"\x{FF5B}", # FULLWIDTH LEFT CURLY BRACKET |
612
|
|
|
|
|
|
|
], |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
"|" => [ |
615
|
|
|
|
|
|
|
"\x{007C}", # | # VERTICAL LINE |
616
|
|
|
|
|
|
|
"\x{01C0}", # LATIN LETTER DENTAL CLICK |
617
|
|
|
|
|
|
|
"\x{16C1}", # RUNIC LETTER ISAZ IS ISS I |
618
|
|
|
|
|
|
|
"\x{239C}", # LEFT PARENTHESIS EXTENSION |
619
|
|
|
|
|
|
|
"\x{239F}", # RIGHT PARENTHESIS EXTENSION |
620
|
|
|
|
|
|
|
"\x{23A2}", # LEFT SQUARE BRACKET EXTENSION |
621
|
|
|
|
|
|
|
"\x{23A5}", # RIGHT SQUARE BRACKET EXTENSION |
622
|
|
|
|
|
|
|
"\x{23AA}", # CURLY BRACKET EXTENSION |
623
|
|
|
|
|
|
|
"\x{23AE}", # INTEGRAL EXTENSION |
624
|
|
|
|
|
|
|
"\x{FF5C}", # FULLWIDTH VERTICAL LINE |
625
|
|
|
|
|
|
|
"\x{FFE8}", # HALFWIDTH FORMS LIGHT VERTICAL |
626
|
|
|
|
|
|
|
], |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
"}" => [ |
629
|
|
|
|
|
|
|
"\x{007D}", # } # RIGHT CURLY BRACKET |
630
|
|
|
|
|
|
|
"\x{FE5C}", # SMALL RIGHT CURLY BRACKET |
631
|
|
|
|
|
|
|
"\x{FF5D}", # FULLWIDTH RIGHT CURLY BRACKET |
632
|
|
|
|
|
|
|
], |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
"~" => [ |
635
|
|
|
|
|
|
|
"\x{007E}", # ~ # TILDE |
636
|
|
|
|
|
|
|
"\x{02DC}", # SMALL TILDE |
637
|
|
|
|
|
|
|
"\x{2053}", # SWUNG DASH |
638
|
|
|
|
|
|
|
"\x{223C}", # TILDE OPERATOR |
639
|
|
|
|
|
|
|
"\x{FF5E}", # FULLWIDTH TILDE |
640
|
|
|
|
|
|
|
], |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
); |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
my %replace_map; |
646
|
|
|
|
|
|
|
sub _build_replace_map { |
647
|
1
|
|
|
1
|
|
25
|
for my $ascii_char (keys %homoglyphs) { |
648
|
83
|
|
|
|
|
107
|
for my $homoglyph (@{ $homoglyphs{$ascii_char} }) { |
|
83
|
|
|
|
|
163
|
|
649
|
340
|
|
|
|
|
808
|
$replace_map{$homoglyph} = $ascii_char; |
650
|
|
|
|
|
|
|
} |
651
|
|
|
|
|
|
|
} |
652
|
|
|
|
|
|
|
} |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
# TODO: this would probably be much more efficient if we build up a tr/// |
656
|
|
|
|
|
|
|
# transliteration, I suspect. |
657
|
|
|
|
|
|
|
sub replace_homoglyphs { |
658
|
84
|
|
|
84
|
0
|
27263
|
my $input = shift; |
659
|
84
|
|
|
|
|
139
|
my $result; |
660
|
84
|
100
|
|
|
|
223
|
_build_replace_map() unless keys %replace_map; |
661
|
84
|
|
|
|
|
525
|
for my $char (split //, $input) { |
662
|
2536
|
|
66
|
|
|
5906
|
$result .= $replace_map{$char} // $char; |
663
|
|
|
|
|
|
|
} |
664
|
84
|
|
|
|
|
508
|
return $result; |
665
|
|
|
|
|
|
|
} |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
# Mostly for testing, take a string, and for each character we have a choice of |
671
|
|
|
|
|
|
|
# homoglyphs for, pick one at random and use it. |
672
|
|
|
|
|
|
|
sub disguise { |
673
|
80
|
|
|
80
|
0
|
42254
|
my $input = shift; |
674
|
80
|
|
|
|
|
144
|
my $result; |
675
|
80
|
|
|
|
|
540
|
for my $char (split //, $input) { |
676
|
2450
|
100
|
|
|
|
4405
|
if (my $possible_homoglyphs = $homoglyphs{$char}) { |
677
|
2140
|
|
|
|
|
4571
|
$result .= $possible_homoglyphs->[int rand @$possible_homoglyphs]; |
678
|
|
|
|
|
|
|
} else { |
679
|
310
|
|
|
|
|
939
|
$result .= $char; |
680
|
|
|
|
|
|
|
} |
681
|
|
|
|
|
|
|
} |
682
|
80
|
|
|
|
|
322
|
return $result; |
683
|
|
|
|
|
|
|
} |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
=head1 AUTHOR |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
David Precious, C<< >> |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
=head1 BUGS |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
693
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
694
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
=head1 SUPPORT |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
perldoc Unicode::Homoglyph::Replace |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
You can also look for information at: |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
=over 4 |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
L |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
L |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
=item * CPAN Ratings |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
L |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
=item * Search CPAN |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
L |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
=back |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
=head1 SEE ALSO |
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
L, where the list of homoglyphs came from. |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
Copyright 2018 David Precious. |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
742
|
|
|
|
|
|
|
under the terms of the the Artistic License (2.0). You may obtain a |
743
|
|
|
|
|
|
|
copy of the full license at: |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
L |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
Any use, modification, and distribution of the Standard or Modified |
748
|
|
|
|
|
|
|
Versions is governed by this Artistic License. By using, modifying or |
749
|
|
|
|
|
|
|
distributing the Package, you accept this license. Do not use, modify, |
750
|
|
|
|
|
|
|
or distribute the Package, if you do not accept this license. |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
If your Modified Version has been derived from a Modified Version made |
753
|
|
|
|
|
|
|
by someone other than you, you are nevertheless required to ensure that |
754
|
|
|
|
|
|
|
your Modified Version complies with the requirements of this license. |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
This license does not grant you the right to use any trademark, service |
757
|
|
|
|
|
|
|
mark, tradename, or logo of the Copyright Holder. |
758
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
This license includes the non-exclusive, worldwide, free-of-charge |
760
|
|
|
|
|
|
|
patent license to make, have made, use, offer to sell, sell, import and |
761
|
|
|
|
|
|
|
otherwise transfer the Package with respect to any patent claims |
762
|
|
|
|
|
|
|
licensable by the Copyright Holder that are necessarily infringed by the |
763
|
|
|
|
|
|
|
Package. If you institute patent litigation (including a cross-claim or |
764
|
|
|
|
|
|
|
counterclaim) against any party alleging that the Package constitutes |
765
|
|
|
|
|
|
|
direct or contributory patent infringement, then this Artistic License |
766
|
|
|
|
|
|
|
to you shall terminate on the date that such litigation is filed. |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER |
769
|
|
|
|
|
|
|
AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. |
770
|
|
|
|
|
|
|
THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR |
771
|
|
|
|
|
|
|
PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY |
772
|
|
|
|
|
|
|
YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR |
773
|
|
|
|
|
|
|
CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR |
774
|
|
|
|
|
|
|
CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, |
775
|
|
|
|
|
|
|
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
=cut |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
1; # End of Unicode::Homoglyph::Replace |