line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Unicode::Security; |
2
|
|
|
|
|
|
|
|
3
|
7
|
|
|
7
|
|
149999
|
use 5.008; |
|
7
|
|
|
|
|
19
|
|
|
7
|
|
|
|
|
216
|
|
4
|
7
|
|
|
7
|
|
26
|
use strict; |
|
7
|
|
|
|
|
10
|
|
|
7
|
|
|
|
|
215
|
|
5
|
7
|
|
|
7
|
|
29
|
use warnings; |
|
7
|
|
|
|
|
20
|
|
|
7
|
|
|
|
|
199
|
|
6
|
7
|
|
|
7
|
|
27
|
use Exporter qw(import); |
|
7
|
|
|
|
|
8
|
|
|
7
|
|
|
|
|
216
|
|
7
|
|
|
|
|
|
|
|
8
|
7
|
|
|
7
|
|
7357
|
use Unicode::Security::Confusables; |
|
7
|
|
|
|
|
27
|
|
|
7
|
|
|
|
|
2399
|
|
9
|
7
|
|
|
7
|
|
156123
|
use Unicode::Normalize qw(NFD); |
|
7
|
|
|
|
|
23568
|
|
|
7
|
|
|
|
|
633
|
|
10
|
7
|
|
|
7
|
|
6721
|
use Unicode::UCD qw(charinfo charscript); |
|
7
|
|
|
|
|
604287
|
|
|
7
|
|
|
|
|
1127
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our $VERSION = '0.08'; |
13
|
|
|
|
|
|
|
$VERSION = eval $VERSION; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
our @EXPORT_OK = qw( |
16
|
|
|
|
|
|
|
skeleton confusable soss restriction_level mixed_script |
17
|
|
|
|
|
|
|
mixed_number mixed_num |
18
|
|
|
|
|
|
|
whole_script_confusable mixed_script_confusable |
19
|
|
|
|
|
|
|
ws_confusable ms_confusable |
20
|
|
|
|
|
|
|
); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
our (%MA, %WS); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
use constant { |
25
|
7
|
|
|
|
|
5199
|
UNRESTRICTED => 0, |
26
|
|
|
|
|
|
|
ASCII_ONLY => 1, |
27
|
|
|
|
|
|
|
SINGLE_SCRIPT => 2, |
28
|
|
|
|
|
|
|
HIGHLY_RESTRICTIVE => 3, |
29
|
|
|
|
|
|
|
MODERATELY_RESTRICTIVE => 4, |
30
|
|
|
|
|
|
|
MINIMALLY_RESTRICTIVE => 5, |
31
|
7
|
|
|
7
|
|
69
|
}; |
|
7
|
|
|
|
|
16
|
|
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
my %recommended_script = map { $_ => \1 } qw( |
34
|
|
|
|
|
|
|
Common Inherited Arabic Armenian Bengali Bopomofo Cyrillic Devanagari |
35
|
|
|
|
|
|
|
Ethiopic Georgian Greek Gujarati Gurmukhi Han Hangul Hebrew Hiragana |
36
|
|
|
|
|
|
|
Kannada Katakana Khmer Lao Latin Malayalam Myanmar Oriya Sinhala Tamil |
37
|
|
|
|
|
|
|
Telugu Thaana Thai Tibetan |
38
|
|
|
|
|
|
|
); |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
my %aspirational_script = map { $_ => \1 } qw( |
41
|
|
|
|
|
|
|
Canadian_Aboriginal Miao Mongolian Tifinagh Yi |
42
|
|
|
|
|
|
|
); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my %highly_restrictive = map { $_ => \1 } ( |
45
|
|
|
|
|
|
|
'', 'Hiragana', 'Katakana', 'Hiragana, Katakana', 'Bopomofo', 'Hangul', |
46
|
|
|
|
|
|
|
); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
sub skeleton { |
50
|
10
|
|
|
10
|
1
|
46
|
my $str = NFD shift; |
51
|
10
|
100
|
|
|
|
34
|
my $m = $str =~ s{(.)}{ my $c = $MA{$1}; defined $c ? $c : $1 }eg; |
|
49
|
|
|
|
|
64
|
|
|
49
|
|
|
|
|
93
|
|
52
|
10
|
50
|
|
|
|
53
|
return $m ? NFD $str : $str; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub confusable { |
57
|
5
|
|
|
5
|
1
|
18
|
return skeleton($_[0]) eq skeleton($_[1]); |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# Algorithm described here: |
62
|
|
|
|
|
|
|
# http://www.unicode.org/reports/tr39/#Whole_Script_Confusables |
63
|
|
|
|
|
|
|
sub whole_script_confusable { |
64
|
3
|
|
|
3
|
1
|
12
|
my ($target, $str) = @_; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# Canonicalize the script name to match the format used in %WS. |
67
|
3
|
|
|
|
|
8
|
$target = ucfirst lc $target; |
68
|
|
|
|
|
|
|
|
69
|
3
|
|
|
|
|
37
|
my %soss = soss(NFD $str); |
70
|
3
|
|
|
|
|
6
|
delete @soss{qw(Common Inherited)}; |
71
|
|
|
|
|
|
|
|
72
|
3
|
50
|
|
|
|
9
|
my $count = keys %soss or return ''; |
73
|
3
|
50
|
|
|
|
9
|
return if 1 < $count; |
74
|
3
|
|
|
|
|
5
|
my ($source) = keys %soss; |
75
|
|
|
|
|
|
|
|
76
|
3
|
|
|
|
|
9
|
my $chars = $WS{$source}{$target}; |
77
|
3
|
100
|
|
|
|
1
|
do { return 1 if $chars->{$_} } for keys %{ $soss{$source} }; |
|
3
|
|
|
|
|
47
|
|
|
7
|
|
|
|
|
31
|
|
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
*ws_confusable = *ws_confusable = \&whole_script_confusable; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# Algorithm described here: |
83
|
|
|
|
|
|
|
# http://www.unicode.org/reports/tr39/#Mixed_Script_Confusables |
84
|
|
|
|
|
|
|
sub mixed_script_confusable { |
85
|
3
|
|
|
3
|
1
|
50
|
my %soss = soss(NFD $_[0]); |
86
|
3
|
|
|
|
|
8
|
delete @soss{qw(Common Inherited)}; |
87
|
|
|
|
|
|
|
|
88
|
3
|
|
|
|
|
6
|
my @soss = keys %soss; |
89
|
3
|
|
|
|
|
7
|
for my $source (@soss) { |
90
|
5
|
|
|
|
|
4
|
my $sum = 0; |
91
|
5
|
|
|
|
|
5
|
for my $target (@soss) { |
92
|
11
|
100
|
|
|
|
23
|
next if $target eq $source; |
93
|
|
|
|
|
|
|
|
94
|
7
|
|
|
|
|
6
|
my $nok = 0; |
95
|
7
|
|
|
|
|
14
|
my $chars = $WS{$target}{$source}; |
96
|
7
|
|
|
|
|
8
|
for my $char (keys %{ $soss{$target} }) { |
|
7
|
|
|
|
|
13
|
|
97
|
10
|
100
|
|
|
|
27
|
$nok = 1, last unless $chars->{$char}; |
98
|
|
|
|
|
|
|
} |
99
|
7
|
100
|
|
|
|
12
|
last if $nok; |
100
|
4
|
|
|
|
|
5
|
$sum++; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
5
|
100
|
|
|
|
24
|
return 1 if 1 == @soss - $sum; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
1
|
|
|
|
|
6
|
return ''; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
*ms_confusable = *ms_confusable = \&mixed_script_confusable; |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
sub soss { |
112
|
24
|
|
|
24
|
1
|
30
|
my %soss; |
113
|
24
|
|
|
|
|
92
|
for my $char (split //, $_[0]) { |
114
|
146
|
|
|
|
|
285
|
my $script = charscript(ord($char)); |
115
|
146
|
50
|
|
|
|
42614
|
$script = 'Unknown' unless defined $script; |
116
|
146
|
|
|
|
|
329
|
$soss{$script}{$char} = \1; |
117
|
|
|
|
|
|
|
} |
118
|
24
|
|
|
|
|
106
|
return %soss; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub mixed_script { |
123
|
4
|
|
|
4
|
1
|
1114
|
my %soss = soss($_[0]); |
124
|
4
|
|
|
|
|
13
|
delete @soss{qw(Common Inherited)}; |
125
|
4
|
|
|
|
|
20
|
return 1 < keys %soss; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub mixed_number { |
130
|
7
|
|
|
7
|
1
|
2170
|
my %z; |
131
|
7
|
|
|
|
|
29
|
for my $char (split //, $_[0]) { |
132
|
36
|
50
|
|
|
|
75
|
my $info = charinfo(ord $char) or next; |
133
|
|
|
|
|
|
|
|
134
|
36
|
|
|
|
|
176752
|
my $num = $info->{decimal}; |
135
|
36
|
100
|
|
|
|
81
|
next unless length $num; |
136
|
|
|
|
|
|
|
|
137
|
28
|
|
|
|
|
103
|
$z{ ord($char) - $num } = \1; |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
|
140
|
7
|
|
|
|
|
35
|
return 1 < keys %z; |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
*mixed_num = *mixed_num = \&mixed_number; |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# Algorithm described here: |
146
|
|
|
|
|
|
|
# http://www.unicode.org/reports/tr39/#Restriction_Level_Detection |
147
|
|
|
|
|
|
|
sub restriction_level { |
148
|
17
|
|
|
17
|
1
|
5314
|
my ($str, $non_id_regex) = @_; |
149
|
|
|
|
|
|
|
|
150
|
17
|
100
|
|
|
|
71
|
$non_id_regex = qr/\P{ID_Continue}/ unless defined $non_id_regex; |
151
|
|
|
|
|
|
|
|
152
|
17
|
100
|
|
|
|
132
|
return UNRESTRICTED if $str =~ /$non_id_regex/; |
153
|
16
|
100
|
|
|
|
59
|
return ASCII_ONLY if $str !~ /\P{ASCII}/; |
154
|
|
|
|
|
|
|
|
155
|
14
|
|
|
|
|
29
|
my %soss = soss($str); |
156
|
14
|
|
|
|
|
29
|
delete @soss{qw(Common Inherited)}; |
157
|
14
|
100
|
|
|
|
34
|
return SINGLE_SCRIPT if 1 == keys %soss; |
158
|
|
|
|
|
|
|
|
159
|
13
|
|
|
|
|
21
|
delete $soss{Latin}; |
160
|
13
|
|
|
|
|
25
|
my %copy = %soss; |
161
|
13
|
|
|
|
|
10
|
delete $copy{Han}; |
162
|
13
|
|
|
|
|
37
|
my $soss = join ', ', sort keys %copy; |
163
|
13
|
100
|
|
|
|
51
|
return HIGHLY_RESTRICTIVE if $highly_restrictive{$soss}; |
164
|
|
|
|
|
|
|
|
165
|
6
|
50
|
|
|
|
14
|
if (1 == keys %soss) { |
166
|
6
|
|
|
|
|
7
|
my ($script) = keys %soss; |
167
|
6
|
100
|
66
|
|
|
61
|
return MODERATELY_RESTRICTIVE |
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
168
|
|
|
|
|
|
|
if ($recommended_script{$script} or $aspirational_script{$script}) |
169
|
|
|
|
|
|
|
and not ($soss{Cyrillic} or $soss{Greek}); |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
4
|
|
|
|
|
15
|
return MINIMALLY_RESTRICTIVE; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
1; |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
__END__ |