| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Unicode::Security; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 7 |  |  | 7 |  | 403718 | use 5.008; | 
|  | 7 |  |  |  |  | 21 |  | 
|  | 7 |  |  |  |  | 262 |  | 
| 4 | 7 |  |  | 7 |  | 34 | use strict; | 
|  | 7 |  |  |  |  | 9 |  | 
|  | 7 |  |  |  |  | 238 |  | 
| 5 | 7 |  |  | 7 |  | 28 | use warnings; | 
|  | 7 |  |  |  |  | 25 |  | 
|  | 7 |  |  |  |  | 217 |  | 
| 6 | 7 |  |  | 7 |  | 30 | use Exporter qw(import); | 
|  | 7 |  |  |  |  | 9 |  | 
|  | 7 |  |  |  |  | 232 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 | 7 |  |  | 7 |  | 8442 | use Unicode::Security::Confusables; | 
|  | 7 |  |  |  |  | 31 |  | 
|  | 7 |  |  |  |  | 2717 |  | 
| 9 | 7 |  |  | 7 |  | 6843 | use Unicode::Normalize qw(NFD); | 
|  | 7 |  |  |  |  | 15335 |  | 
|  | 7 |  |  |  |  | 688 |  | 
| 10 | 7 |  |  | 7 |  | 7373 | use Unicode::UCD qw(charinfo charscript); | 
|  | 7 |  |  |  |  | 345980 |  | 
|  | 7 |  |  |  |  | 1292 |  | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | our $VERSION = '0.07'; | 
| 13 |  |  |  |  |  |  | $VERSION = eval $VERSION; | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | our @EXPORT_OK = qw( | 
| 16 |  |  |  |  |  |  | skeleton confusable soss restriction_level mixed_script | 
| 17 |  |  |  |  |  |  | mixed_number mixed_num | 
| 18 |  |  |  |  |  |  | whole_script_confusable mixed_script_confusable | 
| 19 |  |  |  |  |  |  | ws_confusable ms_confusable | 
| 20 |  |  |  |  |  |  | ); | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | our (%MA, %WS); | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | use constant { | 
| 25 | 7 |  |  |  |  | 6492 | UNRESTRICTED           => 0, | 
| 26 |  |  |  |  |  |  | ASCII_ONLY             => 1, | 
| 27 |  |  |  |  |  |  | SINGLE_SCRIPT          => 2, | 
| 28 |  |  |  |  |  |  | HIGHLY_RESTRICTIVE     => 3, | 
| 29 |  |  |  |  |  |  | MODERATELY_RESTRICTIVE => 4, | 
| 30 |  |  |  |  |  |  | MINIMALLY_RESTRICTIVE  => 5, | 
| 31 | 7 |  |  | 7 |  | 73 | }; | 
|  | 7 |  |  |  |  | 10 |  | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | my %recommended_script = map { $_ => \1 } qw( | 
| 34 |  |  |  |  |  |  | Common Inherited Arabic Armenian Bengali Bopomofo Cyrillic Devanagari | 
| 35 |  |  |  |  |  |  | Ethiopic Georgian Greek Gujarati Gurmukhi Han Hangul Hebrew Hiragana | 
| 36 |  |  |  |  |  |  | Kannada Katakana Khmer Lao Latin Malayalam Myanmar Oriya Sinhala Tamil | 
| 37 |  |  |  |  |  |  | Telugu Thaana Thai Tibetan | 
| 38 |  |  |  |  |  |  | ); | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | my %aspirational_script = map { $_ => \1 } qw( | 
| 41 |  |  |  |  |  |  | Canadian_Aboriginal Miao Mongolian Tifinagh Yi | 
| 42 |  |  |  |  |  |  | ); | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | my %highly_restrictive = map { $_ => \1 } ( | 
| 45 |  |  |  |  |  |  | '', 'Hiragana', 'Katakana', 'Hiragana, Katakana', 'Bopomofo', 'Hangul', | 
| 46 |  |  |  |  |  |  | ); | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | sub skeleton { | 
| 50 | 10 |  |  | 10 | 1 | 56 | my $str = NFD shift; | 
| 51 | 10 | 100 |  |  |  | 44 | my $m = $str =~ s{(.)}{ my $c = $MA{$1}; defined $c ? $c : $1 }eg; | 
|  | 49 |  |  |  |  | 88 |  | 
|  | 49 |  |  |  |  | 138 |  | 
| 52 | 10 | 50 |  |  |  | 70 | return $m ? NFD $str : $str; | 
| 53 |  |  |  |  |  |  | } | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | sub confusable { | 
| 57 | 5 |  |  | 5 | 1 | 21 | return skeleton($_[0]) eq skeleton($_[1]); | 
| 58 |  |  |  |  |  |  | } | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  |  | 
| 61 |  |  |  |  |  |  | # Algorithm described here: | 
| 62 |  |  |  |  |  |  | #   http://www.unicode.org/reports/tr39/#Whole_Script_Confusables | 
| 63 |  |  |  |  |  |  | sub whole_script_confusable { | 
| 64 | 3 |  |  | 3 | 1 | 12 | my ($target, $str) = @_; | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | # Canonicalize the script name to match the format used in %WS. | 
| 67 | 3 |  |  |  |  | 9 | $target = ucfirst lc $target; | 
| 68 |  |  |  |  |  |  |  | 
| 69 | 3 |  |  |  |  | 33 | my %soss = soss(NFD $str); | 
| 70 | 3 |  |  |  |  | 6 | delete @soss{qw(Common Inherited)}; | 
| 71 |  |  |  |  |  |  |  | 
| 72 | 3 | 50 |  |  |  | 8 | my $count = keys %soss or return ''; | 
| 73 | 3 | 50 |  |  |  | 8 | return if 1 < $count; | 
| 74 | 3 |  |  |  |  | 5 | my ($source) = keys %soss; | 
| 75 |  |  |  |  |  |  |  | 
| 76 | 3 |  |  |  |  | 7 | my $chars = $WS{$source}{$target}; | 
| 77 | 3 | 100 |  |  |  | 3 | do { return 1 if $chars->{$_} } for keys %{ $soss{$source} }; | 
|  | 3 |  |  |  |  | 16 |  | 
|  | 7 |  |  |  |  | 29 |  | 
| 78 |  |  |  |  |  |  | } | 
| 79 |  |  |  |  |  |  | *ws_confusable = *ws_confusable = \&whole_script_confusable; | 
| 80 |  |  |  |  |  |  |  | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | # Algorithm described here: | 
| 83 |  |  |  |  |  |  | #   http://www.unicode.org/reports/tr39/#Mixed_Script_Confusables | 
| 84 |  |  |  |  |  |  | sub mixed_script_confusable { | 
| 85 | 3 |  |  | 3 | 1 | 43 | my %soss = soss(NFD $_[0]); | 
| 86 | 3 |  |  |  |  | 8 | delete @soss{qw(Common Inherited)}; | 
| 87 |  |  |  |  |  |  |  | 
| 88 | 3 |  |  |  |  | 7 | my @soss = keys %soss; | 
| 89 | 3 |  |  |  |  | 5 | for my $source (@soss) { | 
| 90 | 5 |  |  |  |  | 6 | my $sum = 0; | 
| 91 | 5 |  |  |  |  | 5 | for my $target (@soss) { | 
| 92 | 11 | 100 |  |  |  | 18 | next if $target eq $source; | 
| 93 |  |  |  |  |  |  |  | 
| 94 | 7 |  |  |  |  | 7 | my $nok = 0; | 
| 95 | 7 |  |  |  |  | 12 | my $chars = $WS{$target}{$source}; | 
| 96 | 7 |  |  |  |  | 5 | for my $char (keys %{ $soss{$target} }) { | 
|  | 7 |  |  |  |  | 13 |  | 
| 97 | 10 | 100 |  |  |  | 28 | $nok = 1, last unless $chars->{$char}; | 
| 98 |  |  |  |  |  |  | } | 
| 99 | 7 | 100 |  |  |  | 13 | last if $nok; | 
| 100 | 4 |  |  |  |  | 6 | $sum++; | 
| 101 |  |  |  |  |  |  | } | 
| 102 |  |  |  |  |  |  |  | 
| 103 | 5 | 100 |  |  |  | 26 | return 1 if 1 == @soss - $sum; | 
| 104 |  |  |  |  |  |  | } | 
| 105 |  |  |  |  |  |  |  | 
| 106 | 1 |  |  |  |  | 5 | return ''; | 
| 107 |  |  |  |  |  |  | } | 
| 108 |  |  |  |  |  |  | *ms_confusable = *ms_confusable = \&mixed_script_confusable; | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | sub soss { | 
| 112 | 24 |  |  | 24 | 1 | 27 | my %soss; | 
| 113 | 24 |  |  |  |  | 99 | for my $char (split //, $_[0]) { | 
| 114 | 146 |  |  |  |  | 310 | my $script = charscript(ord($char)); | 
| 115 | 146 | 50 |  |  |  | 49933 | $script = 'Unknown' unless defined $script; | 
| 116 | 146 |  |  |  |  | 377 | $soss{$script}{$char} = \1; | 
| 117 |  |  |  |  |  |  | } | 
| 118 | 24 |  |  |  |  | 123 | return %soss; | 
| 119 |  |  |  |  |  |  | } | 
| 120 |  |  |  |  |  |  |  | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | sub mixed_script { | 
| 123 | 4 |  |  | 4 | 1 | 1936 | my %soss = soss($_[0]); | 
| 124 | 4 |  |  |  |  | 17 | delete @soss{qw(Common Inherited)}; | 
| 125 | 4 |  |  |  |  | 31 | return 1 < keys %soss; | 
| 126 |  |  |  |  |  |  | } | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | sub mixed_number { | 
| 130 | 7 |  |  | 7 | 1 | 2568 | my %z; | 
| 131 | 7 |  |  |  |  | 31 | for my $char (split //, $_[0]) { | 
| 132 | 36 | 50 |  |  |  | 100 | my $info = charinfo(ord $char) or next; | 
| 133 |  |  |  |  |  |  |  | 
| 134 | 36 |  |  |  |  | 208872 | my $num = $info->{decimal}; | 
| 135 | 36 | 100 |  |  |  | 104 | next unless length $num; | 
| 136 |  |  |  |  |  |  |  | 
| 137 | 28 |  |  |  |  | 150 | $z{ ord($char) - $num } = \1; | 
| 138 |  |  |  |  |  |  | } | 
| 139 |  |  |  |  |  |  |  | 
| 140 | 7 |  |  |  |  | 45 | return 1 < keys %z; | 
| 141 |  |  |  |  |  |  | } | 
| 142 |  |  |  |  |  |  | *mixed_num = *mixed_num = \&mixed_number; | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | # Algorithm described here: | 
| 146 |  |  |  |  |  |  | #   http://www.unicode.org/reports/tr39/#Restriction_Level_Detection | 
| 147 |  |  |  |  |  |  | sub restriction_level { | 
| 148 | 17 |  |  | 17 | 1 | 5380 | my ($str, $non_id_regex) = @_; | 
| 149 |  |  |  |  |  |  |  | 
| 150 | 17 | 100 |  |  |  | 81 | $non_id_regex = qr/\P{ID_Continue}/ unless defined $non_id_regex; | 
| 151 |  |  |  |  |  |  |  | 
| 152 | 17 | 100 |  |  |  | 156 | return UNRESTRICTED if $str =~ /$non_id_regex/; | 
| 153 | 16 | 100 |  |  |  | 60 | return ASCII_ONLY   if $str !~ /\P{ASCII}/; | 
| 154 |  |  |  |  |  |  |  | 
| 155 | 14 |  |  |  |  | 26 | my %soss = soss($str); | 
| 156 | 14 |  |  |  |  | 25 | delete @soss{qw(Common Inherited)}; | 
| 157 | 14 | 100 |  |  |  | 33 | return SINGLE_SCRIPT if 1 == keys %soss; | 
| 158 |  |  |  |  |  |  |  | 
| 159 | 13 |  |  |  |  | 23 | delete $soss{Latin}; | 
| 160 | 13 |  |  |  |  | 20 | my %copy = %soss; | 
| 161 | 13 |  |  |  |  | 13 | delete $copy{Han}; | 
| 162 | 13 |  |  |  |  | 39 | my $soss = join ', ', sort keys %copy; | 
| 163 | 13 | 100 |  |  |  | 65 | return HIGHLY_RESTRICTIVE if $highly_restrictive{$soss}; | 
| 164 |  |  |  |  |  |  |  | 
| 165 | 6 | 50 |  |  |  | 11 | if (1 == keys %soss) { | 
| 166 | 6 |  |  |  |  | 9 | my ($script) = keys %soss; | 
| 167 | 6 | 100 | 66 |  |  | 58 | return MODERATELY_RESTRICTIVE | 
|  |  |  | 100 |  |  |  |  | 
|  |  |  | 66 |  |  |  |  | 
| 168 |  |  |  |  |  |  | if ($recommended_script{$script} or $aspirational_script{$script}) | 
| 169 |  |  |  |  |  |  | and not ($soss{Cyrillic} or $soss{Greek}); | 
| 170 |  |  |  |  |  |  |  | 
| 171 |  |  |  |  |  |  | } | 
| 172 |  |  |  |  |  |  |  | 
| 173 | 4 |  |  |  |  | 17 | return MINIMALLY_RESTRICTIVE; | 
| 174 |  |  |  |  |  |  | } | 
| 175 |  |  |  |  |  |  |  | 
| 176 |  |  |  |  |  |  |  | 
| 177 |  |  |  |  |  |  | 1; | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | __END__ |