| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Unicode::Stringprep; | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | require 5.008_003; | 
| 4 |  |  |  |  |  |  |  | 
| 5 | 11 |  |  | 11 |  | 1147337 | use strict; | 
|  | 11 |  |  |  |  | 29 |  | 
|  | 11 |  |  |  |  | 434 |  | 
| 6 | 11 |  |  | 11 |  | 3229 | use utf8; | 
|  | 11 |  |  |  |  | 46 |  | 
|  | 11 |  |  |  |  | 76 |  | 
| 7 | 11 |  |  | 11 |  | 302 | use warnings; | 
|  | 11 |  |  |  |  | 28 |  | 
|  | 11 |  |  |  |  | 1294 |  | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | our $VERSION = "1.105"; | 
| 10 |  |  |  |  |  |  | $VERSION = eval $VERSION; | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | require Exporter; | 
| 13 |  |  |  |  |  |  | our @ISA    = qw(Exporter); | 
| 14 |  |  |  |  |  |  | our @EXPORT = qw(stringprep); | 
| 15 |  |  |  |  |  |  |  | 
| 16 | 11 |  |  | 11 |  | 65 | use Carp; | 
|  | 11 |  |  |  |  | 27 |  | 
|  | 11 |  |  |  |  | 961 |  | 
| 17 |  |  |  |  |  |  |  | 
| 18 | 11 |  |  | 11 |  | 12420 | use Unicode::Normalize(); | 
|  | 11 |  |  |  |  | 31724 |  | 
|  | 11 |  |  |  |  | 414 |  | 
| 19 |  |  |  |  |  |  |  | 
| 20 | 11 |  |  | 11 |  | 8531 | use Unicode::Stringprep::Unassigned; | 
|  | 11 |  |  |  |  | 42 |  | 
|  | 11 |  |  |  |  | 604 |  | 
| 21 | 11 |  |  | 11 |  | 18855 | use Unicode::Stringprep::Mapping; | 
|  | 11 |  |  |  |  | 56 |  | 
|  | 11 |  |  |  |  | 867 |  | 
| 22 | 11 |  |  | 11 |  | 8309 | use Unicode::Stringprep::Prohibited; | 
|  | 11 |  |  |  |  | 33 |  | 
|  | 11 |  |  |  |  | 380 |  | 
| 23 | 11 |  |  | 11 |  | 7129 | use Unicode::Stringprep::BiDi; | 
|  | 11 |  |  |  |  | 40 |  | 
|  | 11 |  |  |  |  | 13829 |  | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | sub new { | 
| 26 | 10 |  |  | 10 | 1 | 5676 | my $self  = shift; | 
| 27 | 10 |  | 33 |  |  | 166 | my $class = ref($self) || $self; | 
| 28 | 10 |  |  |  |  | 56 | return bless _compile(@_), $class; | 
| 29 |  |  |  |  |  |  | } | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | ## Here be eval dragons | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | sub _compile { | 
| 34 | 10 |  |  | 10 |  | 26 | my $unicode_version = shift; | 
| 35 | 10 |  |  |  |  | 23 | my $mapping_tables = shift; | 
| 36 | 10 |  |  |  |  | 39 | my $unicode_normalization = uc shift; | 
| 37 | 10 |  |  |  |  | 24 | my $prohibited_tables = shift; | 
| 38 | 10 |  |  |  |  | 22 | my $bidi_check = shift; | 
| 39 | 10 |  |  |  |  | 18 | my $unassigned_check = shift; | 
| 40 |  |  |  |  |  |  |  | 
| 41 | 10 | 50 |  |  |  | 54 | croak 'Unsupported Unicode version '.$unicode_version.'.' | 
| 42 |  |  |  |  |  |  | if $unicode_version != 3.2; | 
| 43 |  |  |  |  |  |  |  | 
| 44 | 10 |  |  |  |  | 510 | my $mapping_sub = _compile_mapping($mapping_tables); | 
| 45 | 10 |  |  |  |  | 1496 | my $normalization_sub = _compile_normalization($unicode_normalization); | 
| 46 | 10 |  |  |  |  | 47 | my $prohibited_sub = _compile_prohibited($prohibited_tables); | 
| 47 | 10 | 100 |  |  |  | 48 | my $bidi_sub = $bidi_check ? '_check_bidi($string)' : undef; | 
| 48 | 10 | 100 |  |  |  | 42 | my $unassigned_sub = $unassigned_check ? '_check_unassigned($string)' : undef; | 
| 49 | 10 | 100 |  |  |  | 36 | my $pr29_sub = (defined $normalization_sub) ? '_check_pr29($string)' : undef; | 
| 50 |  |  |  |  |  |  |  | 
| 51 | 10 |  |  |  |  | 22 | my $code = "sub { no warnings 'utf8';". | 
| 52 |  |  |  |  |  |  | 'my $string = shift;'; | 
| 53 |  |  |  |  |  |  |  | 
| 54 | 10 | 50 |  |  |  | 46 | $code .= '$string .= pack("U0");' if $] < 5.008; | 
| 55 |  |  |  |  |  |  |  | 
| 56 | 35 | 100 |  |  |  | 1083 | $code .= join('', map { $_ ? "{$_}\n" : ''} | 
|  | 60 |  |  |  |  | 103 |  | 
| 57 | 10 |  |  |  |  | 29 | grep { defined $_ } | 
| 58 |  |  |  |  |  |  | $mapping_sub, | 
| 59 |  |  |  |  |  |  | $normalization_sub, | 
| 60 |  |  |  |  |  |  | $prohibited_sub, | 
| 61 |  |  |  |  |  |  | $bidi_sub, | 
| 62 |  |  |  |  |  |  | $unassigned_sub, | 
| 63 |  |  |  |  |  |  | $pr29_sub ). | 
| 64 |  |  |  |  |  |  | 'return $string;'. | 
| 65 |  |  |  |  |  |  | '}'; | 
| 66 |  |  |  |  |  |  |  | 
| 67 | 10 |  | 50 | 10 |  | 1369 | return eval $code || die $@; | 
|  | 10 |  |  |  |  | 91 |  | 
|  | 10 |  |  |  |  | 19 |  | 
|  | 10 |  |  |  |  | 39010 |  | 
| 68 |  |  |  |  |  |  | } | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  | ## generic compilation functions for matching/mapping characters | 
| 71 |  |  |  |  |  |  | ## | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  | sub _compile_mapping { | 
| 74 | 132 |  |  | 132 |  | 50769 | my %map = (); | 
| 75 |  |  |  |  |  |  | sub _mapping_tables { | 
| 76 | 153 |  |  | 33 |  | 511 | my $map = shift; | 
| 77 | 68 |  |  |  |  | 494 | while(@_) { | 
| 78 | 4366 |  |  |  |  | 4317 | my $data = shift; | 
| 79 | 4366 | 100 |  |  |  | 10667 | if(ref($data) eq 'HASH') { %{$map} = (%{$map},%{$data}) } | 
|  | 80 | 100 |  |  |  | 109 |  | 
|  | 80 | 100 |  |  |  | 264 |  | 
|  | 79 |  |  |  |  | 264 |  | 
|  | 27 |  |  |  |  | 84 |  | 
| 80 | 48 |  |  |  |  | 122 | elsif(ref($data) eq 'ARRAY') { _mapping_tables($map,@{$data}) } | 
|  | 23 |  |  |  |  | 511 |  | 
| 81 | 4255 |  |  |  |  | 13562 | elsif(defined $data){ $map->{$data} = shift }; | 
| 82 |  |  |  |  |  |  | } | 
| 83 |  |  |  |  |  |  | } | 
| 84 | 132 |  |  |  |  | 225 | _mapping_tables(\%map,@_); | 
| 85 |  |  |  |  |  |  |  | 
| 86 | 130 | 100 |  |  |  | 657 | return '' if !%map; | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | sub _compile_mapping_r { | 
| 89 | 1564 |  |  | 1564 |  | 1633 | my $map = shift; | 
| 90 | 1564 | 100 |  |  |  | 2458 | if($#_ <= 7) { | 
| 91 | 785 |  |  |  |  | 970 | return (join '', (map { '$char == '.$_. | 
|  | 4287 |  |  |  |  | 17850 |  | 
| 92 | 4287 |  |  |  |  | 7163 | ' ? "'.(join '', map { quotemeta($_); } ( $$map{$_} )).'"'. | 
| 93 |  |  |  |  |  |  | '   : ' } @_)).' die'; | 
| 94 |  |  |  |  |  |  | } else { | 
| 95 | 779 |  |  |  |  | 2852 | my @a = splice @_, 0, int($#_/2); | 
| 96 | 779 |  |  |  |  | 1799 | return '$char < '.$_[0].' ? ('. | 
| 97 |  |  |  |  |  |  | _compile_mapping_r($map,@a). | 
| 98 |  |  |  |  |  |  | ') : ('. | 
| 99 |  |  |  |  |  |  | _compile_mapping_r($map,@_). | 
| 100 |  |  |  |  |  |  | ')'; | 
| 101 |  |  |  |  |  |  | } | 
| 102 |  |  |  |  |  |  | }; | 
| 103 |  |  |  |  |  |  |  | 
| 104 | 101 |  |  |  |  | 1099 | my @from = sort { $a <=> $b } keys %map; | 
|  | 39221 |  |  |  |  | 35814 |  | 
| 105 |  |  |  |  |  |  |  | 
| 106 | 126 | 100 |  |  |  | 637 | return undef if !@from; | 
| 107 |  |  |  |  |  |  |  | 
| 108 | 126 |  |  |  |  | 396 | return '$string =~ s/('._compile_set( map { $_ => $_ } @from).')/my $char = ord($1); '. | 
|  | 4407 |  |  |  |  | 5917 |  | 
| 109 |  |  |  |  |  |  | _compile_mapping_r(\%map, @from).'/ge;', | 
| 110 |  |  |  |  |  |  | } | 
| 111 |  |  |  |  |  |  |  | 
| 112 |  |  |  |  |  |  | sub _compile_set { | 
| 113 | 71 |  |  | 71 |  | 162 | my @collect = (); | 
| 114 |  |  |  |  |  |  | sub _set_tables { | 
| 115 | 122 |  |  | 122 |  | 168 | my $set = shift; | 
| 116 | 122 |  |  |  |  | 314 | while(@_) { | 
| 117 | 19613 |  |  |  |  | 21167 | my $data = shift; | 
| 118 | 19613 | 100 |  |  |  | 51270 | if(ref($data) eq 'HASH') { _set_tables($set, %{$data}); } | 
|  | 0 | 100 |  |  |  | 0 |  | 
|  | 0 | 100 |  |  |  | 0 |  | 
| 119 | 51 |  |  |  |  | 54 | elsif(ref($data) eq 'ARRAY') { _set_tables($set, @{$data}); } | 
|  | 51 |  |  |  |  | 147 |  | 
| 120 | 19561 |  | 100 |  |  | 22798 | elsif(defined $data){ push @{$set}, [$data,shift || $data] }; | 
|  | 19561 |  |  |  |  | 77382 |  | 
| 121 |  |  |  |  |  |  | } | 
| 122 |  |  |  |  |  |  | } | 
| 123 | 71 |  |  |  |  | 763 | _set_tables(\@collect,@_); | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | # NB: This destroys @collect as it modifies the anonymous ARRAYs | 
| 126 |  |  |  |  |  |  | # referenced in @collect. | 
| 127 |  |  |  |  |  |  | # This is harmless as it only modifies ARRAYs after they've been | 
| 128 |  |  |  |  |  |  | # inspected. | 
| 129 |  |  |  |  |  |  |  | 
| 130 | 71 |  |  |  |  | 140 | my @set = (); | 
| 131 | 71 |  |  |  |  | 1147 | foreach my $d (sort { $a->[0]<=>$b->[0] } @collect) { | 
|  | 22092 |  |  |  |  | 22911 |  | 
| 132 | 19561 | 100 | 100 |  |  | 71793 | if(!@set || $set[$#set]->[1]+1 < $d->[0]) { | 
|  |  | 100 |  |  |  |  |  | 
| 133 | 15437 |  |  |  |  | 21427 | push @set, $d; | 
| 134 |  |  |  |  |  |  | } elsif($set[$#set]->[1] < $d->[1]) { | 
| 135 | 4074 |  |  |  |  | 6841 | $set[$#set]->[1] = $d->[1]; | 
| 136 |  |  |  |  |  |  | } | 
| 137 |  |  |  |  |  |  | } | 
| 138 |  |  |  |  |  |  |  | 
| 139 | 71 | 100 |  |  |  | 221 | return undef if !@set; | 
| 140 |  |  |  |  |  |  |  | 
| 141 | 6944 |  |  |  |  | 18892 | return '['.join('', map { | 
| 142 | 68 |  |  |  |  | 181 | $_->[0] >= $_->[1] | 
| 143 |  |  |  |  |  |  | ? sprintf("\\x{%X}", $_->[0]) | 
| 144 | 15437 | 100 |  |  |  | 35220 | : sprintf("\\x{%X}-\\x{%X}", @{$_}[0,1]) | 
| 145 |  |  |  |  |  |  | } @set ).']'; | 
| 146 |  |  |  |  |  |  | } | 
| 147 |  |  |  |  |  |  |  | 
| 148 |  |  |  |  |  |  | ## specific functions for individual stringprep steps | 
| 149 |  |  |  |  |  |  | ## | 
| 150 |  |  |  |  |  |  |  | 
| 151 |  |  |  |  |  |  | sub _compile_normalization { | 
| 152 | 10 |  |  | 10 |  | 33 | my $unicode_normalization = uc shift; | 
| 153 | 10 |  |  |  |  | 46 | $unicode_normalization =~ s/^NF//; | 
| 154 |  |  |  |  |  |  |  | 
| 155 | 10 | 100 |  |  |  | 58 | return '$string = _NFKC_3_2($string)' if $unicode_normalization eq 'KC'; | 
| 156 | 4 | 50 |  |  |  | 32 | return undef if !$unicode_normalization; | 
| 157 |  |  |  |  |  |  |  | 
| 158 | 0 |  |  |  |  | 0 | croak 'Unsupported Unicode normalization (NF)'.$unicode_normalization.'.'; | 
| 159 |  |  |  |  |  |  | } | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | my $is_Unassigned = _compile_set(@Unicode::Stringprep::Unassigned::A1); | 
| 162 |  |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  | sub _NFKC_3_2 { | 
| 164 | 136 |  |  | 136 |  | 214 | my $string = shift; | 
| 165 |  |  |  |  |  |  |  | 
| 166 |  |  |  |  |  |  | ## pre-map characters corrected in Corrigendum #4 | 
| 167 |  |  |  |  |  |  | ## | 
| 168 | 11 |  |  | 11 |  | 96 | no warnings 'utf8'; | 
|  | 11 |  |  |  |  | 22 |  | 
|  | 11 |  |  |  |  | 980 |  | 
| 169 | 11 |  |  | 11 |  | 56 | $string =~ tr/\x{2F868}\x{2F874}\x{2F91F}\x{2F95F}\x{2F9BF}/\x{2136A}\x{5F33}\x{43AB}\x{7AAE}\x{4D57}/; | 
|  | 11 |  |  |  |  | 20 |  | 
|  | 11 |  |  |  |  | 161 |  | 
|  | 136 |  |  |  |  | 791 |  | 
| 170 |  |  |  |  |  |  |  | 
| 171 |  |  |  |  |  |  | ## only normalize runs of assigned characters | 
| 172 |  |  |  |  |  |  | ## | 
| 173 | 136 |  |  |  |  | 1729 | my @s = split m/($is_Unassigned+)/o, $string; | 
| 174 |  |  |  |  |  |  |  | 
| 175 | 136 |  |  |  |  | 1656 | for( my $i = 0; $i <= $#s ; $i+=2 ) { # skips delimiters == is_Unassigned | 
| 176 | 11 |  |  | 11 |  | 70935 | no warnings 'utf8'; | 
|  | 11 |  |  |  |  | 28 |  | 
|  | 11 |  |  |  |  | 8732 |  | 
| 177 | 133 |  |  |  |  | 1460 | $s[$i] = Unicode::Normalize::NFKC($s[$i]); | 
| 178 |  |  |  |  |  |  | } | 
| 179 | 136 |  |  |  |  | 5261 | return join '', @s; | 
| 180 |  |  |  |  |  |  | } | 
| 181 |  |  |  |  |  |  |  | 
| 182 |  |  |  |  |  |  | sub _check_unassigned { | 
| 183 | 26 | 100 |  | 26 |  | 5170 | if( shift =~ m/($is_Unassigned)/os ) { | 
| 184 | 1 |  |  |  |  | 11 | die sprintf("unassigned character U+%04X",ord($1)); | 
| 185 |  |  |  |  |  |  | } | 
| 186 |  |  |  |  |  |  | } | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  | sub _compile_prohibited { | 
| 189 | 10 |  |  | 10 |  | 37 | my $prohibited = _compile_set(@_); | 
| 190 |  |  |  |  |  |  |  | 
| 191 | 10 | 100 |  |  |  | 69 | if($prohibited) { | 
| 192 |  |  |  |  |  |  | return | 
| 193 | 7 |  |  |  |  | 55 | 'if($string =~ m/('.$prohibited.')/os) {'. | 
| 194 |  |  |  |  |  |  | 'die sprintf("prohibited character U+%04X",ord($1))'. | 
| 195 |  |  |  |  |  |  | '}'; | 
| 196 |  |  |  |  |  |  | } | 
| 197 |  |  |  |  |  |  | } | 
| 198 |  |  |  |  |  |  |  | 
| 199 |  |  |  |  |  |  | my $is_RandAL = _compile_set(@Unicode::Stringprep::BiDi::D1); | 
| 200 |  |  |  |  |  |  | my $is_L = _compile_set(@Unicode::Stringprep::BiDi::D2); | 
| 201 |  |  |  |  |  |  |  | 
| 202 |  |  |  |  |  |  | sub _check_bidi { | 
| 203 | 85 |  |  | 85 |  | 127 | my $string = shift; | 
| 204 |  |  |  |  |  |  |  | 
| 205 | 85 | 100 |  |  |  | 3130 | if($string =~ m/$is_RandAL/os) { | 
| 206 | 9 | 100 |  |  |  | 728 | if($string =~ m/$is_L/os) { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 207 | 4 |  |  |  |  | 32 | die "string contains both RandALCat and LCat characters" | 
| 208 |  |  |  |  |  |  | } elsif($string !~ m/^(?:$is_RandAL)/os) { | 
| 209 | 0 |  |  |  |  | 0 | die "string contains RandALCat character but does not start with one" | 
| 210 |  |  |  |  |  |  | } elsif($string !~ m/(?:$is_RandAL)$/os) { | 
| 211 | 3 |  |  |  |  | 32 | die "string contains RandALCat character but does not end with one" | 
| 212 |  |  |  |  |  |  | } | 
| 213 |  |  |  |  |  |  | } | 
| 214 |  |  |  |  |  |  | } | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | my $is_Combining = _compile_set(  0x0300,0x0314, 0x0316,0x0319, 0x031C,0x0320, | 
| 217 |  |  |  |  |  |  | 0x0321,0x0322, 0x0323,0x0326, 0x0327,0x0328, 0x0329,0x0333, 0x0334,0x0338, | 
| 218 |  |  |  |  |  |  | 0x0339,0x033C, 0x033D,0x0344, 0x0347,0x0349, 0x034A,0x034C, 0x034D,0x034E, | 
| 219 |  |  |  |  |  |  | 0x0360,0x0361, 0x0363,0x036F, 0x0483,0x0486, 0x0592,0x0595, 0x0597,0x0599, | 
| 220 |  |  |  |  |  |  | 0x059C,0x05A1, 0x05A3,0x05A7, 0x05A8,0x05A9, 0x05AB,0x05AC, 0x0653,0x0654, | 
| 221 |  |  |  |  |  |  | 0x06D6,0x06DC, 0x06DF,0x06E2, 0x06E7,0x06E8, 0x06EB,0x06EC, 0x0732,0x0733, | 
| 222 |  |  |  |  |  |  | 0x0735,0x0736, 0x0737,0x0739, 0x073B,0x073C, 0x073F,0x0741, 0x0749,0x074A, | 
| 223 |  |  |  |  |  |  | 0x0953,0x0954, 0x0E38,0x0E39, 0x0E48,0x0E4B, 0x0EB8,0x0EB9, 0x0EC8,0x0ECB, | 
| 224 |  |  |  |  |  |  | 0x0F18,0x0F19, 0x0F7A,0x0F7D, 0x0F82,0x0F83, 0x0F86,0x0F87, 0x20D0,0x20D1, | 
| 225 |  |  |  |  |  |  | 0x20D2,0x20D3, 0x20D4,0x20D7, 0x20D8,0x20DA, 0x20DB,0x20DC, 0x20E5,0x20E6, | 
| 226 |  |  |  |  |  |  | 0x302E,0x302F, 0x3099,0x309A, 0xFE20,0xFE23, | 
| 227 |  |  |  |  |  |  | 0x1D165,0x1D166, 0x1D167,0x1D169, 0x1D16E,0x1D172, 0x1D17B,0x1D182, | 
| 228 |  |  |  |  |  |  | 0x1D185,0x1D189, 0x1D18A,0x1D18B, 0x1D1AA,0x1D1AD, | 
| 229 |  |  |  |  |  |  | map { ($_,$_) } 0x0315, 0x031A, 0x031B, 0x0345, 0x0346, 0x0362, 0x0591, | 
| 230 |  |  |  |  |  |  | 0x0596, 0x059A, 0x059B, 0x05AA, 0x05AD, 0x05AE, 0x05AF, 0x05B0, 0x05B1, | 
| 231 |  |  |  |  |  |  | 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BB, | 
| 232 |  |  |  |  |  |  | 0x05BC, 0x05BD, 0x05BF, 0x05C1, 0x05C2, 0x05C4, 0x064B, 0x064C, 0x064D, | 
| 233 |  |  |  |  |  |  | 0x064E, 0x064F, 0x0650, 0x0651, 0x0652, 0x0655, 0x0670, 0x06E3, 0x06E4, | 
| 234 |  |  |  |  |  |  | 0x06EA, 0x06ED, 0x0711, 0x0730, 0x0731, 0x0734, 0x073A, 0x073D, 0x073E, | 
| 235 |  |  |  |  |  |  | 0x0742, 0x0743, 0x0744, 0x0745, 0x0746, 0x0747, 0x0748, 0x093C, 0x094D, | 
| 236 |  |  |  |  |  |  | 0x0951, 0x0952, 0x09BC, 0x09CD, 0x0A3C, 0x0A4D, 0x0ABC, 0x0ACD, 0x0B3C, | 
| 237 |  |  |  |  |  |  | 0x0B4D, 0x0BCD, 0x0C4D, 0x0C55, 0x0C56, 0x0CCD, 0x0D4D, 0x0DCA, 0x0E3A, | 
| 238 |  |  |  |  |  |  | 0x0F35, 0x0F37, 0x0F39, 0x0F71, 0x0F72, 0x0F74, 0x0F80, 0x0F84, 0x0FC6, | 
| 239 |  |  |  |  |  |  | 0x1037, 0x1039, 0x1714, 0x1734, 0x17D2, 0x18A9, 0x20E1, 0x20E7, 0x20E8, | 
| 240 |  |  |  |  |  |  | 0x20E9, 0x20EA, 0x302A, 0x302B, 0x302C, 0x302D, 0xFB1E, 0x1D16D,         ); | 
| 241 |  |  |  |  |  |  |  | 
| 242 |  |  |  |  |  |  | my $is_HangulLV = _compile_set( map { ($_,$_) }     0xAC00, 0xAC1C, 0xAC38, | 
| 243 |  |  |  |  |  |  | 0xAC54, 0xAC70, 0xAC8C, 0xACA8, 0xACC4, 0xACE0, 0xACFC, 0xAD18, 0xAD34, | 
| 244 |  |  |  |  |  |  | 0xAD50, 0xAD6C, 0xAD88, 0xADA4, 0xADC0, 0xADDC, 0xADF8, 0xAE14, 0xAE30, | 
| 245 |  |  |  |  |  |  | 0xAE4C, 0xAE68, 0xAE84, 0xAEA0, 0xAEBC, 0xAED8, 0xAEF4, 0xAF10, 0xAF2C, | 
| 246 |  |  |  |  |  |  | 0xAF48, 0xAF64, 0xAF80, 0xAF9C, 0xAFB8, 0xAFD4, 0xAFF0, 0xB00C, 0xB028, | 
| 247 |  |  |  |  |  |  | 0xB044, 0xB060, 0xB07C, 0xB098, 0xB0B4, 0xB0D0, 0xB0EC, 0xB108, 0xB124, | 
| 248 |  |  |  |  |  |  | 0xB140, 0xB15C, 0xB178, 0xB194, 0xB1B0, 0xB1CC, 0xB1E8, 0xB204, 0xB220, | 
| 249 |  |  |  |  |  |  | 0xB23C, 0xB258, 0xB274, 0xB290, 0xB2AC, 0xB2C8, 0xB2E4, 0xB300, 0xB31C, | 
| 250 |  |  |  |  |  |  | 0xB338, 0xB354, 0xB370, 0xB38C, 0xB3A8, 0xB3C4, 0xB3E0, 0xB3FC, 0xB418, | 
| 251 |  |  |  |  |  |  | 0xB434, 0xB450, 0xB46C, 0xB488, 0xB4A4, 0xB4C0, 0xB4DC, 0xB4F8, 0xB514, | 
| 252 |  |  |  |  |  |  | 0xB530, 0xB54C, 0xB568, 0xB584, 0xB5A0, 0xB5BC, 0xB5D8, 0xB5F4, 0xB610, | 
| 253 |  |  |  |  |  |  | 0xB62C, 0xB648, 0xB664, 0xB680, 0xB69C, 0xB6B8, 0xB6D4, 0xB6F0, 0xB70C, | 
| 254 |  |  |  |  |  |  | 0xB728, 0xB744, 0xB760, 0xB77C, 0xB798, 0xB7B4, 0xB7D0, 0xB7EC, 0xB808, | 
| 255 |  |  |  |  |  |  | 0xB824, 0xB840, 0xB85C, 0xB878, 0xB894, 0xB8B0, 0xB8CC, 0xB8E8, 0xB904, | 
| 256 |  |  |  |  |  |  | 0xB920, 0xB93C, 0xB958, 0xB974, 0xB990, 0xB9AC, 0xB9C8, 0xB9E4, 0xBA00, | 
| 257 |  |  |  |  |  |  | 0xBA1C, 0xBA38, 0xBA54, 0xBA70, 0xBA8C, 0xBAA8, 0xBAC4, 0xBAE0, 0xBAFC, | 
| 258 |  |  |  |  |  |  | 0xBB18, 0xBB34, 0xBB50, 0xBB6C, 0xBB88, 0xBBA4, 0xBBC0, 0xBBDC, 0xBBF8, | 
| 259 |  |  |  |  |  |  | 0xBC14, 0xBC30, 0xBC4C, 0xBC68, 0xBC84, 0xBCA0, 0xBCBC, 0xBCD8, 0xBCF4, | 
| 260 |  |  |  |  |  |  | 0xBD10, 0xBD2C, 0xBD48, 0xBD64, 0xBD80, 0xBD9C, 0xBDB8, 0xBDD4, 0xBDF0, | 
| 261 |  |  |  |  |  |  | 0xBE0C, 0xBE28, 0xBE44, 0xBE60, 0xBE7C, 0xBE98, 0xBEB4, 0xBED0, 0xBEEC, | 
| 262 |  |  |  |  |  |  | 0xBF08, 0xBF24, 0xBF40, 0xBF5C, 0xBF78, 0xBF94, 0xBFB0, 0xBFCC, 0xBFE8, | 
| 263 |  |  |  |  |  |  | 0xC004, 0xC020, 0xC03C, 0xC058, 0xC074, 0xC090, 0xC0AC, 0xC0C8, 0xC0E4, | 
| 264 |  |  |  |  |  |  | 0xC100, 0xC11C, 0xC138, 0xC154, 0xC170, 0xC18C, 0xC1A8, 0xC1C4, 0xC1E0, | 
| 265 |  |  |  |  |  |  | 0xC1FC, 0xC218, 0xC234, 0xC250, 0xC26C, 0xC288, 0xC2A4, 0xC2C0, 0xC2DC, | 
| 266 |  |  |  |  |  |  | 0xC2F8, 0xC314, 0xC330, 0xC34C, 0xC368, 0xC384, 0xC3A0, 0xC3BC, 0xC3D8, | 
| 267 |  |  |  |  |  |  | 0xC3F4, 0xC410, 0xC42C, 0xC448, 0xC464, 0xC480, 0xC49C, 0xC4B8, 0xC4D4, | 
| 268 |  |  |  |  |  |  | 0xC4F0, 0xC50C, 0xC528, 0xC544, 0xC560, 0xC57C, 0xC598, 0xC5B4, 0xC5D0, | 
| 269 |  |  |  |  |  |  | 0xC5EC, 0xC608, 0xC624, 0xC640, 0xC65C, 0xC678, 0xC694, 0xC6B0, 0xC6CC, | 
| 270 |  |  |  |  |  |  | 0xC6E8, 0xC704, 0xC720, 0xC73C, 0xC758, 0xC774, 0xC790, 0xC7AC, 0xC7C8, | 
| 271 |  |  |  |  |  |  | 0xC7E4, 0xC800, 0xC81C, 0xC838, 0xC854, 0xC870, 0xC88C, 0xC8A8, 0xC8C4, | 
| 272 |  |  |  |  |  |  | 0xC8E0, 0xC8FC, 0xC918, 0xC934, 0xC950, 0xC96C, 0xC988, 0xC9A4, 0xC9C0, | 
| 273 |  |  |  |  |  |  | 0xC9DC, 0xC9F8, 0xCA14, 0xCA30, 0xCA4C, 0xCA68, 0xCA84, 0xCAA0, 0xCABC, | 
| 274 |  |  |  |  |  |  | 0xCAD8, 0xCAF4, 0xCB10, 0xCB2C, 0xCB48, 0xCB64, 0xCB80, 0xCB9C, 0xCBB8, | 
| 275 |  |  |  |  |  |  | 0xCBD4, 0xCBF0, 0xCC0C, 0xCC28, 0xCC44, 0xCC60, 0xCC7C, 0xCC98, 0xCCB4, | 
| 276 |  |  |  |  |  |  | 0xCCD0, 0xCCEC, 0xCD08, 0xCD24, 0xCD40, 0xCD5C, 0xCD78, 0xCD94, 0xCDB0, | 
| 277 |  |  |  |  |  |  | 0xCDCC, 0xCDE8, 0xCE04, 0xCE20, 0xCE3C, 0xCE58, 0xCE74, 0xCE90, 0xCEAC, | 
| 278 |  |  |  |  |  |  | 0xCEC8, 0xCEE4, 0xCF00, 0xCF1C, 0xCF38, 0xCF54, 0xCF70, 0xCF8C, 0xCFA8, | 
| 279 |  |  |  |  |  |  | 0xCFC4, 0xCFE0, 0xCFFC, 0xD018, 0xD034, 0xD050, 0xD06C, 0xD088, 0xD0A4, | 
| 280 |  |  |  |  |  |  | 0xD0C0, 0xD0DC, 0xD0F8, 0xD114, 0xD130, 0xD14C, 0xD168, 0xD184, 0xD1A0, | 
| 281 |  |  |  |  |  |  | 0xD1BC, 0xD1D8, 0xD1F4, 0xD210, 0xD22C, 0xD248, 0xD264, 0xD280, 0xD29C, | 
| 282 |  |  |  |  |  |  | 0xD2B8, 0xD2D4, 0xD2F0, 0xD30C, 0xD328, 0xD344, 0xD360, 0xD37C, 0xD398, | 
| 283 |  |  |  |  |  |  | 0xD3B4, 0xD3D0, 0xD3EC, 0xD408, 0xD424, 0xD440, 0xD45C, 0xD478, 0xD494, | 
| 284 |  |  |  |  |  |  | 0xD4B0, 0xD4CC, 0xD4E8, 0xD504, 0xD520, 0xD53C, 0xD558, 0xD574, 0xD590, | 
| 285 |  |  |  |  |  |  | 0xD5AC, 0xD5C8, 0xD5E4, 0xD600, 0xD61C, 0xD638, 0xD654, 0xD670, 0xD68C, | 
| 286 |  |  |  |  |  |  | 0xD6A8, 0xD6C4, 0xD6E0, 0xD6FC, 0xD718, 0xD734, 0xD750, 0xD76C, 0xD788, ); | 
| 287 |  |  |  |  |  |  |  | 
| 288 |  |  |  |  |  |  | sub _check_pr29 { | 
| 289 | 93 | 100 |  | 93 |  | 7131 | die "String contains Unicode Corrigendum #5 problem sequences" if shift =~ m/ | 
| 290 |  |  |  |  |  |  | \x{09C7}$is_Combining+[\x{09BE}\x{09D7}]		| # BENGALI VOWEL SIGN E | 
| 291 |  |  |  |  |  |  | \x{0B47}$is_Combining+[\x{0B3E}\x{0B56}\x{0B57}]	| # ORIYA VOWEL SIGN E | 
| 292 |  |  |  |  |  |  | \x{0BC6}$is_Combining+[\x{0BBE}\x{0BD7}]		| # TAMIL VOWEL SIGN E | 
| 293 |  |  |  |  |  |  | \x{0BC7}$is_Combining+\x{0BBE}			| # TAMIL VOWEL SIGN EE | 
| 294 |  |  |  |  |  |  | \x{0B92}$is_Combining+\x{0BD7}			| # TAMIL LETTER O | 
| 295 |  |  |  |  |  |  | \x{0CC6}$is_Combining+[\x{0CC2}\x{0CD5}\x{0CD6}]	| # KANNADA VOWEL SIGN E | 
| 296 |  |  |  |  |  |  | [\x{0CBF}\x{0CCA}]$is_Combining\x{0CD5}		| # KANNADA VOWEL SIGN I or KANNADA VOWEL SIGN O | 
| 297 |  |  |  |  |  |  | \x{0D47}$is_Combining+\x{0D3E}			| # MALAYALAM VOWEL SIGN EE | 
| 298 |  |  |  |  |  |  | \x{0D46}$is_Combining+[\x{0D3E}\x{0D57}]		| # MALAYALAM VOWEL SIGN E | 
| 299 |  |  |  |  |  |  | \x{1025}$is_Combining+\x{102E}			| # MYANMAR LETTER U | 
| 300 |  |  |  |  |  |  | \x{0DD9}$is_Combining+[\x{0DCF}\x{0DDF}]		| # SINHALA VOWEL SIGN KOMBUVA | 
| 301 |  |  |  |  |  |  | [\x{1100}-\x{1112}]$is_Combining[\x{1161}-\x{1175} ] | # HANGUL CHOSEONG KIYEOK..HIEUH | 
| 302 |  |  |  |  |  |  | ($is_HangulLV|[\x{1100}-\x{1112}][\x{1161}-\x{1175}])($is_Combining)([\x{11A8}-\x{11C2}]) # HANGUL SyllableType=LV | 
| 303 |  |  |  |  |  |  | /osx; | 
| 304 |  |  |  |  |  |  | } | 
| 305 |  |  |  |  |  |  |  | 
| 306 |  |  |  |  |  |  | 1; | 
| 307 |  |  |  |  |  |  | __END__ |