| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package UTF8::R2; | 
| 2 |  |  |  |  |  |  | ###################################################################### | 
| 3 |  |  |  |  |  |  | # | 
| 4 |  |  |  |  |  |  | # UTF8::R2 - makes UTF-8 scripting easy for enterprise use | 
| 5 |  |  |  |  |  |  | # | 
| 6 |  |  |  |  |  |  | # http://search.cpan.org/dist/UTF8-R2/ | 
| 7 |  |  |  |  |  |  | # | 
| 8 |  |  |  |  |  |  | # Copyright (c) 2019, 2020, 2021, 2022, 2023 INABA Hitoshi  in a CPAN | 
| 9 |  |  |  |  |  |  | ###################################################################### | 
| 10 |  |  |  |  |  |  |  | 
| 11 | 60 |  |  | 60 |  | 222466 | use 5.00503;    # Universal Consensus 1998 for primetools | 
|  | 60 |  |  |  |  | 584 |  | 
| 12 |  |  |  |  |  |  | # use 5.008001; # Lancaster Consensus 2013 for toolchains | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | $VERSION = '0.27'; | 
| 15 |  |  |  |  |  |  | $VERSION = $VERSION; | 
| 16 |  |  |  |  |  |  |  | 
| 17 | 60 |  |  | 60 |  | 344 | use strict; | 
|  | 60 |  |  |  |  | 142 |  | 
|  | 60 |  |  |  |  | 2188 |  | 
| 18 | 60 | 50 |  | 60 |  | 1504 | BEGIN { $INC{'warnings.pm'} = '' if $] < 5.006 } use warnings; local $^W=1; | 
|  | 60 |  |  | 60 |  | 400 |  | 
|  | 60 |  |  |  |  | 149 |  | 
|  | 60 |  |  |  |  | 2787 |  | 
| 19 | 60 |  |  | 60 |  | 27341 | use Symbol (); | 
|  | 60 |  |  |  |  | 51196 |  | 
|  | 60 |  |  |  |  | 25357 |  | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | my %utf8_codepoint = ( | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | # beautiful concept in young days, however disabled 5-6 octets for safety | 
| 24 |  |  |  |  |  |  | # https://www.ietf.org/rfc/rfc2279.txt | 
| 25 |  |  |  |  |  |  | 'RFC2279' => qr{(?>@{[join('', qw( | 
| 26 |  |  |  |  |  |  | [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF]       | | 
| 27 |  |  |  |  |  |  | [\xC2-\xDF][\x80-\xBF]                       | | 
| 28 |  |  |  |  |  |  | [\xE0-\xEF][\x80-\xBF][\x80-\xBF]            | | 
| 29 |  |  |  |  |  |  | [\xF0-\xF4][\x80-\xBF][\x80-\xBF][\x80-\xBF] | | 
| 30 |  |  |  |  |  |  | [\x00-\xFF] | 
| 31 |  |  |  |  |  |  | ))]})}x, | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | # https://tools.ietf.org/rfc/rfc3629.txt | 
| 34 |  |  |  |  |  |  | 'RFC3629' => qr{(?>@{[join('', qw( | 
| 35 |  |  |  |  |  |  | [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF]       | | 
| 36 |  |  |  |  |  |  | [\xC2-\xDF][\x80-\xBF]                       | | 
| 37 |  |  |  |  |  |  | [\xE0-\xE0][\xA0-\xBF][\x80-\xBF]            | | 
| 38 |  |  |  |  |  |  | [\xE1-\xEC][\x80-\xBF][\x80-\xBF]            | | 
| 39 |  |  |  |  |  |  | [\xED-\xED][\x80-\x9F][\x80-\xBF]            | | 
| 40 |  |  |  |  |  |  | [\xEE-\xEF][\x80-\xBF][\x80-\xBF]            | | 
| 41 |  |  |  |  |  |  | [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] | | 
| 42 |  |  |  |  |  |  | [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] | | 
| 43 |  |  |  |  |  |  | [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] | | 
| 44 |  |  |  |  |  |  | [\x00-\xFF] | 
| 45 |  |  |  |  |  |  | ))]})}x, | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | # http://simonsapin.github.io/wtf-8/ | 
| 48 |  |  |  |  |  |  | 'WTF8' => qr{(?>@{[join('', qw( | 
| 49 |  |  |  |  |  |  | [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF]       | | 
| 50 |  |  |  |  |  |  | [\xC2-\xDF][\x80-\xBF]                       | | 
| 51 |  |  |  |  |  |  | [\xE0-\xE0][\xA0-\xBF][\x80-\xBF]            | | 
| 52 |  |  |  |  |  |  | [\xE1-\xEF][\x80-\xBF][\x80-\xBF]            | | 
| 53 |  |  |  |  |  |  | [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] | | 
| 54 |  |  |  |  |  |  | [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] | | 
| 55 |  |  |  |  |  |  | [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] | | 
| 56 |  |  |  |  |  |  | [\x00-\xFF] | 
| 57 | 60 |  |  | 60 |  | 598 | ))]})}x, | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | # optimized RFC3629 for ja_JP | 
| 60 | 60 | 50 | 66 |  |  | 463 | 'RFC3629.ja_JP' => qr{(?>@{[join('', qw( | 
| 61 | 0 | 0 |  |  |  | 0 | [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF]       | | 
| 62 | 0 |  |  |  |  | 0 | [\xE1-\xEC][\x80-\xBF][\x80-\xBF]            | | 
| 63 | 0 |  |  |  |  | 0 | [\xC2-\xDF][\x80-\xBF]                       | | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 64 |  |  |  |  |  |  | [\xEE-\xEF][\x80-\xBF][\x80-\xBF]            | | 
| 65 | 0 |  |  |  |  | 0 | [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] | | 
| 66 |  |  |  |  |  |  | [\xE0-\xE0][\xA0-\xBF][\x80-\xBF]            | | 
| 67 |  |  |  |  |  |  | [\xED-\xED][\x80-\x9F][\x80-\xBF]            | | 
| 68 | 60 |  |  |  |  | 177 | [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] | | 
| 69 |  |  |  |  |  |  | [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] | | 
| 70 |  |  |  |  |  |  | [\x00-\xFF] | 
| 71 | 25 | 100 |  |  |  | 101 | ))]})}x, | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 72 | 60 |  |  | 60 |  | 470 |  | 
|  | 60 |  |  |  |  | 122 |  | 
|  | 60 |  |  |  |  | 21773 |  | 
| 73 |  |  |  |  |  |  | # optimized WTF-8 for ja_JP | 
| 74 |  |  |  |  |  |  | 'WTF8.ja_JP' => qr{(?>@{[join('', qw( | 
| 75 | 17 |  |  |  |  | 84 | [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF]       | | 
| 76 | 17 |  |  |  |  | 34 | [\xE1-\xEF][\x80-\xBF][\x80-\xBF]            | | 
|  | 17 |  |  |  |  | 100 |  | 
| 77 |  |  |  |  |  |  | [\xC2-\xDF][\x80-\xBF]                       | | 
| 78 |  |  |  |  |  |  | [\xE0-\xE0][\xA0-\xBF][\x80-\xBF]            | | 
| 79 | 17 |  |  |  |  | 36 | [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] | | 
|  | 17 |  |  |  |  | 136 |  | 
| 80 | 17 |  |  |  |  | 32 | [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] | | 
|  | 17 |  |  |  |  | 64 |  | 
| 81 | 17 |  |  |  |  | 33 | [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] | | 
|  | 17 |  |  |  |  | 70 |  | 
| 82 | 17 |  |  |  |  | 40 | [\x00-\xFF] | 
|  | 17 |  |  |  |  | 61 |  | 
| 83 | 17 |  |  |  |  | 38 | ))]})}x, | 
|  | 17 |  |  |  |  | 55 |  | 
| 84 | 17 |  |  |  |  | 34 | ); | 
|  | 17 |  |  |  |  | 57 |  | 
| 85 | 17 |  |  |  |  | 33 |  | 
|  | 17 |  |  |  |  | 52 |  | 
| 86 | 17 |  |  |  |  | 35 | # supports /./ | 
|  | 17 |  |  |  |  | 66 |  | 
| 87 | 17 |  |  |  |  | 31 | my $x = | 
|  | 17 |  |  |  |  | 62 |  | 
| 88 | 17 |  |  |  |  | 32 | ($^X =~ /jperl(\.exe)?\z/i) && (`$^X -v` =~ /SJIS version/) ? | 
|  | 17 |  |  |  |  | 53 |  | 
| 89 | 17 |  |  |  |  | 197 | q{(?>[\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC]|[\x00-\xFF])} : # debug tool using JPerl(SJIS version) | 
|  | 17 |  |  |  |  | 70 |  | 
| 90 | 17 |  |  |  |  | 36 | $utf8_codepoint{'RFC3629'}; | 
|  | 17 |  |  |  |  | 63 |  | 
| 91 | 17 |  |  |  |  | 33 |  | 
|  | 17 |  |  |  |  | 58 |  | 
| 92 | 17 |  |  |  |  | 31 | # supports [\b] \d \h \s \v \w | 
|  | 17 |  |  |  |  | 63 |  | 
| 93 | 17 |  |  |  |  | 32 | my $bare_backspace = '\x08'; | 
|  | 17 |  |  |  |  | 59 |  | 
| 94 | 17 |  |  |  |  | 34 | my $bare_d = '0123456789'; | 
|  | 17 |  |  |  |  | 93 |  | 
| 95 | 17 |  |  |  |  | 34 | my $bare_h = '\x09\x20'; | 
|  | 17 |  |  |  |  | 52 |  | 
| 96 | 17 |  |  |  |  | 37 | my $bare_s = '\t\n\f\r\x20'; | 
|  | 17 |  |  |  |  | 88 |  | 
| 97 |  |  |  |  |  |  | my $bare_v = '\x0A\x0B\x0C\x0D'; | 
| 98 |  |  |  |  |  |  | my $bare_w = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_'; | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | #--------------------------------------------------------------------- | 
| 101 | 60 |  |  | 60 |  | 437 | # exports mb package | 
|  | 60 |  |  |  |  | 122 |  | 
|  | 60 |  |  |  |  | 389740 |  | 
| 102 |  |  |  |  |  |  | sub import { | 
| 103 |  |  |  |  |  |  | my $self = shift @_; | 
| 104 | 5 |  |  |  |  | 26 |  | 
| 105 | 5 |  |  |  |  | 10 | # confirm version | 
|  | 5 |  |  |  |  | 36 |  | 
| 106 |  |  |  |  |  |  | if (defined($_[0]) and ($_[0] =~ /\A [0-9] /xms)) { | 
| 107 |  |  |  |  |  |  | if ($_[0] ne $UTF8::R2::VERSION) { | 
| 108 |  |  |  |  |  |  | my($package,$filename,$line) = caller; | 
| 109 |  |  |  |  |  |  | die "$filename requires @{[__PACKAGE__]} $_[0], however @{[__FILE__]} am only $UTF8::R2::VERSION, stopped at $filename line $line.\n"; | 
| 110 | 3 |  |  |  |  | 8 | } | 
| 111 |  |  |  |  |  |  | shift @_; | 
| 112 |  |  |  |  |  |  | } | 
| 113 |  |  |  |  |  |  |  | 
| 114 |  |  |  |  |  |  | for (@_) { | 
| 115 | 60 |  |  |  |  | 140 |  | 
| 116 | 60 |  |  |  |  | 105 | # export *mb | 
| 117 |  |  |  |  |  |  | if ($_ eq '*mb') { | 
| 118 |  |  |  |  |  |  | no strict qw(refs); | 
| 119 | 60 |  |  |  |  | 158 |  | 
| 120 | 60 |  |  |  |  | 7836 | # tie my %mb, __PACKAGE__; # makes: Parentheses missing around "my" list | 
| 121 |  |  |  |  |  |  | tie my %mb, 'UTF8::R2'; | 
| 122 |  |  |  |  |  |  | *{caller().'::mb'} = \%mb; | 
| 123 |  |  |  |  |  |  |  | 
| 124 |  |  |  |  |  |  | # supports mb package | 
| 125 |  |  |  |  |  |  | *{caller().'::mb::ORIG_PROGRAM_NAME'} = \$UTF8::R2::ORIG_PROGRAM_NAME; | 
| 126 | 0 |  |  | 0 | 0 | 0 | *{caller().'::mb::PERL'}              = \$UTF8::R2::PERL; | 
| 127 | 0 |  |  |  |  | 0 | *{caller().'::mb::chop'}              = \&UTF8::R2::chop; | 
| 128 | 0 |  |  |  |  | 0 | *{caller().'::mb::chr'}               = \&UTF8::R2::chr; | 
| 129 | 0 |  |  |  |  | 0 | *{caller().'::mb::do'}                = \&UTF8::R2::do; | 
| 130 | 0 |  |  |  |  | 0 | *{caller().'::mb::eval'}              = \&UTF8::R2::eval; | 
| 131 |  |  |  |  |  |  | *{caller().'::mb::getc'}              = \&UTF8::R2::getc; | 
| 132 | 0 |  |  |  |  | 0 | *{caller().'::mb::index'}             = \&UTF8::R2::index; | 
| 133 | 0 |  |  |  |  | 0 | *{caller().'::mb::index_byte'}        = \&UTF8::R2::index_byte; | 
| 134 | 0 |  |  |  |  | 0 | *{caller().'::mb::length'}            = \&UTF8::R2::length; | 
| 135 |  |  |  |  |  |  | *{caller().'::mb::ord'}               = \&UTF8::R2::ord; | 
| 136 |  |  |  |  |  |  | *{caller().'::mb::require'}           = \&UTF8::R2::require; | 
| 137 |  |  |  |  |  |  | *{caller().'::mb::reverse'}           = \&UTF8::R2::reverse; | 
| 138 |  |  |  |  |  |  | *{caller().'::mb::rindex'}            = \&UTF8::R2::rindex; | 
| 139 |  |  |  |  |  |  | *{caller().'::mb::rindex_byte'}       = \&UTF8::R2::rindex_byte; | 
| 140 | 36 |  |  | 36 | 0 | 1363 | *{caller().'::mb::split'}             = \&UTF8::R2::split; | 
| 141 | 36 | 100 |  |  |  | 83 | *{caller().'::mb::substr'}            = \&UTF8::R2::substr; | 
| 142 | 52 | 100 |  |  |  | 482 | *{caller().'::mb::tr'}                = \&UTF8::R2::tr; | 
| 143 | 40 |  |  |  |  | 74 | } | 
| 144 | 40 |  |  |  |  | 100 |  | 
| 145 |  |  |  |  |  |  | # export %mb | 
| 146 |  |  |  |  |  |  | elsif ($_ eq '%mb') { | 
| 147 | 36 |  |  |  |  | 75 | no strict qw(refs); | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | # tie my %mb, __PACKAGE__; # makes: Parentheses missing around "my" list | 
| 150 |  |  |  |  |  |  | tie my %mb, 'UTF8::R2'; | 
| 151 |  |  |  |  |  |  | *{caller().'::mb'} = \%mb; | 
| 152 |  |  |  |  |  |  | } | 
| 153 | 88 | 100 |  | 88 | 0 | 1120 |  | 
| 154 |  |  |  |  |  |  | # set script encoding | 
| 155 |  |  |  |  |  |  | elsif (defined $utf8_codepoint{$_}) { | 
| 156 |  |  |  |  |  |  | $x = $utf8_codepoint{$_}; | 
| 157 |  |  |  |  |  |  | } | 
| 158 |  |  |  |  |  |  | } | 
| 159 | 88 |  |  |  |  | 139 |  | 
| 160 | 88 |  |  |  |  | 117 | # $^X($EXECUTABLE_NAME) for execute MBCS Perl script | 
| 161 | 168 |  |  |  |  | 307 | $UTF8::R2::PERL = $^X; | 
| 162 | 168 |  |  |  |  | 459 | $UTF8::R2::PERL = $UTF8::R2::PERL; # to avoid: Name "UTF8::R2::PERL" used only once: possible typo at ... | 
| 163 |  |  |  |  |  |  |  | 
| 164 | 88 |  |  |  |  | 489 | # original $0($PROGRAM_NAME) | 
| 165 |  |  |  |  |  |  | $UTF8::R2::ORIG_PROGRAM_NAME = $0; | 
| 166 |  |  |  |  |  |  | $UTF8::R2::ORIG_PROGRAM_NAME = $UTF8::R2::ORIG_PROGRAM_NAME; # to avoid: Name "UTF8::R2::ORIG_PROGRAM_NAME" used only once: possible typo at ... | 
| 167 |  |  |  |  |  |  | } | 
| 168 |  |  |  |  |  |  |  | 
| 169 |  |  |  |  |  |  | #--------------------------------------------------------------------- | 
| 170 |  |  |  |  |  |  | # confess() for this module | 
| 171 |  |  |  |  |  |  | sub confess { | 
| 172 | 10 |  |  | 10 | 0 | 4527 | my $i = 0; | 
| 173 |  |  |  |  |  |  | my @confess = (); | 
| 174 |  |  |  |  |  |  | while (my($package,$filename,$line,$subroutine) = caller($i)) { | 
| 175 |  |  |  |  |  |  | push @confess, "[$i] $filename($line) $subroutine\n"; | 
| 176 |  |  |  |  |  |  | $i++; | 
| 177 |  |  |  |  |  |  | } | 
| 178 |  |  |  |  |  |  | print STDERR "\n", @_, "\n"; | 
| 179 |  |  |  |  |  |  | print STDERR CORE::reverse @confess; | 
| 180 |  |  |  |  |  |  | die; | 
| 181 |  |  |  |  |  |  | } | 
| 182 | 2 | 50 |  | 2 | 0 | 152 |  | 
| 183 |  |  |  |  |  |  | #--------------------------------------------------------------------- | 
| 184 |  |  |  |  |  |  | # chop() for UTF-8 codepoint string | 
| 185 | 2 |  |  |  |  | 101 | sub UTF8::R2::chop (@) { | 
| 186 |  |  |  |  |  |  | my $chop = ''; | 
| 187 |  |  |  |  |  |  | for (@_ ? @_ : $_) { | 
| 188 |  |  |  |  |  |  | if (my @x = /\G$x/g) { | 
| 189 |  |  |  |  |  |  | $chop = pop @x; | 
| 190 |  |  |  |  |  |  | $_ = join '', @x; | 
| 191 |  |  |  |  |  |  | } | 
| 192 |  |  |  |  |  |  | } | 
| 193 |  |  |  |  |  |  | return $chop; | 
| 194 |  |  |  |  |  |  | } | 
| 195 | 8 | 50 |  | 8 | 0 | 488 |  | 
| 196 | 8 |  |  |  |  | 244 | #--------------------------------------------------------------------- | 
| 197 | 8 | 100 |  |  |  | 65 | # chr() for UTF-8 codepoint string | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 198 |  |  |  |  |  |  | sub UTF8::R2::chr (;$) { | 
| 199 |  |  |  |  |  |  | my $number = @_ ? $_[0] : $_; | 
| 200 | 2 |  |  |  |  | 6 |  | 
| 201 |  |  |  |  |  |  | # Negative values give the Unicode replacement character (chr(0xfffd)), | 
| 202 |  |  |  |  |  |  | # except under the bytes pragma, where the low eight bits of the value | 
| 203 | 2 |  |  |  |  | 6 | # (truncated to an integer) are used. | 
| 204 | 2 |  |  |  |  | 4 |  | 
| 205 |  |  |  |  |  |  | my @octet = (); | 
| 206 |  |  |  |  |  |  | CORE::do { | 
| 207 | 2 |  |  |  |  | 8 | unshift @octet, ($number % 0x100); | 
| 208 | 2 |  |  |  |  | 6 | $number = int($number / 0x100); | 
| 209 | 2 |  |  |  |  | 14 | } while ($number > 0); | 
| 210 |  |  |  |  |  |  | return pack 'C*', @octet; | 
| 211 | 8 |  |  |  |  | 31 | } | 
| 212 |  |  |  |  |  |  |  | 
| 213 |  |  |  |  |  |  | #--------------------------------------------------------------------- | 
| 214 |  |  |  |  |  |  | # mb::do() like do(), mb.pm compatible | 
| 215 |  |  |  |  |  |  | sub UTF8::R2::do ($) { | 
| 216 |  |  |  |  |  |  |  | 
| 217 | 16 |  |  | 16 | 0 | 766 | # run as Perl script | 
| 218 | 16 | 100 |  |  |  | 41 | return CORE::eval sprintf(<<'END', (caller)[0,2,1]); | 
| 219 | 8 |  |  |  |  | 190 | package %s; | 
| 220 |  |  |  |  |  |  | #line %s "%s" | 
| 221 |  |  |  |  |  |  | CORE::do "$_[0]"; | 
| 222 | 8 |  |  |  |  | 18 | END | 
| 223 |  |  |  |  |  |  | } | 
| 224 | 16 | 100 |  |  |  | 36 |  | 
| 225 | 8 |  |  |  |  | 18 | #--------------------------------------------------------------------- | 
| 226 |  |  |  |  |  |  | # mb::eval() like eval(), mb.pm compatible | 
| 227 |  |  |  |  |  |  | sub UTF8::R2::eval (;$) { | 
| 228 | 8 |  |  |  |  | 26 | local $_ = @_ ? $_[0] : $_; | 
| 229 |  |  |  |  |  |  |  | 
| 230 |  |  |  |  |  |  | # run as Perl script in caller package | 
| 231 |  |  |  |  |  |  | return CORE::eval sprintf(<<'END', (caller)[0,2,1], $_); | 
| 232 |  |  |  |  |  |  | package %s; | 
| 233 |  |  |  |  |  |  | #line %s "%s" | 
| 234 |  |  |  |  |  |  | %s | 
| 235 | 16 | 100 |  | 16 | 0 | 756 | END | 
| 236 | 8 |  |  |  |  | 198 | } | 
| 237 |  |  |  |  |  |  |  | 
| 238 |  |  |  |  |  |  | #--------------------------------------------------------------------- | 
| 239 | 8 |  |  |  |  | 25 | # getc() for UTF-8 codepoint string | 
| 240 |  |  |  |  |  |  | sub UTF8::R2::getc (;*) { | 
| 241 |  |  |  |  |  |  | my $fh = @_ ? Symbol::qualify_to_ref($_[0],caller()) : \*STDIN; | 
| 242 |  |  |  |  |  |  | my $getc = CORE::getc $fh; | 
| 243 |  |  |  |  |  |  | if ($getc =~ /\A [\x00-\x7F\x80-\xC1\xF5-\xFF] \z/xms) { | 
| 244 |  |  |  |  |  |  | } | 
| 245 |  |  |  |  |  |  | elsif ($getc =~ /\A [\xC2-\xDF] \z/xms) { | 
| 246 | 6 | 100 |  | 6 | 0 | 317 | $getc .= CORE::getc $fh; | 
| 247 |  |  |  |  |  |  | } | 
| 248 | 6 | 100 |  |  |  | 310 | elsif ($getc =~ /\A [\xE0-\xEF] \z/xms) { | 
|  | 109 |  |  |  |  | 1150 |  | 
| 249 |  |  |  |  |  |  | $getc .= CORE::getc $fh; | 
| 250 |  |  |  |  |  |  | $getc .= CORE::getc $fh; | 
| 251 |  |  |  |  |  |  | } | 
| 252 |  |  |  |  |  |  | elsif ($getc =~ /\A [\xF0-\xF4] \z/xms) { | 
| 253 |  |  |  |  |  |  | $getc .= CORE::getc $fh; | 
| 254 |  |  |  |  |  |  | $getc .= CORE::getc $fh; | 
| 255 | 2 | 100 |  | 2 | 0 | 199 | $getc .= CORE::getc $fh; | 
| 256 | 2 | 50 |  |  |  | 95 | } | 
| 257 | 2 |  |  |  |  | 6 | return $getc; | 
| 258 |  |  |  |  |  |  | } | 
| 259 |  |  |  |  |  |  |  | 
| 260 | 0 |  |  |  |  | 0 | #--------------------------------------------------------------------- | 
| 261 |  |  |  |  |  |  | # index() for UTF-8 codepoint string | 
| 262 |  |  |  |  |  |  | sub UTF8::R2::index ($$;$) { | 
| 263 |  |  |  |  |  |  | my $index = 0; | 
| 264 |  |  |  |  |  |  | if (@_ == 3) { | 
| 265 |  |  |  |  |  |  | $index = CORE::index $_[0], $_[1], CORE::length(UTF8::R2::substr($_[0], 0, $_[2])); | 
| 266 |  |  |  |  |  |  | } | 
| 267 | 32 | 100 |  | 32 | 0 | 737 | else { | 
| 268 | 32 |  |  |  |  | 817 | $index = CORE::index $_[0], $_[1]; | 
| 269 |  |  |  |  |  |  | } | 
| 270 |  |  |  |  |  |  | if ($index == -1) { | 
| 271 |  |  |  |  |  |  | return -1; | 
| 272 |  |  |  |  |  |  | } | 
| 273 |  |  |  |  |  |  | else { | 
| 274 | 30 | 100 |  | 30 | 0 | 1568 | return UTF8::R2::length(CORE::substr $_[0], 0, $index); | 
| 275 | 30 |  |  |  |  | 64 | } | 
| 276 | 30 | 50 |  |  |  | 597 | } | 
| 277 | 30 |  |  |  |  | 116 |  | 
| 278 | 70 |  |  |  |  | 110 | #--------------------------------------------------------------------- | 
| 279 |  |  |  |  |  |  | # JPerl like index() for UTF-8 codepoint string | 
| 280 |  |  |  |  |  |  | sub UTF8::R2::index_byte ($$;$) { | 
| 281 | 30 |  |  |  |  | 94 | if (@_ == 3) { | 
| 282 |  |  |  |  |  |  | return CORE::index $_[0], $_[1], CORE::length(UTF8::R2::substr($_[0], 0, $_[2])); | 
| 283 |  |  |  |  |  |  | } | 
| 284 |  |  |  |  |  |  | else { | 
| 285 |  |  |  |  |  |  | return CORE::index $_[0], $_[1]; | 
| 286 |  |  |  |  |  |  | } | 
| 287 | 200526 |  |  | 200526 | 0 | 322154 | } | 
| 288 | 200526 |  |  |  |  | 549056 |  | 
| 289 | 200526 |  |  |  |  | 334360 | #--------------------------------------------------------------------- | 
| 290 |  |  |  |  |  |  | # universal lc() for UTF-8 codepoint string | 
| 291 | 200526 | 100 |  |  |  | 568066 | sub UTF8::R2::lc (;$) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 292 | 0 |  |  |  |  | 0 | local $_ = @_ ? $_[0] : $_; | 
| 293 | 11414 | 100 |  |  |  | 31778 | #                          A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q R r S s T t U u V v W w X x Y y Z z | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 294 | 0 |  |  |  |  | 0 | return join '', map { {qw( A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q R r S s T t U u V v W w X x Y y Z z )}->{$_}||$_ } /\G$x/g; | 
| 295 |  |  |  |  |  |  | #                          A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q R r S s T t U u V v W w X x Y y Z z | 
| 296 | 374 | 50 |  |  |  | 1979 | } | 
| 297 |  |  |  |  |  |  |  | 
| 298 |  |  |  |  |  |  | #--------------------------------------------------------------------- | 
| 299 |  |  |  |  |  |  | # universal lcfirst() for UTF-8 codepoint string | 
| 300 |  |  |  |  |  |  | sub UTF8::R2::lcfirst (;$) { | 
| 301 |  |  |  |  |  |  | local $_ = @_ ? $_[0] : $_; | 
| 302 | 960 | 100 |  |  |  | 6543 | if (/\A($x)(.*)\z/s) { | 
| 303 |  |  |  |  |  |  | return UTF8::R2::lc($1) . $2; | 
| 304 |  |  |  |  |  |  | } | 
| 305 |  |  |  |  |  |  | else { | 
| 306 |  |  |  |  |  |  | return ''; | 
| 307 |  |  |  |  |  |  | } | 
| 308 |  |  |  |  |  |  | } | 
| 309 | 5088 | 100 |  |  |  | 42557 |  | 
|  |  | 100 |  |  |  |  |  | 
| 310 |  |  |  |  |  |  | #--------------------------------------------------------------------- | 
| 311 |  |  |  |  |  |  | # length() for UTF-8 codepoint string | 
| 312 |  |  |  |  |  |  | sub UTF8::R2::length (;$) { | 
| 313 |  |  |  |  |  |  | local $_ = @_ ? $_[0] : $_; | 
| 314 |  |  |  |  |  |  | return scalar(() = /\G$x/g); | 
| 315 |  |  |  |  |  |  | } | 
| 316 |  |  |  |  |  |  |  | 
| 317 |  |  |  |  |  |  | #--------------------------------------------------------------------- | 
| 318 | 4992 | 100 |  |  |  | 46887 | # ord() for UTF-8 codepoint string | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 319 |  |  |  |  |  |  | sub UTF8::R2::ord (;$) { | 
| 320 |  |  |  |  |  |  | local $_ = @_ ? $_[0] : $_; | 
| 321 |  |  |  |  |  |  | my $ord = 0; | 
| 322 |  |  |  |  |  |  | if (/\A($x)/) { | 
| 323 |  |  |  |  |  |  | for my $octet (unpack 'C*', $1) { | 
| 324 |  |  |  |  |  |  | $ord = $ord * 0x100 + $octet; | 
| 325 |  |  |  |  |  |  | } | 
| 326 |  |  |  |  |  |  | } | 
| 327 |  |  |  |  |  |  | return $ord; | 
| 328 |  |  |  |  |  |  | } | 
| 329 | 22058 | 100 |  |  |  | 51274 |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 330 | 0 |  |  |  |  | 0 | #--------------------------------------------------------------------- | 
| 331 | 1226 | 100 |  |  |  | 5798 | # qr/ [A-Z] / for UTF-8 codepoint string | 
| 332 |  |  |  |  |  |  | sub list_all_by_hyphen_utf8_like { | 
| 333 |  |  |  |  |  |  | my($a, $b) = @_; | 
| 334 |  |  |  |  |  |  | my @a = (undef, unpack 'C*', $a); | 
| 335 | 1226 | 100 |  |  |  | 3914 | my @b = (undef, unpack 'C*', $b); | 
| 336 |  |  |  |  |  |  |  | 
| 337 |  |  |  |  |  |  | if (0) { } | 
| 338 |  |  |  |  |  |  | elsif (CORE::length($a) == 1) { | 
| 339 | 1226 |  |  |  |  | 4305 | if (0) { } | 
| 340 |  |  |  |  |  |  | elsif (CORE::length($b) == 1) { | 
| 341 |  |  |  |  |  |  | return ( | 
| 342 |  |  |  |  |  |  | $a[1]<=$b[1] ?  sprintf(join('', qw( [\x%02x-\x%02x]                                         )), $a[1], | 
| 343 | 10464 | 100 |  |  |  | 88084 | $b[1]) : (), | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 344 |  |  |  |  |  |  | ); | 
| 345 |  |  |  |  |  |  | } | 
| 346 |  |  |  |  |  |  | elsif (CORE::length($b) == 2) { | 
| 347 |  |  |  |  |  |  | return ( | 
| 348 |  |  |  |  |  |  | sprintf(join('', qw(       \x%02x  [\x80-\x%02x]                             )), $b[1], $b[2]), | 
| 349 |  |  |  |  |  |  | 0xC2 < $b[1] ?  sprintf(join('', qw( [\xC2-\x%02x] [\x80-\xBF  ]                             )), $b[1]-1     ) : (), | 
| 350 |  |  |  |  |  |  | sprintf(join('', qw( [\x%02x-\x7F]                                           )), $a[1]       ), | 
| 351 |  |  |  |  |  |  | ); | 
| 352 | 10368 | 100 |  |  |  | 102798 | } | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 353 |  |  |  |  |  |  | elsif (CORE::length($b) == 3) { | 
| 354 |  |  |  |  |  |  | return ( | 
| 355 |  |  |  |  |  |  | sprintf(join('', qw(       \x%02x        \x%02x  [\x80-\x%02x]               )), $b[1], $b[2], $b[3]), | 
| 356 |  |  |  |  |  |  | 0x80 < $b[2] ?  sprintf(join('', qw(       \x%02x  [\x80-\x%02x] [\x80-\xBF  ]               )), $b[1], $b[2]-1     ) : (), | 
| 357 |  |  |  |  |  |  | 0xE0 < $b[1] ?  sprintf(join('', qw( [\xE0-\x%02x] [\x80-\xBF  ] [\x80-\xBF  ]               )), $b[1]-1            ) : (), | 
| 358 |  |  |  |  |  |  | sprintf(join('', qw( [\xC2-\xDF  ] [\x80-\xBF  ]                             )),                    ), | 
| 359 |  |  |  |  |  |  | sprintf(join('', qw( [\x%02x-\x7F]                                           )), $a[1]              ), | 
| 360 |  |  |  |  |  |  | ); | 
| 361 |  |  |  |  |  |  | } | 
| 362 |  |  |  |  |  |  | elsif (CORE::length($b) == 4) { | 
| 363 | 107566 | 100 |  |  |  | 232159 | return ( | 
|  |  | 50 |  |  |  |  |  | 
| 364 | 0 |  |  |  |  | 0 | sprintf(join('', qw(       \x%02x        \x%02x        \x%02x  [\x80-\x%02x] )), $b[1], $b[2], $b[3], $b[4]), | 
| 365 | 34606 | 100 |  |  |  | 203358 | 0x80 < $b[3] ?  sprintf(join('', qw(       \x%02x        \x%02x  [\x80-\x%02x] [\x80-\xBF  ] )), $b[1], $b[2], $b[3]-1     ) : (), | 
|  |  | 100 |  |  |  |  |  | 
| 366 |  |  |  |  |  |  | 0x80 < $b[2] ?  sprintf(join('', qw(       \x%02x  [\x80-\x%02x] [\x80-\xBF  ] [\x80-\xBF  ] )), $b[1], $b[2]-1            ) : (), | 
| 367 |  |  |  |  |  |  | 0xF0 < $b[1] ?  sprintf(join('', qw( [\xF0-\x%02x] [\x80-\xBF  ] [\x80-\xBF  ] [\x80-\xBF  ] )), $b[1]-1                   ) : (), | 
| 368 |  |  |  |  |  |  | sprintf(join('', qw( [\xE0-\xEF  ] [\x80-\xBF  ] [\x80-\xBF  ]               )),                           ), | 
| 369 |  |  |  |  |  |  | sprintf(join('', qw( [\xC2-\xDF  ] [\x80-\xBF  ]                             )),                           ), | 
| 370 | 34606 | 100 |  |  |  | 143639 | sprintf(join('', qw( [\x%02x-\x7F]                                           )), $a[1]                     ), | 
|  |  | 100 |  |  |  |  |  | 
| 371 |  |  |  |  |  |  | ); | 
| 372 |  |  |  |  |  |  | } | 
| 373 |  |  |  |  |  |  | } | 
| 374 |  |  |  |  |  |  | elsif (CORE::length($a) == 2) { | 
| 375 | 34606 |  |  |  |  | 123893 | if (0) { } | 
| 376 |  |  |  |  |  |  | elsif (CORE::length($b) == 2) { | 
| 377 |  |  |  |  |  |  | my $lower_limit = join('|', | 
| 378 |  |  |  |  |  |  | $a[1] < 0xDF ?  sprintf(join('', qw( [\x%02x-\xDF] [\x80-\xBF  ]                             )), $a[1]+1     ) : (), | 
| 379 | 72960 | 100 |  |  |  | 777754 | sprintf(join('', qw(  \x%02x       [\x%02x-\xBF]                             )), $a[1], $a[2]), | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 380 |  |  |  |  |  |  | ); | 
| 381 |  |  |  |  |  |  | my $upper_limit = join('|', | 
| 382 |  |  |  |  |  |  | sprintf(join('', qw(       \x%02x  [\x80-\x%02x]                             )), $b[1], $b[2]), | 
| 383 |  |  |  |  |  |  | 0xC2 < $b[1] ?  sprintf(join('', qw( [\xC2-\x%02x] [\x80-\xBF  ]                             )), $b[1]-1     ) : (), | 
| 384 |  |  |  |  |  |  | ); | 
| 385 |  |  |  |  |  |  | return qq{(?=$lower_limit)(?=$upper_limit)}; | 
| 386 |  |  |  |  |  |  | } | 
| 387 |  |  |  |  |  |  | elsif (CORE::length($b) == 3) { | 
| 388 |  |  |  |  |  |  | return ( | 
| 389 |  |  |  |  |  |  | sprintf(join('', qw(       \x%02x        \x%02x  [\x80-\x%02x]               )), $b[1], $b[2], $b[3] ), | 
| 390 | 59488 | 50 |  |  |  | 104698 | 0x80 < $b[2] ?  sprintf(join('', qw(       \x%02x  [\x80-\x%02x] [\x80-\xBF  ]               )), $b[1], $b[2]-1      ) : (), | 
| 391 | 0 |  |  |  |  | 0 | 0xE0 < $b[1] ?  sprintf(join('', qw( [\xE0-\x%02x] [\x80-\xBF  ] [\x80-\xBF  ]               )), $b[1]-1             ) : (), | 
| 392 | 59488 | 100 |  |  |  | 393403 | $a[1] < 0xDF ?  sprintf(join('', qw( [\x%02x-\xDF] [\x80-\xBF  ]                             )), $a[1]+1             ) : (), | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 393 |  |  |  |  |  |  | sprintf(join('', qw(  \x%02x       [\x%02x-\xBF]                             )), $a[1], $a[2]        ), | 
| 394 |  |  |  |  |  |  | ); | 
| 395 |  |  |  |  |  |  | } | 
| 396 |  |  |  |  |  |  | elsif (CORE::length($b) == 4) { | 
| 397 |  |  |  |  |  |  | return ( | 
| 398 | 59488 | 100 |  |  |  | 309513 | sprintf(join('', qw(       \x%02x        \x%02x        \x%02x  [\x80-\x%02x] )), $b[1], $b[2], $b[3], $b[4]), | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 399 |  |  |  |  |  |  | 0x80 < $b[3] ?  sprintf(join('', qw(       \x%02x        \x%02x  [\x80-\x%02x] [\x80-\xBF  ] )), $b[1], $b[2], $b[3]-1     ) : (), | 
| 400 |  |  |  |  |  |  | 0x80 < $b[2] ?  sprintf(join('', qw(       \x%02x  [\x80-\x%02x] [\x80-\xBF  ] [\x80-\xBF  ] )), $b[1], $b[2]-1            ) : (), | 
| 401 |  |  |  |  |  |  | 0xF0 < $b[1] ?  sprintf(join('', qw( [\xF0-\x%02x] [\x80-\xBF  ] [\x80-\xBF  ] [\x80-\xBF  ] )), $b[1]-1                   ) : (), | 
| 402 |  |  |  |  |  |  | sprintf(join('', qw( [\xE0-\xEF  ] [\x80-\xBF  ] [\x80-\xBF  ]               )),                           ), | 
| 403 |  |  |  |  |  |  | $a[1] < 0xDF ?  sprintf(join('', qw( [\x%02x-\xDF] [\x80-\xBF  ]                             )), $a[1]+1                   ) : (), | 
| 404 | 59488 |  |  |  |  | 226186 | sprintf(join('', qw(  \x%02x       [\x%02x-\xBF]                             )), $a[1], $a[2]              ), | 
| 405 |  |  |  |  |  |  | ); | 
| 406 |  |  |  |  |  |  | } | 
| 407 |  |  |  |  |  |  | } | 
| 408 |  |  |  |  |  |  | elsif (CORE::length($a) == 3) { | 
| 409 | 0 |  |  |  |  | 0 | if (0) { } | 
|  | 0 |  |  |  |  | 0 |  | 
| 410 |  |  |  |  |  |  | elsif (CORE::length($b) == 3) { | 
| 411 |  |  |  |  |  |  | my $lower_limit = join('|', | 
| 412 |  |  |  |  |  |  | $a[1] < 0xEF ?  sprintf(join('', qw( [\x%02x-\xEF] [\x80-\xBF  ] [\x80-\xBF  ]               )), $a[1]+1            ) : (), | 
| 413 |  |  |  |  |  |  | $a[2] < 0xBF ?  sprintf(join('', qw(  \x%02x       [\x%02x-\xBF] [\x80-\xBF  ]               )), $a[1], $a[2]+1     ) : (), | 
| 414 |  |  |  |  |  |  | sprintf(join('', qw(  \x%02x        \x%02x       [\x%02x-\xBF]               )), $a[1], $a[2], $a[3]), | 
| 415 |  |  |  |  |  |  | ); | 
| 416 | 209731 |  |  | 209731 | 0 | 415156 | my $upper_limit = join('|', | 
| 417 | 209731 | 50 |  |  |  | 1255462 | sprintf(join('', qw(       \x%02x        \x%02x  [\x80-\x%02x]               )), $b[1], $b[2], $b[3]), | 
| 418 | 209731 |  |  |  |  | 382602 | 0x80 < $b[2] ?  sprintf(join('', qw(       \x%02x  [\x80-\x%02x] [\x80-\xBF  ]               )), $b[1], $b[2]-1     ) : (), | 
| 419 |  |  |  |  |  |  | 0xE0 < $b[1] ?  sprintf(join('', qw( [\xE0-\x%02x] [\x80-\xBF  ] [\x80-\xBF  ]               )), $b[1]-1            ) : (), | 
| 420 |  |  |  |  |  |  | ); | 
| 421 | 209731 |  |  |  |  | 325339 | return qq{(?=$lower_limit)(?=$upper_limit)}; | 
| 422 | 209731 |  |  |  |  | 2604218 | } | 
| 423 |  |  |  |  |  |  | elsif (CORE::length($b) == 4) { | 
| 424 |  |  |  |  |  |  | return ( | 
| 425 |  |  |  |  |  |  | sprintf(join('', qw(       \x%02x        \x%02x        \x%02x  [\x80-\x%02x] )), $b[1], $b[2], $b[3], $b[4]), | 
| 426 | 1261625 |  |  |  |  | 2699954 | 0x80 < $b[3] ?  sprintf(join('', qw(       \x%02x        \x%02x  [\x80-\x%02x] [\x80-\xBF  ] )), $b[1], $b[2], $b[3]-1     ) : (), | 
| 427 |  |  |  |  |  |  | 0x80 < $b[2] ?  sprintf(join('', qw(       \x%02x  [\x80-\x%02x] [\x80-\xBF  ] [\x80-\xBF  ] )), $b[1], $b[2]-1            ) : (), | 
| 428 |  |  |  |  |  |  | 0xF0 < $b[1] ?  sprintf(join('', qw( [\xF0-\x%02x] [\x80-\xBF  ] [\x80-\xBF  ] [\x80-\xBF  ] )), $b[1]-1                   ) : (), | 
| 429 | 1261625 | 100 |  |  |  | 10826340 | $a[1] < 0xEF ?  sprintf(join('', qw( [\x%02x-\xEF] [\x80-\xBF  ] [\x80-\xBF  ]               )), $a[1]+1                   ) : (), | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 430 | 208402 |  |  |  |  | 1418155 | $a[2] < 0xBF ?  sprintf(join('', qw(  \x%02x       [\x%02x-\xBF] [\x80-\xBF  ]               )), $a[1], $a[2]+1            ) : (), | 
| 431 | 208402 |  |  |  |  | 330722 | sprintf(join('', qw(  \x%02x        \x%02x       [\x%02x-\xBF]               )), $a[1], $a[2], $a[3]       ), | 
| 432 | 208402 |  |  |  |  | 243350 | ); | 
| 433 |  |  |  |  |  |  | } | 
| 434 | 208402 |  |  |  |  | 485570 | } | 
| 435 | 208450 |  |  |  |  | 339609 | elsif (CORE::length($a) == 4) { | 
| 436 |  |  |  |  |  |  | if (0) { } | 
| 437 |  |  |  |  |  |  | elsif (CORE::length($b) == 4) { | 
| 438 | 208450 | 100 | 100 |  |  | 775251 | my $lower_limit = join('|', | 
| 439 | 200526 | 100 |  |  |  | 406083 | $a[1] < 0xF4 ?  sprintf(join('', qw( [\x%02x-\xF4] [\x80-\xBF  ] [\x80-\xBF  ] [\x80-\xBF  ] )), $a[1]+1                   ) : (), | 
| 440 | 200526 | 100 |  |  |  | 323669 | $a[2] < 0xBF ?  sprintf(join('', qw(  \x%02x       [\x%02x-\xBF] [\x80-\xBF  ] [\x80-\xBF  ] )), $a[1], $a[2]+1            ) : (), | 
| 441 | 200526 |  |  |  |  | 391394 | $a[3] < 0xBF ?  sprintf(join('', qw(  \x%02x        \x%02x       [\x%02x-\xBF] [\x80-\xBF  ] )), $a[1], $a[2], $a[3]+1     ) : (), | 
| 442 | 200526 |  |  |  |  | 541082 | sprintf(join('', qw(  \x%02x        \x%02x        \x%02x       [\x%02x-\xBF] )), $a[1], $a[2], $a[3], $a[4]), | 
| 443 |  |  |  |  |  |  | ); | 
| 444 |  |  |  |  |  |  | my $upper_limit = join('|', | 
| 445 |  |  |  |  |  |  | sprintf(join('', qw(       \x%02x        \x%02x        \x%02x  [\x80-\x%02x] )), $b[1], $b[2], $b[3], $b[4]), | 
| 446 |  |  |  |  |  |  | 0x80 < $b[3] ?  sprintf(join('', qw(       \x%02x        \x%02x  [\x80-\x%02x] [\x80-\xBF  ] )), $b[1], $b[2], $b[3]-1     ) : (), | 
| 447 |  |  |  |  |  |  | 0x80 < $b[2] ?  sprintf(join('', qw(       \x%02x  [\x80-\x%02x] [\x80-\xBF  ] [\x80-\xBF  ] )), $b[1], $b[2]-1            ) : (), | 
| 448 |  |  |  |  |  |  | 0xF0 < $b[1] ?  sprintf(join('', qw( [\xF0-\x%02x] [\x80-\xBF  ] [\x80-\xBF  ] [\x80-\xBF  ] )), $b[1]-1                   ) : (), | 
| 449 | 7924 | 100 |  |  |  | 74629 | ); | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 450 | 18 |  |  |  |  | 93 | return qq{(?=$lower_limit)(?=$upper_limit)}; | 
| 451 |  |  |  |  |  |  | } | 
| 452 |  |  |  |  |  |  | } | 
| 453 |  |  |  |  |  |  |  | 
| 454 | 33 |  |  |  |  | 99 | # over range of codepoint | 
| 455 | 24 |  |  |  |  | 73 | confess sprintf(qq{@{[__FILE__]}: codepoint class [$_[0]-$_[1]] is not 1 to 4 octets (%d-%d)}, CORE::length($a), CORE::length($b)); | 
| 456 |  |  |  |  |  |  | } | 
| 457 |  |  |  |  |  |  |  | 
| 458 | 24 |  |  |  |  | 66 | #--------------------------------------------------------------------- | 
| 459 | 24 |  |  |  |  | 82 | # qr// for UTF-8 codepoint string | 
| 460 | 192 |  |  |  |  | 512 | sub UTF8::R2::qr ($) { | 
| 461 | 3 |  |  |  |  | 15 |  | 
| 462 | 33 |  |  |  |  | 63 | my $modifiers = ''; | 
| 463 | 24 |  |  |  |  | 62 | if (($modifiers) = $_[0] =~ /\A \( \? \^? (.*?) : /x) { | 
| 464 | 24 |  |  |  |  | 51 | $modifiers =~ s/-.*//; | 
| 465 | 24 |  |  |  |  | 50 | } | 
| 466 | 192 |  |  |  |  | 326 |  | 
| 467 |  |  |  |  |  |  | my @after = (); | 
| 468 |  |  |  |  |  |  | while ($_[0] =~ s! \A ( | 
| 469 | 256 |  |  |  |  | 388 | (?> \[ (?: \[:[^:]+?:\] | \\x\{[0123456789ABCDEFabcdef]+\} | \\c[\x00-\xFF] | (?>\\$x) | $x )+? \] ) | | 
| 470 | 256 |  |  |  |  | 417 | \\x\{[0123456789ABCDEFabcdef]+\} | \\c[\x00-\xFF] | (?>\\$x) | $x | 
| 471 | 256 |  |  |  |  | 374 | ) !!x) { | 
| 472 | 256 |  |  |  |  | 403 | my $before = $1; | 
| 473 | 256 |  |  |  |  | 424 |  | 
| 474 | 256 |  |  |  |  | 425 | # [^...] or [...] | 
| 475 | 256 |  |  |  |  | 411 | if (my($negative,$class) = $before =~ /\A \[ (\^?) ((?>\\$x|$x)+?) \] \z/x) { | 
| 476 | 256 |  |  |  |  | 472 | my @classmate = $class =~ /\G (?: \[:.+?:\] | \\x\{[0123456789ABCDEFabcdef]+\} | (?>\\$x) | $x ) /xg; | 
| 477 | 256 |  |  |  |  | 443 | my @sbcs = (); | 
| 478 | 256 |  |  |  |  | 427 | my @xbcs = (); | 
| 479 | 256 |  |  |  |  | 435 |  | 
| 480 | 256 |  |  |  |  | 426 | for (my $i=0; $i <= $#classmate; ) { | 
| 481 | 256 |  |  |  |  | 411 | my $classmate = $classmate[$i]; | 
| 482 | 256 |  |  |  |  | 419 |  | 
| 483 |  |  |  |  |  |  | # hyphen of [A-Z] or [^A-Z] | 
| 484 |  |  |  |  |  |  | if (($i < $#classmate) and ($classmate[$i+1] eq '-')) { | 
| 485 | 256 |  |  |  |  | 679 | my $a = ($classmate[$i+0] =~ /\A \\x \{ ([0123456789ABCDEFabcdef]+) \} \z/x) ? UTF8::R2::chr(hex $1) : $classmate[$i+0]; | 
| 486 | 256 |  |  |  |  | 634 | my $b = ($classmate[$i+2] =~ /\A \\x \{ ([0123456789ABCDEFabcdef]+) \} \z/x) ? UTF8::R2::chr(hex $1) : $classmate[$i+2]; | 
| 487 | 256 |  |  |  |  | 661 | push @xbcs, list_all_by_hyphen_utf8_like($a, $b); | 
| 488 | 256 |  |  |  |  | 662 | $i += 3; | 
| 489 | 256 |  |  |  |  | 641 | } | 
| 490 | 256 |  |  |  |  | 650 |  | 
| 491 | 256 |  |  |  |  | 649 | # any "one" | 
| 492 | 256 |  |  |  |  | 639 | else { | 
| 493 | 256 |  |  |  |  | 665 |  | 
| 494 | 256 |  |  |  |  | 631 | # \x{UTF8hex} | 
| 495 | 256 |  |  |  |  | 653 | if ($classmate =~ /\A \\x \{ ([0123456789ABCDEFabcdef]+) \} \z/x) { | 
| 496 | 256 |  |  |  |  | 637 | push @xbcs, UTF8::R2::chr(hex $1); | 
| 497 | 256 |  |  |  |  | 656 | } | 
| 498 | 256 |  |  |  |  | 669 |  | 
| 499 |  |  |  |  |  |  | # \any | 
| 500 |  |  |  |  |  |  | elsif ($classmate eq '\D'         ) { push @xbcs, "(?:(?![$bare_d])$x)"  } | 
| 501 | 21 |  |  |  |  | 45 | elsif ($classmate eq '\H'         ) { push @xbcs, "(?:(?![$bare_h])$x)"  } | 
| 502 | 120 |  |  |  |  | 215 | #                   elsif ($classmate eq '\N'         ) { push @xbcs, "(?:(?!\\n)$x)"        } # \N in a character class must be a named character: \N{...} in regex | 
| 503 | 7924 |  |  |  |  | 17555 | #                   elsif ($classmate eq '\R'         ) { push @xbcs, "(?>\\r\\n|[$bare_v])" } # Unrecognized escape \R in character class passed through in regex | 
| 504 |  |  |  |  |  |  | elsif ($classmate eq '\S'         ) { push @xbcs, "(?:(?![$bare_s])$x)"  } | 
| 505 |  |  |  |  |  |  | elsif ($classmate eq '\V'         ) { push @xbcs, "(?:(?![$bare_v])$x)"  } | 
| 506 |  |  |  |  |  |  | elsif ($classmate eq '\W'         ) { push @xbcs, "(?:(?![$bare_w])$x)"  } | 
| 507 |  |  |  |  |  |  | elsif ($classmate eq '\b'         ) { push @sbcs, $bare_backspace        } | 
| 508 | 208402 | 100 |  |  |  | 484009 | elsif ($classmate eq '\d'         ) { push @sbcs, $bare_d                } | 
|  |  | 50 |  |  |  |  |  | 
| 509 | 79449 | 0 | 33 |  |  | 1117953 | elsif ($classmate eq '\h'         ) { push @sbcs, $bare_h                } | 
|  |  | 50 | 33 |  |  |  |  | 
|  |  | 50 | 0 |  |  |  |  | 
| 510 |  |  |  |  |  |  | elsif ($classmate eq '\s'         ) { push @sbcs, $bare_s                } | 
| 511 |  |  |  |  |  |  | elsif ($classmate eq '\v'         ) { push @sbcs, $bare_v                } | 
| 512 |  |  |  |  |  |  | elsif ($classmate eq '\w'         ) { push @sbcs, $bare_w                } | 
| 513 |  |  |  |  |  |  |  | 
| 514 |  |  |  |  |  |  | # [:POSIX:] | 
| 515 |  |  |  |  |  |  | elsif ($classmate eq '[:alnum:]'  ) { push @sbcs, '\x30-\x39\x41-\x5A\x61-\x7A';                  } | 
| 516 |  |  |  |  |  |  | elsif ($classmate eq '[:alpha:]'  ) { push @sbcs, '\x41-\x5A\x61-\x7A';                           } | 
| 517 |  |  |  |  |  |  | elsif ($classmate eq '[:ascii:]'  ) { push @sbcs, '\x00-\x7F';                                    } | 
| 518 | 0 | 0 | 0 |  |  | 0 | elsif ($classmate eq '[:blank:]'  ) { push @sbcs, '\x09\x20';                                     } | 
|  |  | 0 | 0 |  |  |  |  | 
|  |  | 0 | 0 |  |  |  |  | 
| 519 |  |  |  |  |  |  | elsif ($classmate eq '[:cntrl:]'  ) { push @sbcs, '\x00-\x1F\x7F';                                } | 
| 520 |  |  |  |  |  |  | elsif ($classmate eq '[:digit:]'  ) { push @sbcs, '\x30-\x39';                                    } | 
| 521 |  |  |  |  |  |  | elsif ($classmate eq '[:graph:]'  ) { push @sbcs, '\x21-\x7F';                                    } | 
| 522 |  |  |  |  |  |  | elsif ($classmate eq '[:lower:]'  ) { push @sbcs, '\x61-\x7A';                                    } # /i modifier requires 'a' to 'z' literally | 
| 523 |  |  |  |  |  |  | elsif ($classmate eq '[:print:]'  ) { push @sbcs, '\x20-\x7F';                                    } | 
| 524 |  |  |  |  |  |  | elsif ($classmate eq '[:punct:]'  ) { push @sbcs, '\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E'; } | 
| 525 |  |  |  |  |  |  | elsif ($classmate eq '[:space:]'  ) { push @sbcs, '\s\x0B';                                       } # "\s" and vertical tab ("\cK") | 
| 526 |  |  |  |  |  |  | elsif ($classmate eq '[:upper:]'  ) { push @sbcs, '\x41-\x5A';                                    } # /i modifier requires 'A' to 'Z' literally | 
| 527 | 128953 | 50 | 66 |  |  | 1819604 | elsif ($classmate eq '[:word:]'   ) { push @sbcs, '\x30-\x39\x41-\x5A\x5F\x61-\x7A';              } | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 50 | 33 |  |  |  |  | 
| 528 |  |  |  |  |  |  | elsif ($classmate eq '[:xdigit:]' ) { push @sbcs, '\x30-\x39\x41-\x46\x61-\x66';                  } | 
| 529 |  |  |  |  |  |  |  | 
| 530 |  |  |  |  |  |  | # [:^POSIX:] | 
| 531 |  |  |  |  |  |  | elsif ($classmate eq '[:^alnum:]' ) { push @xbcs, "(?:(?![\\x30-\\x39\\x41-\\x5A\\x61-\\x7A])$x)";                      } | 
| 532 |  |  |  |  |  |  | elsif ($classmate eq '[:^alpha:]' ) { push @xbcs, "(?:(?![\\x41-\\x5A\\x61-\\x7A])$x)";                                 } | 
| 533 |  |  |  |  |  |  | elsif ($classmate eq '[:^ascii:]' ) { push @xbcs, "(?:(?![\\x00-\\x7F])$x)";                                            } | 
| 534 |  |  |  |  |  |  | elsif ($classmate eq '[:^blank:]' ) { push @xbcs, "(?:(?![\\x09\\x20])$x)";                                             } | 
| 535 |  |  |  |  |  |  | elsif ($classmate eq '[:^cntrl:]' ) { push @xbcs, "(?:(?![\\x00-\\x1F\\x7F])$x)";                                       } | 
| 536 | 498 | 100 |  |  |  | 3996 | elsif ($classmate eq '[:^digit:]' ) { push @xbcs, "(?:(?![\\x30-\\x39])$x)";                                            } | 
| 537 | 18 |  |  |  |  | 146 | elsif ($classmate eq '[:^graph:]' ) { push @xbcs, "(?:(?![\\x21-\\x7F])$x)";                                            } | 
| 538 | 33 |  |  |  |  | 254 | elsif ($classmate eq '[:^lower:]' ) { push @xbcs, "(?:(?![\\x61-\\x7A])$x)";                                            } # /i modifier requires 'a' to 'z' literally | 
| 539 | 24 |  |  |  |  | 190 | elsif ($classmate eq '[:^print:]' ) { push @xbcs, "(?:(?![\\x20-\\x7F])$x)";                                            } | 
| 540 | 18 |  |  |  |  | 147 | elsif ($classmate eq '[:^punct:]' ) { push @xbcs, "(?:(?![\\x21-\\x2F\\x3A-\\x3F\\x40\\x5B-\\x5F\\x60\\x7B-\\x7E])$x)"; } | 
| 541 | 39 |  |  |  |  | 322 | elsif ($classmate eq '[:^space:]' ) { push @xbcs, "(?:(?![\\s\\x0B])$x)";                                               } # "\s" and vertical tab ("\cK") | 
| 542 | 24 |  |  |  |  | 222 | elsif ($classmate eq '[:^upper:]' ) { push @xbcs, "(?:(?![\\x41-\\x5A])$x)";                                            } # /i modifier requires 'A' to 'Z' literally | 
| 543 | 24 |  |  |  |  | 181 | elsif ($classmate eq '[:^word:]'  ) { push @xbcs, "(?:(?![\\x30-\\x39\\x41-\\x5A\\x5F\\x61-\\x7A])$x)";                 } | 
| 544 | 192 |  |  |  |  | 1445 | elsif ($classmate eq '[:^xdigit:]') { push @xbcs, "(?:(?![\\x30-\\x39\\x41-\\x46\\x61-\\x66])$x)";                      } | 
| 545 | 18 |  |  |  |  | 149 |  | 
| 546 | 33 |  |  |  |  | 264 | # other all | 
| 547 | 24 |  |  |  |  | 181 | elsif (CORE::length($classmate)==1) { push @sbcs, $classmate } | 
| 548 | 24 |  |  |  |  | 171 | else                                { push @xbcs, $classmate } | 
| 549 | 24 |  |  |  |  | 171 | $i += 1; | 
| 550 | 192 |  |  |  |  | 1345 | } | 
| 551 |  |  |  |  |  |  | } | 
| 552 |  |  |  |  |  |  |  | 
| 553 |  |  |  |  |  |  | # [^...] | 
| 554 | 210172 | 100 |  |  |  | 626059 | if ($negative eq q[^]) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 555 | 0 |  |  |  |  | 0 | push @after, | 
| 556 |  |  |  |  |  |  | ( @sbcs and  @xbcs) ? '(?:(?!' . join('|', @xbcs, '['.join('',@sbcs).']') . ")$x)" : | 
| 557 |  |  |  |  |  |  | (!@sbcs and  @xbcs) ? '(?:(?!' . join('|', @xbcs                        ) . ")$x)" : | 
| 558 |  |  |  |  |  |  | ( @sbcs and !@xbcs) ? '(?:(?!' .                  '['.join('',@sbcs).']'  . ")$x)" : | 
| 559 |  |  |  |  |  |  | ''; | 
| 560 | 3 |  |  |  |  | 10 | } | 
| 561 |  |  |  |  |  |  |  | 
| 562 | 210172 |  |  |  |  | 1519254 | # [...] on Perl 5.006 | 
| 563 |  |  |  |  |  |  | elsif ($] =~ /\A5\.006/) { | 
| 564 |  |  |  |  |  |  | push @after, | 
| 565 |  |  |  |  |  |  | ( @sbcs and  @xbcs) ? '(?:'    . join('|', @xbcs, '['.join('',@sbcs).']') .    ')' : | 
| 566 |  |  |  |  |  |  | (!@sbcs and  @xbcs) ? '(?:'    . join('|', @xbcs                        ) .    ')' : | 
| 567 | 18 |  |  |  |  | 82 | ( @sbcs and !@xbcs) ?                             '['.join('',@sbcs).']'           : | 
| 568 |  |  |  |  |  |  | ''; | 
| 569 |  |  |  |  |  |  | } | 
| 570 |  |  |  |  |  |  |  | 
| 571 |  |  |  |  |  |  | # [...] | 
| 572 | 841848 |  |  |  |  | 6379441 | else { | 
| 573 |  |  |  |  |  |  | push @after, | 
| 574 |  |  |  |  |  |  | ( @sbcs and  @xbcs) ? '(?:(?=' . join('|', @xbcs, '['.join('',@sbcs).']') . ")$x)" : | 
| 575 |  |  |  |  |  |  | (!@sbcs and  @xbcs) ? '(?:(?=' . join('|', @xbcs                        ) . ")$x)" : | 
| 576 | 209731 |  |  |  |  | 497440 | ( @sbcs and !@xbcs) ?                             '['.join('',@sbcs).']'           : | 
| 577 | 209731 |  |  |  |  | 15967515 | ''; | 
| 578 |  |  |  |  |  |  | } | 
| 579 |  |  |  |  |  |  | } | 
| 580 |  |  |  |  |  |  |  | 
| 581 |  |  |  |  |  |  | # \any or /./ | 
| 582 |  |  |  |  |  |  | elsif ($before eq '.' ) { push @after, ($modifiers =~ /s/) ? $x : "(?:(?!\\n)$x)"                    } | 
| 583 | 10 | 50 |  | 10 | 0 | 2400 | elsif ($before eq '\B') { push @after, "(?:(? | 
| 584 |  |  |  |  |  |  | elsif ($before eq '\D') { push @after, "(?:(?![$bare_d])$x)"                                         } | 
| 585 |  |  |  |  |  |  | elsif ($before eq '\H') { push @after, "(?:(?![$bare_h])$x)"                                         } | 
| 586 | 10 | 50 |  |  |  | 42 | elsif ($before eq '\N') { push @after, "(?:(?!\\n)$x)"                                               } | 
| 587 | 0 | 0 |  |  |  | 0 | elsif ($before eq '\R') { push @after, "(?>\\r\\n|[$bare_v])"                                        } | 
| 588 | 0 |  |  |  |  | 0 | elsif ($before eq '\S') { push @after, "(?:(?![$bare_s])$x)"                                         } | 
| 589 |  |  |  |  |  |  | elsif ($before eq '\V') { push @after, "(?:(?![$bare_v])$x)"                                         } | 
| 590 |  |  |  |  |  |  | elsif ($before eq '\W') { push @after, "(?:(?![$bare_w])$x)"                                         } | 
| 591 | 0 |  |  |  |  | 0 | elsif ($before eq '\b') { push @after, "(?:(? | 
| 592 | 0 |  |  |  |  | 0 | elsif ($before eq '\d') { push @after, "[$bare_d]"                                                   } | 
| 593 |  |  |  |  |  |  | elsif ($before eq '\h') { push @after, "[$bare_h]"                                                   } | 
| 594 |  |  |  |  |  |  | elsif ($before eq '\s') { push @after, "[$bare_s]"                                                   } | 
| 595 |  |  |  |  |  |  | elsif ($before eq '\v') { push @after, "[$bare_v]"                                                   } | 
| 596 |  |  |  |  |  |  | elsif ($before eq '\w') { push @after, "[$bare_w]"                                                   } | 
| 597 |  |  |  |  |  |  |  | 
| 598 |  |  |  |  |  |  | # quantifiers ? + * {n} {n,} {n,m} | 
| 599 |  |  |  |  |  |  | elsif ($before =~ /\A[?+*{]\z/) { | 
| 600 | 10 |  |  |  |  | 16 | if    (0)                                             { } | 
| 601 | 10 | 50 | 33 |  |  | 81 | elsif ($after[-1] =~ /\A \\c [\x00-\xFF]        \z/x) { } # \c) \c} \c] \cX | 
| 602 | 0 |  |  |  |  | 0 | elsif ($after[-1] =~ /\A \\  [\x00-\xFF]        \z/x) { } # \) \} \] \" \0 \1 \D \E \F \G \H \K \L \N \Q \R \S \U \V \W \\ \a \d \e \f \h \l \n \r \s \t \u \v \w | 
| 603 |  |  |  |  |  |  | elsif ($after[-1] =~ /\A     [\x00-\xFF]        \z/x) { } # (a) a{1} [a] a . \012 \x12 \o{12} \g{1} | 
| 604 | 10 | 100 |  |  |  | 34 | elsif ($after[-1] =~ /       [\x00-\xFF] [)}\]] \z/x) { } # (any) any{1} [any] | 
| 605 | 2 |  |  |  |  | 4 | else {                                                    # XBCS | 
| 606 | 2 | 50 |  |  |  | 15 | $after[-1] = '(?:' . $after[-1] . ')'; | 
| 607 | 0 |  |  |  |  | 0 | } | 
| 608 |  |  |  |  |  |  | push @after, $before; | 
| 609 | 8 |  |  |  |  | 20 | } | 
|  | 88 |  |  |  |  | 184 |  | 
| 610 | 8 | 50 |  |  |  | 114 |  | 
| 611 | 8 |  |  |  |  | 28 | # \x{UTF8hex} | 
| 612 |  |  |  |  |  |  | elsif ($before =~ /\A \\x \{ ([0123456789ABCDEFabcdef]+) \} \z/x) { | 
| 613 |  |  |  |  |  |  | push @after, UTF8::R2::chr(hex $1); | 
| 614 |  |  |  |  |  |  | } | 
| 615 | 8 |  |  |  |  | 13 |  | 
| 616 | 8 |  |  |  |  | 338 | # else | 
| 617 |  |  |  |  |  |  | else { | 
| 618 |  |  |  |  |  |  | push @after, $before; | 
| 619 |  |  |  |  |  |  | } | 
| 620 |  |  |  |  |  |  | } | 
| 621 |  |  |  |  |  |  |  | 
| 622 |  |  |  |  |  |  | my $after = join '', @after; | 
| 623 | 8 | 50 |  |  |  | 2468 | return qr/$after/; | 
|  |  | 50 |  |  |  |  |  | 
| 624 | 0 |  |  |  |  | 0 | } | 
| 625 | 0 |  |  |  |  | 0 |  | 
| 626 |  |  |  |  |  |  | #--------------------------------------------------------------------- | 
| 627 |  |  |  |  |  |  | # mb::require() like require(), mb.pm compatible | 
| 628 | 0 |  |  |  |  | 0 | sub UTF8::R2::require (;$) { | 
| 629 | 0 |  |  |  |  | 0 | local $_ = @_ ? $_[0] : $_; | 
| 630 |  |  |  |  |  |  |  | 
| 631 |  |  |  |  |  |  | # require perl version | 
| 632 | 8 |  |  |  |  | 44 | if (/^[0-9]/) { | 
| 633 |  |  |  |  |  |  | if ($] < $_) { | 
| 634 |  |  |  |  |  |  | confess "Perl $_ required--this is only version $], stopped"; | 
| 635 |  |  |  |  |  |  | } | 
| 636 | 0 |  |  |  |  | 0 | else { | 
| 637 |  |  |  |  |  |  | undef $@; | 
| 638 |  |  |  |  |  |  | return 1; | 
| 639 |  |  |  |  |  |  | } | 
| 640 |  |  |  |  |  |  | } | 
| 641 |  |  |  |  |  |  |  | 
| 642 |  |  |  |  |  |  | # require expr | 
| 643 |  |  |  |  |  |  | else { | 
| 644 |  |  |  |  |  |  |  | 
| 645 | 20 | 100 |  | 20 | 0 | 855 | # find expr in @INC | 
| 646 |  |  |  |  |  |  | my $file = $_; | 
| 647 |  |  |  |  |  |  | if (($file =~ s{::}{/}g) or ($file !~ m{[\./\\]})) { | 
| 648 | 4 |  |  |  |  | 18 | $file .= '.pm'; | 
| 649 |  |  |  |  |  |  | } | 
| 650 |  |  |  |  |  |  | if (exists $INC{$file}) { | 
| 651 |  |  |  |  |  |  | undef $@; | 
| 652 |  |  |  |  |  |  | return 1 if $INC{$file}; | 
| 653 |  |  |  |  |  |  | confess "Compilation failed in require"; | 
| 654 |  |  |  |  |  |  | } | 
| 655 | 16 | 100 |  |  |  | 827 | for my $prefix_file ($file, map { "$_/$file" } @INC) { | 
| 656 |  |  |  |  |  |  | if (-f $prefix_file) { | 
| 657 |  |  |  |  |  |  | $INC{$_} = $prefix_file; | 
| 658 |  |  |  |  |  |  |  | 
| 659 |  |  |  |  |  |  | # run as Perl script | 
| 660 |  |  |  |  |  |  | # must use CORE::do to use , because CORE::eval cannot do it. | 
| 661 |  |  |  |  |  |  | local $@; | 
| 662 |  |  |  |  |  |  | my $result = CORE::eval sprintf(<<'END', (caller)[0,2,1]); | 
| 663 |  |  |  |  |  |  | package %s; | 
| 664 |  |  |  |  |  |  | #line %s "%s" | 
| 665 |  |  |  |  |  |  | CORE::do "$prefix_file"; | 
| 666 |  |  |  |  |  |  | END | 
| 667 |  |  |  |  |  |  |  | 
| 668 | 16 |  |  | 16 | 0 | 3789 | # return result | 
| 669 | 16 | 100 |  |  |  | 46 | if ($@) { | 
| 670 | 8 |  |  |  |  | 206 | $INC{$_} = undef; | 
| 671 |  |  |  |  |  |  | confess $@; | 
| 672 |  |  |  |  |  |  | } | 
| 673 | 8 |  |  |  |  | 15 | elsif (not $result) { | 
| 674 |  |  |  |  |  |  | delete $INC{$_}; | 
| 675 | 16 | 100 |  |  |  | 36 | confess "$_ did not return true value"; | 
| 676 | 8 |  |  |  |  | 18 | } | 
| 677 |  |  |  |  |  |  | else { | 
| 678 |  |  |  |  |  |  | return $result; | 
| 679 | 8 |  |  |  |  | 26 | } | 
| 680 |  |  |  |  |  |  | } | 
| 681 |  |  |  |  |  |  | } | 
| 682 |  |  |  |  |  |  | confess "Can't find $_ in \@INC"; | 
| 683 |  |  |  |  |  |  | } | 
| 684 |  |  |  |  |  |  | } | 
| 685 |  |  |  |  |  |  |  | 
| 686 | 16 | 100 |  | 16 | 0 | 720 | #--------------------------------------------------------------------- | 
| 687 | 8 |  |  |  |  | 199 | # reverse() for UTF-8 codepoint string | 
| 688 |  |  |  |  |  |  | sub UTF8::R2::reverse (@) { | 
| 689 |  |  |  |  |  |  |  | 
| 690 | 8 |  |  |  |  | 27 | # in list context, | 
| 691 |  |  |  |  |  |  | if (wantarray) { | 
| 692 |  |  |  |  |  |  |  | 
| 693 |  |  |  |  |  |  | # returns a list value consisting of the elements of @_ in the opposite order | 
| 694 |  |  |  |  |  |  | return CORE::reverse @_; | 
| 695 |  |  |  |  |  |  | } | 
| 696 |  |  |  |  |  |  |  | 
| 697 | 112 | 100 | 100 | 112 | 1 | 4119 | # in scalar context, | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 698 | 76 | 100 |  |  |  | 749 | else { | 
| 699 | 76 | 100 | 100 |  |  | 276 |  | 
|  |  |  | 100 |  |  |  |  | 
| 700 | 24 |  |  |  |  | 106 | # returns a string value with all characters in the opposite order of | 
| 701 |  |  |  |  |  |  | return (join '', | 
| 702 | 76 | 100 |  |  |  | 137 | CORE::reverse( | 
| 703 | 52 |  |  |  |  | 244 | @_ ? | 
| 704 |  |  |  |  |  |  | join('',@_) =~ /\G$x/g : # concatenates the elements of @_ | 
| 705 |  |  |  |  |  |  | /\G$x/g                  # $_ when without arguments | 
| 706 | 24 | 50 |  |  |  | 50 | ) | 
| 707 | 0 | 0 |  |  |  | 0 | ); | 
| 708 | 0 |  |  |  |  | 0 | } | 
| 709 |  |  |  |  |  |  | } | 
| 710 | 24 |  |  |  |  | 75 |  | 
| 711 |  |  |  |  |  |  | #--------------------------------------------------------------------- | 
| 712 |  |  |  |  |  |  | # rindex() for UTF-8 codepoint string | 
| 713 |  |  |  |  |  |  | sub UTF8::R2::rindex ($$;$) { | 
| 714 | 24 |  |  |  |  | 56 | my $rindex = 0; | 
| 715 |  |  |  |  |  |  | if (@_ == 3) { | 
| 716 |  |  |  |  |  |  | $rindex = CORE::rindex $_[0], $_[1], CORE::length(UTF8::R2::substr($_[0], 0, $_[2])); | 
| 717 | 12 |  |  |  |  | 34 | } | 
| 718 |  |  |  |  |  |  | else { | 
| 719 |  |  |  |  |  |  | $rindex = CORE::rindex $_[0], $_[1]; | 
| 720 | 0 |  |  |  |  | 0 | } | 
| 721 |  |  |  |  |  |  | if ($rindex == -1) { | 
| 722 |  |  |  |  |  |  | return -1; | 
| 723 | 0 |  |  |  |  | 0 | } | 
| 724 |  |  |  |  |  |  | else { | 
| 725 |  |  |  |  |  |  | return UTF8::R2::length(CORE::substr $_[0], 0, $rindex); | 
| 726 |  |  |  |  |  |  | } | 
| 727 |  |  |  |  |  |  | } | 
| 728 |  |  |  |  |  |  |  | 
| 729 | 116 | 100 | 100 | 0 | 0 | 24341 | #--------------------------------------------------------------------- | 
|  | 116 | 100 |  | 116 |  | 722 |  | 
|  | 4 | 100 |  |  |  | 19 |  | 
|  | 112 | 50 |  |  |  | 336 |  | 
|  | 32 | 100 |  |  |  | 102 |  | 
|  | 32 | 50 |  |  |  | 85 |  | 
|  | 32 | 100 |  |  |  | 122 |  | 
|  | 64 | 100 |  |  |  | 386 |  | 
|  | 64 | 100 |  |  |  | 263 |  | 
|  | 64 |  |  |  |  | 287 |  | 
|  | 64 |  |  |  |  | 640 |  | 
|  | 16 |  |  |  |  | 76 |  | 
|  | 16 |  |  |  |  | 148 |  | 
| 730 |  |  |  |  |  |  | # JPerl like rindex() for UTF-8 codepoint string | 
| 731 |  |  |  |  |  |  | sub UTF8::R2::rindex_byte ($$;$) { | 
| 732 |  |  |  |  |  |  | if (@_ == 3) { | 
| 733 |  |  |  |  |  |  | return CORE::rindex $_[0], $_[1], CORE::length(UTF8::R2::substr($_[0], 0, $_[2])); | 
| 734 |  |  |  |  |  |  | } | 
| 735 |  |  |  |  |  |  | else { | 
| 736 |  |  |  |  |  |  | return CORE::rindex $_[0], $_[1]; | 
| 737 |  |  |  |  |  |  | } | 
| 738 |  |  |  |  |  |  | } | 
| 739 |  |  |  |  |  |  |  | 
| 740 |  |  |  |  |  |  | #--------------------------------------------------------------------- | 
| 741 |  |  |  |  |  |  | # split() for UTF-8 codepoint string | 
| 742 |  |  |  |  |  |  | sub UTF8::R2::split (;$$$) { | 
| 743 |  |  |  |  |  |  | if (defined($_[0]) and (($_[0] eq '') or ($_[0] =~ /\A \( \? \^? [-a-z]* : \) \z/x))) { | 
| 744 |  |  |  |  |  |  | my @x = (defined($_[1]) ? $_[1] : $_) =~ /\G$x/g; | 
| 745 |  |  |  |  |  |  | if (defined($_[2]) and ($_[2] > 0) and (scalar(@x) > $_[2])) { | 
| 746 |  |  |  |  |  |  | @x = (@x[0..$_[2]-1-1], join('', @x[$_[2]-1..$#x])); | 
| 747 |  |  |  |  |  |  | } | 
| 748 |  |  |  |  |  |  | if (wantarray) { | 
| 749 |  |  |  |  |  |  | return @x; | 
| 750 |  |  |  |  |  |  | } | 
| 751 |  |  |  |  |  |  | else { | 
| 752 |  |  |  |  |  |  | if ($] < 5.012) { | 
| 753 |  |  |  |  |  |  | warn "Use of implicit split to \@_ is deprecated" if $^W; | 
| 754 |  |  |  |  |  |  | @_ = @x; # unlike camel book and perldoc saying, can return only scalar(@_), cannot @_ | 
| 755 |  |  |  |  |  |  | } | 
| 756 |  |  |  |  |  |  | return scalar @x; | 
| 757 |  |  |  |  |  |  | } | 
| 758 |  |  |  |  |  |  | } | 
| 759 |  |  |  |  |  |  | elsif (@_ == 3) { | 
| 760 |  |  |  |  |  |  | return CORE::split UTF8::R2::qr($_[0]), $_[1], $_[2]; | 
| 761 |  |  |  |  |  |  | } | 
| 762 |  |  |  |  |  |  | elsif (@_ == 2) { | 
| 763 |  |  |  |  |  |  | return CORE::split UTF8::R2::qr($_[0]), $_[1]; | 
| 764 |  |  |  |  |  |  | } | 
| 765 |  |  |  |  |  |  | elsif (@_ == 1) { | 
| 766 |  |  |  |  |  |  | return CORE::split UTF8::R2::qr($_[0]); | 
| 767 |  |  |  |  |  |  | } | 
| 768 |  |  |  |  |  |  | else { | 
| 769 |  |  |  |  |  |  | return CORE::split; | 
| 770 |  |  |  |  |  |  | } | 
| 771 |  |  |  |  |  |  | } | 
| 772 |  |  |  |  |  |  |  | 
| 773 |  |  |  |  |  |  | #--------------------------------------------------------------------- | 
| 774 |  |  |  |  |  |  | # substr() for UTF-8 codepoint string | 
| 775 |  |  |  |  |  |  | CORE::eval sprintf <<'END', ($] >= 5.014) ? ':lvalue' : ''; | 
| 776 |  |  |  |  |  |  | #                            vv--------------******* | 
| 777 |  |  |  |  |  |  | sub UTF8::R2::substr ($$;$$) %s { | 
| 778 |  |  |  |  |  |  | my @x = $_[0] =~ /\G$x/g; | 
| 779 |  |  |  |  |  |  |  | 
| 780 |  |  |  |  |  |  | # If the substring is beyond either end of the string, substr() returns the undefined | 
| 781 |  |  |  |  |  |  | # value and produces a warning. When used as an lvalue, specifying a substring that | 
| 782 | 860 |  |  | 860 | 0 | 1740 | # is entirely outside the string raises an exception. | 
| 783 | 860 |  |  |  |  | 1058 | # http://perldoc.perl.org/functions/substr.html | 
| 784 | 860 |  |  |  |  | 1838 |  | 
| 785 | 1884 | 100 | 100 |  |  | 4480 | # A return with no argument returns the scalar value undef in scalar context, | 
|  |  |  | 100 |  |  |  |  | 
| 786 |  |  |  |  |  |  | # an empty list () in list context, and (naturally) nothing at all in void | 
| 787 |  |  |  |  |  |  | # context. | 
| 788 |  |  |  |  |  |  |  | 
| 789 | 8 | 50 |  |  |  | 20 | if (($_[1] < (-1 * scalar(@x))) or (+1 * scalar(@x) < $_[1])) { | 
| 790 | 8 | 50 |  |  |  | 25 | return; | 
| 791 | 8 | 50 |  |  |  | 38 | } | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 792 | 0 |  |  |  |  | 0 |  | 
| 793 | 0 |  |  |  |  | 0 | # substr($string,$offset,$length,$replacement) | 
|  | 0 |  |  |  |  | 0 |  | 
| 794 |  |  |  |  |  |  | if (@_ == 4) { | 
| 795 |  |  |  |  |  |  | my $substr = join '', splice @x, $_[1], $_[2], $_[3]; | 
| 796 | 0 |  |  |  |  | 0 | $_[0] = join '', @x; | 
|  | 0 |  |  |  |  | 0 |  | 
| 797 |  |  |  |  |  |  | $substr; # "return $substr" doesn't work, don't write "return" | 
| 798 |  |  |  |  |  |  | } | 
| 799 | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 800 |  |  |  |  |  |  | # substr($string,$offset,$length) | 
| 801 |  |  |  |  |  |  | elsif (@_ == 3) { | 
| 802 | 8 |  |  |  |  | 18 | local $SIG{__WARN__} = sub {}; # avoid: Use of uninitialized value in join or string at here | 
|  | 24 |  |  |  |  | 49 |  | 
| 803 | 8 |  |  |  |  | 21 | my $octet_offset = | 
| 804 |  |  |  |  |  |  | ($_[1] < 0) ? -1 * CORE::length(join '', @x[$#x+$_[1]+1 .. $#x])     : | 
| 805 |  |  |  |  |  |  | ($_[1] > 0) ?      CORE::length(join '', @x[0           .. $_[1]-1]) : | 
| 806 |  |  |  |  |  |  | 0; | 
| 807 | 1876 | 50 |  |  |  | 2776 | my $octet_length = | 
| 808 | 0 |  |  |  |  | 0 | ($_[2] < 0) ? -1 * CORE::length(join '', @x[$#x+$_[2]+1 .. $#x])           : | 
| 809 |  |  |  |  |  |  | ($_[2] > 0) ?      CORE::length(join '', @x[$_[1]       .. $_[1]+$_[2]-1]) : | 
| 810 |  |  |  |  |  |  | 0; | 
| 811 | 1876 |  |  |  |  | 2791 | CORE::substr($_[0], $octet_offset, $octet_length); | 
| 812 |  |  |  |  |  |  | } | 
| 813 | 1876 |  |  |  |  | 3178 |  | 
| 814 |  |  |  |  |  |  | # substr($string,$offset) | 
| 815 |  |  |  |  |  |  | else { | 
| 816 | 860 |  |  |  |  | 2008 | my $octet_offset = | 
| 817 |  |  |  |  |  |  | ($_[1] < 0) ? -1 * CORE::length(join '', @x[$#x+$_[1]+1 .. $#x])     : | 
| 818 |  |  |  |  |  |  | ($_[1] > 0) ?      CORE::length(join '', @x[0           .. $_[1]-1]) : | 
| 819 |  |  |  |  |  |  | 0; | 
| 820 |  |  |  |  |  |  | CORE::substr($_[0], $octet_offset); | 
| 821 |  |  |  |  |  |  | } | 
| 822 | 430 |  |  | 430 | 1 | 28770 | } | 
| 823 | 430 |  |  |  |  | 2413 | END | 
| 824 | 430 |  |  |  |  | 2416 |  | 
| 825 | 430 | 100 |  |  |  | 1269 | #--------------------------------------------------------------------- | 
|  | 604 |  |  |  |  | 1458 |  | 
| 826 |  |  |  |  |  |  | # tr/A-C/1-3/ for UTF-8 codepoint | 
| 827 | 430 |  |  |  |  | 673 | sub list_all_ASCII_by_hyphen { | 
| 828 | 430 |  |  |  |  | 837 | my @hyphened = @_; | 
| 829 |  |  |  |  |  |  | my @list_all = (); | 
| 830 |  |  |  |  |  |  | for (my $i=0; $i <= $#hyphened; ) { | 
| 831 | 1026 | 100 |  |  |  | 1886 | if ( | 
| 832 |  |  |  |  |  |  | ($i+1 < $#hyphened)      and | 
| 833 |  |  |  |  |  |  | ($hyphened[$i+1] eq '-') and | 
| 834 | 938 | 100 | 66 |  |  | 2530 | 1) { | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 835 | 774 |  |  |  |  | 1944 | $hyphened[$i+0] = ($hyphened[$i+0] eq '\\-') ? '-' : $hyphened[$i+0]; | 
| 836 |  |  |  |  |  |  | $hyphened[$i+2] = ($hyphened[$i+2] eq '\\-') ? '-' : $hyphened[$i+2]; | 
| 837 |  |  |  |  |  |  | if (0) { } | 
| 838 |  |  |  |  |  |  | elsif ($hyphened[$i+0] !~ m/\A [\x00-\x7F] \z/xms) { | 
| 839 |  |  |  |  |  |  | confess sprintf(qq{@{[__FILE__]}: "$hyphened[$i+0]-$hyphened[$i+2]" in tr/// is not US-ASCII}); | 
| 840 | 92 |  |  |  |  | 211 | } | 
| 841 |  |  |  |  |  |  | elsif ($hyphened[$i+2] !~ m/\A [\x00-\x7F] \z/xms) { | 
| 842 |  |  |  |  |  |  | confess sprintf(qq{@{[__FILE__]}: "$hyphened[$i+0]-$hyphened[$i+2]" in tr/// is not US-ASCII}); | 
| 843 |  |  |  |  |  |  | } | 
| 844 |  |  |  |  |  |  | elsif ($hyphened[$i+0] gt $hyphened[$i+2]) { | 
| 845 | 56 |  |  |  |  | 152 | confess sprintf(qq{@{[__FILE__]}: "$hyphened[$i+0]-$hyphened[$i+2]" in tr/// is not "$hyphened[$i+0]" le "$hyphened[$i+2]"}); | 
| 846 |  |  |  |  |  |  | } | 
| 847 |  |  |  |  |  |  | else { | 
| 848 |  |  |  |  |  |  | push @list_all, map { CORE::chr($_) } (CORE::ord($hyphened[$i+0]) .. CORE::ord($hyphened[$i+2])); | 
| 849 |  |  |  |  |  |  | $i += 3; | 
| 850 | 16 |  |  |  |  | 48 | } | 
| 851 |  |  |  |  |  |  | } | 
| 852 |  |  |  |  |  |  | else { | 
| 853 |  |  |  |  |  |  | if ($hyphened[$i] eq '\\-') { | 
| 854 |  |  |  |  |  |  | push @list_all, '-'; | 
| 855 | 430 |  |  |  |  | 585 | } | 
| 856 | 430 |  |  |  |  | 579 | else { | 
| 857 |  |  |  |  |  |  | push @list_all, $hyphened[$i]; | 
| 858 |  |  |  |  |  |  | } | 
| 859 | 430 | 100 |  |  |  | 648 | $i++; | 
| 860 |  |  |  |  |  |  | } | 
| 861 |  |  |  |  |  |  | } | 
| 862 | 156 | 100 |  |  |  | 238 | return @list_all; | 
| 863 | 72 |  |  |  |  | 88 | } | 
| 864 | 72 |  |  |  |  | 151 |  | 
| 865 |  |  |  |  |  |  | #--------------------------------------------------------------------- | 
| 866 |  |  |  |  |  |  | # tr/// for UTF-8 codepoint string | 
| 867 | 648 | 100 |  |  |  | 921 | sub UTF8::R2::tr ($$$;$) { | 
| 868 | 360 |  |  |  |  | 419 | my @x           = $_[0] =~ /\G($x)/xmsg; | 
| 869 | 360 |  |  |  |  | 690 | my @search      = list_all_ASCII_by_hyphen($_[1] =~ /\G(\\-|$x)/xmsg); | 
| 870 |  |  |  |  |  |  | my @replacement = list_all_ASCII_by_hyphen($_[2] =~ /\G(\\-|$x)/xmsg); | 
| 871 |  |  |  |  |  |  | my %modifier    = (defined $_[3]) ? (map { $_ => 1 } CORE::split //, $_[3]) : (); | 
| 872 |  |  |  |  |  |  |  | 
| 873 |  |  |  |  |  |  | my %tr = (); | 
| 874 | 288 | 100 |  |  |  | 434 | for (my $i=0; $i <= $#search; $i++) { | 
|  |  | 50 |  |  |  |  |  | 
| 875 |  |  |  |  |  |  |  | 
| 876 |  |  |  |  |  |  | # tr/AAA/123/ works as tr/A/1/ | 
| 877 |  |  |  |  |  |  | if (not exists $tr{$search[$i]}) { | 
| 878 |  |  |  |  |  |  |  | 
| 879 |  |  |  |  |  |  | # tr/ABC/123/ makes %tr = ('A'=>'1','B'=>'2','C'=>'3',); | 
| 880 | 72 | 50 | 33 |  |  | 130 | if (defined($replacement[$i]) and ($replacement[$i] ne '')) { | 
| 881 |  |  |  |  |  |  | $tr{$search[$i]} = $replacement[$i]; | 
| 882 |  |  |  |  |  |  | } | 
| 883 |  |  |  |  |  |  |  | 
| 884 |  |  |  |  |  |  | # tr/ABC/12/d makes %tr = ('A'=>'1','B'=>'2','C'=>'',); | 
| 885 | 72 |  |  |  |  | 119 | elsif (exists $modifier{d}) { | 
| 886 |  |  |  |  |  |  | $tr{$search[$i]} = ''; | 
| 887 |  |  |  |  |  |  | } | 
| 888 | 288 |  |  |  |  | 532 |  | 
| 889 |  |  |  |  |  |  | # tr/ABC/12/ makes %tr = ('A'=>'1','B'=>'2','C'=>'2',); | 
| 890 |  |  |  |  |  |  | elsif (defined($replacement[-1]) and ($replacement[-1] ne '')) { | 
| 891 |  |  |  |  |  |  | $tr{$search[$i]} = $replacement[-1]; | 
| 892 |  |  |  |  |  |  | } | 
| 893 |  |  |  |  |  |  |  | 
| 894 |  |  |  |  |  |  | # tr/ABC// makes %tr = ('A'=>'A','B'=>'B','C'=>'C',); | 
| 895 | 84 |  |  |  |  | 167 | else { | 
| 896 |  |  |  |  |  |  | $tr{$search[$i]} = $search[$i]; | 
| 897 |  |  |  |  |  |  | } | 
| 898 | 540 | 100 |  |  |  | 739 | } | 
| 899 | 396 |  |  |  |  | 744 | } | 
| 900 |  |  |  |  |  |  |  | 
| 901 |  |  |  |  |  |  | my $tr = 0; | 
| 902 |  |  |  |  |  |  | my $replaced = ''; | 
| 903 |  |  |  |  |  |  |  | 
| 904 | 144 | 100 |  |  |  | 246 | # has /c modifier | 
|  |  | 50 |  |  |  |  |  | 
| 905 |  |  |  |  |  |  | if (exists $modifier{c}) { | 
| 906 |  |  |  |  |  |  |  | 
| 907 |  |  |  |  |  |  | # has /s modifier | 
| 908 |  |  |  |  |  |  | if (exists $modifier{s}) { | 
| 909 | 108 |  |  |  |  | 142 | my $last_transliterated = undef; | 
| 910 |  |  |  |  |  |  | while (defined(my $x = shift @x)) { | 
| 911 | 144 |  |  |  |  | 305 |  | 
| 912 |  |  |  |  |  |  | # /c modifier works here | 
| 913 |  |  |  |  |  |  | if (exists $tr{$x}) { | 
| 914 |  |  |  |  |  |  | $replaced .= $x; | 
| 915 |  |  |  |  |  |  | $last_transliterated = undef; | 
| 916 |  |  |  |  |  |  | } | 
| 917 |  |  |  |  |  |  | else { | 
| 918 |  |  |  |  |  |  |  | 
| 919 |  |  |  |  |  |  | # /d modifier works here | 
| 920 |  |  |  |  |  |  | if (exists $modifier{d}) { | 
| 921 | 274 | 100 |  |  |  | 439 | } | 
| 922 | 144 |  |  |  |  | 190 |  | 
| 923 | 144 |  |  |  |  | 293 | elsif (defined $replacement[-1]) { | 
| 924 | 1008 | 100 |  |  |  | 1467 |  | 
| 925 |  |  |  |  |  |  | # /s modifier works here | 
| 926 |  |  |  |  |  |  | if (defined($last_transliterated) and ($replacement[-1] eq $last_transliterated)) { | 
| 927 | 712 | 100 | 100 |  |  | 1628 | } | 
|  |  | 100 |  |  |  |  |  | 
| 928 |  |  |  |  |  |  |  | 
| 929 |  |  |  |  |  |  | # tr/// works here | 
| 930 |  |  |  |  |  |  | else { | 
| 931 |  |  |  |  |  |  | $replaced .= ($last_transliterated = $replacement[-1]); | 
| 932 |  |  |  |  |  |  | } | 
| 933 |  |  |  |  |  |  | } | 
| 934 |  |  |  |  |  |  | $tr++; | 
| 935 |  |  |  |  |  |  | } | 
| 936 | 276 |  |  |  |  | 418 | } | 
| 937 |  |  |  |  |  |  | } | 
| 938 | 712 |  |  |  |  | 1269 |  | 
| 939 |  |  |  |  |  |  | # has no /s modifier | 
| 940 |  |  |  |  |  |  | else { | 
| 941 | 296 |  |  |  |  | 362 | while (defined(my $x = shift @x)) { | 
| 942 | 296 |  |  |  |  | 537 |  | 
| 943 |  |  |  |  |  |  | # /c modifier works here | 
| 944 |  |  |  |  |  |  | if (exists $tr{$x}) { | 
| 945 |  |  |  |  |  |  | $replaced .= $x; | 
| 946 |  |  |  |  |  |  | } | 
| 947 |  |  |  |  |  |  | else { | 
| 948 |  |  |  |  |  |  |  | 
| 949 | 130 |  |  |  |  | 265 | # /d modifier works here | 
| 950 | 970 | 100 |  |  |  | 1355 | if (exists $modifier{d}) { | 
| 951 | 710 |  |  |  |  | 925 | } | 
| 952 | 710 |  |  |  |  | 1213 |  | 
| 953 |  |  |  |  |  |  | # tr/// works here | 
| 954 |  |  |  |  |  |  | elsif (defined $replacement[-1]) { | 
| 955 | 260 |  |  |  |  | 478 | $replaced .= $replacement[-1]; | 
| 956 |  |  |  |  |  |  | } | 
| 957 |  |  |  |  |  |  | $tr++; | 
| 958 |  |  |  |  |  |  | } | 
| 959 |  |  |  |  |  |  | } | 
| 960 |  |  |  |  |  |  | } | 
| 961 |  |  |  |  |  |  | } | 
| 962 | 430 | 100 |  |  |  | 673 |  | 
| 963 | 104 |  |  |  |  | 471 | # has no /c modifier | 
| 964 |  |  |  |  |  |  | else { | 
| 965 |  |  |  |  |  |  |  | 
| 966 |  |  |  |  |  |  | # has /s modifier | 
| 967 |  |  |  |  |  |  | if (exists $modifier{s}) { | 
| 968 | 326 |  |  |  |  | 497 | my $last_transliterated = undef; | 
| 969 | 326 |  |  |  |  | 1286 | while (defined(my $x = shift @x)) { | 
| 970 |  |  |  |  |  |  | if (exists $tr{$x}) { | 
| 971 |  |  |  |  |  |  |  | 
| 972 |  |  |  |  |  |  | # /d modifier works here | 
| 973 |  |  |  |  |  |  | if ($tr{$x} eq '') { | 
| 974 |  |  |  |  |  |  | } | 
| 975 |  |  |  |  |  |  |  | 
| 976 | 4 | 100 |  | 4 | 0 | 200 | # /s modifier works here | 
| 977 |  |  |  |  |  |  | elsif (defined($last_transliterated) and ($tr{$x} eq $last_transliterated)) { | 
| 978 | 4 | 100 |  |  |  | 230 | } | 
|  | 106 |  |  |  |  | 1048 |  | 
| 979 |  |  |  |  |  |  |  | 
| 980 |  |  |  |  |  |  | # tr/// works here | 
| 981 |  |  |  |  |  |  | else { | 
| 982 |  |  |  |  |  |  | $replaced .= ($last_transliterated = $tr{$x}); | 
| 983 |  |  |  |  |  |  | } | 
| 984 |  |  |  |  |  |  | $tr++; | 
| 985 | 2 | 100 |  | 2 | 0 | 205 | } | 
| 986 | 2 | 50 |  |  |  | 107 | else { | 
| 987 | 2 |  |  |  |  | 8 | $replaced .= $x; | 
| 988 |  |  |  |  |  |  | $last_transliterated = undef; | 
| 989 |  |  |  |  |  |  | } | 
| 990 | 0 |  |  |  |  | 0 | } | 
| 991 |  |  |  |  |  |  | } | 
| 992 |  |  |  |  |  |  |  | 
| 993 |  |  |  |  |  |  | # has no /s modifier | 
| 994 |  |  |  |  |  |  | else { | 
| 995 |  |  |  |  |  |  | while (defined(my $x = shift @x)) { | 
| 996 |  |  |  |  |  |  | if (exists $tr{$x}) { | 
| 997 |  |  |  |  |  |  | $replaced .= $tr{$x}; | 
| 998 |  |  |  |  |  |  | $tr++; | 
| 999 |  |  |  |  |  |  | } | 
| 1000 |  |  |  |  |  |  | else { | 
| 1001 | 30 |  |  | 30 |  | 220 | $replaced .= $x; | 
| 1002 | 209022 |  |  | 209022 |  | 13968888 | } | 
| 1003 |  |  |  | 0 |  |  | } | 
| 1004 |  |  |  | 0 |  |  | } | 
| 1005 |  |  |  | 0 |  |  | } | 
| 1006 |  |  |  | 0 |  |  |  | 
| 1007 |  |  |  | 0 |  |  | # /r modifier works here | 
| 1008 |  |  |  | 0 |  |  | if (exists $modifier{r}) { | 
| 1009 |  |  |  | 0 |  |  | return $replaced; | 
| 1010 |  |  |  | 0 |  |  | } | 
| 1011 |  |  |  | 0 |  |  |  | 
| 1012 |  |  |  |  |  |  | # has no /r modifier | 
| 1013 |  |  |  |  |  |  | else { | 
| 1014 |  |  |  |  |  |  | $_[0] = $replaced; | 
| 1015 |  |  |  |  |  |  | return $tr; | 
| 1016 |  |  |  |  |  |  | } | 
| 1017 |  |  |  |  |  |  | } | 
| 1018 |  |  |  |  |  |  |  | 
| 1019 |  |  |  |  |  |  | #--------------------------------------------------------------------- | 
| 1020 |  |  |  |  |  |  | # universal uc() for UTF-8 codepoint string | 
| 1021 |  |  |  |  |  |  | sub UTF8::R2::uc (;$) { | 
| 1022 |  |  |  |  |  |  | local $_ = @_ ? $_[0] : $_; | 
| 1023 |  |  |  |  |  |  | #                          a A b B c C d D e E f F g G h H i I j J k K l L m M n N o O p P q Q r R s S t T u U v V w W x X y Y z Z | 
| 1024 |  |  |  |  |  |  | return join '', map { {qw( a A b B c C d D e E f F g G h H i I j J k K l L m M n N o O p P q Q r R s S t T u U v V w W x X y Y z Z )}->{$_}||$_ } /\G$x/g; | 
| 1025 |  |  |  |  |  |  | #                          a A b B c C d D e E f F g G h H i I j J k K l L m M n N o O p P q Q r R s S t T u U v V w W x X y Y z Z | 
| 1026 |  |  |  |  |  |  | } | 
| 1027 |  |  |  |  |  |  |  | 
| 1028 |  |  |  |  |  |  | #--------------------------------------------------------------------- | 
| 1029 |  |  |  |  |  |  | # universal ucfirst() for UTF-8 codepoint string | 
| 1030 |  |  |  |  |  |  | sub UTF8::R2::ucfirst (;$) { | 
| 1031 |  |  |  |  |  |  | local $_ = @_ ? $_[0] : $_; | 
| 1032 |  |  |  |  |  |  | if (/\A($x)(.*)\z/s) { | 
| 1033 |  |  |  |  |  |  | return UTF8::R2::uc($1) . $2; | 
| 1034 |  |  |  |  |  |  | } | 
| 1035 |  |  |  |  |  |  | else { | 
| 1036 |  |  |  |  |  |  | return ''; | 
| 1037 |  |  |  |  |  |  | } | 
| 1038 |  |  |  |  |  |  | } | 
| 1039 |  |  |  |  |  |  |  | 
| 1040 |  |  |  |  |  |  | # syntax sugar for UTF-8 codepoint regex | 
| 1041 |  |  |  |  |  |  | # | 
| 1042 |  |  |  |  |  |  | # tie my %mb, 'UTF8::R2'; | 
| 1043 |  |  |  |  |  |  | # $result = $_ =~ $mb{qr/$utf8regex/imsxo} | 
| 1044 |  |  |  |  |  |  | # $result = $_ =~ m<\G$mb{qr/$utf8regex/imsxo}>gc | 
| 1045 |  |  |  |  |  |  | # $result = $_ =~ s<$mb{qr/before/imsxo}>egr | 
| 1046 |  |  |  |  |  |  |  | 
| 1047 |  |  |  |  |  |  | sub TIEHASH  { bless { }, $_[0] } | 
| 1048 |  |  |  |  |  |  | sub FETCH    { UTF8::R2::qr $_[1] } | 
| 1049 |  |  |  |  |  |  | sub STORE    { } | 
| 1050 |  |  |  |  |  |  | sub FIRSTKEY { } | 
| 1051 |  |  |  |  |  |  | sub NEXTKEY  { } | 
| 1052 |  |  |  |  |  |  | sub EXISTS   { } | 
| 1053 |  |  |  |  |  |  | sub DELETE   { } | 
| 1054 |  |  |  |  |  |  | sub CLEAR    { } | 
| 1055 |  |  |  |  |  |  | sub UNTIE    { } | 
| 1056 |  |  |  |  |  |  | sub DESTROY  { } | 
| 1057 |  |  |  |  |  |  | sub SCALAR   { } | 
| 1058 |  |  |  |  |  |  |  | 
| 1059 |  |  |  |  |  |  | 1; | 
| 1060 |  |  |  |  |  |  |  | 
| 1061 |  |  |  |  |  |  | __END__ |