| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # $Id: encoding.pm,v 2.20 2017/06/10 17:23:50 dankogai Exp $ | 
| 2 |  |  |  |  |  |  | package encoding; | 
| 3 |  |  |  |  |  |  | our $VERSION = sprintf "%d.%02d", q$Revision: 2.20 $ =~ /(\d+)/g; | 
| 4 |  |  |  |  |  |  |  | 
| 5 | 8 |  |  | 8 |  | 38170 | use Encode; | 
|  | 8 |  |  |  |  | 32 |  | 
|  | 8 |  |  |  |  | 683 |  | 
| 6 | 8 |  |  | 8 |  | 53 | use strict; | 
|  | 8 |  |  |  |  | 15 |  | 
|  | 8 |  |  |  |  | 158 |  | 
| 7 | 8 |  |  | 8 |  | 38 | use warnings; | 
|  | 8 |  |  |  |  | 18 |  | 
|  | 8 |  |  |  |  | 180 |  | 
| 8 | 8 |  |  | 8 |  | 54 | use Config; | 
|  | 8 |  |  |  |  | 16 |  | 
|  | 8 |  |  |  |  | 804 |  | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | use constant { | 
| 11 |  |  |  |  |  |  | DEBUG => !!$ENV{PERL_ENCODE_DEBUG}, | 
| 12 | 8 |  | 33 |  |  | 26 | HAS_PERLIO => eval { require PerlIO::encoding; PerlIO::encoding->VERSION(0.02) }, | 
|  | 8 |  |  |  |  | 3333 |  | 
|  | 8 |  |  |  |  | 5730 |  | 
| 13 |  |  |  |  |  |  | PERL_5_21_7 => $^V && $^V ge v5.21.7, # lexically scoped | 
| 14 | 8 |  |  | 8 |  | 57 | }; | 
|  | 8 |  |  |  |  | 16 |  | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | sub _exception { | 
| 17 | 8 |  |  | 8 |  | 16 | my $name = shift; | 
| 18 | 8 | 50 |  |  |  | 43 | $] > 5.008 and return 0;    # 5.8.1 or higher then no | 
| 19 | 0 |  |  |  |  | 0 | my %utfs = map { $_ => 1 } | 
|  | 0 |  |  |  |  | 0 |  | 
| 20 |  |  |  |  |  |  | qw(utf8 UCS-2BE UCS-2LE UTF-16 UTF-16BE UTF-16LE | 
| 21 |  |  |  |  |  |  | UTF-32 UTF-32BE UTF-32LE); | 
| 22 | 0 | 0 |  |  |  | 0 | $utfs{$name} or return 0;    # UTFs or no | 
| 23 | 0 |  |  |  |  | 0 | require Config; | 
| 24 | 0 |  |  |  |  | 0 | Config->import(); | 
| 25 | 0 |  |  |  |  | 0 | our %Config; | 
| 26 | 0 | 0 |  |  |  | 0 | return $Config{perl_patchlevel} ? 0 : 1    # maintperl then no | 
| 27 |  |  |  |  |  |  | } | 
| 28 |  |  |  |  |  |  |  | 
| 29 | 0 |  | 0 | 0 | 0 | 0 | sub in_locale { $^H & ( $locale::hint_bits || 0 ) } | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | sub _get_locale_encoding { | 
| 32 | 2 |  |  | 2 |  | 12 | my $locale_encoding; | 
| 33 |  |  |  |  |  |  |  | 
| 34 | 2 | 50 |  |  |  | 10 | if ($^O eq 'MSWin32') { | 
| 35 | 0 |  |  |  |  | 0 | my @tries = ( | 
| 36 |  |  |  |  |  |  | # First try to get the OutputCP. This will work only if we | 
| 37 |  |  |  |  |  |  | # are attached to a console | 
| 38 |  |  |  |  |  |  | 'Win32.pm' => 'Win32::GetConsoleOutputCP', | 
| 39 |  |  |  |  |  |  | 'Win32/Console.pm' => 'Win32::Console::OutputCP', | 
| 40 |  |  |  |  |  |  | # If above failed, this means that we are a GUI app | 
| 41 |  |  |  |  |  |  | # Let's assume that the ANSI codepage is what matters | 
| 42 |  |  |  |  |  |  | 'Win32.pm' => 'Win32::GetACP', | 
| 43 |  |  |  |  |  |  | ); | 
| 44 | 0 |  |  |  |  | 0 | while (@tries) { | 
| 45 | 0 |  |  |  |  | 0 | my $cp = eval { | 
| 46 | 0 |  |  |  |  | 0 | require $tries[0]; | 
| 47 | 8 |  |  | 8 |  | 60 | no strict 'refs'; | 
|  | 8 |  |  |  |  | 18 |  | 
|  | 8 |  |  |  |  | 3780 |  | 
| 48 | 0 |  |  |  |  | 0 | &{$tries[1]}() | 
|  | 0 |  |  |  |  | 0 |  | 
| 49 |  |  |  |  |  |  | }; | 
| 50 | 0 | 0 |  |  |  | 0 | if ($cp) { | 
| 51 | 0 | 0 |  |  |  | 0 | if ($cp == 65001) { # Code page for UTF-8 | 
| 52 | 0 |  |  |  |  | 0 | $locale_encoding = 'UTF-8'; | 
| 53 |  |  |  |  |  |  | } else { | 
| 54 | 0 |  |  |  |  | 0 | $locale_encoding = 'cp' . $cp; | 
| 55 |  |  |  |  |  |  | } | 
| 56 | 0 |  |  |  |  | 0 | return $locale_encoding; | 
| 57 |  |  |  |  |  |  | } | 
| 58 | 0 |  |  |  |  | 0 | splice(@tries, 0, 2) | 
| 59 |  |  |  |  |  |  | } | 
| 60 |  |  |  |  |  |  | } | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | # I18N::Langinfo isn't available everywhere | 
| 63 | 2 |  |  |  |  | 5 | $locale_encoding = eval { | 
| 64 | 2 |  |  |  |  | 832 | require I18N::Langinfo; | 
| 65 | 2 |  |  |  |  | 919 | find_encoding( | 
| 66 |  |  |  |  |  |  | I18N::Langinfo::langinfo( I18N::Langinfo::CODESET() ) | 
| 67 |  |  |  |  |  |  | )->name | 
| 68 |  |  |  |  |  |  | }; | 
| 69 | 2 | 50 |  |  |  | 11 | return $locale_encoding if defined $locale_encoding; | 
| 70 |  |  |  |  |  |  |  | 
| 71 | 0 |  |  |  |  | 0 | eval { | 
| 72 | 0 |  |  |  |  | 0 | require POSIX; | 
| 73 |  |  |  |  |  |  | # Get the current locale | 
| 74 |  |  |  |  |  |  | # Remember that MSVCRT impl is quite different from Unixes | 
| 75 | 0 |  |  |  |  | 0 | my $locale = POSIX::setlocale(POSIX::LC_CTYPE()); | 
| 76 | 0 | 0 |  |  |  | 0 | if ( $locale =~ /^([^.]+)\.([^.@]+)(?:@.*)?$/ ) { | 
| 77 | 0 |  |  |  |  | 0 | my $country_language; | 
| 78 | 0 |  |  |  |  | 0 | ( $country_language, $locale_encoding ) = ( $1, $2 ); | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | # Could do more heuristics based on the country and language | 
| 81 |  |  |  |  |  |  | # since we have Locale::Country and Locale::Language available. | 
| 82 |  |  |  |  |  |  | # TODO: get a database of Language -> Encoding mappings | 
| 83 |  |  |  |  |  |  | # (the Estonian database at http://www.eki.ee/letter/ | 
| 84 |  |  |  |  |  |  | # would be excellent!) --jhi | 
| 85 | 0 | 0 |  |  |  | 0 | if (lc($locale_encoding) eq 'euc') { | 
| 86 | 0 | 0 |  |  |  | 0 | if ( $country_language =~ /^ja_JP|japan(?:ese)?$/i ) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 87 | 0 |  |  |  |  | 0 | $locale_encoding = 'euc-jp'; | 
| 88 |  |  |  |  |  |  | } | 
| 89 |  |  |  |  |  |  | elsif ( $country_language =~ /^ko_KR|korean?$/i ) { | 
| 90 | 0 |  |  |  |  | 0 | $locale_encoding = 'euc-kr'; | 
| 91 |  |  |  |  |  |  | } | 
| 92 |  |  |  |  |  |  | elsif ( $country_language =~ /^zh_CN|chin(?:a|ese)$/i ) { | 
| 93 | 0 |  |  |  |  | 0 | $locale_encoding = 'euc-cn'; | 
| 94 |  |  |  |  |  |  | } | 
| 95 |  |  |  |  |  |  | elsif ( $country_language =~ /^zh_TW|taiwan(?:ese)?$/i ) { | 
| 96 | 0 |  |  |  |  | 0 | $locale_encoding = 'euc-tw'; | 
| 97 |  |  |  |  |  |  | } | 
| 98 |  |  |  |  |  |  | else { | 
| 99 | 0 |  |  |  |  | 0 | require Carp; | 
| 100 | 0 |  |  |  |  | 0 | Carp::croak( | 
| 101 |  |  |  |  |  |  | "encoding: Locale encoding '$locale_encoding' too ambiguous" | 
| 102 |  |  |  |  |  |  | ); | 
| 103 |  |  |  |  |  |  | } | 
| 104 |  |  |  |  |  |  | } | 
| 105 |  |  |  |  |  |  | } | 
| 106 |  |  |  |  |  |  | }; | 
| 107 |  |  |  |  |  |  |  | 
| 108 | 0 |  |  |  |  | 0 | return $locale_encoding; | 
| 109 |  |  |  |  |  |  | } | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | sub import { | 
| 112 |  |  |  |  |  |  |  | 
| 113 | 9 |  |  | 9 |  | 197 | if ( ord("A") == 193 ) { | 
| 114 |  |  |  |  |  |  | require Carp; | 
| 115 |  |  |  |  |  |  | Carp::croak("encoding: pragma does not support EBCDIC platforms"); | 
| 116 |  |  |  |  |  |  | } | 
| 117 |  |  |  |  |  |  |  | 
| 118 |  |  |  |  |  |  | my $deprecate = | 
| 119 |  |  |  |  |  |  | ($] >= 5.017 and !$Config{usecperl}) | 
| 120 | 9 | 50 | 33 |  |  | 581 | ? "Use of the encoding pragma is deprecated" : 0; | 
| 121 |  |  |  |  |  |  |  | 
| 122 | 9 |  |  |  |  | 33 | my $class = shift; | 
| 123 | 9 |  |  |  |  | 17 | my $name  = shift; | 
| 124 | 9 | 50 |  |  |  | 31 | if (!$name){ | 
| 125 | 0 |  |  |  |  | 0 | require Carp; | 
| 126 | 0 |  |  |  |  | 0 | Carp::croak("encoding: no encoding specified."); | 
| 127 |  |  |  |  |  |  | } | 
| 128 | 9 | 50 |  |  |  | 33 | if ( $name eq ':_get_locale_encoding' ) {    # used by lib/open.pm | 
| 129 | 0 |  |  |  |  | 0 | my $caller = caller(); | 
| 130 |  |  |  |  |  |  | { | 
| 131 | 8 |  |  | 8 |  | 60 | no strict 'refs'; | 
|  | 8 |  |  |  |  | 21 |  | 
|  | 8 |  |  |  |  | 692 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 132 | 0 |  |  |  |  | 0 | *{"${caller}::_get_locale_encoding"} = \&_get_locale_encoding; | 
|  | 0 |  |  |  |  | 0 |  | 
| 133 |  |  |  |  |  |  | } | 
| 134 | 0 |  |  |  |  | 0 | return; | 
| 135 |  |  |  |  |  |  | } | 
| 136 | 9 | 50 |  |  |  | 25 | $name = _get_locale_encoding() if $name eq ':locale'; | 
| 137 | 8 | 50 | 33 | 8 |  | 2980 | BEGIN { strict->unimport('hashpairs') if $] >= 5.027 and $^V =~ /c$/; } | 
| 138 | 9 |  |  |  |  | 23 | my %arg = @_; | 
| 139 | 9 | 50 |  |  |  | 27 | $name = $ENV{PERL_ENCODING} unless defined $name; | 
| 140 | 9 |  |  |  |  | 31 | my $enc = find_encoding($name); | 
| 141 | 9 | 50 |  |  |  | 38 | unless ( defined $enc ) { | 
| 142 | 0 |  |  |  |  | 0 | require Carp; | 
| 143 | 0 |  |  |  |  | 0 | Carp::croak("encoding: Unknown encoding '$name'"); | 
| 144 |  |  |  |  |  |  | } | 
| 145 | 9 |  |  |  |  | 86 | $name = $enc->name;    # canonize | 
| 146 | 9 | 100 |  |  |  | 33 | unless ( $arg{Filter} ) { | 
| 147 | 8 | 50 | 33 |  |  | 40 | if ($] >= 5.025003 and !$Config{usecperl}) { | 
| 148 | 0 |  |  |  |  | 0 | require Carp; | 
| 149 | 0 |  |  |  |  | 0 | Carp::croak("The encoding pragma is no longer supported. Check cperl"); | 
| 150 |  |  |  |  |  |  | } | 
| 151 | 8 | 50 |  |  |  | 683 | warnings::warnif("deprecated",$deprecate) if $deprecate; | 
| 152 |  |  |  |  |  |  |  | 
| 153 | 8 |  |  |  |  | 20 | DEBUG and warn "_exception($name) = ", _exception($name); | 
| 154 | 8 | 50 |  |  |  | 29 | if (! _exception($name)) { | 
| 155 | 8 |  |  |  |  | 17 | if (!PERL_5_21_7) { | 
| 156 |  |  |  |  |  |  | ${^ENCODING} = $enc; | 
| 157 |  |  |  |  |  |  | } | 
| 158 |  |  |  |  |  |  | else { | 
| 159 |  |  |  |  |  |  | # Starting with 5.21.7, this pragma uses a shadow variable | 
| 160 |  |  |  |  |  |  | # designed explicitly for it, ${^E_NCODING}, to enforce | 
| 161 |  |  |  |  |  |  | # lexical scope; instead of ${^ENCODING}. | 
| 162 | 8 |  |  |  |  | 39 | $^H{'encoding'} = 1; | 
| 163 | 8 |  |  |  |  | 30 | ${^E_NCODING} = $enc; | 
| 164 |  |  |  |  |  |  | } | 
| 165 |  |  |  |  |  |  | } | 
| 166 | 8 |  |  |  |  | 18 | if (! HAS_PERLIO ) { | 
| 167 |  |  |  |  |  |  | return 1; | 
| 168 |  |  |  |  |  |  | } | 
| 169 |  |  |  |  |  |  | } | 
| 170 |  |  |  |  |  |  | else { | 
| 171 | 1 | 50 |  |  |  | 80 | warnings::warnif("deprecated",$deprecate) if $deprecate; | 
| 172 |  |  |  |  |  |  |  | 
| 173 | 1 | 50 |  |  |  | 5 | defined( ${^ENCODING} ) and undef ${^ENCODING}; | 
| 174 | 1 |  |  |  |  | 3 | undef ${^E_NCODING} if PERL_5_21_7; | 
| 175 |  |  |  |  |  |  |  | 
| 176 |  |  |  |  |  |  | # implicitly 'use utf8' | 
| 177 | 1 |  |  |  |  | 6 | require utf8;      # to fetch $utf8::hint_bits; | 
| 178 | 1 |  |  |  |  | 3 | $^H |= $utf8::hint_bits; | 
| 179 | 1 | 50 | 50 |  |  | 2 | eval { | 
| 180 | 1 |  |  |  |  | 564 | require Filter::Util::Call; | 
| 181 | 1 |  |  |  |  | 729 | Filter::Util::Call->import; | 
| 182 |  |  |  |  |  |  | filter_add( | 
| 183 |  |  |  |  |  |  | sub { | 
| 184 | 11 |  |  | 11 |  | 60 | my $status = filter_read(); | 
| 185 | 11 | 50 |  |  |  | 24 | if ( $status > 0 ) { | 
| 186 | 11 |  |  |  |  | 38 | $_ = $enc->decode( $_, 1 ); | 
| 187 | 11 |  |  |  |  | 18 | DEBUG and warn $_; | 
| 188 |  |  |  |  |  |  | } | 
| 189 | 11 |  |  |  |  | 126 | $status; | 
| 190 |  |  |  |  |  |  | } | 
| 191 | 1 |  |  |  |  | 6 | ); | 
| 192 | 1 |  |  |  |  | 20 | 1; | 
| 193 |  |  |  |  |  |  | } and DEBUG and warn "Filter installed"; | 
| 194 |  |  |  |  |  |  | } | 
| 195 | 9 | 50 | 33 |  |  | 71 | defined ${^UNICODE} and ${^UNICODE} != 0 and return 1; | 
| 196 | 9 |  |  |  |  | 23 | for my $h (qw(STDIN STDOUT)) { | 
| 197 | 18 | 50 |  |  |  | 42 | if ( $arg{$h} ) { | 
| 198 | 0 | 0 |  |  |  | 0 | unless ( defined find_encoding( $arg{$h} ) ) { | 
| 199 | 0 |  |  |  |  | 0 | require Carp; | 
| 200 | 0 |  |  |  |  | 0 | Carp::croak( | 
| 201 |  |  |  |  |  |  | "encoding: Unknown encoding for $h, '$arg{$h}'"); | 
| 202 |  |  |  |  |  |  | } | 
| 203 | 0 |  |  |  |  | 0 | eval { binmode( $h, ":raw :encoding($arg{$h})" ) }; | 
|  | 0 |  |  |  |  | 0 |  | 
| 204 |  |  |  |  |  |  | } | 
| 205 |  |  |  |  |  |  | else { | 
| 206 | 18 | 50 |  |  |  | 50 | unless ( exists $arg{$h} ) { | 
| 207 | 18 |  |  |  |  | 36 | eval { | 
| 208 | 8 |  |  | 8 |  | 52 | no warnings 'uninitialized'; | 
|  | 8 |  |  |  |  | 16 |  | 
|  | 8 |  |  |  |  | 1041 |  | 
| 209 | 18 |  |  |  |  | 195 | binmode( $h, ":raw :encoding($name)" ); | 
| 210 |  |  |  |  |  |  | }; | 
| 211 |  |  |  |  |  |  | } | 
| 212 |  |  |  |  |  |  | } | 
| 213 | 18 | 50 |  |  |  | 73 | if ($@) { | 
| 214 | 0 |  |  |  |  | 0 | require Carp; | 
| 215 | 0 |  |  |  |  | 0 | Carp::croak($@); | 
| 216 |  |  |  |  |  |  | } | 
| 217 |  |  |  |  |  |  | } | 
| 218 | 9 |  |  |  |  | 1212 | return 1;    # I doubt if we need it, though | 
| 219 |  |  |  |  |  |  | } | 
| 220 |  |  |  |  |  |  |  | 
| 221 |  |  |  |  |  |  | sub unimport { | 
| 222 | 8 |  |  | 8 |  | 44 | no warnings; | 
|  | 8 |  |  |  |  | 16 |  | 
|  | 8 |  |  |  |  | 804 |  | 
| 223 | 3 |  |  | 3 |  | 150 | undef ${^ENCODING}; | 
| 224 | 3 |  |  |  |  | 8 | undef ${^E_NCODING} if PERL_5_21_7; | 
| 225 | 3 |  |  |  |  | 4 | if (HAS_PERLIO) { | 
| 226 | 3 |  |  |  |  | 12 | binmode( STDIN,  ":raw" ); | 
| 227 | 3 |  |  |  |  | 6 | binmode( STDOUT, ":raw" ); | 
| 228 |  |  |  |  |  |  | } | 
| 229 |  |  |  |  |  |  | else { | 
| 230 |  |  |  |  |  |  | binmode(STDIN); | 
| 231 |  |  |  |  |  |  | binmode(STDOUT); | 
| 232 |  |  |  |  |  |  | } | 
| 233 | 3 | 100 |  |  |  | 77 | if ( $INC{"Filter/Util/Call.pm"} ) { | 
| 234 | 1 |  |  |  |  | 2 | eval { filter_del() }; | 
|  | 1 |  |  |  |  | 943 |  | 
| 235 |  |  |  |  |  |  | } | 
| 236 |  |  |  |  |  |  | } | 
| 237 |  |  |  |  |  |  |  | 
| 238 |  |  |  |  |  |  | 1; | 
| 239 |  |  |  |  |  |  | __END__ |