| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Unicode::MapUTF8; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 2284 | use strict; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 32 |  | 
| 4 | 1 |  |  | 1 |  | 5 | use warnings; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 29 |  | 
| 5 |  |  |  |  |  |  |  | 
| 6 | 1 |  |  | 1 |  | 5 | use Carp qw(confess croak carp); | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 110 |  | 
| 7 | 1 |  |  | 1 |  | 645 | use Unicode::String; | 
|  | 1 |  |  |  |  | 6613 |  | 
|  | 1 |  |  |  |  | 47 |  | 
| 8 | 1 |  |  | 1 |  | 709 | use Unicode::Map; | 
|  | 1 |  |  |  |  | 8513 |  | 
|  | 1 |  |  |  |  | 32 |  | 
| 9 | 1 |  |  | 1 |  | 568 | use Unicode::Map8; | 
|  | 1 |  |  |  |  | 5588 |  | 
|  | 1 |  |  |  |  | 48 |  | 
| 10 | 1 |  |  | 1 |  | 689 | use Jcode; | 
|  | 1 |  |  |  |  | 32520 |  | 
|  | 1 |  |  |  |  | 92 |  | 
| 11 |  |  |  |  |  |  |  | 
| 12 | 1 |  |  | 1 |  | 9 | use vars qw ($VERSION @EXPORT @EXPORT_OK @EXPORT_TAGS @ISA); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 79 |  | 
| 13 | 1 |  |  | 1 |  | 757 | use subs qw (utf8_supported_charset to_utf8 from_utf8 utf8_charset_alias _init_charsets); | 
|  | 1 |  |  |  |  | 39 |  | 
|  | 1 |  |  |  |  | 5 |  | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | require Exporter; | 
| 16 |  |  |  |  |  |  | BEGIN { | 
| 17 | 1 |  |  | 1 |  | 154 | @ISA         = qw(Exporter); | 
| 18 | 1 |  |  |  |  | 3 | @EXPORT      = qw (); | 
| 19 | 1 |  |  |  |  | 3 | @EXPORT_OK   = qw (utf8_supported_charset to_utf8 from_utf8 utf8_charset_alias); | 
| 20 | 1 |  |  |  |  | 2 | @EXPORT_TAGS = qw (); | 
| 21 | 1 |  |  |  |  | 4290 | $VERSION     = "1.12"; | 
| 22 |  |  |  |  |  |  | } | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | ############################ | 
| 25 |  |  |  |  |  |  | # File level package globals (class variables) | 
| 26 |  |  |  |  |  |  | my $_Supported_Charsets; | 
| 27 |  |  |  |  |  |  | my $_Charset_Names; | 
| 28 |  |  |  |  |  |  | my $_Charset_Aliases; | 
| 29 |  |  |  |  |  |  | _init_charsets; | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | ############## | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | sub utf8_charset_alias { | 
| 34 | 3 | 50 |  | 3 |  | 88 | if ($#_ == -1) { | 
| 35 | 0 |  |  |  |  | 0 | my $aliases = {}; | 
| 36 | 0 |  |  |  |  | 0 | %$aliases   =  %$_Charset_Aliases; | 
| 37 | 0 |  |  |  |  | 0 | return $aliases; | 
| 38 |  |  |  |  |  |  | } | 
| 39 | 3 |  |  |  |  | 25 | my $parms; | 
| 40 | 3 |  |  |  |  | 21 | my @parms_list = @_; | 
| 41 | 3 | 100 | 66 |  |  | 22 | if (($#parms_list == 0) && (ref ($parms_list[0]) eq 'HASH')) { | 
|  |  | 50 | 33 |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 42 | 2 |  |  |  |  | 6 | _set_utf8_charset_alias($parms_list[0]); | 
| 43 | 2 |  |  |  |  | 3 | return; | 
| 44 |  |  |  |  |  |  | } elsif (($#parms_list > 0) && (($#parms_list % 2) == 1)) { | 
| 45 | 0 |  |  |  |  | 0 | _set_utf8_charset_alias({ @parms_list }); | 
| 46 | 0 |  |  |  |  | 0 | return; | 
| 47 |  |  |  |  |  |  | } elsif ($#parms_list == 0) { | 
| 48 | 1 |  |  |  |  | 2 | my $lc_charset = lc($parms_list[0]); | 
| 49 | 1 |  |  |  |  | 2 | my $result     = $_Charset_Aliases->{$lc_charset}; | 
| 50 | 1 |  |  |  |  | 3 | return $result; | 
| 51 |  |  |  |  |  |  | } | 
| 52 | 0 |  |  |  |  | 0 | croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::utf8_charset_alias() - invalid parameters passed\n"); | 
| 53 |  |  |  |  |  |  | } | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | ###################################################################### | 
| 56 |  |  |  |  |  |  | # Sets (or clears ;-) ) a runtime character set alias. | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | sub _set_utf8_charset_alias { | 
| 59 | 2 |  |  | 2 |  | 5 | my ($parms) = @_; | 
| 60 | 2 |  |  |  |  | 7 | my @alias_names = keys %$parms; | 
| 61 | 2 |  |  |  |  | 5 | foreach my $alias (@alias_names) { | 
| 62 | 2 |  |  |  |  | 5 | my $lc_alias = lc ($alias); | 
| 63 | 2 |  |  |  |  | 6 | my $charset  = $parms->{$alias}; | 
| 64 | 2 | 100 |  |  |  | 5 | if (! defined $charset) { | 
| 65 | 1 | 50 |  |  |  | 3 | if (exists ($_Charset_Aliases->{$lc_alias})) { | 
| 66 | 1 |  |  |  |  | 2 | delete $_Charset_Aliases->{$lc_alias}; | 
| 67 |  |  |  |  |  |  | } | 
| 68 | 1 |  |  |  |  | 3 | next; | 
| 69 |  |  |  |  |  |  | } | 
| 70 | 1 |  |  |  |  | 2 | my $lc_charset = lc ($charset); | 
| 71 | 1 | 50 |  |  |  | 3 | if (! exists ($_Charset_Names->{$lc_charset})) { | 
| 72 | 0 |  |  |  |  | 0 | croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::utf8_charset_alias() - attempted to set alias '$alias' to point to unknown charset encoding of '$charset'\n"); | 
| 73 |  |  |  |  |  |  | } | 
| 74 | 1 | 50 |  |  |  | 3 | if (exists ($_Charset_Names->{$lc_alias})) { | 
| 75 | 0 |  |  |  |  | 0 | carp('[' . localtime(time) . '] [warning] ' . __PACKAGE__ . "::utf8_charset_alias() - Aliased base defined charset name '$alias' to '$charset'."); | 
| 76 |  |  |  |  |  |  | } | 
| 77 | 1 |  |  |  |  | 4 | $_Charset_Aliases->{$lc_alias} = $lc_charset; | 
| 78 |  |  |  |  |  |  | } | 
| 79 |  |  |  |  |  |  | } | 
| 80 |  |  |  |  |  |  |  | 
| 81 |  |  |  |  |  |  | #### | 
| 82 |  |  |  |  |  |  |  | 
| 83 |  |  |  |  |  |  | sub utf8_supported_charset { | 
| 84 | 1 | 50 | 33 | 1 |  | 29 | if ($#_ == -1 && wantarray) { | 
| 85 | 1 |  |  |  |  | 381 | my %all_charsets = (%$_Supported_Charsets, %$_Charset_Aliases); | 
| 86 | 1 |  |  |  |  | 367 | my @charsets     = sort keys %all_charsets; | 
| 87 | 1 |  |  |  |  | 136 | return @charsets; | 
| 88 |  |  |  |  |  |  | } | 
| 89 | 0 |  |  |  |  | 0 | my $charset = shift; | 
| 90 | 0 | 0 |  |  |  | 0 | if (not defined $charset) { | 
| 91 | 0 |  |  |  |  | 0 | croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::utf8_supported_charset() - no character set specified\n"); | 
| 92 |  |  |  |  |  |  | } | 
| 93 | 0 |  |  |  |  | 0 | my $lc_charset = lc($charset); | 
| 94 | 0 | 0 |  |  |  | 0 | return 1 if (exists ($_Charset_Names->{$lc_charset})); | 
| 95 | 0 | 0 |  |  |  | 0 | return 1 if (exists ($_Charset_Aliases->{$lc_charset})); | 
| 96 | 0 |  |  |  |  | 0 | return 0; | 
| 97 |  |  |  |  |  |  | } | 
| 98 |  |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  | #### | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | sub to_utf8 { | 
| 102 | 17 |  |  | 17 |  | 444 | my @parm_list = @_; | 
| 103 | 17 |  |  |  |  | 22 | my $parms  = {}; | 
| 104 | 17 | 50 | 33 |  |  | 54 | if (($#parm_list > 0) && (($#parm_list % 2) == 1)) { | 
|  |  | 50 |  |  |  |  |  | 
| 105 | 0 |  |  |  |  | 0 | $parms = { @parm_list }; | 
| 106 |  |  |  |  |  |  | } elsif ($#parm_list == 0) { | 
| 107 | 17 |  |  |  |  | 19 | $parms = $parm_list[0]; | 
| 108 | 17 | 50 |  |  |  | 33 | if (! ref($parms)) { | 
| 109 | 0 |  |  |  |  | 0 | croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - invalid parameters passed\n"); | 
| 110 |  |  |  |  |  |  | } | 
| 111 |  |  |  |  |  |  | } else { | 
| 112 | 0 |  |  |  |  | 0 | croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - bad parameters passed\n"); | 
| 113 |  |  |  |  |  |  | } | 
| 114 |  |  |  |  |  |  |  | 
| 115 | 17 | 50 |  |  |  | 30 | if (! (exists $parms->{-string})) { | 
| 116 | 0 |  |  |  |  | 0 | croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - missing '-string' parameter\n"); | 
| 117 |  |  |  |  |  |  | } | 
| 118 | 17 |  |  |  |  | 22 | my $string  = $parms->{-string}; | 
| 119 | 17 |  |  |  |  | 26 | my $charset = $parms->{-charset}; | 
| 120 |  |  |  |  |  |  |  | 
| 121 | 17 | 50 |  |  |  | 25 | if (! defined ($charset)) { | 
| 122 | 0 |  |  |  |  | 0 | croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - missing '-charset' parameter value\n"); | 
| 123 |  |  |  |  |  |  | } | 
| 124 | 17 |  |  |  |  | 25 | my $lc_charset    = lc ($charset); | 
| 125 | 17 |  |  |  |  | 24 | my $alias_charset = $_Charset_Aliases->{$lc_charset}; | 
| 126 | 17 | 100 |  |  |  | 31 | my $true_charset  = defined($alias_charset) ? $_Charset_Names->{$alias_charset} : $_Charset_Names->{$lc_charset}; | 
| 127 | 17 | 100 |  |  |  | 21 | if (! defined $true_charset) { | 
| 128 | 1 |  |  |  |  | 264 | croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - character set '$charset' is not supported\n"); | 
| 129 |  |  |  |  |  |  | } | 
| 130 |  |  |  |  |  |  |  | 
| 131 | 16 | 50 |  |  |  | 26 | $string = '' if (! defined ($string)); | 
| 132 |  |  |  |  |  |  |  | 
| 133 | 16 |  |  |  |  | 24 | my $converter = $_Supported_Charsets->{$true_charset}; | 
| 134 | 16 | 100 |  |  |  | 30 | if    ($converter eq 'map8')       { return _unicode_map8_to_utf8   ($string,$true_charset); } | 
|  | 8 |  |  |  |  | 14 |  | 
| 135 | 8 | 50 |  |  |  | 23 | if    ($converter eq 'unicode-map'){ return _unicode_map_to_utf8    ($string,$true_charset); } | 
|  | 0 | 100 |  |  |  | 0 |  | 
|  |  | 50 |  |  |  |  |  | 
| 136 | 4 |  |  |  |  | 7 | elsif ($converter eq 'string')     { return _unicode_string_to_utf8 ($string,$true_charset); } | 
| 137 | 4 |  |  |  |  | 9 | elsif ($converter eq 'jcode')      { return _jcode_to_utf8          ($string,$true_charset); } | 
| 138 |  |  |  |  |  |  | else { | 
| 139 | 0 |  |  |  |  | 0 | croak(  '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - charset '$charset' is not supported\n"); | 
| 140 |  |  |  |  |  |  | } | 
| 141 |  |  |  |  |  |  | } | 
| 142 |  |  |  |  |  |  |  | 
| 143 |  |  |  |  |  |  | #### | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | sub from_utf8 { | 
| 146 | 16 |  |  | 16 |  | 131 | my @parm_list = @_; | 
| 147 | 16 |  |  |  |  | 20 | my $parms; | 
| 148 | 16 | 50 | 33 |  |  | 49 | if (($#parm_list > 0) && (($#parm_list % 2) == 1)) { | 
|  |  | 50 |  |  |  |  |  | 
| 149 | 0 |  |  |  |  | 0 | $parms = { @parm_list }; | 
| 150 |  |  |  |  |  |  | } elsif ($#parm_list == 0) { | 
| 151 | 16 |  |  |  |  | 20 | $parms = $parm_list[0]; | 
| 152 | 16 | 50 |  |  |  | 27 | if (! ref($parms)) { | 
| 153 | 0 |  |  |  |  | 0 | croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - invalid parameters passed\n"); | 
| 154 |  |  |  |  |  |  | } | 
| 155 |  |  |  |  |  |  | } else { | 
| 156 | 0 |  |  |  |  | 0 | croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - bad parameters passed\n"); | 
| 157 |  |  |  |  |  |  | } | 
| 158 |  |  |  |  |  |  |  | 
| 159 | 16 | 50 |  |  |  | 28 | if (! (exists $parms->{-string})) { | 
| 160 | 0 |  |  |  |  | 0 | ; croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - missing '-string' parameter\n"); | 
| 161 |  |  |  |  |  |  | } | 
| 162 |  |  |  |  |  |  |  | 
| 163 | 16 |  |  |  |  | 22 | my $string  = $parms->{-string}; | 
| 164 | 16 |  |  |  |  | 17 | my $charset = $parms->{-charset}; | 
| 165 |  |  |  |  |  |  |  | 
| 166 | 16 | 50 |  |  |  | 24 | if (! defined ($charset)) { | 
| 167 | 0 |  |  |  |  | 0 | croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - missing '-charset' parameter value\n"); | 
| 168 |  |  |  |  |  |  | } | 
| 169 | 16 |  |  |  |  | 26 | my $lc_charset    = lc ($charset); | 
| 170 | 16 |  |  |  |  | 21 | my $alias_charset = $_Charset_Aliases->{$lc_charset}; | 
| 171 | 16 | 100 |  |  |  | 33 | my $true_charset  = defined($alias_charset) ? $_Charset_Names->{$alias_charset} : $_Charset_Names->{$lc_charset}; | 
| 172 | 16 | 50 |  |  |  | 23 | if (! defined $true_charset) { | 
| 173 | 0 |  |  |  |  | 0 | croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - character set '$charset' is not supported\n"); | 
| 174 |  |  |  |  |  |  | } | 
| 175 |  |  |  |  |  |  |  | 
| 176 | 16 | 50 |  |  |  | 27 | $string = '' if (! defined ($string)); | 
| 177 |  |  |  |  |  |  |  | 
| 178 | 16 |  |  |  |  | 20 | my $converter = $_Supported_Charsets->{$true_charset}; | 
| 179 | 16 |  |  |  |  | 17 | my $result; | 
| 180 | 16 | 100 |  |  |  | 35 | if    ($converter eq 'map8')        { $result = _unicode_map8_from_utf8   ($string,$true_charset); } | 
|  | 8 | 50 |  |  |  | 13 |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 181 | 0 |  |  |  |  | 0 | elsif ($converter eq 'unicode-map') { $result = _unicode_map_from_utf8    ($string,$true_charset); } | 
| 182 | 4 |  |  |  |  | 7 | elsif ($converter eq 'string')      { $result = _unicode_string_from_utf8 ($string,$true_charset); } | 
| 183 | 4 |  |  |  |  | 6 | elsif ($converter eq 'jcode')       { $result = _jcode_from_utf8          ($string,$true_charset); } | 
| 184 |  |  |  |  |  |  | else { | 
| 185 | 0 |  |  |  |  | 0 | croak(  '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - charset '$charset' is not supported\n"); | 
| 186 |  |  |  |  |  |  | } | 
| 187 | 16 |  |  |  |  | 44 | return $result; | 
| 188 |  |  |  |  |  |  | } | 
| 189 |  |  |  |  |  |  |  | 
| 190 |  |  |  |  |  |  | ###################################################################### | 
| 191 |  |  |  |  |  |  | # | 
| 192 |  |  |  |  |  |  | # _unicode_map_from_utf8($string,$target_charset); | 
| 193 |  |  |  |  |  |  | # | 
| 194 |  |  |  |  |  |  | # Returns the string converted from UTF8 to the specified target multibyte charset. | 
| 195 |  |  |  |  |  |  | # | 
| 196 |  |  |  |  |  |  |  | 
| 197 |  |  |  |  |  |  | sub _unicode_map_from_utf8 { | 
| 198 | 0 |  |  | 0 |  | 0 | my ($string,$target_charset) = @_; | 
| 199 |  |  |  |  |  |  |  | 
| 200 | 0 | 0 |  |  |  | 0 | if (! defined $target_charset) { | 
| 201 | 0 |  |  |  |  | 0 | croak( '[' . localtime(time) . '] ' . __PACKAGE__ . '::_unicode_map_from_utf8() - (line ' . __LINE__ . ") No target character set specified\n"); | 
| 202 |  |  |  |  |  |  | } | 
| 203 |  |  |  |  |  |  |  | 
| 204 | 0 |  |  |  |  | 0 | my $ucs2   = from_utf8 ({ -string => $string, -charset => 'ucs2' }); | 
| 205 | 0 |  |  |  |  | 0 | my $target = Unicode::Map->new($target_charset); | 
| 206 | 0 | 0 |  |  |  | 0 | if (! defined $target) { | 
| 207 | 0 |  |  |  |  | 0 | confess( '[' . localtime(time) . '] ' . __PACKAGE__ . '::_unicode_map_from_utf8() - (line ' . __LINE__ . ") failed to instantate Unicode::Map object for charset '$target_charset': $!\n"); | 
| 208 |  |  |  |  |  |  | } | 
| 209 | 0 |  |  |  |  | 0 | my $result = $target->from_unicode($ucs2); | 
| 210 | 0 |  |  |  |  | 0 | return $result; | 
| 211 |  |  |  |  |  |  | } | 
| 212 |  |  |  |  |  |  |  | 
| 213 |  |  |  |  |  |  | ###################################################################### | 
| 214 |  |  |  |  |  |  | # | 
| 215 |  |  |  |  |  |  | # _unicode_map_to_utf8($string,$source_charset); | 
| 216 |  |  |  |  |  |  | # | 
| 217 |  |  |  |  |  |  | # Returns the string converted the specified target multibyte charset to UTF8. | 
| 218 |  |  |  |  |  |  | # | 
| 219 |  |  |  |  |  |  | sub _unicode_map_to_utf8 { | 
| 220 | 0 |  |  | 0 |  | 0 | my ($string,$source_charset) = @_; | 
| 221 |  |  |  |  |  |  |  | 
| 222 | 0 | 0 |  |  |  | 0 | if (! defined $source_charset) { | 
| 223 | 0 |  |  |  |  | 0 | croak( '[' . localtime(time) . '] ' . __PACKAGE__ . '::_unicode_map_to_utf8() - (line ' . __LINE__ . ") No source character set specified\n"); | 
| 224 |  |  |  |  |  |  | } | 
| 225 |  |  |  |  |  |  |  | 
| 226 | 0 |  |  |  |  | 0 | my $source = Unicode::Map->new($source_charset); | 
| 227 | 0 | 0 |  |  |  | 0 | if (! defined $source) { | 
| 228 | 0 |  |  |  |  | 0 | confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_map_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::Map object: $!\n"); | 
| 229 |  |  |  |  |  |  | } | 
| 230 | 0 |  |  |  |  | 0 | my $ucs2   = $source->to_unicode($string); | 
| 231 | 0 |  |  |  |  | 0 | my $result = to_utf8({ -string => $ucs2, -charset => 'ucs2' }); | 
| 232 | 0 |  |  |  |  | 0 | return $result; | 
| 233 |  |  |  |  |  |  | } | 
| 234 |  |  |  |  |  |  |  | 
| 235 |  |  |  |  |  |  | ###################################################################### | 
| 236 |  |  |  |  |  |  | # | 
| 237 |  |  |  |  |  |  | # _unicode_map8_from_utf8($string,$target_charset); | 
| 238 |  |  |  |  |  |  | # | 
| 239 |  |  |  |  |  |  | # Returns the string converted from UTF8 to the specified target 8bit charset. | 
| 240 |  |  |  |  |  |  | # | 
| 241 |  |  |  |  |  |  |  | 
| 242 |  |  |  |  |  |  | sub _unicode_map8_from_utf8 { | 
| 243 | 8 |  |  | 8 |  | 13 | my ($string,$target_charset) = @_; | 
| 244 |  |  |  |  |  |  |  | 
| 245 | 8 | 50 |  |  |  | 13 | if (! defined $target_charset) { | 
| 246 | 0 |  |  |  |  | 0 | croak( '[' . localtime(time) . '] ' . __PACKAGE__ . '::_unicode_map8_from_utf8() - (line ' . __LINE__ . ") No target character set specified\n"); | 
| 247 |  |  |  |  |  |  | } | 
| 248 |  |  |  |  |  |  |  | 
| 249 | 8 |  |  |  |  | 26 | my $u = Unicode::String::utf8($string); | 
| 250 | 8 | 50 |  |  |  | 14 | if (! defined $u) { | 
| 251 | 0 |  |  |  |  | 0 | confess( '[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_map8_from_utf8() - (line " . __LINE__ . ") failed to instantate Unicode::String::utf8 object: $!\n"); | 
| 252 |  |  |  |  |  |  | } | 
| 253 | 8 |  |  |  |  | 18 | my $ordering = $u->ord; | 
| 254 | 8 | 50 | 66 |  |  | 169 | $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); | 
| 255 | 8 |  |  |  |  | 16 | my $ucs2_string = $u->ucs2; | 
| 256 |  |  |  |  |  |  |  | 
| 257 | 8 |  |  |  |  | 73 | my $target = Unicode::Map8->new($target_charset); | 
| 258 | 8 | 50 |  |  |  | 553 | if (! defined $target) { | 
| 259 | 0 |  |  |  |  | 0 | confess( '[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_map8_from_utf8() - (line " . __LINE__ . ") ailed to instantate Unicode::Map8 object for character set '$target_charset':  $!\n"); | 
| 260 |  |  |  |  |  |  | } | 
| 261 | 8 |  |  |  |  | 27 | my $result = $target->to8($ucs2_string); | 
| 262 |  |  |  |  |  |  |  | 
| 263 | 8 |  |  |  |  | 36 | return $result; | 
| 264 |  |  |  |  |  |  | } | 
| 265 |  |  |  |  |  |  |  | 
| 266 |  |  |  |  |  |  | ###################################################################### | 
| 267 |  |  |  |  |  |  | # | 
| 268 |  |  |  |  |  |  | # _unicode_map8_to_utf8($string,$source_charset); | 
| 269 |  |  |  |  |  |  | # | 
| 270 |  |  |  |  |  |  | # Returns the string converted the specified target 8bit charset to UTF8. | 
| 271 |  |  |  |  |  |  | # | 
| 272 |  |  |  |  |  |  | # | 
| 273 |  |  |  |  |  |  |  | 
| 274 |  |  |  |  |  |  | sub _unicode_map8_to_utf8 { | 
| 275 | 8 |  |  | 8 |  | 13 | my ($string,$source_charset) = @_; | 
| 276 |  |  |  |  |  |  |  | 
| 277 | 8 |  |  |  |  | 19 | my $source = Unicode::Map8->new($source_charset); | 
| 278 | 8 | 50 |  |  |  | 602 | if (! defined $source) { | 
| 279 | 0 |  |  |  |  | 0 | confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_map8_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::Map8 object for character set '$source_charset': $!\n"); | 
| 280 |  |  |  |  |  |  | } | 
| 281 |  |  |  |  |  |  |  | 
| 282 | 8 |  |  |  |  | 24 | my $ucs2_string = $source->tou($string); | 
| 283 | 8 | 50 |  |  |  | 330 | if (! defined $ucs2_string) { | 
| 284 | 0 |  |  |  |  | 0 | confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_map8_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::String::utf16 object: $!\n"); | 
| 285 |  |  |  |  |  |  | } | 
| 286 | 8 |  |  |  |  | 27 | my $utf8_string = $ucs2_string->utf8; | 
| 287 |  |  |  |  |  |  |  | 
| 288 | 8 |  |  |  |  | 50 | return $utf8_string; | 
| 289 |  |  |  |  |  |  | } | 
| 290 |  |  |  |  |  |  |  | 
| 291 |  |  |  |  |  |  | ###################################################################### | 
| 292 |  |  |  |  |  |  | # | 
| 293 |  |  |  |  |  |  | # _unicode_string_from_utf8($string,$target_charset); | 
| 294 |  |  |  |  |  |  | # | 
| 295 |  |  |  |  |  |  | # Returns the string converted from UTF8 to the specified unicode encoding. | 
| 296 |  |  |  |  |  |  | # | 
| 297 |  |  |  |  |  |  |  | 
| 298 |  |  |  |  |  |  | sub _unicode_string_from_utf8 { | 
| 299 | 4 |  |  | 4 |  | 6 | my ($string,$target_charset) = @_; | 
| 300 |  |  |  |  |  |  |  | 
| 301 | 4 |  |  |  |  | 6 | $target_charset = lc ($target_charset); | 
| 302 | 4 |  |  |  |  | 4 | my $final; | 
| 303 | 4 | 50 |  |  |  | 10 | if ($target_charset eq 'utf8') { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 304 | 0 |  |  |  |  | 0 | $final = $string; | 
| 305 |  |  |  |  |  |  | } elsif ($target_charset eq 'ucs2') { | 
| 306 | 4 |  |  |  |  | 10 | my $u = Unicode::String::utf8($string); | 
| 307 | 4 |  |  |  |  | 8 | my $ordering = $u->ord; | 
| 308 | 4 | 50 | 66 |  |  | 66 | $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); | 
| 309 | 4 |  |  |  |  | 9 | $final = $u->ucs2; | 
| 310 |  |  |  |  |  |  | } elsif ($target_charset eq 'ucs4') { | 
| 311 | 0 |  |  |  |  | 0 | my $u = Unicode::String::utf8($string); | 
| 312 | 0 |  |  |  |  | 0 | my $ordering = $u->ord; | 
| 313 | 0 | 0 | 0 |  |  | 0 | $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); | 
| 314 | 0 |  |  |  |  | 0 | $final = $u->ucs4; | 
| 315 |  |  |  |  |  |  | } elsif ($target_charset eq 'utf16') { | 
| 316 | 0 |  |  |  |  | 0 | my $u = Unicode::String::utf8($string); | 
| 317 | 0 |  |  |  |  | 0 | my $ordering = $u->ord; | 
| 318 | 0 | 0 | 0 |  |  | 0 | $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); | 
| 319 | 0 |  |  |  |  | 0 | $final = $u->utf16; | 
| 320 |  |  |  |  |  |  | } elsif ($target_charset eq 'utf7') { | 
| 321 | 0 |  |  |  |  | 0 | my $u = Unicode::String::utf8($string); | 
| 322 | 0 |  |  |  |  | 0 | my $ordering = $u->ord; | 
| 323 | 0 | 0 | 0 |  |  | 0 | $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); | 
| 324 | 0 |  |  |  |  | 0 | $final = $u->utf7; | 
| 325 |  |  |  |  |  |  | } else { | 
| 326 | 0 |  |  |  |  | 0 | croak(  '[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_string_from_utf8() - charset '$target_charset' is not supported\n"); | 
| 327 |  |  |  |  |  |  | } | 
| 328 | 4 |  |  |  |  | 39 | return $final; | 
| 329 |  |  |  |  |  |  | } | 
| 330 |  |  |  |  |  |  |  | 
| 331 |  |  |  |  |  |  | ###################################################################### | 
| 332 |  |  |  |  |  |  | # | 
| 333 |  |  |  |  |  |  | # _unicode_string_to_utf8($string,$source_charset); | 
| 334 |  |  |  |  |  |  | # | 
| 335 |  |  |  |  |  |  | # Returns the string converted the specified unicode encoding to UTF8. | 
| 336 |  |  |  |  |  |  | # | 
| 337 |  |  |  |  |  |  |  | 
| 338 |  |  |  |  |  |  | sub _unicode_string_to_utf8 { | 
| 339 | 4 |  |  | 4 |  | 8 | my ($string,$source_charset) = @_; | 
| 340 |  |  |  |  |  |  |  | 
| 341 | 4 |  |  |  |  | 6 | $source_charset = lc ($source_charset); | 
| 342 | 4 |  |  |  |  | 5 | my $final; | 
| 343 | 4 | 50 |  |  |  | 7 | if    ($source_charset eq 'utf8') { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 344 | 0 |  |  |  |  | 0 | $final = $string; | 
| 345 |  |  |  |  |  |  | } elsif ($source_charset eq 'ucs2') { | 
| 346 | 4 |  |  |  |  | 9 | my $u = Unicode::String::utf16($string); | 
| 347 | 4 | 50 |  |  |  | 102 | if (! defined $u) { | 
| 348 | 0 |  |  |  |  | 0 | confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_string_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::String::utf16 object: $!\n"); | 
| 349 |  |  |  |  |  |  | } | 
| 350 | 4 |  |  |  |  | 9 | my $ordering = $u->ord; | 
| 351 | 4 | 50 | 66 |  |  | 67 | $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); | 
| 352 | 4 |  |  |  |  | 15 | $final = $u->utf8; | 
| 353 |  |  |  |  |  |  | } elsif ($source_charset eq 'ucs4') { | 
| 354 | 0 |  |  |  |  | 0 | my $u = Unicode::String::ucs4($string); | 
| 355 | 0 | 0 |  |  |  | 0 | if (! defined $u) { | 
| 356 | 0 |  |  |  |  | 0 | confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_string_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::String::ucs4 object: $!\n"); | 
| 357 |  |  |  |  |  |  | } | 
| 358 | 0 |  |  |  |  | 0 | my $ordering = $u->ord; | 
| 359 | 0 | 0 | 0 |  |  | 0 | $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); | 
| 360 | 0 |  |  |  |  | 0 | $final = $u->utf8; | 
| 361 |  |  |  |  |  |  | } elsif ($source_charset eq 'utf16') { | 
| 362 | 0 |  |  |  |  | 0 | my $u = Unicode::String::utf16($string); | 
| 363 | 0 | 0 |  |  |  | 0 | if (! defined $u) { | 
| 364 | 0 |  |  |  |  | 0 | confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_string_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::String::utf16 object: $!\n"); | 
| 365 |  |  |  |  |  |  | } | 
| 366 | 0 |  |  |  |  | 0 | my $ordering = $u->ord; | 
| 367 | 0 | 0 | 0 |  |  | 0 | $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); | 
| 368 | 0 |  |  |  |  | 0 | $final = $u->utf8; | 
| 369 |  |  |  |  |  |  | } elsif ($source_charset eq 'utf7') { | 
| 370 | 0 |  |  |  |  | 0 | my $u = Unicode::String::utf7($string); | 
| 371 | 0 | 0 |  |  |  | 0 | if (! defined $u) { | 
| 372 | 0 |  |  |  |  | 0 | confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_string_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::String::utf7 object: $!\n"); | 
| 373 |  |  |  |  |  |  | } | 
| 374 | 0 |  |  |  |  | 0 | my $ordering = $u->ord; | 
| 375 | 0 | 0 | 0 |  |  | 0 | $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE)); | 
| 376 | 0 |  |  |  |  | 0 | $final = $u->utf8; | 
| 377 |  |  |  |  |  |  | } else { | 
| 378 | 0 |  |  |  |  | 0 | croak(  '[' . localtime(time) . '] ' . __PACKAGE__ . ":: _unicode_string_to_utf8() - charset '$source_charset' is not supported\n"); | 
| 379 |  |  |  |  |  |  | } | 
| 380 |  |  |  |  |  |  |  | 
| 381 | 4 |  |  |  |  | 10 | return $final; | 
| 382 |  |  |  |  |  |  | } | 
| 383 |  |  |  |  |  |  |  | 
| 384 |  |  |  |  |  |  | ###################################################################### | 
| 385 |  |  |  |  |  |  | # | 
| 386 |  |  |  |  |  |  | # _jcode_from_utf8($string,$target_charset); | 
| 387 |  |  |  |  |  |  | # | 
| 388 |  |  |  |  |  |  | # Returns the string converted from UTF8 to the specified Jcode encoding. | 
| 389 |  |  |  |  |  |  | # | 
| 390 |  |  |  |  |  |  |  | 
| 391 |  |  |  |  |  |  | sub _jcode_from_utf8 { | 
| 392 | 4 |  |  | 4 |  | 6 | my ($string,$target_charset) = @_; | 
| 393 |  |  |  |  |  |  |  | 
| 394 | 4 |  |  |  |  | 69 | my $j = Jcode->new($string,'utf8'); | 
| 395 |  |  |  |  |  |  |  | 
| 396 | 4 |  |  |  |  | 363 | $target_charset = lc ($target_charset); | 
| 397 | 4 |  |  |  |  | 6 | my $final; | 
| 398 | 4 | 50 |  |  |  | 13 | if    ($target_charset =~ m/^iso[-_]2022[-_]jp$/) { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 399 | 0 |  |  |  |  | 0 | $final = $j->iso_2022_jp; | 
| 400 |  |  |  |  |  |  | } elsif ($target_charset eq 'sjis') { | 
| 401 | 0 |  |  |  |  | 0 | $final = $j->sjis; | 
| 402 |  |  |  |  |  |  | } elsif ($target_charset eq 'euc-jp') { | 
| 403 | 4 |  |  |  |  | 59 | $final = $j->euc; | 
| 404 |  |  |  |  |  |  | } elsif ($target_charset eq 'jis') { | 
| 405 | 0 |  |  |  |  | 0 | $final = $j->jis; | 
| 406 |  |  |  |  |  |  | } else { | 
| 407 | 0 |  |  |  |  | 0 | croak(  '[' . localtime(time) . '] ' . __PACKAGE__ . "::_jcode_from_utf8() - charset '$target_charset' is not supported\n"); | 
| 408 |  |  |  |  |  |  | } | 
| 409 | 4 |  |  |  |  | 40 | return $final; | 
| 410 |  |  |  |  |  |  | } | 
| 411 |  |  |  |  |  |  |  | 
| 412 |  |  |  |  |  |  | ###################################################################### | 
| 413 |  |  |  |  |  |  | # | 
| 414 |  |  |  |  |  |  | # _jcode_to_utf8($string,$source_charset); | 
| 415 |  |  |  |  |  |  | # | 
| 416 |  |  |  |  |  |  | # Returns the string converted from the specified Jcode encoding to UTF8. | 
| 417 |  |  |  |  |  |  | # | 
| 418 |  |  |  |  |  |  |  | 
| 419 |  |  |  |  |  |  | sub _jcode_to_utf8 { | 
| 420 | 4 |  |  | 4 |  | 5 | my ($string,$source_charset) = @_; | 
| 421 |  |  |  |  |  |  |  | 
| 422 | 4 |  |  |  |  | 7 | $source_charset = lc ($source_charset); | 
| 423 |  |  |  |  |  |  |  | 
| 424 | 4 |  |  |  |  | 4 | my $final; | 
| 425 | 4 | 50 |  |  |  | 17 | if    ($source_charset =~ m/^iso[-_]2022[-_]jp$/) { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 426 | 0 |  |  |  |  | 0 | my $j  = Jcode->new($string,'jis')->h2z; | 
| 427 | 0 |  |  |  |  | 0 | $final = $j->utf8; | 
| 428 |  |  |  |  |  |  | } elsif ($source_charset =~m/^(s[-_]?jis|shift[-_]?jis)$/) { | 
| 429 | 0 |  |  |  |  | 0 | my $j  = Jcode->new($string,'sjis'); | 
| 430 | 0 |  |  |  |  | 0 | $final = $j->utf8; | 
| 431 |  |  |  |  |  |  | } elsif ($source_charset eq 'euc-jp') { | 
| 432 | 4 |  |  |  |  | 81 | my $j  = Jcode->new($string,'euc'); | 
| 433 | 4 |  |  |  |  | 392 | $final = $j->utf8; | 
| 434 |  |  |  |  |  |  | } elsif ($source_charset eq 'jis') { | 
| 435 | 0 |  |  |  |  | 0 | my $j  = Jcode->new($string,'jis'); | 
| 436 | 0 |  |  |  |  | 0 | $final = $j->utf8; | 
| 437 |  |  |  |  |  |  | } else { | 
| 438 | 0 |  |  |  |  | 0 | croak(  '[' . localtime(time) . '] ' . __PACKAGE__ . "::_jcode_to_utf8() - charset '$source_charset' is not supported\n"); | 
| 439 |  |  |  |  |  |  | } | 
| 440 |  |  |  |  |  |  |  | 
| 441 | 4 |  |  |  |  | 60 | return $final; | 
| 442 |  |  |  |  |  |  | } | 
| 443 |  |  |  |  |  |  |  | 
| 444 |  |  |  |  |  |  | ####################################################################### | 
| 445 |  |  |  |  |  |  | # | 
| 446 |  |  |  |  |  |  | # Character set handlers maps | 
| 447 |  |  |  |  |  |  | # | 
| 448 |  |  |  |  |  |  |  | 
| 449 |  |  |  |  |  |  | sub _init_charsets { | 
| 450 |  |  |  |  |  |  |  | 
| 451 | 1 |  |  | 1 |  | 2 | $_Charset_Aliases    = {}; | 
| 452 |  |  |  |  |  |  |  | 
| 453 | 1 |  |  |  |  | 9 | $_Supported_Charsets = { | 
| 454 |  |  |  |  |  |  | 'utf8'                    => 'string', | 
| 455 |  |  |  |  |  |  | 'ucs2'                    => 'string', | 
| 456 |  |  |  |  |  |  | 'ucs4'                    => 'string', | 
| 457 |  |  |  |  |  |  | 'utf7'                    => 'string', | 
| 458 |  |  |  |  |  |  | 'utf16'                   => 'string', | 
| 459 |  |  |  |  |  |  | 'sjis'                    => 'jcode', | 
| 460 |  |  |  |  |  |  | 's-jis'                   => 'jcode', | 
| 461 |  |  |  |  |  |  | 's_jis'                   => 'jcode', | 
| 462 |  |  |  |  |  |  | 'shiftjis'                => 'jcode', | 
| 463 |  |  |  |  |  |  | 'shift-jis'               => 'jcode', | 
| 464 |  |  |  |  |  |  | 'shift_jis'               => 'jcode', | 
| 465 |  |  |  |  |  |  | 'iso-2022-jp'             => 'jcode', | 
| 466 |  |  |  |  |  |  | 'iso_2022_jp'             => 'jcode', | 
| 467 |  |  |  |  |  |  | 'jis'                     => 'jcode', | 
| 468 |  |  |  |  |  |  | 'euc-jp'                  => 'jcode', | 
| 469 |  |  |  |  |  |  | }; | 
| 470 | 1 |  |  |  |  | 6 | $_Charset_Names = { map { lc ($_) => $_ } keys %$_Supported_Charsets }; | 
|  | 15 |  |  |  |  | 31 |  | 
| 471 |  |  |  |  |  |  |  | 
| 472 |  |  |  |  |  |  | # All the Unicode::Map8 charsets | 
| 473 |  |  |  |  |  |  | { | 
| 474 | 1 |  |  |  |  | 4 | my @map_ids = &_list_unicode_map8_charsets; | 
|  | 1 |  |  |  |  | 2 |  | 
| 475 | 1 |  |  |  |  | 5 | foreach my $id (@map_ids) { | 
| 476 | 515 |  |  |  |  | 525 | my $lc_id = lc ($id); | 
| 477 | 515 | 100 |  |  |  | 667 | next if (exists ($_Charset_Names->{$lc_id})); | 
| 478 | 505 |  |  |  |  | 658 | $_Supported_Charsets->{$id} = 'map8'; | 
| 479 | 505 |  |  |  |  | 846 | $_Charset_Names->{$lc_id}    = $id; | 
| 480 |  |  |  |  |  |  | } | 
| 481 |  |  |  |  |  |  | } | 
| 482 | 1 |  |  |  |  | 49 | $_Charset_Names = { map { lc ($_) => $_ } keys %$_Supported_Charsets }; | 
|  | 520 |  |  |  |  | 1112 |  | 
| 483 |  |  |  |  |  |  |  | 
| 484 |  |  |  |  |  |  | # Add any charsets not already listed from Unicode::Map | 
| 485 |  |  |  |  |  |  | { | 
| 486 | 1 |  |  |  |  | 57 | my $unicode_map = Unicode::Map->new; | 
|  | 1 |  |  |  |  | 14 |  | 
| 487 | 1 |  |  |  |  | 36873 | my @map_ids     = $unicode_map->ids; | 
| 488 | 1 |  |  |  |  | 637 | foreach my $id (@map_ids) { | 
| 489 | 90 |  |  |  |  | 91 | my $lc_id = lc ($id); | 
| 490 | 90 | 100 |  |  |  | 144 | next if (exists ($_Charset_Names->{$lc_id})); | 
| 491 | 44 |  |  |  |  | 48 | $_Supported_Charsets->{$id} = 'unicode-map'; | 
| 492 | 44 |  |  |  |  | 66 | $_Charset_Names->{$lc_id}    = $id; | 
| 493 |  |  |  |  |  |  | } | 
| 494 |  |  |  |  |  |  | } | 
| 495 |  |  |  |  |  |  | } | 
| 496 |  |  |  |  |  |  |  | 
| 497 |  |  |  |  |  |  | ###################################################################### | 
| 498 |  |  |  |  |  |  | # | 
| 499 |  |  |  |  |  |  | # Code taken and modified from the 'usr/bin/umap' code distributed | 
| 500 |  |  |  |  |  |  | # with Unicode::Map8. It wouldn't be necessary if Unicode::Map8 | 
| 501 |  |  |  |  |  |  | # had a direct method for this.... | 
| 502 |  |  |  |  |  |  | # | 
| 503 |  |  |  |  |  |  |  | 
| 504 |  |  |  |  |  |  | sub _list_unicode_map8_charsets { | 
| 505 | 1 |  |  | 1 |  | 4 | my %set = ( | 
| 506 |  |  |  |  |  |  | ucs4 => {}, | 
| 507 |  |  |  |  |  |  | ucs2 => {utf16 => 1}, | 
| 508 |  |  |  |  |  |  | utf7 => {}, | 
| 509 |  |  |  |  |  |  | utf8 => {}, | 
| 510 |  |  |  |  |  |  | ); | 
| 511 | 1 | 50 |  |  |  | 47 | if (opendir(DIR, $Unicode::Map8::MAPS_DIR)) { | 
| 512 | 1 |  |  |  |  | 272 | my @files = grep(!/^\.\.?$/,readdir(DIR)); | 
| 513 | 1 |  |  |  |  | 9 | foreach my $f (@files) { | 
| 514 | 181 | 50 |  |  |  | 17032 | next unless -f "$Unicode::Map8::MAPS_DIR/$f"; | 
| 515 | 181 |  |  |  |  | 1176 | $f =~ s/\.(?:bin|txt)$//; | 
| 516 |  |  |  |  |  |  | my $supported = | 
| 517 | 181 | 100 |  |  |  | 552 | $set{$f} = {} if Unicode::Map8->new($f); | 
| 518 |  |  |  |  |  |  | } | 
| 519 |  |  |  |  |  |  | } | 
| 520 |  |  |  |  |  |  |  | 
| 521 | 1 |  |  |  |  | 78 | my $avoid_warning = keys %Unicode::Map8::ALIASES; | 
| 522 | 1 |  |  |  |  | 10 | while ( my($alias, $charset) = each %Unicode::Map8::ALIASES) { | 
| 523 | 343 | 50 |  |  |  | 470 | if (exists $set{$charset}) { | 
| 524 | 343 |  |  |  |  | 957 | $set{$charset}{$alias} = 1; | 
| 525 |  |  |  |  |  |  | } | 
| 526 |  |  |  |  |  |  | } | 
| 527 |  |  |  |  |  |  |  | 
| 528 | 1 |  |  |  |  | 2 | my %merged_set = (); | 
| 529 | 1 |  |  |  |  | 24 | foreach my $encoding (keys %set) { | 
| 530 | 184 |  |  |  |  | 225 | $merged_set{$encoding} = 1; | 
| 531 | 184 |  |  |  |  | 192 | my $set_item = $set{$encoding}; | 
| 532 | 184 |  |  |  |  | 390 | while (my ($key,$value) = each (%$set_item)) { | 
| 533 | 344 |  |  |  |  | 858 | $merged_set{$key} = $value; | 
| 534 |  |  |  |  |  |  | } | 
| 535 |  |  |  |  |  |  | } | 
| 536 | 1 |  |  |  |  | 356 | my @final_charsets = sort keys %merged_set; | 
| 537 | 1 |  |  |  |  | 175 | return @final_charsets; | 
| 538 |  |  |  |  |  |  | } | 
| 539 |  |  |  |  |  |  |  | 
| 540 |  |  |  |  |  |  | ###################################################################### | 
| 541 |  |  |  |  |  |  |  | 
| 542 |  |  |  |  |  |  | 1; |