|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package HTML::HTML5::Parser::Charset::Info;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## skip Test::Tabs  | 
| 
3
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
 
 | 
68
 | 
 use strict;  | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
    | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
317
 | 
    | 
| 
4
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
 
 | 
53
 | 
 use warnings;  | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
    | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
66248
 | 
    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION='0.992';  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## TODO: Certain encodings MUST NOT be implemented [HTML5].  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## ISSUE: Should we convert unassigned code point with trivial Unicode  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## mapping into U+FFFD?  Or, should we return that Unicode character  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## with an error?  (For example, Windows-1252's 0x81 should be converted  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## to U+FFFD or U+0081?)  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub UNREGISTERED_CHARSET_NAME () { 0b1 }  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## Names for non-standard encodings/implementations for Perl encodings  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub REGISTERED_CHARSET_NAME () { 0b10 }  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## Names for standard encodings for Perl encodings  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub PRIMARY_CHARSET_NAME () { 0b100 }  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## "Name:" field for IANA names  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## Canonical name for Perl encodings  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub PREFERRED_CHARSET_NAME () { 0b1000 }  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## "preferred MIME name" for IANA names  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub FALLBACK_ENCODING_IMPL () { 0b10000 }  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## For Perl encodings: Not a name of the encoding, the encoding  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## for the name might be useful as a fallback when the correct  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## encoding is not supported.  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub NONCONFORMING_ENCODING_IMPL () { FALLBACK_ENCODING_IMPL }  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## For Perl encodings: Not a conforming implementation of the encoding,  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## though it seems that the intention was to implement that encoding.  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub SEMICONFORMING_ENCODING_IMPL () { 0b1000000 }  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## For Perl encodings: The implementation itself (returned by  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## |get_perl_encoding|) is non-conforming.  The decode handle  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## implementation (returned by |get_decode_handle|) is conforming.  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub ERROR_REPORTING_ENCODING_IMPL () { 0b100000 }  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## For Perl encodings: Support error reporting via |manakai_onerror|  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## handler when the encoding is handled with decode handle.  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## iana_status  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub STATUS_COMMON () { 0b1 }  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub STATUS_LIMITED_USE () { 0b10 }  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub STATUS_OBSOLETE () { 0b100 }  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## category  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub CHARSET_CATEGORY_BLOCK_SAFE () { 0b1 }  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## NOTE: Stateless  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub CHARSET_CATEGORY_EUCJP () { 0b10 }  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub CHARSET_CATEGORY_SJIS () { 0b100 }  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub CHARSET_CATEGORY_UTF16 () { 0b1000 }  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## NOTE: "A UTF-16 encoding" in HTML5.  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub CHARSET_CATEGORY_ASCII_COMPAT () { 0b10000 }  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## NOTE: "superset of US-ASCII (specifically, ANSI_X3.4-1968)  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## for bytes in the range 0x09-0x0A, 0x0C-0x0D, 0x20-0x22, 0x26, 0x27,  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## 0x2C-0x3F, 0x41-0x5A, and 0x61-0x7A" [HTML5]  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub CHARSET_CATEGORY_EBCDIC () { 0b100000 }  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## NOTE: "based on EBCDIC" in HTML5.  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub CHARSET_CATEGORY_MIME_TEXT () { 0b1000000 }  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## NOTE: Suitable as MIME text.  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## ISSUE: Shift_JIS is a superset of US-ASCII?  ISO-2022-JP is?  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## ISSUE: 0x5F (_) should be added to the range?  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $Charset; ## TODO: this is obsolete.  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $IANACharset;  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## NOTE: Charset names used where IANA charset names are allowed, either  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## registered or not.  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $HTMLCharset;  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## NOTE: Same as charset names in $IANACharset, except all ASCII  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## punctuations are dropped and letters/digits only names are not included.  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'us-ascii'}  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'ansi_x3.4-1968'}  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-ir-6'}  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'ansi_x3.4-1986'}  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_646.irv:1991'}  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'ascii'}  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso646-us'}  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'us-ascii'}  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'us'}  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'ibm367'}  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'cp367'}  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csascii'}  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'ansix341968'}  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'isoir6'}  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'ansix341986'}  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso646irv1991'}  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso646us'}  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'usascii'}  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'ansi_x3.4-1968' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-ir-6' => REGISTERED_CHARSET_NAME,  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'ansi_x3.4-1986' => REGISTERED_CHARSET_NAME,  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_646.irv:1991' => REGISTERED_CHARSET_NAME,  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'ascii' => REGISTERED_CHARSET_NAME,  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso646-us' => REGISTERED_CHARSET_NAME,  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'us-ascii' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'us' => REGISTERED_CHARSET_NAME,  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'ibm367' => REGISTERED_CHARSET_NAME,  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'cp367' => REGISTERED_CHARSET_NAME,  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csascii' => REGISTERED_CHARSET_NAME,  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl_names => {  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'web-latin1-us-ascii' => UNREGISTERED_CHARSET_NAME |  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         SEMICONFORMING_ENCODING_IMPL | ERROR_REPORTING_ENCODING_IMPL,  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'cp1252' => FALLBACK_ENCODING_IMPL, # part of standard Perl distribution  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   fallback => {  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x80" => "\x{20AC}",  | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x81" => undef,  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x82" => "\x{201A}",  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x83" => "\x{0192}",  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x84" => "\x{201E}",  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x85" => "\x{2026}",  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x86" => "\x{2020}",  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x87" => "\x{2021}",  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x88" => "\x{02C6}",  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x89" => "\x{2030}",  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8A" => "\x{0160}",  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8B" => "\x{2039}",  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8C" => "\x{0152}",  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8D" => undef,  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8E" => "\x{017D}",  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8F" => undef,  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x90" => undef,  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x91" => "\x{2018}",  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x92" => "\x{2019}",  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x93" => "\x{201C}",  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x94" => "\x{201D}",  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x95" => "\x{2022}",  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x96" => "\x{2013}",  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x97" => "\x{2014}",  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x98" => "\x{02DC}",  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x99" => "\x{2122}",  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9A" => "\x{0161}",  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9B" => "\x{203A}",  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9C" => "\x{0153}",  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9D" => undef,  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9E" => "\x{017E}",  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9F" => "\x{0178}",  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xA0" => "\xA0", "\xA1" => "\xA1", "\xA2" => "\xA2", "\xA3" => "\xA3",  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xA4" => "\xA4", "\xA5" => "\xA5", "\xA6" => "\xA6", "\xA7" => "\xA7",  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xA8" => "\xA8", "\xA9" => "\xA9", "\xAA" => "\xAA", "\xAB" => "\xAB",  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xAC" => "\xAC", "\xAD" => "\xAD", "\xAE" => "\xAE", "\xAF" => "\xAF",  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xB0" => "\xB0", "\xB1" => "\xB1", "\xB2" => "\xB2", "\xB3" => "\xB3",  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xB4" => "\xB4", "\xB5" => "\xB5", "\xB6" => "\xB6", "\xB7" => "\xB7",  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xB8" => "\xB8", "\xB9" => "\xB9", "\xBA" => "\xBA", "\xBB" => "\xBB",  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xBC" => "\xBC", "\xBD" => "\xBD", "\xBE" => "\xBE", "\xBF" => "\xBF",  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xC0" => "\xC0", "\xC1" => "\xC1", "\xC2" => "\xC2", "\xC3" => "\xC3",  | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xC4" => "\xC4", "\xC5" => "\xC5", "\xC6" => "\xC6", "\xC7" => "\xC7",  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xC8" => "\xC8", "\xC9" => "\xC9", "\xCA" => "\xCA", "\xCB" => "\xCB",  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xCC" => "\xCC", "\xCD" => "\xCD", "\xCE" => "\xCE", "\xCF" => "\xCF",  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xD0" => "\xD0", "\xD1" => "\xD1", "\xD2" => "\xD2", "\xD3" => "\xD3",  | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xD4" => "\xD4", "\xD5" => "\xD5", "\xD6" => "\xD6", "\xD7" => "\xD7",  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xD8" => "\xD8", "\xD9" => "\xD9", "\xDA" => "\xDA", "\xDB" => "\xDB",  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xDC" => "\xDC", "\xDD" => "\xDD", "\xDE" => "\xDE", "\xDF" => "\xDF",  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xE0" => "\xE0", "\xE1" => "\xE1", "\xE2" => "\xE2", "\xE3" => "\xE3",  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xE4" => "\xE4", "\xE5" => "\xE5", "\xE6" => "\xE6", "\xE7" => "\xE7",  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xE8" => "\xE8", "\xE9" => "\xE9", "\xEA" => "\xEA", "\xEB" => "\xEB",  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xEC" => "\xEC", "\xED" => "\xED", "\xEE" => "\xEE", "\xEF" => "\xEF",  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xF0" => "\xF0", "\xF1" => "\xF1", "\xF2" => "\xF2", "\xF3" => "\xF3",  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xF4" => "\xF4", "\xF5" => "\xF5", "\xF6" => "\xF6", "\xF7" => "\xF7",  | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xF8" => "\xF8", "\xF9" => "\xF9", "\xFA" => "\xFA", "\xFB" => "\xFB",  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xFC" => "\xFC", "\xFD" => "\xFD", "\xFE" => "\xFE", "\xFF" => "\xFF",  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## NOTE: Treated as |windows-1252|.  Properties of this charset  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## should be consistent with those of that charset.  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'iso-8859-1'}  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-1:1987'}  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-ir-100'}  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-1'}  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-8859-1'}  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'latin1'}  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'l1'}  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'ibm819'}  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'cp819'}  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csisolatin1'}  | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso885911987'}  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'isoir100'}  | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso88591'}  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-1:1987' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-ir-100' => REGISTERED_CHARSET_NAME,  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-1' => REGISTERED_CHARSET_NAME,  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-8859-1' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'latin1' => REGISTERED_CHARSET_NAME,  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'l1' => REGISTERED_CHARSET_NAME,  | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'ibm819' => REGISTERED_CHARSET_NAME,  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'cp819' => REGISTERED_CHARSET_NAME,  | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csisolatin1' => REGISTERED_CHARSET_NAME,  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl_names => {  | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'web-latin1' => UNREGISTERED_CHARSET_NAME | SEMICONFORMING_ENCODING_IMPL |  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ERROR_REPORTING_ENCODING_IMPL,  | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'cp1252' => FALLBACK_ENCODING_IMPL, # part of standard Perl distribution  | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   fallback => {  | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x80" => "\x{20AC}",  | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x81" => undef,  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x82" => "\x{201A}",  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x83" => "\x{0192}",  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x84" => "\x{201E}",  | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x85" => "\x{2026}",  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x86" => "\x{2020}",  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x87" => "\x{2021}",  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x88" => "\x{02C6}",  | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x89" => "\x{2030}",  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8A" => "\x{0160}",  | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8B" => "\x{2039}",  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8C" => "\x{0152}",  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8D" => undef,  | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8E" => "\x{017D}",  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8F" => undef,  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x90" => undef,  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x91" => "\x{2018}",  | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x92" => "\x{2019}",  | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x93" => "\x{201C}",  | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x94" => "\x{201D}",  | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x95" => "\x{2022}",  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x96" => "\x{2013}",  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x97" => "\x{2014}",  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x98" => "\x{02DC}",  | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x99" => "\x{2122}",  | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9A" => "\x{0161}",  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9B" => "\x{203A}",  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9C" => "\x{0153}",  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9D" => undef,  | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9E" => "\x{017E}",  | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9F" => "\x{0178}",  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## NOTE: Treated as |windows-1252|.  Properties of this charset  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## should be consistent with those of that charset.  | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'iso-8859-2'}  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-2:1987'}  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-ir-101'}  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-2'}  | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-8859-2'}  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'latin2'}  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'l2'}  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csisolatin2'}  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso885921987'}  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'isoir101'}  | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso88592'}  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-2:1987' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-ir-101' => REGISTERED_CHARSET_NAME,  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-2' => REGISTERED_CHARSET_NAME,  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-8859-2' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'latin2' => REGISTERED_CHARSET_NAME,  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'l2' => REGISTERED_CHARSET_NAME,  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csisolatin2' => REGISTERED_CHARSET_NAME,  | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'iso-8859-3'}  | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-3:1988'}  | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-ir-109'}  | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-3'}  | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-8859-3'}  | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'latin3'}  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'l3'}  | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csisolatin3'}  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso885931988'}  | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'isoir109'}  | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso88593'}  | 
| 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-3:1988' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-ir-109' => REGISTERED_CHARSET_NAME,  | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-3' => REGISTERED_CHARSET_NAME,  | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-8859-3' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'latin3' => REGISTERED_CHARSET_NAME,  | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'l3' => REGISTERED_CHARSET_NAME,  | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csisolatin3' => REGISTERED_CHARSET_NAME,  | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   error_level => {  | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'unassigned-code-point-error' => 'iso_shall',  | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## NOTE: I didn't check whether ISO/IEC 8859-3 prohibits the use of  | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## unassigned code points, but ECMA-94:1986 (whose content considered  | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## as equivalent to ISO 8859/1-4) disallows the use of them.  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'iso-8859-4'}  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-4:1988'}  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-ir-110'}  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-4'}  | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-8859-4'}  | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'latin4'}  | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'l4'}  | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csisolatin4'}  | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso885941988'}  | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'isoir110'}  | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso88594'}  | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-4:1988' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-ir-110' => REGISTERED_CHARSET_NAME,  | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-4' => REGISTERED_CHARSET_NAME,  | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-8859-4' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'latin4' => REGISTERED_CHARSET_NAME,  | 
| 
314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'l4' => REGISTERED_CHARSET_NAME,  | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csisolatin4' => REGISTERED_CHARSET_NAME,  | 
| 
316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   error_level => {  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'unassigned-code-point-error' => 'iso_shall',  | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## NOTE: I didn't check whether ISO/IEC 8859-3 prohibits the use of  | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## unassigned code points, but ECMA-94:1986 (whose content considered  | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## as equivalent to ISO 8859/1-4) disallows the use of them.  | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'iso-8859-5'}  | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-5:1988'}  | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-ir-144'}  | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-5'}  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-8859-5'}  | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'cyrillic'}  | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csisolatincyrillic'}  | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso885951988'}  | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'isoir144'}  | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso88595'}  | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-5:1988' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-ir-144' => REGISTERED_CHARSET_NAME,  | 
| 
340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-5' => REGISTERED_CHARSET_NAME,  | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-8859-5' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'cyrillic' => REGISTERED_CHARSET_NAME,  | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csisolatincyrillic' => REGISTERED_CHARSET_NAME,  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'iso-8859-6'}  | 
| 
348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-6:1987'}  | 
| 
349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-ir-127'}  | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-6'}  | 
| 
351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-8859-6'}  | 
| 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'ecma-114'}  | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'asmo-708'}  | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'arabic'}  | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csisolatinarabic'}  | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso885961987'}  | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'isoir127'}  | 
| 
358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso88596'}  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'ecma114'}  | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'asmo708'}  | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## NOTE: 3/0..3/9 have different semantics from U+0030..0039,  | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## but have same character names (maybe).  | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## NOTE: According to RFC 2046, charset left-hand half of "iso-8859-6"  | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## is same as "us-ascii".  | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## TODO: RFC 1345 def?  | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-6:1987' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-ir-127' => REGISTERED_CHARSET_NAME,  | 
| 
371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-6' => REGISTERED_CHARSET_NAME,  | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-8859-6' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'ecma-114' => REGISTERED_CHARSET_NAME,  | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'asmo-708' => REGISTERED_CHARSET_NAME,  | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'arabic' => REGISTERED_CHARSET_NAME,  | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csisolatinarabic' => REGISTERED_CHARSET_NAME,  | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'iso-8859-7'}  | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-7:1987'}  | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-ir-126'}  | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-7'}  | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-8859-7'}  | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'elot_928'}  | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'ecma-118'}  | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'greek'}  | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'greek8'}  | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csisolatingreek'}  | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso885971987'}  | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'isoir126'}  | 
| 
393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso88597'}  | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'elot928'}  | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'ecma118'}  | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-7:1987' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-ir-126' => REGISTERED_CHARSET_NAME,  | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-7' => REGISTERED_CHARSET_NAME,  | 
| 
402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-8859-7' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'elot_928' => REGISTERED_CHARSET_NAME,  | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'ecma-118' => REGISTERED_CHARSET_NAME,  | 
| 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'greek' => REGISTERED_CHARSET_NAME,  | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'greek8' => REGISTERED_CHARSET_NAME,  | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csisolatingreek' => REGISTERED_CHARSET_NAME,  | 
| 
408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'iso-8859-8'}  | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-8:1988'}  | 
| 
414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-ir-138'}  | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-8'}  | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-8859-8'}  | 
| 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'hebrew'}  | 
| 
418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csisolatinhebrew'}  | 
| 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso885981988'}  | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'isoir138'}  | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso88598'}  | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-8:1988' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-ir-138' => REGISTERED_CHARSET_NAME,  | 
| 
427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-8' => REGISTERED_CHARSET_NAME,  | 
| 
428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-8859-8' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'hebrew' => REGISTERED_CHARSET_NAME,  | 
| 
430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csisolatinhebrew' => REGISTERED_CHARSET_NAME,  | 
| 
431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'iso-8859-9'}  | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-9:1989'}  | 
| 
437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-ir-148'}  | 
| 
438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-9'}  | 
| 
439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-8859-9'}  | 
| 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'latin5'}  | 
| 
441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'l5'}  | 
| 
442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csisolatin5'}  | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso885991989'}  | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'isoir148'}  | 
| 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso88599'}  | 
| 
446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-9:1989' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-ir-148' => REGISTERED_CHARSET_NAME,  | 
| 
451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-9' => REGISTERED_CHARSET_NAME,  | 
| 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-8859-9' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'latin5' => REGISTERED_CHARSET_NAME,  | 
| 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'l5' => REGISTERED_CHARSET_NAME,  | 
| 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csisolatin5' => REGISTERED_CHARSET_NAME,  | 
| 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl_names => {  | 
| 
458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'web-latin5' => UNREGISTERED_CHARSET_NAME | SEMICONFORMING_ENCODING_IMPL |  | 
| 
459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ERROR_REPORTING_ENCODING_IMPL,  | 
| 
460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'cp1254' => FALLBACK_ENCODING_IMPL, # part of standard Perl distribution  | 
| 
461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   fallback => {  | 
| 
463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x80" => "\x{20AC}",  | 
| 
464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x81" => undef,  | 
| 
465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x82" => "\x{201A}",  | 
| 
466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x83" => "\x{0192}",  | 
| 
467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x84" => "\x{201E}",  | 
| 
468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x85" => "\x{2026}",  | 
| 
469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x86" => "\x{2020}",  | 
| 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x87" => "\x{2021}",  | 
| 
471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x88" => "\x{02C6}",  | 
| 
472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x89" => "\x{2030}",  | 
| 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8A" => "\x{0160}",  | 
| 
474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8B" => "\x{2039}",  | 
| 
475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8C" => "\x{0152}",  | 
| 
476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8D" => undef,  | 
| 
477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8E" => undef,  | 
| 
478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8F" => undef,  | 
| 
479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x90" => undef,  | 
| 
480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x91" => "\x{2018}",  | 
| 
481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x92" => "\x{2019}",  | 
| 
482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x93" => "\x{201C}",  | 
| 
483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x94" => "\x{201D}",  | 
| 
484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x95" => "\x{2022}",  | 
| 
485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x96" => "\x{2013}",  | 
| 
486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x97" => "\x{2014}",  | 
| 
487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x98" => "\x{02DC}",  | 
| 
488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x99" => "\x{2122}",  | 
| 
489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9A" => "\x{0161}",  | 
| 
490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9B" => "\x{203A}",  | 
| 
491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9C" => "\x{0153}",  | 
| 
492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9D" => undef,  | 
| 
493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9E" => undef,  | 
| 
494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9F" => "\x{0178}",  | 
| 
495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## NOTE: Treated as |windows-1254|.  Properties of this charset  | 
| 
497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## should be consistent with those of that charset.  | 
| 
498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'iso-8859-10'}  | 
| 
501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-8859-10'}  | 
| 
502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-ir-157'}  | 
| 
503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'l6'}  | 
| 
504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_8859-10:1992'}  | 
| 
505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csisolatin6'}  | 
| 
506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'latin6'}  | 
| 
507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso885910'}  | 
| 
508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'isoir157'}  | 
| 
509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso8859101992'}  | 
| 
510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-8859-10' => PRIMARY_CHARSET_NAME | PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-ir-157' => REGISTERED_CHARSET_NAME,  | 
| 
515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'l6' => REGISTERED_CHARSET_NAME,  | 
| 
516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_8859-10:1992' => REGISTERED_CHARSET_NAME,  | 
| 
517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csisolatin6' => REGISTERED_CHARSET_NAME,  | 
| 
518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'latin6' => REGISTERED_CHARSET_NAME,  | 
| 
519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'iso_6937-2-add'}  | 
| 
524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso_6937-2-add'}  | 
| 
525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-ir-142'}  | 
| 
526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csisotextcomm'}  | 
| 
527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso69372add'}  | 
| 
528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'isoir142'}  | 
| 
529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso_6937-2-add' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-ir-142' => REGISTERED_CHARSET_NAME,  | 
| 
534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csisotextcomm' => REGISTERED_CHARSET_NAME,  | 
| 
535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'jis_x0201'}  | 
| 
540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'jis_x0201'}  | 
| 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'x0201'}  | 
| 
542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'cshalfwidthkatakana'}  | 
| 
543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'jisx0201'}  | 
| 
544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'jis_x0201' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'x0201' => REGISTERED_CHARSET_NAME,  | 
| 
549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'cshalfwidthkatakana' => REGISTERED_CHARSET_NAME,  | 
| 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'jis_encoding'}  | 
| 
555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'jis_encoding'}  | 
| 
556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csjisencoding'}  | 
| 
557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'jisencoding'}  | 
| 
558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => 0,  | 
| 
560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'jis_encoding' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csjisencoding' => REGISTERED_CHARSET_NAME,  | 
| 
563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## NOTE: What is this?  | 
| 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'shift_jis'}  | 
| 
568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'shift_jis'}  | 
| 
569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'ms_kanji'}  | 
| 
570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csshiftjis'}  | 
| 
571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'shiftjis'}  | 
| 
572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'mskanji'}  | 
| 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_SJIS | CHARSET_CATEGORY_BLOCK_SAFE |  | 
| 
575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       CHARSET_CATEGORY_MIME_TEXT | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'shift_jis' => PREFERRED_CHARSET_NAME | PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
578
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'ms_kanji' => REGISTERED_CHARSET_NAME,  | 
| 
579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csshiftjis' => REGISTERED_CHARSET_NAME,  | 
| 
580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl_names => {  | 
| 
582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'shift-jis-1997' => UNREGISTERED_CHARSET_NAME |  | 
| 
583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         SEMICONFORMING_ENCODING_IMPL | ERROR_REPORTING_ENCODING_IMPL,  | 
| 
584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     shiftjis => PRIMARY_CHARSET_NAME | NONCONFORMING_ENCODING_IMPL |  | 
| 
585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ERROR_REPORTING_ENCODING_IMPL,  | 
| 
586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## NOTE: Unicode mapping is wrong.  | 
| 
587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
590
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'x-sjis'}  | 
| 
592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'x-sjis'}  | 
| 
593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'xsjis'}  | 
| 
594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_SJIS | CHARSET_CATEGORY_BLOCK_SAFE |  | 
| 
596
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'x-sjis' => UNREGISTERED_CHARSET_NAME,  | 
| 
599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl_names => {  | 
| 
601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'shift-jis-1997' => FALLBACK_ENCODING_IMPL | ERROR_REPORTING_ENCODING_IMPL,  | 
| 
602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{shift_jisx0213}  | 
| 
607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{shift_jisx0213}  | 
| 
608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{shiftjisx0213}  | 
| 
609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_SJIS | CHARSET_CATEGORY_BLOCK_SAFE |  | 
| 
611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       CHARSET_CATEGORY_MIME_TEXT,  | 
| 
612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     shift_jisx0213 => UNREGISTERED_CHARSET_NAME,  | 
| 
614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl_names => {  | 
| 
616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #shift_jisx0213 (non-standard - i don't know its conformance)  | 
| 
617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'shift-jis-1997' => FALLBACK_ENCODING_IMPL | ERROR_REPORTING_ENCODING_IMPL,  | 
| 
618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'shiftjis' => FALLBACK_ENCODING_IMPL | ERROR_REPORTING_ENCODING_IMPL,  | 
| 
619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
620
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'euc-jp'}  | 
| 
624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'extended_unix_code_packed_format_for_japanese'}  | 
| 
625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'cseucpkdfmtjapanese'}  | 
| 
626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'euc-jp'}  | 
| 
627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'extendedunixcodepackedformatforjapanese'}  | 
| 
628
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'cseucpkdfmtjapanese'}  | 
| 
629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'eucjp'}  | 
| 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_EUCJP | CHARSET_CATEGORY_BLOCK_SAFE |  | 
| 
632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       CHARSET_CATEGORY_MIME_TEXT | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'extended_unix_code_packed_format_for_japanese' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'cseucpkdfmtjapanese' => REGISTERED_CHARSET_NAME,  | 
| 
636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'euc-jp' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
637
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
638
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl_names => {  | 
| 
639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'euc-jp-1997' => UNREGISTERED_CHARSET_NAME |  | 
| 
640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         SEMICONFORMING_ENCODING_IMPL | ERROR_REPORTING_ENCODING_IMPL,  | 
| 
641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## NOTE: Though the IANA definition references the 1990 version  | 
| 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## of EUC-JP, the 1997 version of JIS standard claims that the version  | 
| 
643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## is same coded character set as the 1990 version, such that we  | 
| 
644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## consider the EUC-JP 1990 version is same as the 1997 version.  | 
| 
645
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'euc-jp' => PREFERRED_CHARSET_NAME | NONCONFORMING_ENCODING_IMPL |  | 
| 
646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ERROR_REPORTING_ENCODING_IMPL,  | 
| 
647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## NOTE: Unicode mapping is wrong.  | 
| 
648
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'x-euc-jp'}  | 
| 
653
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'x-euc-jp'}  | 
| 
654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'xeucjp'}  | 
| 
655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_EUCJP | CHARSET_CATEGORY_BLOCK_SAFE |  | 
| 
657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       CHARSET_CATEGORY_MIME_TEXT | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
658
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
659
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'x-euc-jp' => UNREGISTERED_CHARSET_NAME,  | 
| 
660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl_names => {  | 
| 
662
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'euc-jp-1997' => FALLBACK_ENCODING_IMPL | ERROR_REPORTING_ENCODING_IMPL,  | 
| 
663
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'euc-jp' => FALLBACK_ENCODING_IMPL | ERROR_REPORTING_ENCODING_IMPL,  | 
| 
664
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
666
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
667
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'extended_unix_code_fixed_width_for_japanese'}  | 
| 
668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'extended_unix_code_fixed_width_for_japanese'}  | 
| 
669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'cseucfixwidjapanese'}  | 
| 
670
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'extendedunixcodefixedwidthforjapanese'}  | 
| 
671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
672
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE,  | 
| 
673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'extended_unix_code_fixed_width_for_japanese' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'cseucfixwidjapanese' => REGISTERED_CHARSET_NAME,  | 
| 
676
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
678
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
679
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## TODO: ...  | 
| 
681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'euc-kr'}  | 
| 
683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'euc-kr'}  | 
| 
684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'cseuckr'}  | 
| 
685
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'euckr'}  | 
| 
686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
689
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'euc-kr' => PRIMARY_CHARSET_NAME | PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'cseuckr' => REGISTERED_CHARSET_NAME,  | 
| 
691
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl_names => {  | 
| 
693
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## TODO: We need a parse error generating wrapper for the decoder.  | 
| 
694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'cp949' => FALLBACK_ENCODING_IMPL, # part of standard Perl distribution  | 
| 
695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## NOTE: |euc-kr| is handled as |windows-949|, such that properties   | 
| 
697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## should be consistent with that encoding's properties.  | 
| 
698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'iso-2022-jp'}  | 
| 
701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-2022-jp'}  | 
| 
702
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csiso2022jp'}  | 
| 
703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso2022jp'}  | 
| 
704
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'junet-code'}  | 
| 
705
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso2022jp'}  | 
| 
706
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'junetcode'}  | 
| 
707
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_MIME_TEXT | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
709
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-2022-jp' => PREFERRED_CHARSET_NAME | PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csiso2022jp' => REGISTERED_CHARSET_NAME,  | 
| 
712
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso2022jp' => UNREGISTERED_CHARSET_NAME,  | 
| 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'junet-code' => UNREGISTERED_CHARSET_NAME,  | 
| 
714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'iso-2022-jp-2'}  | 
| 
719
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-2022-jp-2'}  | 
| 
720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csiso2022jp2'}  | 
| 
721
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso2022jp2'}  | 
| 
722
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
723
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_MIME_TEXT,  | 
| 
724
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-2022-jp-2' => PREFERRED_CHARSET_NAME | PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csiso2022jp2' => REGISTERED_CHARSET_NAME,  | 
| 
727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
728
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
729
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
730
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
731
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## TODO: ...  | 
| 
732
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $IANACharset->{'gb_2312-80'}  | 
| 
734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-ir-58'}  | 
| 
735
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{chinese}  | 
| 
736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{gb231280}  | 
| 
737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{isoir58}  | 
| 
738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## NOTE: What is represented by this charset is unclear...  I don't   | 
| 
740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## understand what RFC 1945 describes...  | 
| 
741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => 0,  | 
| 
742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'gb_2312-80' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
744
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-ir-58' => REGISTERED_CHARSET_NAME,  | 
| 
745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'chinese' => REGISTERED_CHARSET_NAME,  | 
| 
746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csiso58gb231280' => REGISTERED_CHARSET_NAME,  | 
| 
747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
748
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl_names => {  | 
| 
749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## TODO: GB2312->GBK Parse Error wrapper  | 
| 
750
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'cp936' => FALLBACK_ENCODING_IMPL,  | 
| 
751
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## NOTE: |gb2312| is handled as |gbk|, such that properties should be  | 
| 
753
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## consistent.  | 
| 
754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## TODO: ...  | 
| 
757
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
758
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'utf-8'}  | 
| 
759
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'utf-8'}  | 
| 
760
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'x-utf-8'}  | 
| 
761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'utf8'}  | 
| 
762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'xutf8'}  | 
| 
763
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT |  | 
| 
765
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       CHARSET_CATEGORY_MIME_TEXT,  | 
| 
766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
767
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'utf-8' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## NOTE: IANA name "utf-8" references RFC 3629.  According to the RFC,  | 
| 
769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## the definitive definition is one specified in the Unicode Standard.  | 
| 
770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'x-utf-8' => UNREGISTERED_CHARSET_NAME,  | 
| 
771
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## NOTE: We treat |x-utf-8| as an alias of |utf-8|, since unlike  | 
| 
772
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## other charset like |x-sjis| or |x-euc-jp|, there is no major  | 
| 
773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## variant for the UTF-8 encoding.  | 
| 
774
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  ## TODO: We might ought to reconsider this policy, since  | 
| 
775
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  ## there are UTF-8 variant in fact, such as   | 
| 
776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  ## Unicode's UTF-8, ISO/IEC 10646's UTF-8, UTF-8n, and as  | 
| 
777
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  ## such.  | 
| 
778
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
779
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl_names => {  | 
| 
780
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'utf-8-strict' => PRIMARY_CHARSET_NAME | SEMICONFORMING_ENCODING_IMPL |  | 
| 
781
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ERROR_REPORTING_ENCODING_IMPL,  | 
| 
782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## NOTE: It does not support non-Unicode UCS characters (conforming).  | 
| 
783
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## It does detect illegal sequences (conforming).  | 
| 
784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## It does not support surrpgate pairs (conforming).  | 
| 
785
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## It does not support BOMs (non-conforming).  | 
| 
786
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
787
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
788
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bom_pattern => qr/\xEF\xBB\xBF/,  | 
| 
789
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
791
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'utf-8n'}  | 
| 
792
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'utf-8n'}  | 
| 
793
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'utf-8'}  | 
| 
794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
795
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_MIME_TEXT |  | 
| 
796
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
797
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
798
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'utf-8n' => UNREGISTERED_CHARSET_NAME,  | 
| 
799
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## NOTE: Is there any normative definition for the charset?  | 
| 
800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## What variant of UTF-8 should we use for the charset?  | 
| 
801
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
802
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl_names => {  | 
| 
803
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'utf-8-strict' => PRIMARY_CHARSET_NAME | ERROR_REPORTING_ENCODING_IMPL,  | 
| 
804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
805
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
806
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
807
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
808
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## TODO: ...  | 
| 
809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'gbk'}  | 
| 
811
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'gbk'}  | 
| 
812
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'cp936'}  | 
| 
813
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'ms936'}  | 
| 
814
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'windows-936'}  | 
| 
815
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'windows936'}  | 
| 
816
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
817
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_MIME_TEXT,  | 
| 
818
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
819
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'gbk' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
820
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'cp936' => REGISTERED_CHARSET_NAME,  | 
| 
821
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'ms936' => REGISTERED_CHARSET_NAME,  | 
| 
822
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'windows-936' => REGISTERED_CHARSET_NAME,  | 
| 
823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
824
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
825
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_status => STATUS_COMMON | STATUS_OBSOLETE,  | 
| 
826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
827
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
828
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'gb18030'}  | 
| 
829
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'gb18030'}  | 
| 
830
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_MIME_TEXT,  | 
| 
832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
833
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'gb18030' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
834
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
835
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_status => STATUS_COMMON,  | 
| 
836
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   mime_text_suitable => 1,  | 
| 
837
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
838
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
839
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## TODO: ...  | 
| 
840
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
841
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'utf-16be'}  | 
| 
842
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'utf-16be'}  | 
| 
843
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'utf16be'}  | 
| 
844
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
845
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_UTF16,  | 
| 
846
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
847
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'utf-16be' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
848
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
849
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
850
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
852
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'utf-16le'}  | 
| 
853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'utf-16le'}  | 
| 
854
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'utf16le'}  | 
| 
855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
856
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_UTF16,  | 
| 
857
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'utf-16le' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
860
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'utf-16'}  | 
| 
864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'utf-16'}  | 
| 
865
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'utf16'}  | 
| 
866
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
867
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_UTF16,  | 
| 
868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
869
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'utf-16' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
870
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
871
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
872
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
873
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
874
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## TODO: ...  | 
| 
875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
876
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'windows-31j'}  | 
| 
877
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'windows-31j'}  | 
| 
878
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'cswindows31j'}  | 
| 
879
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'windows31j'}  | 
| 
880
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
881
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_SJIS | CHARSET_CATEGORY_BLOCK_SAFE |  | 
| 
882
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       CHARSET_CATEGORY_MIME_TEXT,  | 
| 
883
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'windows-31j' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'cswindows31j' => REGISTERED_CHARSET_NAME,  | 
| 
886
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
887
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_status => STATUS_LIMITED_USE, # maybe  | 
| 
888
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
890
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
891
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'gb2312'}  | 
| 
892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'gb2312'}  | 
| 
893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csgb2312'}  | 
| 
894
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
895
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_MIME_TEXT |  | 
| 
896
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
897
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
898
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'gb2312' => PRIMARY_CHARSET_NAME | PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
899
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csgb2312' => REGISTERED_CHARSET_NAME,  | 
| 
900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
901
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl_names => {  | 
| 
902
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## TODO: GB2312->GBK Parse Error wrapper  | 
| 
903
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'cp936' => FALLBACK_ENCODING_IMPL,  | 
| 
904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
905
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## NOTE: |gb2312| is handled as |gbk|, such that properties should be  | 
| 
906
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## consistent.  | 
| 
907
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
908
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
909
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'big5'}  | 
| 
910
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'big5'}  | 
| 
911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'csbig5'}  | 
| 
912
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'x-x-big5'}  | 
| 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{xxbig5}  | 
| 
914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
915
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_MIME_TEXT,  | 
| 
916
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
917
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'big5' => PRIMARY_CHARSET_NAME | PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
918
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'csbig5' => REGISTERED_CHARSET_NAME,  | 
| 
919
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'x-x-big5' => UNREGISTERED_CHARSET_NAME,  | 
| 
920
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## NOTE: In HTML5, |x-x-big5| is defined as an alias of |big5|.  | 
| 
921
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## According to that spec, if there is any difference between   | 
| 
922
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## input and replacement encodings, the result is parse error.  | 
| 
923
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## However, since there is no formal definition for |x-x-big5|  | 
| 
924
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## charset, we cannot raise such errors.  | 
| 
925
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
926
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
927
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
928
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## TODO: ...  | 
| 
930
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
931
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'big5-hkscs'}  | 
| 
932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'big5-hkscs'}  | 
| 
933
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'big5hkscs'}  | 
| 
934
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
935
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_MIME_TEXT,  | 
| 
936
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
937
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'big5-hkscs' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
938
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
939
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
940
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
941
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
942
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## TODO: ...  | 
| 
943
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
944
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'windows-1252'}  | 
| 
945
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'windows-1252'}  | 
| 
946
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'windows1252'}  | 
| 
947
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
948
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT |  | 
| 
949
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       CHARSET_CATEGORY_MIME_TEXT,  | 
| 
950
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
951
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'windows-1252' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
952
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
953
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: Check whether use of 0x81 is conforming or not...  | 
| 
954
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
955
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
956
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'windows-1253'}  | 
| 
957
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'windows-1253'}  | 
| 
958
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'windows1253'}  | 
| 
959
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT |  | 
| 
961
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       CHARSET_CATEGORY_MIME_TEXT,  | 
| 
962
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'windows-1253' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
964
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
965
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: Check whether use of 0x81 is conforming or not...  | 
| 
966
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
967
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
968
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'windows-1254'}  | 
| 
969
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'windows-1254'}  | 
| 
970
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'windows1254'}  | 
| 
971
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
972
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT |  | 
| 
973
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       CHARSET_CATEGORY_MIME_TEXT,  | 
| 
974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
975
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'windows-1254' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
976
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
977
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: Check whether use of 0x81 is conforming or not...  | 
| 
978
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
979
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
980
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## TODO: ...  | 
| 
981
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'tis-620'}  | 
| 
983
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'tis-620'}  | 
| 
984
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'tis620'}  | 
| 
985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
986
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
987
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
988
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'tis-620' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,  | 
| 
989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl_names => {  | 
| 
991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'web-tis-620' => UNREGISTERED_CHARSET_NAME | ERROR_REPORTING_ENCODING_IMPL,  | 
| 
992
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'windows-874' => FALLBACK_ENCODING_IMPL | ERROR_REPORTING_ENCODING_IMPL,  | 
| 
993
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
994
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   fallback => {  | 
| 
995
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x80" => "\x{20AC}",  | 
| 
996
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x81" => undef, "\x82" => undef, "\x83" => undef, "\x84" => undef,  | 
| 
997
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x85" => "\x{2026}",  | 
| 
998
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x86" => undef, "\x87" => undef, "\x88" => undef, "\x89" => undef,  | 
| 
999
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8A" => undef, "\x8B" => undef, "\x8C" => undef, "\x8D" => undef,  | 
| 
1000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8E" => undef, "\x8F" => undef, "\x90" => undef,  | 
| 
1001
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x91" => "\x{2018}",  | 
| 
1002
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x92" => "\x{2019}",  | 
| 
1003
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x93" => "\x{201C}",  | 
| 
1004
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x94" => "\x{201D}",  | 
| 
1005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x95" => "\x{2022}",  | 
| 
1006
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x96" => "\x{2013}",  | 
| 
1007
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x97" => "\x{2014}",  | 
| 
1008
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x98" => undef, "\x99" => undef, "\x9A" => undef, "\x9B" => undef,  | 
| 
1009
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9C" => undef, "\x9D" => undef, "\x9E" => undef, "\x9F" => undef,  | 
| 
1010
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\xA0" => "\xA0",  | 
| 
1011
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
1012
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## NOTE: |tis-620| is treated as |windows-874|, so ensure that  | 
| 
1013
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## they are consistent.  | 
| 
1014
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
1015
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1016
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'iso-8859-11'}  | 
| 
1017
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'iso-8859-11'}  | 
| 
1018
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'iso885911'}  | 
| 
1019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
1020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
1021
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
1022
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'iso-8859-11' => UNREGISTERED_CHARSET_NAME,  | 
| 
1023
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## NOTE: The Web Thai encoding, i.e. windows-874.  | 
| 
1024
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
1025
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl_names => {  | 
| 
1026
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'web-thai' => UNREGISTERED_CHARSET_NAME | ERROR_REPORTING_ENCODING_IMPL,  | 
| 
1027
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'windows-874' => FALLBACK_ENCODING_IMPL | ERROR_REPORTING_ENCODING_IMPL,  | 
| 
1028
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
1029
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   fallback => {  | 
| 
1030
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x80" => "\x{20AC}",  | 
| 
1031
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x81" => undef, "\x82" => undef, "\x83" => undef, "\x84" => undef,  | 
| 
1032
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x85" => "\x{2026}",  | 
| 
1033
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x86" => undef, "\x87" => undef, "\x88" => undef, "\x89" => undef,  | 
| 
1034
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8A" => undef, "\x8B" => undef, "\x8C" => undef, "\x8D" => undef,  | 
| 
1035
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x8E" => undef, "\x8F" => undef, "\x90" => undef,  | 
| 
1036
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x91" => "\x{2018}",  | 
| 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x92" => "\x{2019}",  | 
| 
1038
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x93" => "\x{201C}",  | 
| 
1039
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x94" => "\x{201D}",  | 
| 
1040
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x95" => "\x{2022}",  | 
| 
1041
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x96" => "\x{2013}",  | 
| 
1042
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x97" => "\x{2014}",  | 
| 
1043
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x98" => undef, "\x99" => undef, "\x9A" => undef, "\x9B" => undef,  | 
| 
1044
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\x9C" => undef, "\x9D" => undef, "\x9E" => undef, "\x9F" => undef,  | 
| 
1045
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
1046
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## NOTE: |iso-8859-11| is treated as |windows-874|, so ensure that  | 
| 
1047
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## they are consistent.  | 
| 
1048
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
1049
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1050
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Charset->{'windows-874'}  | 
| 
1051
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $IANACharset->{'windows-874'}  | 
| 
1052
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{'windows874'}  | 
| 
1053
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
1054
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_ASCII_COMPAT,  | 
| 
1055
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
1056
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'windows-874' => UNREGISTERED_CHARSET_NAME,  | 
| 
1057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
1058
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl_names => {  | 
| 
1059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'windows-874' => REGISTERED_CHARSET_NAME | ERROR_REPORTING_ENCODING_IMPL,  | 
| 
1060
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
1061
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## TODO: |error_level|  | 
| 
1062
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
1063
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1064
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $IANACharset->{'windows-949'}  | 
| 
1065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = $HTMLCharset->{windows949}  | 
| 
1066
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 = __PACKAGE__->new ({  | 
| 
1067
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   category => CHARSET_CATEGORY_BLOCK_SAFE | CHARSET_CATEGORY_MIME_TEXT,  | 
| 
1068
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   iana_names => {  | 
| 
1069
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'windows-949' => UNREGISTERED_CHARSET_NAME,  | 
| 
1070
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
1071
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl_names => {  | 
| 
1072
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'cp949' => PREFERRED_CHARSET_NAME | NONCONFORMING_ENCODING_IMPL |  | 
| 
1073
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ERROR_REPORTING_ENCODING_IMPL,  | 
| 
1074
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## TODO: Is this implementation conforming?  | 
| 
1075
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   },  | 
| 
1076
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## NOTE: |error_level| is same as default, since we can't find any formal  | 
| 
1077
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## definition for this charset.  | 
| 
1078
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
1079
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1080
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new ($$) {  | 
| 
1081
 | 
378
 | 
 
 | 
 
 | 
  
378
  
 | 
  
0
  
 | 
1707
 | 
   return bless $_[1], $_[0];  | 
| 
1082
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # new  | 
| 
1083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1084
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## NOTE: A class method  | 
| 
1085
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_by_html_name ($$) {  | 
| 
1086
 | 
716
 | 
 
 | 
 
 | 
  
716
  
 | 
  
0
  
 | 
1669
 | 
   my $name = $_[1];  | 
| 
1087
 | 
716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1697
 | 
   $name =~ tr/A-Z/a-z/; ## ASCII case-insensitive  | 
| 
1088
 | 
716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1245
 | 
   my $iana_name = $name;  | 
| 
1089
 | 
716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3852
 | 
   $name =~ s/[\x09-\x0D\x20-\x2F\x3A-\x40\x5B-\x60\x7B-\x7E]//g;  | 
| 
1090
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## NOTE: U+000B is included.  | 
| 
1091
 | 
716
 | 
  
 50
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
2413
 | 
   unless ($HTMLCharset->{$name} || $IANACharset->{$name}) {  | 
| 
1092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $IANACharset->{$iana_name} =  | 
| 
1093
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $HTMLCharset->{$name} = __PACKAGE__->new ({  | 
| 
1094
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       iana_names => {  | 
| 
1095
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $iana_name => UNREGISTERED_CHARSET_NAME,  | 
| 
1096
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       },  | 
| 
1097
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     });  | 
| 
1098
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
1099
 | 
716
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
2201
 | 
   return $HTMLCharset->{$name} || $IANACharset->{$name};  | 
| 
1100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # get_by_html_name  | 
| 
1101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## NOTE: A class method  | 
| 
1103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_by_iana_name ($$) {  | 
| 
1104
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
0
 | 
   my $name = $_[1];  | 
| 
1105
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   $name =~ tr/A-Z/a-z/; ## ASCII case-insensitive  | 
| 
1106
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   unless ($IANACharset->{$name}) {  | 
| 
1107
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $IANACharset->{$name} = __PACKAGE__->new ({  | 
| 
1108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       iana_names => {  | 
| 
1109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $name => UNREGISTERED_CHARSET_NAME,  | 
| 
1110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       },  | 
| 
1111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     });  | 
| 
1112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
1113
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   return $IANACharset->{$name};  | 
| 
1114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # get_by_iana_name  | 
| 
1115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_decode_handle ($$;%) {  | 
| 
1117
 | 
713
 | 
 
 | 
 
 | 
  
713
  
 | 
  
0
  
 | 
1201
 | 
   my $self = shift;  | 
| 
1118
 | 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1103
 | 
   my $byte_stream = shift;  | 
| 
1119
 | 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2960
 | 
   my %opt = @_;  | 
| 
1120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $obj = {  | 
| 
1122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     category => $self->{category},  | 
| 
1123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     char_buffer => \(my $s = ''),  | 
| 
1124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     char_buffer_pos => 0,  | 
| 
1125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     character_queue => [],  | 
| 
1126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     filehandle => $byte_stream,  | 
| 
1127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     charset => '', ## TODO: We set a charset name for input_encoding (when we get identify-by-URI nonsense away)  | 
| 
1128
 | 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15650
 | 
     byte_buffer => $opt{byte_buffer} ? ${$opt{byte_buffer}} : '', ## TODO: ref, instead of value, should be used  | 
| 
1129
 | 
 
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
     onerror => $opt{onerror} || sub {},  | 
| 
1130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #onerror_set  | 
| 
1131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     level => $opt{level} || {  | 
| 
1132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       must => 'm',  | 
| 
1133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       charset_variant => 'm',  | 
| 
1134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       charset_fact => 'm',  | 
| 
1135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       iso_shall => 'm',  | 
| 
1136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     },  | 
| 
1137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     error_level => $self->{error_level} || {  | 
| 
1138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## HTML5 charset name aliases  | 
| 
1139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## NOTE: Use of code points in the variant whose definition differs  | 
| 
1140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## from the original charset is a parse error in HTML5.  However,  | 
| 
1141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## it does not affect the document conformance; the HTML5 spec  | 
| 
1142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## does not define the conformance of the input stream against the  | 
| 
1143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## charset in use.  | 
| 
1144
 | 
713
 | 
  
 50
  
 | 
  
 50
  
 | 
 
 | 
 
 | 
2754
 | 
       'fallback-char-error' => 'charset_variant',  | 
| 
 
 | 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       #'fallback-illegal-error' => 'charset_variant',  | 
| 
1146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'fallback-unassigned-error' => 'charset_variant',  | 
| 
1147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## NOTE: An appropriate error level should be set for each charset  | 
| 
1148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## (many charset prohibits use of unassigned code points).  | 
| 
1149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'illegal-octets-error' => 'charset_fact',  | 
| 
1151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'unassigned-code-point-error' => 'charset_fact',  | 
| 
1152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'invalid-state-error' => 'charset_fact',  | 
| 
1153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     },  | 
| 
1154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   };  | 
| 
1155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1156
 | 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3890
 | 
   require HTML::HTML5::Parser::Charset::DecodeHandle;  | 
| 
1157
 | 
713
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
2777
 | 
   if ($self->{iana_names}->{'iso-2022-jp'}) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1158
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $obj->{state_2440} = 'gl-jis-1978';  | 
| 
1159
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $obj->{state_2442} = 'gl-jis-1983';  | 
| 
1160
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $obj->{state} = 'state_2842';  | 
| 
1161
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     eval {  | 
| 
1162
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       require Encode::GLJIS1978;  | 
| 
1163
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       require Encode::GLJIS1983;  | 
| 
1164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
1165
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
     if (Encode::find_encoding ($obj->{state_2440}) and  | 
| 
1166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         Encode::find_encoding ($obj->{state_2442})) {  | 
| 
1167
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       return ((bless $obj, 'HTML::HTML5::Parser::Charset::DecodeHandle::ISO2022JP'),  | 
| 
1168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME);  | 
| 
1169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } elsif ($self->{xml_names}->{'iso-2022-jp'}) {  | 
| 
1171
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $obj->{state_2440} = 'gl-jis-1997-swapped';  | 
| 
1172
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $obj->{state_2442} = 'gl-jis-1997';  | 
| 
1173
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $obj->{state} = 'state_2842';  | 
| 
1174
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     eval {  | 
| 
1175
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       require Encode::GLJIS1997Swapped;  | 
| 
1176
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       require Encode::GLJIS1997;  | 
| 
1177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
1178
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
     if (Encode::find_encoding ($obj->{state_2440}) and  | 
| 
1179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         Encode::find_encoding ($obj->{state_2442})) {  | 
| 
1180
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       return ((bless $obj, 'HTML::HTML5::Parser::Charset::DecodeHandle::ISO2022JP'),  | 
| 
1181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME);  | 
| 
1182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
1184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1185
 | 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2788
 | 
   my ($e, $e_status) = $self->get_perl_encoding  | 
| 
1186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       (%opt, allow_semiconforming => 1);  | 
| 
1187
 | 
713
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
2027
 | 
   if ($e) {  | 
| 
1188
 | 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1702
 | 
     $obj->{perl_encoding_name} = $e->name;  | 
| 
1189
 | 
713
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3675
 | 
     unless ($self->{category} & CHARSET_CATEGORY_BLOCK_SAFE) {  | 
| 
1190
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $e_status |= FALLBACK_ENCODING_IMPL;  | 
| 
1191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1192
 | 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1595
 | 
     $obj->{bom_pattern} = $self->{bom_pattern};  | 
| 
1193
 | 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1365
 | 
     $obj->{fallback} = $self->{fallback};  | 
| 
1194
 | 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4802
 | 
     return ((bless $obj, 'HTML::HTML5::Parser::Charset::DecodeHandle::Encode'), $e_status);  | 
| 
1195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
1196
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return (undef, 0);  | 
| 
1197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
1198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # get_decode_handle  | 
| 
1199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_perl_encoding ($;%) {  | 
| 
1201
 | 
713
 | 
 
 | 
 
 | 
  
713
  
 | 
  
0
  
 | 
2550
 | 
   my ($self, %opt) = @_;  | 
| 
1202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
1203
 | 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2898
 | 
   require Encode;  | 
| 
1204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $load_encode = sub {  | 
| 
1205
 | 
713
 | 
 
 | 
 
 | 
  
713
  
 | 
 
 | 
1325
 | 
     my $name = shift;  | 
| 
1206
 | 
713
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
6324
 | 
     if ($name eq 'euc-jp-1997') {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1207
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       require Encode::EUCJP1997;  | 
| 
1208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($name eq 'shift-jis-1997') {  | 
| 
1209
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       require Encode::ShiftJIS1997;  | 
| 
1210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ({'web-latin1' => 1,  | 
| 
1211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               'web-latin1-us-ascii' => 1,  | 
| 
1212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               'web-latin5' => 1}->{$name}) {  | 
| 
1213
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
755
 | 
       require HTML::HTML5::Parser::Charset::WebLatin1;  | 
| 
1214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($name eq 'web-thai' or $name eq 'web-tis-620') {  | 
| 
1215
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       require HTML::HTML5::Parser::Charset::WebThai;  | 
| 
1216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1217
 | 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2833
 | 
   }; # $load_encode  | 
| 
1218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1219
 | 
713
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1977
 | 
   if ($opt{allow_error_reporting}) {  | 
| 
1220
 | 
713
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1105
 | 
     for my $perl_name (keys %{$self->{perl_names} or {}}) {  | 
| 
 
 | 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2730
 | 
    | 
| 
1221
 | 
715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1312
 | 
       my $perl_status = $self->{perl_names}->{$perl_name};  | 
| 
1222
 | 
715
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2034
 | 
       next unless $perl_status & ERROR_REPORTING_ENCODING_IMPL;  | 
| 
1223
 | 
713
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1686
 | 
       next if $perl_status & FALLBACK_ENCODING_IMPL;  | 
| 
1224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       next if $perl_status & SEMICONFORMING_ENCODING_IMPL and  | 
| 
1225
 | 
713
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
2589
 | 
           not $opt{allow_semiconforming};  | 
| 
1226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         | 
| 
1227
 | 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1907
 | 
       $load_encode->($perl_name);  | 
| 
1228
 | 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3378
 | 
       my $e = Encode::find_encoding ($perl_name);  | 
| 
1229
 | 
713
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
14448
 | 
       if ($e and $e->name eq $perl_name) {  | 
| 
1230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## NOTE: Don't return $e unless $e eq $perl_name, since  | 
| 
1231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## |find_encoding| resolves e.g. |foobarlatin-1| to |iso-8859-1|,  | 
| 
1232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## which might return wrong encoding object when a dedicated  | 
| 
1233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## implementation not part of the standard Perl distribution is  | 
| 
1234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## desired.  | 
| 
1235
 | 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8155
 | 
         return ($e, $perl_status);  | 
| 
1236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
1237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
1239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
1240
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   for my $perl_name (keys %{$self->{perl_names} or {}}) {  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1241
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $perl_status = $self->{perl_names}->{$perl_name};  | 
| 
1242
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     next if $perl_status & ERROR_REPORTING_ENCODING_IMPL;  | 
| 
1243
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     next if $perl_status & FALLBACK_ENCODING_IMPL;  | 
| 
1244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     next if $perl_status & SEMICONFORMING_ENCODING_IMPL and  | 
| 
1245
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
         not $opt{allow_semiconforming};  | 
| 
1246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1247
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $load_encode->($perl_name);  | 
| 
1248
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $e = Encode::find_encoding ($perl_name);  | 
| 
1249
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if ($e) {  | 
| 
1250
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       return ($e, $perl_status);  | 
| 
1251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
1253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
1254
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   if ($opt{allow_fallback}) {  | 
| 
1255
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     for my $perl_name (keys %{$self->{perl_names} or {}}) {  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1256
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       my $perl_status = $self->{perl_names}->{$perl_name};  | 
| 
1257
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
       next unless $perl_status & FALLBACK_ENCODING_IMPL or  | 
| 
1258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           $perl_status & SEMICONFORMING_ENCODING_IMPL;  | 
| 
1259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## NOTE: We don't prefer semi-conforming implementations to   | 
| 
1260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## non-conforming implementations, since semi-conforming implementations  | 
| 
1261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## will never be conforming without assist of the callee, and in such  | 
| 
1262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## cases the callee should set the |allow_semiconforming| option upon  | 
| 
1263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## the invocation of the method anyway.  | 
| 
1264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
1265
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $load_encode->($perl_name);  | 
| 
1266
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       my $e = Encode::find_encoding ($perl_name);  | 
| 
1267
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       if ($e) {  | 
| 
1268
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         return ($e, $perl_status);  | 
| 
1269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
1270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1272
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     for my $iana_name (keys %{$self->{iana_names} or {}}) {  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1273
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $load_encode->($iana_name);  | 
| 
1274
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       my $e = Encode::find_encoding ($iana_name);  | 
| 
1275
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       if ($e) {  | 
| 
1276
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         return ($e, FALLBACK_ENCODING_IMPL);  | 
| 
1277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
1278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
1280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
1281
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   return (undef, 0);  | 
| 
1282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # get_perl_encoding  | 
| 
1283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_iana_name ($) {  | 
| 
1285
 | 
714
 | 
 
 | 
 
 | 
  
714
  
 | 
  
0
  
 | 
1413
 | 
   my $self = shift;  | 
| 
1286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
1287
 | 
714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1550
 | 
   my $primary;  | 
| 
1288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $other;  | 
| 
1289
 | 
714
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1030
 | 
   for my $iana_name (keys %{$self->{iana_names} or {}}) {  | 
| 
 
 | 
714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2660
 | 
    | 
| 
1290
 | 
1428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2405
 | 
     my $name_status = $self->{iana_names}->{$iana_name};  | 
| 
1291
 | 
1428
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4011
 | 
     if ($name_status & PREFERRED_CHARSET_NAME) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1292
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
       return $iana_name;  | 
| 
1293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($name_status & PRIMARY_CHARSET_NAME) {  | 
| 
1294
 | 
710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1484
 | 
       $primary = $iana_name;  | 
| 
1295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($name_status & REGISTERED_CHARSET_NAME) {  | 
| 
1296
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
       $other = $iana_name;  | 
| 
1297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
1298
 | 
710
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
2534
 | 
       $other ||= $iana_name;  | 
| 
1299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
1301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1302
 | 
710
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
3800
 | 
   return $primary || $other;  | 
| 
1303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # get_iana_name  | 
| 
1304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## NOTE: A non-method function  | 
| 
1306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub is_syntactically_valid_iana_charset_name ($) {  | 
| 
1307
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
 
 | 
   my $name = shift;  | 
| 
1308
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return $name =~ /\A[\x20-\x7E]{1,40}\z/;  | 
| 
1309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## NOTE: According to IANAREG, "The character set names may be up to 40   | 
| 
1311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## characters taken from the printable characters of US-ASCII.  However,  | 
| 
1312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## no distinction is made between use of upper and lower case letters.".  | 
| 
1313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # is_suntactically_valid_iana_charset_name  | 
| 
1314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
1316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## $Date: 2008/09/15 07:19:33 $  | 
| 
1317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    |