|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package HTML::HTML5::Parser::Charset::DecodeHandle;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## skip Test::Tabs  | 
| 
3
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
 
 | 
67
 | 
 use strict;  | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
    | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
300
 | 
    | 
| 
4
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
 
 | 
50
 | 
 use warnings;  | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
    | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
424
 | 
    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = '0.992';  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## NOTE: |Message::Charset::Info| uses this module without calling  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## the constructor.  | 
| 
10
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
 
 | 
56
 | 
 use HTML::HTML5::Parser::Charset::Info;  | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
    | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1631
 | 
    | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $XML_AUTO_CHARSET = q;  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $IANA_CHARSET = q;  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $PERL_CHARSET = q;  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $XML_CHARSET = q;  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## ->create_decode_handle ($charset_uri, $byte_stream, $onerror)  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub create_decode_handle ($$$;$) {  | 
| 
19
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
0
 | 
   my $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$_[1]};  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $obj = {  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              category => 0,  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              char_buffer => \(my $s = ''),  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              char_buffer_pos => 0,  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              character_queue => [],  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              filehandle => $_[2],  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              charset => $_[1],  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              byte_buffer => '',  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
              onerror => $_[3] || sub {},  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              #onerror_set  | 
| 
30
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
             };  | 
| 
31
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
   if ($csdef->{uri}->{$XML_AUTO_CHARSET} or  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $obj->{charset} eq $XML_AUTO_CHARSET) {  | 
| 
33
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $b = ''; # UTF-8 w/o BOM  | 
| 
34
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};  | 
| 
35
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $obj->{input_encoding} = 'UTF-8';  | 
| 
36
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if (read $obj->{filehandle}, $b, 256) {  | 
| 
37
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
 
 | 
67
 | 
       no warnings "substr";  | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
    | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
513
 | 
    | 
| 
38
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
 
 | 
51
 | 
       no warnings "uninitialized";  | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
    | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
62396
 | 
    | 
| 
39
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       if (substr ($b, 0, 1) eq "<") {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
40
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if (substr ($b, 1, 1) eq "?") { # ASCII8  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
41
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if ($b =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               encoding\s*=\s*["']([^"']*)/x) {  | 
| 
43
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = $1;  | 
| 
44
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});  | 
| 
45
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};  | 
| 
46
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
             if (not $csdef->{ascii8} or $csdef->{bom_required}) {  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               $obj->{onerror}->(undef, 'charset-name-mismatch-error',  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 charset_uri => $uri,  | 
| 
49
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                 charset_name => $obj->{input_encoding});  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           } else {  | 
| 
52
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};  | 
| 
53
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = 'UTF-8';  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
55
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if (defined $csdef->{no_bom_variant}) {  | 
| 
56
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant}};  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } elsif (substr ($b, 1, 1) eq "\x00") {  | 
| 
59
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if (substr ($b, 2, 2) eq "?\x00") { # ASCII16LE  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
60
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             my $c = $b; $c =~ tr/\x00//d;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
61
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 encoding\s*=\s*["']([^"']*)/x) {  | 
| 
63
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
               $obj->{input_encoding} = $1;  | 
| 
64
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
               my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});  | 
| 
65
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
               $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};  | 
| 
66
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
               if (not $csdef->{ascii16} or $csdef->{ascii16be} or  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   $csdef->{bom_required}) {  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $obj->{onerror}->(undef, 'charset-name-mismatch-error',  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                   charset_uri => $uri,  | 
| 
70
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                   charset_name => $obj->{input_encoding});  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               }  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             } else {  | 
| 
73
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
               $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};  | 
| 
74
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
               $obj->{input_encoding} = 'UTF-8';  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
76
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             if (defined $csdef->{no_bom_variant16le}) {  | 
| 
77
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
               $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant16le}};  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian4321  | 
| 
80
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             my $c = $b; $c =~ tr/\x00//d;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
81
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 encoding\s*=\s*["']([^"']*)/x) {  | 
| 
83
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
               $obj->{input_encoding} = $1;  | 
| 
84
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
               my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});  | 
| 
85
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
               $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};  | 
| 
86
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
               if (not $csdef->{ascii32} or   | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   $csdef->{ascii32endian1234} or  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   $csdef->{ascii32endian2143} or  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   $csdef->{ascii32endian3412} or  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   $csdef->{bom_required}) {  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $obj->{onerror}->(undef, 'charset-name-mismatch-error',  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                   charset_uri => $uri,  | 
| 
93
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                   charset_name => $obj->{input_encoding});  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               }  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             } else {  | 
| 
96
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
               $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};  | 
| 
97
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
               $obj->{input_encoding} = 'UTF-8';  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
99
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             if (defined $csdef->{no_bom_variant32endian4321}) {  | 
| 
100
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
               $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant32endian4321}};  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } elsif (substr ($b, 0, 3) eq "\xEF\xBB\xBF") { # UTF8  | 
| 
105
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $obj->{has_bom} = 1;  | 
| 
106
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         substr ($b, 0, 3) = '';  | 
| 
107
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         my $c = $b;  | 
| 
108
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             encoding\s*=\s*["']([^"']*)/x) {  | 
| 
110
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $obj->{input_encoding} = $1;  | 
| 
111
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});  | 
| 
112
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};  | 
| 
113
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
           if (not $csdef->{utf8_encoding_scheme} or  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               not $csdef->{bom_allowed}) {  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $obj->{onerror}->(undef, 'charset-name-mismatch-error',  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                               charset_uri => $uri,  | 
| 
117
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                               charset_name => $obj->{input_encoding});  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } else {  | 
| 
120
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};  | 
| 
121
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $obj->{input_encoding} = 'UTF-8';  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
123
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if (defined $csdef->{no_bom_variant}) {  | 
| 
124
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant}};  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } elsif (substr ($b, 0, 2) eq "\x00<") {  | 
| 
127
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if (substr ($b, 2, 2) eq "\x00?") { # ASCII16BE  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
128
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           my $c = $b; $c =~ tr/\x00//d;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
129
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               encoding\s*=\s*["']([^"']*)/x) {  | 
| 
131
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = $1;  | 
| 
132
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});  | 
| 
133
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};  | 
| 
134
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
             if (not $csdef->{ascii16} or $csdef->{ascii16le} or  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{bom_required}) {  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               $obj->{onerror}->(undef, 'charset-name-mismatch-error',  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 charset_uri => $uri,  | 
| 
138
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                 charset_name => $obj->{input_encoding});  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           } else {  | 
| 
141
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};  | 
| 
142
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = 'UTF-8';  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
144
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if (defined $csdef->{no_bom_variant16be}) {  | 
| 
145
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant16be}};  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian3412  | 
| 
148
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           my $c = $b; $c =~ tr/\x00//d;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
149
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               encoding\s*=\s*["']([^"']*)/x) {  | 
| 
151
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = $1;  | 
| 
152
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});  | 
| 
153
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};  | 
| 
154
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
             if (not $csdef->{ascii32} or   | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian1234} or  | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian2143} or  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian4321} or  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{bom_required}) {  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               $obj->{onerror}->(undef, 'charset-name-mismatch-error',  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 charset_uri => $uri,  | 
| 
161
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                 charset_name => $obj->{input_encoding});  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           } else {  | 
| 
164
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};  | 
| 
165
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = 'UTF-8';  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
167
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if (defined $csdef->{no_bom_variant32endian3412}) {  | 
| 
168
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant32endian3412}};  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } elsif (substr ($b, 0, 2) eq "\xFE\xFF") {  | 
| 
172
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if (substr ($b, 2, 2) eq "\x00<") { # ASCII16BE  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
173
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $obj->{has_bom} = 1;  | 
| 
174
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           substr ($b, 0, 2) = '';  | 
| 
175
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           my $c = $b; $c =~ tr/\x00//d;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
176
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               encoding\s*=\s*["']([^"']*)/x) {  | 
| 
178
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = $1;  | 
| 
179
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});  | 
| 
180
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};  | 
| 
181
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
             if (not $csdef->{ascii16} or  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii16le} or  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 not $csdef->{bom_allowed}) {  | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               $obj->{onerror}->(undef, 'charset-name-mismatch-error',  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 charset_uri => $uri,  | 
| 
186
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                 charset_name => $obj->{input_encoding});  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           } else {  | 
| 
189
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};  | 
| 
190
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = 'UTF-16';  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
192
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if (defined $csdef->{no_bom_variant16be}) {  | 
| 
193
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant16be}};  | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian3412  | 
| 
196
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $obj->{has_bom} = 1;  | 
| 
197
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           substr ($b, 0, 4) = '';  | 
| 
198
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           my $c = $b; $c =~ tr/\x00//d;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
199
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               encoding\s*=\s*["']([^"']*)/x) {  | 
| 
201
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = $1;  | 
| 
202
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});  | 
| 
203
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};  | 
| 
204
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
             if (not $csdef->{ascii32} or  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian1234} or  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian2143} or  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian4321} or  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 not $csdef->{bom_allowed}) {  | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               $obj->{onerror}->(undef, 'charset-name-mismatch-error',  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 charset_uri => $uri,  | 
| 
211
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                 charset_name => $obj->{input_encoding});  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           } else {  | 
| 
214
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};  | 
| 
215
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = 'UTF-16';  | 
| 
216
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{byte_buffer} .= "\x00\x00";  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
218
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if (defined $csdef->{no_bom_variant32endian3412}) {  | 
| 
219
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant32endian3412}};  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } else {  | 
| 
222
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};  | 
| 
223
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $obj->{input_encoding} = 'UTF-16';  | 
| 
224
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           substr ($b, 0, 2) = '';  | 
| 
225
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $obj->{has_bom} = 1;  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } elsif (substr ($b, 0, 2) eq "\xFF\xFE") {  | 
| 
228
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if (substr ($b, 2, 2) eq "<\x00") { # ASCII16LE  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
229
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $obj->{has_bom} = 1;  | 
| 
230
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           substr ($b, 0, 2) = '';  | 
| 
231
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           my $c = $b; $c =~ tr/\x00//d;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
232
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               encoding\s*=\s*["']([^"']*)/x) {  | 
| 
234
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = $1;  | 
| 
235
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});  | 
| 
236
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};  | 
| 
237
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
             if (not $csdef->{ascii16} or  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii16be} or  | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 not $csdef->{bom_allowed}) {  | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               $obj->{onerror}->(undef, 'charset-name-mismatch-error',  | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 charset_uri => $uri,  | 
| 
242
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                 charset_name => $obj->{input_encoding});  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           } else {  | 
| 
245
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};  | 
| 
246
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = 'UTF-16';  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
248
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if (defined $csdef->{no_bom_variant16le}) {  | 
| 
249
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant16le}};  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian4321  | 
| 
252
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $obj->{has_bom} = 1;  | 
| 
253
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           substr ($b, 0, 4) = '';  | 
| 
254
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           my $c = $b; $c =~ tr/\x00//d;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
255
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               encoding\s*=\s*["']([^"']*)/x) {  | 
| 
257
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = $1;  | 
| 
258
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});  | 
| 
259
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};  | 
| 
260
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
             if (not $csdef->{ascii32} or  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian1234} or  | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian2143} or  | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian3412} or  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 not $csdef->{bom_allowed}) {  | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               $obj->{onerror}->(undef, 'charset-name-mismatch-error',  | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 charset_uri => $uri,  | 
| 
267
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                 charset_name => $obj->{input_encoding});  | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           } else {  | 
| 
270
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};  | 
| 
271
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = 'UTF-16';  | 
| 
272
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{byte_buffer} .= "\x00\x00";  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
274
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if (defined $csdef->{no_bom_variant32endian4321}) {  | 
| 
275
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant32endian4321}};  | 
| 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } else {  | 
| 
278
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};  | 
| 
279
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $obj->{input_encoding} = 'UTF-16';  | 
| 
280
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           substr ($b, 0, 2) = '';  | 
| 
281
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $obj->{has_bom} = 1;  | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } elsif (substr ($b, 0, 2) eq "\x00\x00") {  | 
| 
284
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if (substr ($b, 2, 2) eq "\x00<") { # ASCII32Endian1234  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
285
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           my $c = $b; $c =~ tr/\x00//d;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
286
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?  | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               encoding\s*=\s*["']([^"']*)/x) {  | 
| 
288
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = $1;  | 
| 
289
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});  | 
| 
290
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};  | 
| 
291
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
             if (not $csdef->{ascii32} or  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian2143} or  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian3412} or  | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian4321} or  | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{bom_required}) {  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               $obj->{onerror}->(undef, 'charset-name-mismatch-error',  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 charset_uri => $uri,  | 
| 
298
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                 charset_name => $obj->{input_encoding});  | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           } else {  | 
| 
301
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};  | 
| 
302
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = 'UTF-8';  | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
304
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if (defined $csdef->{no_bom_variant32endian1234}) {  | 
| 
305
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant32endian1234}};  | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } elsif (substr ($b, 2, 2) eq "<\x00") { # ASCII32Endian2143  | 
| 
308
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           my $c = $b; $c =~ tr/\x00//d;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
309
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?  | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               encoding\s*=\s*["']([^"']*)/x) {  | 
| 
311
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = $1;  | 
| 
312
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});  | 
| 
313
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};  | 
| 
314
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
             if (not $csdef->{ascii32} or  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian1234} or  | 
| 
316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian3412} or  | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian4321} or  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{bom_required}) {  | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               $obj->{onerror}->(undef, 'charset-name-mismatch-error',  | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 charset_uri => $uri,  | 
| 
321
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                 charset_name => $obj->{input_encoding});  | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           } else {  | 
| 
324
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};  | 
| 
325
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = 'UTF-8';  | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
327
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if (defined $csdef->{no_bom_variant32endian2143}) {  | 
| 
328
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant32endian2143}};  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } elsif (substr ($b, 2, 2) eq "\xFE\xFF") { # ASCII32Endian1234  | 
| 
331
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $obj->{has_bom} = 1;  | 
| 
332
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           substr ($b, 0, 4) = '';  | 
| 
333
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           my $c = $b; $c =~ tr/\x00//d;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
334
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?  | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               encoding\s*=\s*["']([^"']*)/x) {  | 
| 
336
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = $1;  | 
| 
337
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});  | 
| 
338
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};  | 
| 
339
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
             if (not $csdef->{ascii32} or  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian2143} or  | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian3412} or  | 
| 
342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian4321} or  | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{bom_required}) {  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               $obj->{onerror}->(undef, 'charset-name-mismatch-error',  | 
| 
345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 charset_uri => $uri,  | 
| 
346
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                 charset_name => $obj->{input_encoding});  | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           } else {  | 
| 
349
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};  | 
| 
350
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = 'UTF-8';  | 
| 
351
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{has_bom} = 0;  | 
| 
352
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{byte_buffer} .= "\x00\x00\xFE\xFF";  | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          }  | 
| 
354
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if (defined $csdef->{no_bom_variant32endian1234}) {  | 
| 
355
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant32endian1234}};  | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } elsif (substr ($b, 2, 2) eq "\xFF\xFE") { # ASCII32Endian2143  | 
| 
358
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $obj->{has_bom} = 1;  | 
| 
359
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           substr ($b, 0, 4) = '';  | 
| 
360
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           my $c = $b; $c =~ tr/\x00//d;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
361
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?  | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               encoding\s*=\s*["']([^"']*)/x) {  | 
| 
363
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = $1;  | 
| 
364
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});  | 
| 
365
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};  | 
| 
366
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
             if (not $csdef->{ascii32} or  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian1234} or  | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian3412} or  | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{ascii32endian4321} or  | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $csdef->{bom_required}) {  | 
| 
371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               $obj->{onerror}->(undef, 'charset-name-mismatch-error',  | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 charset_uri => $uri,  | 
| 
373
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                 charset_name => $obj->{input_encoding});  | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           } else {  | 
| 
376
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};  | 
| 
377
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{input_encoding} = 'UTF-8';  | 
| 
378
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{has_bom} = 0;  | 
| 
379
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $obj->{byte_buffer} .= "\x00\x00\xFF\xFE";  | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
381
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if (defined $csdef->{no_bom_variant32endian2143}) {  | 
| 
382
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant32endian2143}};  | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # \x4C\x6F\xA7\x94 EBCDIC  | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } # buffer  | 
| 
387
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $obj->{byte_buffer} .= $b;  | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } # read  | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } elsif ($csdef->{uri}->{$XML_CHARSET.'utf-8'}) {  | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## BOM is optional.  | 
| 
391
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $b = '';  | 
| 
392
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if (read $obj->{filehandle}, $b, 3) {  | 
| 
393
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       if ($b eq "\xEF\xBB\xBF") {  | 
| 
394
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $obj->{has_bom} = 1;  | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
396
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $obj->{byte_buffer} .= $b;  | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
399
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; # UTF-8 w/o BOM  | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } elsif ($csdef->{uri}->{$XML_CHARSET.'utf-16'}) {  | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ## BOM is mandated.  | 
| 
402
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $b = '';  | 
| 
403
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if (read $obj->{filehandle}, $b, 2) {  | 
| 
404
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       if ($b eq "\xFE\xFF") {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
405
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $obj->{has_bom} = 1; # UTF-16BE w/o BOM  | 
| 
406
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};  | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } elsif ($b eq "\xFF\xFE") {  | 
| 
408
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $obj->{has_bom} = 1; # UTF-16LE w/o BOM  | 
| 
409
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};  | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
411
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $obj->{onerror}->(undef, 'no-bom-error', charset_uri => $obj->{charset});  | 
| 
412
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $obj->{has_bom} = 0;  | 
| 
413
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $obj->{byte_buffer} .= $b; # UTF-16BE w/o BOM  | 
| 
414
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};  | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
417
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $obj->{onerror}->(undef, 'no-bom-error', charset_uri => $obj->{charset});  | 
| 
418
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $obj->{has_bom} = 0; # UTF-16BE w/o BOM  | 
| 
419
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};  | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
423
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   if ($csdef->{uri}->{$XML_CHARSET.'iso-2022-jp'}) {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
424
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $obj->{state_2440} = 'gl-jis-1997-swapped';  | 
| 
425
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $obj->{state_2442} = 'gl-jis-1997';  | 
| 
426
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $obj->{state} = 'state_2842';  | 
| 
427
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     require Encode::GLJIS1997Swapped;  | 
| 
428
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     require Encode::GLJIS1997;  | 
| 
429
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
     if (Encode::find_encoding ($obj->{state_2440}) and  | 
| 
430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         Encode::find_encoding ($obj->{state_2442})) {  | 
| 
431
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       return bless $obj, 'HTML::HTML5::Parser::Charset::DecodeHandle::ISO2022JP';  | 
| 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } elsif ($csdef->{uri}->{$IANA_CHARSET.'iso-2022-jp'}) {  | 
| 
434
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $obj->{state_2440} = 'gl-jis-1978';  | 
| 
435
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $obj->{state_2442} = 'gl-jis-1983';  | 
| 
436
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $obj->{state} = 'state_2842';  | 
| 
437
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     require Encode::GLJIS1978;  | 
| 
438
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     require Encode::GLJIS1983;  | 
| 
439
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
     if (Encode::find_encoding ($obj->{state_2440}) and  | 
| 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         Encode::find_encoding ($obj->{state_2442})) {  | 
| 
441
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       return bless $obj, 'HTML::HTML5::Parser::Charset::DecodeHandle::ISO2022JP';  | 
| 
442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } elsif (defined $csdef->{perl_name}->[0]) {  | 
| 
444
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
     if ($csdef->{uri}->{$XML_CHARSET.'euc-jp'} or  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $csdef->{uri}->{$IANA_CHARSET.'euc-jp'}) {  | 
| 
446
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $obj->{perl_encoding_name} = $csdef->{perl_name}->[0];  | 
| 
447
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       require Encode::EUCJP1997;  | 
| 
448
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       if (Encode::find_encoding ($obj->{perl_encoding_name})) {  | 
| 
449
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $obj->{category} |= HTML::HTML5::Parser::Charset::Info::CHARSET_CATEGORY_EUCJP;  | 
| 
450
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         return bless $obj, 'HTML::HTML5::Parser::Charset::DecodeHandle::Encode';  | 
| 
451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($csdef->{uri}->{$XML_CHARSET.'shift_jis'} or  | 
| 
453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              $csdef->{uri}->{$IANA_CHARSET.'shift_jis'}) {  | 
| 
454
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $obj->{perl_encoding_name} = $csdef->{perl_name}->[0];  | 
| 
455
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       require Encode::ShiftJIS1997;  | 
| 
456
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       if (Encode::find_encoding ($obj->{perl_encoding_name})) {  | 
| 
457
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         return bless $obj, 'HTML::HTML5::Parser::Charset::DecodeHandle::Encode';  | 
| 
458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($csdef->{is_block_safe}) {  | 
| 
460
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $obj->{perl_encoding_name} = $csdef->{perl_name}->[0];  | 
| 
461
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       require Encode;  | 
| 
462
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       if (Encode::find_encoding ($obj->{perl_encoding_name})) {  | 
| 
463
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         return bless $obj, 'HTML::HTML5::Parser::Charset::DecodeHandle::Encode';  | 
| 
464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $obj->{onerror}->(undef, 'charset-not-supported-error',  | 
| 
469
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     charset_uri => $obj->{charset});  | 
| 
470
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   return undef;  | 
| 
471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # create_decode_handle  | 
| 
472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub name_to_uri ($$$) {  | 
| 
474
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
0
 | 
   my $domain = $_[1];  | 
| 
475
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   my $name = lc $_[2];  | 
| 
476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
477
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   if ($domain eq 'ietf') {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
478
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $IANA_CHARSET . $name;  | 
| 
479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } elsif ($domain eq 'xml') {  | 
| 
480
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if ({  | 
| 
481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'utf-8' => 1,  | 
| 
482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'utf-16' => 1,  | 
| 
483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-10646-ucs-2' => 1,  | 
| 
484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-10646-ucs-4' => 1,  | 
| 
485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-8859-1' => 1,  | 
| 
486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-8859-2' => 1,  | 
| 
487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-8859-3' => 1,  | 
| 
488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-8859-4' => 1,  | 
| 
489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-8859-5' => 1,  | 
| 
490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-8859-6' => 1,  | 
| 
491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-8859-7' => 1,  | 
| 
492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-8859-8' => 1,  | 
| 
493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-8859-9' => 1,  | 
| 
494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-8859-10' => 1,  | 
| 
495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-8859-11' => 1,  | 
| 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-8859-13' => 1,  | 
| 
497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-8859-14' => 1,  | 
| 
498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-8859-15' => 1,  | 
| 
499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-8859-16' => 1,  | 
| 
500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'iso-2022-jp' => 1,  | 
| 
501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'shift_jis' => 1,  | 
| 
502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       'euc-jp' => 1,  | 
| 
503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }->{$name}) {  | 
| 
504
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       return $XML_CHARSET . $name;  | 
| 
505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
507
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $uri = $IANA_CHARSET . $name;  | 
| 
508
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $uri if $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};  | 
| 
509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
510
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $XML_CHARSET . $name;  | 
| 
511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
512
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return undef;  | 
| 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # name_to_uri  | 
| 
515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub uri_to_name ($$$) {  | 
| 
517
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
0
 | 
   my (undef, $domain, $uri) = @_;  | 
| 
518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
519
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   if ($domain eq 'xml') {  | 
| 
520
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $v = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri}->{xml_name};  | 
| 
521
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $v if defined $v;  | 
| 
522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
523
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if (substr ($uri, 0, length $XML_CHARSET) eq $XML_CHARSET) {  | 
| 
524
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       return substr ($uri, length $XML_CHARSET);  | 
| 
525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
527
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $domain = 'ietf'; ## TODO: XML encoding name has smaller range  | 
| 
528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
530
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   if ($domain eq 'ietf') {  | 
| 
531
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $v = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri}->{iana_name};  | 
| 
532
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $v->[0] if defined $v;  | 
| 
533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
534
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if (substr ($uri, 0, length $IANA_CHARSET) eq $IANA_CHARSET) {  | 
| 
535
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       return substr ($uri, length $IANA_CHARSET);  | 
| 
536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
539
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   return undef;  | 
| 
540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # uri_to_name  | 
| 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 require IO::Handle;  | 
| 
543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package HTML::HTML5::Parser::Charset::DecodeHandle::ByteBuffer;  | 
| 
545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## NOTE: Provides a byte buffer wrapper object.  | 
| 
547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new ($$) {  | 
| 
549
 | 
708
 | 
 
 | 
 
 | 
  
708
  
 | 
 
 | 
2350
 | 
   my $self = bless {  | 
| 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     buffer => '',  | 
| 
551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }, shift;  | 
| 
552
 | 
708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1614
 | 
   $self->{filehandle} = shift;  | 
| 
553
 | 
708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1640
 | 
   return $self;  | 
| 
554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # new  | 
| 
555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub read {  | 
| 
557
 | 
2589
 | 
 
 | 
 
 | 
  
2589
  
 | 
 
 | 
4565
 | 
   my $self = shift;  | 
| 
558
 | 
2589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4563
 | 
   my $pos = length $self->{buffer};  | 
| 
559
 | 
2589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9737
 | 
   my $r = $self->{filehandle}->read ($self->{buffer}, $_[1], $pos);  | 
| 
560
 | 
2589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23234
 | 
   substr ($_[0], $_[2]) = substr ($self->{buffer}, $pos);  | 
| 
561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## NOTE: This would do different behavior from Perl's standard  | 
| 
562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## |read| when $pos points beyond the end of the string.  | 
| 
563
 | 
2589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6196
 | 
   return $r;  | 
| 
564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # read  | 
| 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
566
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
 sub close { $_[0]->{filehandle}->close }  | 
| 
567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package HTML::HTML5::Parser::Charset::DecodeHandle::CharString;  | 
| 
569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## NOTE: Same as Perl's standard |open $handle, '<', \$char_string|,  | 
| 
571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## but supports |ungetc| and other extensions.  | 
| 
572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new ($$) {  | 
| 
574
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
   my $self = bless {pos => 0}, shift;  | 
| 
575
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   $self->{string} = shift; # must be a scalar ref  | 
| 
576
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   return $self;  | 
| 
577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # new  | 
| 
578
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getc ($) {  | 
| 
580
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
   my $self = shift;  | 
| 
581
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   if ($self->{pos} < length ${$self->{string}}) {  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
582
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return substr ${$self->{string}}, $self->{pos}++, 1;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
584
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return undef;  | 
| 
585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # getc  | 
| 
587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub read ($$$$) {  | 
| 
589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #my ($self, $scalar, $length, $offset) = @_;  | 
| 
590
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
   my $self = $_[0];  | 
| 
591
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
   my $length = $_[2] || 0;  | 
| 
592
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
   my $offset = $_[3] || 0;  | 
| 
593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## NOTE: We don't support standard Perl semantics if $offset is  | 
| 
594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## greater than the length of $scalar.  | 
| 
595
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   substr ($_[1], $offset) = substr (${$self->{string}}, $self->{pos}, $length);  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
596
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   my $count = (length $_[1]) - $offset;  | 
| 
597
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   $self->{pos} += $count;  | 
| 
598
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   return $count;  | 
| 
599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # read  | 
| 
600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub manakai_read_until ($$$;$) {  | 
| 
602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #my ($self, $scalar, $pattern, $offset) = @_;  | 
| 
603
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
   my $self = $_[0];  | 
| 
604
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   pos (${$self->{string}}) = $self->{pos};  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
605
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   if (${$self->{string}} =~ /\G(?>$_[2])+/) {  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
606
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     substr ($_[1], $_[3]) = substr (${$self->{string}}, $-[0], $+[0] - $-[0]);  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
607
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $self->{pos} += $+[0] - $-[0];  | 
| 
608
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $+[0] - $-[0];  | 
| 
609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
610
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return 0;  | 
| 
611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # manakai_read_until  | 
| 
613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub ungetc ($$) {  | 
| 
615
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
   my $self = shift;  | 
| 
616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## Ignore second parameter.  | 
| 
617
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   $self->{pos}-- if $self->{pos} > 0;  | 
| 
618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # ungetc  | 
| 
619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
620
 | 
 
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 sub close ($) { }  | 
| 
621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
622
 | 
 
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 sub onerror ($;$) { }  | 
| 
623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package HTML::HTML5::Parser::Charset::DecodeHandle::Encode;  | 
| 
625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## NOTE: Provides a Perl |Encode| module wrapper object.  | 
| 
627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
628
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
 sub charset ($) { $_[0]->{charset} }  | 
| 
629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
630
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
 sub close ($) { $_[0]->{filehandle}->close }  | 
| 
631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getc ($) {  | 
| 
633
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
   my $c = '';  | 
| 
634
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   my $l = $_[0]->read ($c, 1);  | 
| 
635
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   if ($l) {  | 
| 
636
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $c;  | 
| 
637
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
638
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return undef;  | 
| 
639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # getc  | 
| 
641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub read ($$$;$) {  | 
| 
643
 | 
2740
 | 
 
 | 
 
 | 
  
2740
  
 | 
 
 | 
4752
 | 
   my $self = $_[0];  | 
| 
644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #my $scalar = $_[1];  | 
| 
645
 | 
2740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4180
 | 
   my $length = $_[2];  | 
| 
646
 | 
2740
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
8863
 | 
   my $offset = $_[3] || 0;  | 
| 
647
 | 
2740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4354
 | 
   my $count = 0;  | 
| 
648
 | 
2740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4361
 | 
   my $eof;  | 
| 
649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## NOTE: It is incompatible with the standard Perl semantics  | 
| 
650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## if $offset is greater than the length of $scalar.  | 
| 
651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   A: {  | 
| 
653
 | 
2740
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4355
 | 
     return $count if $length < 1;  | 
| 
 
 | 
5340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11766
 | 
    | 
| 
654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
655
 | 
5340
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7693
 | 
     if (my $l = (length ${$self->{char_buffer}}) - $self->{char_buffer_pos}) {  | 
| 
 
 | 
5340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15225
 | 
    | 
| 
656
 | 
1227
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2876
 | 
       if ($l >= $length) {  | 
| 
657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         substr ($_[1], $offset)  | 
| 
658
 | 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
431
 | 
             = substr (${$self->{char_buffer}}, $self->{char_buffer_pos},  | 
| 
659
 | 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
227
 | 
                       $length);  | 
| 
660
 | 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
255
 | 
         $count += $length;  | 
| 
661
 | 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
275
 | 
         $self->{char_buffer_pos} += $length;  | 
| 
662
 | 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
243
 | 
         $length = 0;  | 
| 
663
 | 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
487
 | 
         return $count;  | 
| 
664
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         substr ($_[1], $offset)  | 
| 
666
 | 
1083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1564
 | 
             = substr (${$self->{char_buffer}}, $self->{char_buffer_pos});  | 
| 
 
 | 
1083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3749
 | 
    | 
| 
667
 | 
1083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1928
 | 
         $count += $l;  | 
| 
668
 | 
1083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1670
 | 
         $length -= $l;  | 
| 
669
 | 
1083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1925
 | 
         ${$self->{char_buffer}} = '';  | 
| 
 
 | 
1083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2719
 | 
    | 
| 
670
 | 
1083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1960
 | 
         $self->{char_buffer_pos} = 0;  | 
| 
671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
672
 | 
1083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1755
 | 
       $offset = length $_[1];  | 
| 
673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
675
 | 
5196
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
11204
 | 
     if ($eof) {  | 
| 
676
 | 
2596
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5855
 | 
       return $count;  | 
| 
677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
678
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
679
 | 
2600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4166
 | 
     my $error;  | 
| 
680
 | 
2600
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
7680
 | 
     if ($self->{continue}) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
681
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       if ($self->{filehandle}->read ($self->{byte_buffer}, 256,  | 
| 
682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                      length $self->{byte_buffer})) {  | 
| 
683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #   | 
| 
684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
685
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $error = 1;  | 
| 
686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
687
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $self->{continue} = 0;  | 
| 
688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif (512 > length $self->{byte_buffer}) {  | 
| 
689
 | 
2598
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8218
 | 
       if ($self->{filehandle}->read ($self->{byte_buffer}, 256,  | 
| 
690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                      length $self->{byte_buffer})) {  | 
| 
691
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #  | 
| 
692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
693
 | 
2598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4333
 | 
         $eof = 1;  | 
| 
694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
697
 | 
2600
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
6162
 | 
     unless ($error) {  | 
| 
698
 | 
2600
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
6181
 | 
       if (not $self->{bom_checked}) {  | 
| 
699
 | 
711
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1688
 | 
         if (defined $self->{bom_pattern}) {  | 
| 
700
 | 
709
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4312
 | 
           if ($self->{byte_buffer} =~ s/^$self->{bom_pattern}//) {  | 
| 
701
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $self->{has_bom} = 1;  | 
| 
702
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
704
 | 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1580
 | 
         $self->{bom_checked} = 1;  | 
| 
705
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
706
 | 
2600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3950
 | 
       my $string = do {  | 
| 
707
 | 
2600
 | 
  
  0
  
 | 
 
 | 
  
0
  
 | 
 
 | 
19304
 | 
         local $SIG{__WARN__} = sub { warn $_[0] unless $_[0] =~ /^Code point/ };  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         Encode::decode (  | 
| 
709
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           $self->{perl_encoding_name},  | 
| 
710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           $self->{byte_buffer},  | 
| 
711
 | 
2600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16800
 | 
           Encode::FB_QUIET(),  | 
| 
712
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         );  | 
| 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       };  | 
| 
714
 | 
2600
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
75205
 | 
       if (length $string) {  | 
| 
715
 | 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1655
 | 
         $self->{char_buffer} = \$string;  | 
| 
716
 | 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1313
 | 
         $self->{char_buffer_pos} = 0;  | 
| 
717
 | 
711
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
2221
 | 
         if (length $self->{byte_buffer}) {  | 
| 
718
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $self->{continue} = 1;  | 
| 
719
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
721
 | 
1889
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4359
 | 
         if (length $self->{byte_buffer}) {  | 
| 
722
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $error = 1;  | 
| 
723
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } else {  | 
| 
724
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## NOTE: No further input.  | 
| 
725
 | 
1889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4360
 | 
           redo A;  | 
| 
726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
728
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
729
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
730
 | 
711
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1799
 | 
     if ($error) {  | 
| 
731
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       my $r = substr $self->{byte_buffer}, 0, 1, '';  | 
| 
732
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       my $fallback;  | 
| 
733
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       my $etype = 'illegal-octets-error';  | 
| 
734
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       my %earg;  | 
| 
735
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       if ($self->{category}  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               & HTML::HTML5::Parser::Charset::Info::CHARSET_CATEGORY_SJIS) {  | 
| 
737
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if ($r =~ /^[\x81-\x9F\xE0-\xFC]/) {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
738
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if ($self->{byte_buffer} =~ s/(.)//s) {  | 
| 
739
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $r .= $1;                     # not limited to \x40-\xFC - \x7F  | 
| 
740
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $etype = 'unassigned-code-point-error';  | 
| 
741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## NOTE: Range [\xF0-\xFC] is unassigned and may be used as a  | 
| 
743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## single-byte character or as the first-byte of a double-byte  | 
| 
744
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## character, according to JIS X 0208:1997 Appendix 1.  However, the  | 
| 
745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## current practice is using the range as first-bytes of double-byte  | 
| 
746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## characters.  | 
| 
747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } elsif ($r =~ /^[\x80\xA0\xFD-\xFF]/) {  | 
| 
748
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $etype = 'unassigned-code-point-error';  | 
| 
749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
750
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } elsif ($self->{category}  | 
| 
751
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                    & HTML::HTML5::Parser::Charset::Info::CHARSET_CATEGORY_EUCJP) {  | 
| 
752
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
         if ($r =~ /^[\xA1-\xFE]/) {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
753
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if ($self->{byte_buffer} =~ s/^([\xA1-\xFE])//) {  | 
| 
754
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $r .= $1;  | 
| 
755
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $etype = 'unassigned-code-point-error';  | 
| 
756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
757
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } elsif ($r eq "\x8F") {  | 
| 
758
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if ($self->{byte_buffer} =~ s/^([\xA1-\xFE][\xA1-\xFE]?)//) {  | 
| 
759
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $r .= $1;  | 
| 
760
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $etype = 'unassigned-code-point-error' if length $1 == 2;  | 
| 
761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } elsif ($r eq "\x8E") {  | 
| 
763
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           if ($self->{byte_buffer} =~ s/^([\xA1-\xFE])//) {  | 
| 
764
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $r .= $1;  | 
| 
765
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $etype = 'unassigned-code-point-error';  | 
| 
766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
767
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } elsif ($r eq "\xA0" or $r eq "\xFF") {  | 
| 
768
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $etype = 'unassigned-code-point-error';  | 
| 
769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
771
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $fallback = $self->{fallback}->{$r};  | 
| 
772
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if (defined $fallback) {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## NOTE: This is an HTML5 parse error.  | 
| 
774
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $etype = 'fallback-char-error';  | 
| 
775
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $earg{char} = \$fallback;  | 
| 
776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } elsif (exists $self->{fallback}->{$r}) {  | 
| 
777
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## NOTE: This is an HTML5 parse error.  In addition, the octet  | 
| 
778
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## is not assigned with a character.  | 
| 
779
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $etype = 'fallback-unassigned-error';  | 
| 
780
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
781
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
783
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## NOTE: Fixup line/column number by counting the number of   | 
| 
784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## lines/columns in the string that is to be retuend by this  | 
| 
785
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## method call.  | 
| 
786
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       my $line_diff = 0;  | 
| 
787
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       my $col_diff = 0;  | 
| 
788
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       my $set_col;  | 
| 
789
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       for (my $i = 0; $i < $count; $i++) {  | 
| 
790
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         my $s = substr $_[1], $i - $count, 1;  | 
| 
791
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if ($s eq "\x0D") {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
792
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $line_diff++;  | 
| 
793
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $col_diff = 0;  | 
| 
794
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $set_col = 1;  | 
| 
795
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $i++ if substr ($_[1], $i - $count + 1, 1) eq "\x0A";  | 
| 
796
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } elsif ($s eq "\x0A") {  | 
| 
797
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $line_diff++;  | 
| 
798
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $col_diff = 0;  | 
| 
799
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $set_col = 1;  | 
| 
800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } else {  | 
| 
801
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $col_diff++;  | 
| 
802
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
803
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
804
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       my $i = $self->{char_buffer_pos};  | 
| 
805
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
       if ($count and substr (${$self->{char_buffer}}, -1, 1) eq "\x0D") {  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
806
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if (substr (${$self->{char_buffer}}, $i, 1) eq "\x0A") {  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
807
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $i++;  | 
| 
808
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
810
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       my $cb_length = length ${$self->{char_buffer}};  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
811
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       for (; $i < $cb_length; $i++) {  | 
| 
812
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         my $s = substr $_[1], $i, 1;  | 
| 
813
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if ($s eq "\x0D") {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
814
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $line_diff++;  | 
| 
815
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $col_diff = 0;  | 
| 
816
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $set_col = 1;  | 
| 
817
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $i++ if substr ($_[1], $i + 1, 1) eq "\x0A";  | 
| 
818
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } elsif ($s eq "\x0A") {  | 
| 
819
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $line_diff++;  | 
| 
820
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $col_diff = 0;  | 
| 
821
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $set_col = 1;  | 
| 
822
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } else {  | 
| 
823
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $col_diff++;  | 
| 
824
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
825
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $self->{onerror}->($self, $etype, octets => \$r, %earg,  | 
| 
827
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                          level => $self->{level}->{$self->{error_level}->{$etype}},  | 
| 
828
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                          line_diff => $line_diff,  | 
| 
829
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                          ($set_col ? (column => 1) : ()),  | 
| 
830
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                          column_diff => $col_diff);  | 
| 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## NOTE: Error handler may modify |octets| parameter, which  | 
| 
832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## would be returned as part of the output.  Note that what  | 
| 
833
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ## is returned would affect what |manakai_read_until| returns.  | 
| 
834
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       ${$self->{char_buffer}} .= defined $fallback ? $fallback : $r;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
835
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
836
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
837
 | 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1638
 | 
     redo A;  | 
| 
838
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } # A  | 
| 
839
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # read  | 
| 
840
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
841
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub manakai_read_until ($$$;$) {  | 
| 
842
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #my ($self, $scalar, $pattern, $offset) = @_;  | 
| 
843
 | 
1894
 | 
 
 | 
 
 | 
  
1894
  
 | 
 
 | 
3652
 | 
   my $self = $_[0];  | 
| 
844
 | 
1894
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4049
 | 
   my $s = '';  | 
| 
845
 | 
1894
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6195
 | 
   $self->read ($s, 255);  | 
| 
846
 | 
1894
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
22923
 | 
   if ($s =~ /^(?>$_[2])+/) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
847
 | 
883
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3469
 | 
     my $rem_length = (length $s) - $+[0];  | 
| 
848
 | 
883
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2694
 | 
     if ($rem_length) {  | 
| 
849
 | 
174
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
496
 | 
       if ($self->{char_buffer_pos} > $rem_length) {  | 
| 
850
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
         $self->{char_buffer_pos} -= $rem_length;  | 
| 
851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
852
 | 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
504
 | 
         substr (${$self->{char_buffer}}, 0, $self->{char_buffer_pos})  | 
| 
853
 | 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
522
 | 
             = substr ($s, $+[0]);  | 
| 
854
 | 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
385
 | 
         $self->{char_buffer_pos} = 0;  | 
| 
855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
856
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
857
 | 
883
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4197
 | 
     substr ($_[1], $_[3]) = substr ($s, $-[0], $+[0] - $-[0]);  | 
| 
858
 | 
883
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4448
 | 
     return $+[0];  | 
| 
859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } elsif (length $s) {  | 
| 
860
 | 
209
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
614
 | 
     if ($self->{char_buffer_pos} > length $s) {  | 
| 
861
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
       $self->{char_buffer_pos} -= length $s;  | 
| 
862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
863
 | 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
392
 | 
       substr (${$self->{char_buffer}}, 0, $self->{char_buffer_pos}) = $s;  | 
| 
 
 | 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
690
 | 
    | 
| 
864
 | 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
405
 | 
       $self->{char_buffer_pos} = 0;  | 
| 
865
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
866
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
867
 | 
1011
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3632
 | 
   return 0;  | 
| 
868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # manakai_read_until  | 
| 
869
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
870
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
 sub has_bom ($) { $_[0]->{has_bom} }  | 
| 
871
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
872
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub input_encoding ($) {  | 
| 
873
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
   my $v = $_[0]->{input_encoding};  | 
| 
874
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   return $v if defined $v;  | 
| 
875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
876
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   my $uri = $_[0]->{charset};  | 
| 
877
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   if (defined $uri) {  | 
| 
878
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return HTML::HTML5::Parser::Charset::DecodeHandle->uri_to_name (xml => $uri);  | 
| 
879
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
880
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
881
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   return undef;  | 
| 
882
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # input_encoding  | 
| 
883
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub onerror ($;$) {  | 
| 
885
 | 
1422
 | 
  
100
  
 | 
 
 | 
  
1422
  
 | 
 
 | 
3649
 | 
   if (@_ > 1) {  | 
| 
886
 | 
711
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1799
 | 
     if ($_[1]) {  | 
| 
887
 | 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2370
 | 
       $_[0]->{onerror} = $_[1];  | 
| 
888
 | 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1626
 | 
       $_[0]->{onerror_set} = 1;  | 
| 
889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
890
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
       $_[0]->{onerror} = sub { };  | 
| 
891
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       delete $_[0]->{onerror_set};  | 
| 
892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
894
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
895
 | 
1422
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4499
 | 
   return $_[0]->{onerror_set} ? $_[0]->{onerror} : undef;  | 
| 
896
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # onerror  | 
| 
897
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
898
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub ungetc ($$) {  | 
| 
899
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
  
0
  
 | 
 
 | 
 
 | 
   unshift @{$_[0]->{character_queue}}, chr int ($_[1] or 0);  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # ungetc  | 
| 
901
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
902
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package HTML::HTML5::Parser::Charset::DecodeHandle::ISO2022JP;  | 
| 
903
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 push our @ISA, 'HTML::HTML5::Parser::Charset::DecodeHandle::Encode';  | 
| 
904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
905
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getc ($) {  | 
| 
906
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
   my $self = $_[0];  | 
| 
907
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return shift @{$self->{character_queue}} if @{$self->{character_queue}};  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
908
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
909
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $r;  | 
| 
910
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   A: {  | 
| 
911
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $error;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
912
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if ($self->{continue}) {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
913
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if ($self->{filehandle}->read ($self->{byte_buffer}, 256,  | 
| 
914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                      length $self->{byte_buffer})) {  | 
| 
915
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #   | 
| 
916
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
917
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $error = 1;  | 
| 
918
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
919
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $self->{continue} = 0;  | 
| 
920
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif (512 > length $self->{byte_buffer}) {  | 
| 
921
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $self->{filehandle}->read ($self->{byte_buffer}, 256,  | 
| 
922
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                  length $self->{byte_buffer});  | 
| 
923
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
924
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
925
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     unless ($error) {  | 
| 
926
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if ($self->{byte_buffer} =~ s/^\x1B(\x24[\x40\x42]|\x28[\x42\x4A])//) {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
927
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self->{state} = {  | 
| 
928
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           "\x24\x40" => 'state_2440',  | 
| 
929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           "\x24\x42" => 'state_2442',  | 
| 
930
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           "\x28\x42" => 'state_2842',  | 
| 
931
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           "\x28\x4A" => 'state_284A',  | 
| 
932
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                          }->{$1};  | 
| 
933
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         redo A;  | 
| 
934
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } elsif ($self->{state} eq 'state_2842') { # IRV  | 
| 
935
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if ($self->{byte_buffer} =~ s/^([\x00-\x0D\x10-\x1A\x1C-\x7F]+)//) {  | 
| 
936
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           push @{$self->{character_queue}}, split //, $1;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
937
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           $r = shift @{$self->{character_queue}};  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
938
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } else {  | 
| 
939
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           if (length $self->{byte_buffer}) {  | 
| 
940
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $error = 1;  | 
| 
941
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           } else {  | 
| 
942
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $r = undef;  | 
| 
943
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
944
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
945
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } elsif ($self->{state} eq 'state_284A') { # 0201  | 
| 
946
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if ($self->{byte_buffer} =~ s/^([\x00-\x0D\x10-\x1A\x1C-\x7F]+)//) {  | 
| 
947
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           my $v = $1;  | 
| 
948
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
 
 | 
145
 | 
           $v =~ tr/\x5C\x7E/\xA5\x{203E}/;  | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
    | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
184
 | 
    | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
949
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           push @{$self->{character_queue}}, split //, $v;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
950
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           $r = shift @{$self->{character_queue}};  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
951
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } else {  | 
| 
952
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           if (length $self->{byte_buffer}) {  | 
| 
953
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $error = 1;  | 
| 
954
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           } else {  | 
| 
955
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $r = undef;  | 
| 
956
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $self->{onerror}->($self, 'invalid-state-error',  | 
| 
957
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                state => $self->{state},  | 
| 
958
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                level => $self->{level}->{$self->{error_level}->{'invalid-state-error'}});  | 
| 
959
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
961
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } elsif ($self->{state} eq 'state_2442') { # 1983  | 
| 
962
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $v = Encode::decode ($self->{state_2442},  | 
| 
963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 $self->{byte_buffer},  | 
| 
964
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 Encode::FB_QUIET ());  | 
| 
965
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if (length $v) {  | 
| 
966
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           push @{$self->{character_queue}}, split //, $v;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
967
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           $r = shift @{$self->{character_queue}};  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
968
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } else {  | 
| 
969
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           if (length $self->{byte_buffer}) {  | 
| 
970
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $error = 1;  | 
| 
971
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           } else {  | 
| 
972
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $r = undef;  | 
| 
973
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $self->{onerror}->($self, 'invalid-state-error',  | 
| 
974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                state => $self->{state},  | 
| 
975
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                level => $self->{level}->{$self->{error_level}->{'invalid-state-error'}});  | 
| 
976
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
977
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
978
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } elsif ($self->{state} eq 'state_2440') { # 1978  | 
| 
979
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $v = Encode::decode ($self->{state_2440},  | 
| 
980
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 $self->{byte_buffer},  | 
| 
981
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 Encode::FB_QUIET ());  | 
| 
982
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if (length $v) {  | 
| 
983
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           push @{$self->{character_queue}}, split //, $v;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
984
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           $r = shift @{$self->{character_queue}};  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } else {  | 
| 
986
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           if (length $self->{byte_buffer}) {  | 
| 
987
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $error = 1;  | 
| 
988
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           } else {  | 
| 
989
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $r = undef;  | 
| 
990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $self->{onerror}->($self, 'invalid-state-error',  | 
| 
991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                state => $self->{state},  | 
| 
992
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                level => $self->{level}->{$self->{error_level}->{'invalid-state-error'}});  | 
| 
993
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
994
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
995
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
996
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $error = 1;  | 
| 
997
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
998
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
999
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1000
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if ($error) {  | 
| 
1001
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $r = substr $self->{byte_buffer}, 0, 1, '';  | 
| 
1002
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       my $etype = 'illegal-octets-error';  | 
| 
1003
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
       if (($self->{state} eq 'state_2442' or  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1004
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            $self->{state} eq 'state_2440') and  | 
| 
1005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           $r =~ /^[\x21-\x7E]/ and  | 
| 
1006
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           $self->{byte_buffer} =~ s/^([\x21-\x7E])//) {  | 
| 
1007
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $r .= $1;  | 
| 
1008
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $etype = 'unassigned-code-point-error';  | 
| 
1009
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } elsif ($r eq "\x1B" and  | 
| 
1010
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                $self->{byte_buffer} =~ s/^\(H//) { # Old 0201  | 
| 
1011
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $r .= "(H";  | 
| 
1012
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self->{state} = 'state_284A';  | 
| 
1013
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
1014
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $self->{onerror}->($self, $etype, octets => \$r,  | 
| 
1015
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                          level => $self->{level}->{$self->{error_level}->{$etype}});  | 
| 
1016
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1017
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } # A  | 
| 
1018
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
1019
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return $r;  | 
| 
1020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # getc  | 
| 
1021
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1022
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## TODO: This is not good for performance.  Should be replaced  | 
| 
1023
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## by read-centric implementation.  | 
| 
1024
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub read ($$$;$) {  | 
| 
1025
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #my ($self, $scalar, $length, $offset) = @_;  | 
| 
1026
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
   my $length = $_[2];  | 
| 
1027
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $r = '';  | 
| 
1028
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   while ($length > 0) {  | 
| 
1029
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $c = $_[0]->getc;  | 
| 
1030
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     last unless defined $c;  | 
| 
1031
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $r .= $c;  | 
| 
1032
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $length--;  | 
| 
1033
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
1034
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   substr ($_[1], $_[3]) = $r;  | 
| 
1035
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## NOTE: This would do different thing from what Perl's |read| do  | 
| 
1036
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ## if $offset points beyond the end of the $scalar.  | 
| 
1037
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return length $r;  | 
| 
1038
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # read  | 
| 
1039
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1040
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub manakai_read_until ($$$;$) {  | 
| 
1041
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #my ($self, $scalar, $pattern, $offset) = @_;  | 
| 
1042
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
   my $self = $_[0];  | 
| 
1043
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $c = $self->getc;  | 
| 
1044
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if ($c =~ /^$_[2]/) {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1045
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     substr ($_[1], $_[3]) = $c;  | 
| 
1046
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return 1;  | 
| 
1047
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } elsif (defined $c) {  | 
| 
1048
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->ungetc (ord $c);  | 
| 
1049
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return 0;  | 
| 
1050
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
1051
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return 0;  | 
| 
1052
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
1053
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } # manakai_read_until  | 
| 
1054
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1055
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:us-ascii'} =   | 
| 
1056
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:us'} =   | 
| 
1057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso646-us'} =   | 
| 
1058
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:cp367'} =   | 
| 
1059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ibm367'} =   | 
| 
1060
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ansi_x3.4-1986'} =   | 
| 
1061
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ansi_x3.4-1968'} =   | 
| 
1062
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-ir-6'} =   | 
| 
1063
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:csascii'} =   | 
| 
1064
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso_646.irv:1991'} =   | 
| 
1065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ascii'} = {ascii8 =>  | 
| 
1066
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1067
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1068
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1069
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ietf_name =>  | 
| 
1070
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['ansi_x3.4-1968',  | 
| 
1071
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'ansi_x3.4-1986',  | 
| 
1072
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'ascii',  | 
| 
1073
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'cp367',  | 
| 
1074
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'csascii',  | 
| 
1075
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'ibm367',  | 
| 
1076
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'iso-ir-6',  | 
| 
1077
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'iso646-us',  | 
| 
1078
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'iso_646.irv:1991',  | 
| 
1079
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'us',  | 
| 
1080
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'us-ascii',  | 
| 
1081
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'us-ascii'],  | 
| 
1082
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 mime_name =>  | 
| 
1083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'us-ascii',  | 
| 
1084
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perl_name =>  | 
| 
1085
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['ascii',  | 
| 
1086
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'iso-646-us',  | 
| 
1087
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'us-ascii'],  | 
| 
1088
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 utf8_encoding_scheme =>  | 
| 
1089
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1090
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1091
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'urn:x-suika-fam-cx:charset:ansi_x3.4-1968',  | 
| 
1092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1093
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'urn:x-suika-fam-cx:charset:ansi_x3.4-1986',  | 
| 
1094
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1095
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'urn:x-suika-fam-cx:charset:ascii',  | 
| 
1096
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1097
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'urn:x-suika-fam-cx:charset:cp367',  | 
| 
1098
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'urn:x-suika-fam-cx:charset:csascii',  | 
| 
1100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'urn:x-suika-fam-cx:charset:ibm367',  | 
| 
1102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'urn:x-suika-fam-cx:charset:iso-ir-6',  | 
| 
1104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'urn:x-suika-fam-cx:charset:iso646-us',  | 
| 
1106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'urn:x-suika-fam-cx:charset:iso_646.irv:1991',  | 
| 
1108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'urn:x-suika-fam-cx:charset:us',  | 
| 
1110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'urn:x-suika-fam-cx:charset:us-ascii',  | 
| 
1112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ascii-ctrl'} = {perl_name =>  | 
| 
1116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['ascii-ctrl'],  | 
| 
1117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ascii-ctrl',  | 
| 
1119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'}};  | 
| 
1120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.null'} = {perl_name =>  | 
| 
1121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['null'],  | 
| 
1122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.null',  | 
| 
1124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'}};  | 
| 
1125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-8'} = {ascii8 =>  | 
| 
1126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 bom_allowed =>  | 
| 
1128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant =>  | 
| 
1130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',  | 
| 
1131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 utf8_encoding_scheme =>  | 
| 
1132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-8',  | 
| 
1135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'UTF-8',  | 
| 
1137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/UTF-8.RFC2279'} = {ascii8 =>  | 
| 
1140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 bom_allowed =>  | 
| 
1142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant =>  | 
| 
1144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',  | 
| 
1145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 utf8_encoding_scheme =>  | 
| 
1146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/UTF-8.RFC2279',  | 
| 
1149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'}};  | 
| 
1150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8'} = {  | 
| 
1151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii8 => 1,  | 
| 
1152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perl_name =>  | 
| 
1155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['utf-8'],  | 
| 
1156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 utf8_encoding_scheme =>  | 
| 
1157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8',  | 
| 
1160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'}};  | 
| 
1161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-8'} = {  | 
| 
1162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii8 => 1,  | 
| 
1163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 bom_allowed =>  | 
| 
1164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant =>  | 
| 
1166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8',  | 
| 
1167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ietf_name =>  | 
| 
1168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['utf-8'],  | 
| 
1169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 mime_name =>  | 
| 
1170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'utf-8',  | 
| 
1171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 utf8_encoding_scheme =>  | 
| 
1172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'urn:x-suika-fam-cx:charset:utf-8',  | 
| 
1175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8'} = {ascii8 =>  | 
| 
1179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perl_name =>  | 
| 
1183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['utf8'],  | 
| 
1184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 utf8_encoding_scheme =>  | 
| 
1185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',  | 
| 
1188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'}};  | 
| 
1189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-16'} = {  | 
| 
1190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii16 => 1,  | 
| 
1191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 bom_allowed =>  | 
| 
1192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 bom_required =>  | 
| 
1194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant =>  | 
| 
1196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',  | 
| 
1197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant16be =>  | 
| 
1198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',  | 
| 
1199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant16le =>  | 
| 
1200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',  | 
| 
1201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perl_name =>  | 
| 
1202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['utf-16'],  | 
| 
1203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-16',  | 
| 
1205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'UTF-16',  | 
| 
1207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16'} = {  | 
| 
1210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii16 => 1,  | 
| 
1211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 bom_allowed =>  | 
| 
1212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant =>  | 
| 
1214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',  | 
| 
1215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant16be =>  | 
| 
1216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',  | 
| 
1217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant16le =>  | 
| 
1218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',  | 
| 
1219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ietf_name =>  | 
| 
1220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['utf-16'],  | 
| 
1221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 mime_name =>  | 
| 
1222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'utf-16',  | 
| 
1223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'urn:x-suika-fam-cx:charset:utf-16',  | 
| 
1225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16be'} = {  | 
| 
1229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii16 => 1,  | 
| 
1230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii16be => 1,  | 
| 
1231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 bom_allowed =>  | 
| 
1232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant =>  | 
| 
1234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',  | 
| 
1235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant16be =>  | 
| 
1236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',  | 
| 
1237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ietf_name =>  | 
| 
1238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['utf-16be'],  | 
| 
1239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 mime_name =>  | 
| 
1240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'utf-16be',  | 
| 
1241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'urn:x-suika-fam-cx:charset:utf-16be',  | 
| 
1243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16le'} = {  | 
| 
1247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii16 => 1,  | 
| 
1248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii16le => 1,  | 
| 
1249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 bom_allowed =>  | 
| 
1250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant =>  | 
| 
1252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',  | 
| 
1253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant16le =>  | 
| 
1254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',  | 
| 
1255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ietf_name =>  | 
| 
1256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['utf-16le'],  | 
| 
1257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 mime_name =>  | 
| 
1258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'utf-16le',  | 
| 
1259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'urn:x-suika-fam-cx:charset:utf-16le',  | 
| 
1261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be'} = {  | 
| 
1265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii16 => 1,  | 
| 
1266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii16be => 1,  | 
| 
1267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perl_name =>  | 
| 
1270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['utf-16be'],  | 
| 
1271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',  | 
| 
1273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'}};  | 
| 
1274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le'} = {  | 
| 
1275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii16 => 1,  | 
| 
1276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii16le => 1,  | 
| 
1277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perl_name =>  | 
| 
1280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['utf-16le'],  | 
| 
1281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',  | 
| 
1283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'}};  | 
| 
1284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-2'} = $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-10646-ucs-2'} = {  | 
| 
1285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii16 => 1,  | 
| 
1286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 bom_allowed =>  | 
| 
1287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant =>  | 
| 
1289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',  | 
| 
1290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant16be =>  | 
| 
1291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be',  | 
| 
1292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant16le =>  | 
| 
1293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',  | 
| 
1294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ietf_name =>  | 
| 
1295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['csunicode',  | 
| 
1296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'iso-10646-ucs-2'],  | 
| 
1297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 mime_name =>  | 
| 
1298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'iso-10646-ucs-2',  | 
| 
1299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-2',  | 
| 
1301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'urn:x-suika-fam-cx:charset:iso-10646-ucs-2',  | 
| 
1303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-10646-UCS-2',  | 
| 
1305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be'} = {  | 
| 
1308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii16 => 1,  | 
| 
1309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii16be => 1,  | 
| 
1310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perl_name =>  | 
| 
1313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['ucs-2be'],  | 
| 
1314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be',  | 
| 
1316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'}};  | 
| 
1317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le'} = {  | 
| 
1318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii16 => 1,  | 
| 
1319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii16le => 1,  | 
| 
1320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perl_name =>  | 
| 
1323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['ucs-2le'],  | 
| 
1324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',  | 
| 
1326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'}};  | 
| 
1327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-4'} = $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-10646-ucs-4'} = {  | 
| 
1328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii32 => 1,  | 
| 
1329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 bom_allowed =>  | 
| 
1330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant =>  | 
| 
1332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',  | 
| 
1333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant32endian1234 =>  | 
| 
1334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be',  | 
| 
1335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no_bom_variant32endian4321 =>  | 
| 
1336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',  | 
| 
1337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ietf_name =>  | 
| 
1338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['csucs4',  | 
| 
1339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'iso-10646-ucs-4'],  | 
| 
1340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 mime_name =>  | 
| 
1341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'iso-10646-ucs-4',  | 
| 
1342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-4',  | 
| 
1344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'urn:x-suika-fam-cx:charset:iso-10646-ucs-4',  | 
| 
1346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-10646-UCS-4',  | 
| 
1348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be'} = {  | 
| 
1351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii32 => 1,  | 
| 
1352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii32endian1234 => 1,  | 
| 
1353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perl_name =>  | 
| 
1356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['ucs-4be',  | 
| 
1357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'utf-32be'],  | 
| 
1358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be',  | 
| 
1360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'}};  | 
| 
1361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le'} = {  | 
| 
1362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii32 => 1,  | 
| 
1363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ascii32endian4321 => 1,  | 
| 
1364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perl_name =>  | 
| 
1367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['ucs-4le',  | 
| 
1368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'utf-32le'],  | 
| 
1369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',  | 
| 
1371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'}};  | 
| 
1372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso_8859-1:1987'} = $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-1'} = {ascii8 =>  | 
| 
1373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ietf_name =>  | 
| 
1377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['cp819',  | 
| 
1378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'csisolatin1',  | 
| 
1379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'ibm819',  | 
| 
1380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'iso-8859-1',  | 
| 
1381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'iso-8859-1',  | 
| 
1382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'iso-ir-100',  | 
| 
1383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'iso_8859-1',  | 
| 
1384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'iso_8859-1:1987',  | 
| 
1385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'l1',  | 
| 
1386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'latin1'],  | 
| 
1387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 mime_name =>  | 
| 
1388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'iso-8859-1',  | 
| 
1389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perl_name =>  | 
| 
1390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['iso-8859-1',  | 
| 
1391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'latin1'],  | 
| 
1392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-1',  | 
| 
1394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'urn:x-suika-fam-cx:charset:iso_8859-1:1987',  | 
| 
1396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-8859-1',  | 
| 
1398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-2'} = {ascii8 =>  | 
| 
1401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-2',  | 
| 
1406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-8859-2',  | 
| 
1408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-3'} = {ascii8 =>  | 
| 
1411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-3',  | 
| 
1416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-8859-3',  | 
| 
1418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-4'} = {ascii8 =>  | 
| 
1421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-4',  | 
| 
1426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-8859-4',  | 
| 
1428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-5'} = {ascii8 =>  | 
| 
1431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-5',  | 
| 
1436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-8859-5',  | 
| 
1438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-6'} = {ascii8 =>  | 
| 
1441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-6',  | 
| 
1446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-8859-6',  | 
| 
1448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-7'} = {ascii8 =>  | 
| 
1451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-7',  | 
| 
1456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-8859-7',  | 
| 
1458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-8'} = {ascii8 =>  | 
| 
1461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-8',  | 
| 
1466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-8859-8',  | 
| 
1468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-9'} = {ascii8 =>  | 
| 
1471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-9',  | 
| 
1476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-8859-9',  | 
| 
1478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-10'} = {ascii8 =>  | 
| 
1481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-10',  | 
| 
1486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-8859-10',  | 
| 
1488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-11'} = {ascii8 =>  | 
| 
1491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-11',  | 
| 
1496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-8859-11',  | 
| 
1498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-13'} = {ascii8 =>  | 
| 
1501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-13',  | 
| 
1506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-8859-13',  | 
| 
1508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-14'} = {ascii8 =>  | 
| 
1511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-14',  | 
| 
1516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-8859-14',  | 
| 
1518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-15'} = {ascii8 =>  | 
| 
1521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-15',  | 
| 
1526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-8859-15',  | 
| 
1528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-16'} = {ascii8 =>  | 
| 
1531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-16',  | 
| 
1536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-8859-16',  | 
| 
1538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-2022-jp'} = {ascii8 =>  | 
| 
1541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-2022-jp',  | 
| 
1544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'ISO-2022-JP',  | 
| 
1546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-2022-jp'} = {ascii8 =>  | 
| 
1549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ietf_name =>  | 
| 
1551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['csiso2022jp',  | 
| 
1552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'iso-2022-jp',  | 
| 
1553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'iso-2022-jp'],  | 
| 
1554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 mime_name =>  | 
| 
1555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'iso-2022-jp',  | 
| 
1556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'urn:x-suika-fam-cx:charset:iso-2022-jp',  | 
| 
1558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.iso-2022-jp'} = {ascii8 =>  | 
| 
1562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perl_name =>  | 
| 
1564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['iso-2022-jp'],  | 
| 
1565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.iso-2022-jp',  | 
| 
1567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'}};  | 
| 
1568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:shift_jis'} = $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.shift_jis'} = {ascii8 =>  | 
| 
1569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ietf_name =>  | 
| 
1573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['csshiftjis',  | 
| 
1574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'ms_kanji',  | 
| 
1575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'shift_jis',  | 
| 
1576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'shift_jis'],  | 
| 
1577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 mime_name =>  | 
| 
1578
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'shift_jis',  | 
| 
1579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perl_name =>  | 
| 
1580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['shift-jis-1997'],  | 
| 
1581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.shift_jis',  | 
| 
1583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'urn:x-suika-fam-cx:charset:shift_jis',  | 
| 
1585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'Shift_JIS',  | 
| 
1587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.shiftjis'} = {ascii8 =>  | 
| 
1590
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perl_name =>  | 
| 
1594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['shiftjis',  | 
| 
1595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'sjis'],  | 
| 
1596
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.shiftjis',  | 
| 
1598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'}};  | 
| 
1599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:euc-jp'} = $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.euc-jp'} = $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:extended_unix_code_packed_format_for_japanese'} = {ascii8 =>  | 
| 
1600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ietf_name =>  | 
| 
1604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['cseucpkdfmtjapanese',  | 
| 
1605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'euc-jp',  | 
| 
1606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'euc-jp',  | 
| 
1607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'extended_unix_code_packed_format_for_japanese'],  | 
| 
1608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 mime_name =>  | 
| 
1609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'euc-jp',  | 
| 
1610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perl_name =>  | 
| 
1611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['euc-jp-1997'],  | 
| 
1612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.euc-jp',  | 
| 
1614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'urn:x-suika-fam-cx:charset:euc-jp',  | 
| 
1616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'urn:x-suika-fam-cx:charset:extended_unix_code_packed_format_for_japanese',  | 
| 
1618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'},  | 
| 
1619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   xml_name => 'EUC-JP',  | 
| 
1620
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
1621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.euc-jp'} = {ascii8 =>  | 
| 
1623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is_block_safe =>  | 
| 
1625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1',  | 
| 
1626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perl_name =>  | 
| 
1627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ['euc-jp',  | 
| 
1628
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'ujis'],  | 
| 
1629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'uri',  | 
| 
1630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.euc-jp',  | 
| 
1631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1'}};  | 
| 
1632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
1634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## $Date: 2008/09/15 07:19:03 $  |