| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package EBook::Ishmael::CharDet::EUCKR; |
|
2
|
18
|
|
|
18
|
|
374
|
use 5.016; |
|
|
18
|
|
|
|
|
71
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '2.03'; |
|
4
|
18
|
|
|
18
|
|
159
|
use strict; |
|
|
18
|
|
|
|
|
57
|
|
|
|
18
|
|
|
|
|
686
|
|
|
5
|
18
|
|
|
18
|
|
92
|
use warnings; |
|
|
18
|
|
|
|
|
45
|
|
|
|
18
|
|
|
|
|
1754
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
18
|
|
|
18
|
|
124
|
use EBook::Ishmael::CharDet::Constants qw(:CONSTANTS); |
|
|
18
|
|
|
|
|
35
|
|
|
|
18
|
|
|
|
|
17533
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# Generated from contrib/cjk2encodings.pl |
|
10
|
|
|
|
|
|
|
my %EUCKR_FREQS = map { $_ => 1 } ( |
|
11
|
|
|
|
|
|
|
51151,49356,46297,49057,49339,41379,48103,49351,45293,46274,48301,47270,45217, |
|
12
|
|
|
|
|
|
|
49654,47054,49065,45268,46287,46517,49336,49338,51153,48878,45538,49572,48838, |
|
13
|
|
|
|
|
|
|
49590,48579,45994,49076,47822,46315,45261,49574,47278,49404,49370,45306,45308, |
|
14
|
|
|
|
|
|
|
47800,46011,45527,41378,46569,45533,46835,48314,48374,49622,48581,45286,47341, |
|
15
|
|
|
|
|
|
|
49358,45498,49144,49359,47534,48890,48115,49381,45304,47531,49366,49363,47291, |
|
16
|
|
|
|
|
|
|
47041,49101,46574,49631,48338,46501,46839,47337,47288,49132,51160,46535,48840, |
|
17
|
|
|
|
|
|
|
46542,47286,45253,50337,49109,49088,49319,51447,47857,47023,51168,47344,51363, |
|
18
|
|
|
|
|
|
|
49655,45497,45218,48111,47593,47335,48888,49327,48316,50101,49656,48842,46527, |
|
19
|
|
|
|
|
|
|
45496,50107,51154,49341,47267,48576,47590,49598,48871,45288,50626,45235,49403, |
|
20
|
|
|
|
|
|
|
49364,47577,45224,51373,45219,46311,49077,47352,46759,45550,47536,46791,48305, |
|
21
|
|
|
|
|
|
|
47055,47049,45491,46521,48125,51156,47559,46050,50143,49915,48583,49131,49383, |
|
22
|
|
|
|
|
|
|
47582,45229,48850,45255,47826,47564,48106,49081,50099,46002,47796,50637,47801, |
|
23
|
|
|
|
|
|
|
49405,45544,49611,48339,48585,49134,51396,47613,49124,49068,45746,51120,47581, |
|
24
|
|
|
|
|
|
|
49066,47803,51384,48578,46300,51378,51159,46325,47566,45274,48609,46587,50887, |
|
25
|
|
|
|
|
|
|
49911,47301,49402,47345,51196,50667,51167,45232,47608,45999,50930,48863,47824, |
|
26
|
|
|
|
|
|
|
47575,49116,49602,49329,45507,51190,46059,48376,49091,49916,45529,47823,46302, |
|
27
|
|
|
|
|
|
|
48580,49090,48302,49623,50935,47578,47009,47791,48570,50150,46565,47592,48843, |
|
28
|
|
|
|
|
|
|
51157,51152,41398,41399,49097,46536,45511,50608,51171,50339,47808,45281,46757, |
|
29
|
|
|
|
|
|
|
45517,48108,49661,49378,50338,51173,46326,49135,49145,50122,51143,47269,49624, |
|
30
|
|
|
|
|
|
|
47612,49371,45524,47297,49110,41400,50082,51439,41401,50085,48864,48830,49902, |
|
31
|
|
|
|
|
|
|
50134,47792,45534,48378,46328,51109,51375,48347,49658,50343,48307,46843,47050, |
|
32
|
|
|
|
|
|
|
47079,49142,46272,47583,48341,50925,47066,46549,49866,46281,47022,47779,45542, |
|
33
|
|
|
|
|
|
|
46837,46061,49913,46249,50660,50616,48887,48880,49108,45277,45997,47271,45476, |
|
34
|
|
|
|
|
|
|
45276,50385,50884,46275,49357,45279,50931,45303,46554,47526,51146,46537,46026, |
|
35
|
|
|
|
|
|
|
50116,50146,50129,47315,51364,49569,50108,48117,50100,46568,50144,51118,45228, |
|
36
|
|
|
|
|
|
|
51170,47606,47043,51391,46518,46822,47289,45512,48118,47610,45528,48573,46308, |
|
37
|
|
|
|
|
|
|
50618,51371,47030,48839,51441,48818,49582,49647,50603,45531,51123,48375,49069, |
|
38
|
|
|
|
|
|
|
49601,51451,49375,47568,47298,47048,47579,50104,49086,47280,47317,50155,49591, |
|
39
|
|
|
|
|
|
|
49089,49662,45282,47042,45294,46003,49825,49653,45259,49342,48599,49093,49328, |
|
40
|
|
|
|
|
|
|
46303,47274,48886,49072,50103,47789,49570,46307,49587,45986,50346,50084,47282, |
|
41
|
|
|
|
|
|
|
45225,47287,51385,46567,47316,45296,51176,47299,51414,47859,49138,47025,48853, |
|
42
|
|
|
|
|
|
|
45230,41404,41405,47350,41396,49903,41397,49593,50617,46576,47348,41394,41395, |
|
43
|
|
|
|
|
|
|
49625,48883,47073,46069,51398,49344,42237,48622,46039,50601,51197,50102,50863, |
|
44
|
|
|
|
|
|
|
50896,51161,46005,48590,46566,47587,46836,48310,48854,48083,45221,47273,50145, |
|
45
|
|
|
|
|
|
|
48841,47045,47353,46060,46768,46067,49372,48104,51376,46545,47584,49133,50118, |
|
46
|
|
|
|
|
|
|
46842,46544,48831,48882,48382,46025,50369,46251,47056,48847,46018,49098,47019, |
|
47
|
|
|
|
|
|
|
48848,51365,47309,46771,48076,46062,50391,45785,50627,45295,45525,46068,49592, |
|
48
|
|
|
|
|
|
|
49103,49660,49095,50677,49377,47097,47027,48041,46007,47283,46809,49873,48851, |
|
49
|
|
|
|
|
|
|
47044,45549,47078,47535,45540,46010,51188,46033,47032,47059,49125,48332,50341, |
|
50
|
|
|
|
|
|
|
47028,45483,50377,48586,50924, |
|
51
|
|
|
|
|
|
|
); |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# https://www-archive.mozilla.org/projects/intl/universalcharsetdetection |
|
54
|
|
|
|
|
|
|
my $DIST_RATIO = 0.99; |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub new { |
|
57
|
|
|
|
|
|
|
|
|
58
|
67
|
|
|
67
|
0
|
173
|
my ($class) = @_; |
|
59
|
|
|
|
|
|
|
|
|
60
|
67
|
|
|
|
|
378
|
my $self = { |
|
61
|
|
|
|
|
|
|
Code => 0, |
|
62
|
|
|
|
|
|
|
Left => 0, |
|
63
|
|
|
|
|
|
|
Freqs => 0, |
|
64
|
|
|
|
|
|
|
MBs => 0, |
|
65
|
|
|
|
|
|
|
Total => 0, |
|
66
|
|
|
|
|
|
|
Bad => 0, |
|
67
|
|
|
|
|
|
|
}; |
|
68
|
|
|
|
|
|
|
|
|
69
|
67
|
|
|
|
|
617
|
return bless $self, $class; |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
} |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub take { |
|
74
|
|
|
|
|
|
|
|
|
75
|
6829
|
|
|
6829
|
0
|
13500
|
my ($self, $bytes) = @_; |
|
76
|
|
|
|
|
|
|
|
|
77
|
6829
|
50
|
|
|
|
15349
|
return TAKE_BAD if $self->{Bad}; |
|
78
|
|
|
|
|
|
|
|
|
79
|
6829
|
|
|
|
|
16812
|
for my $i (0 .. length($bytes) - 1) { |
|
80
|
108945
|
|
|
|
|
170165
|
my $b = ord(substr $bytes, $i, 1) & 0xff; |
|
81
|
108945
|
100
|
66
|
|
|
275827
|
if ($self->{Left}) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
82
|
22032
|
100
|
66
|
|
|
56350
|
if ($b >= 0x21 && $b <= 0xfe) { |
|
83
|
22013
|
|
|
|
|
36295
|
$self->{Code} = ($self->{Code} << 8) | $b; |
|
84
|
22013
|
|
|
|
|
29479
|
$self->{Left}--; |
|
85
|
22013
|
100
|
|
|
|
47393
|
if (exists $EUCKR_FREQS{ $self->{Code} }) { |
|
86
|
6283
|
|
|
|
|
8163
|
$self->{Freqs}++; |
|
87
|
|
|
|
|
|
|
} |
|
88
|
22013
|
|
|
|
|
30845
|
$self->{MBs}++; |
|
89
|
22013
|
|
|
|
|
29277
|
$self->{Total}++; |
|
90
|
22013
|
|
|
|
|
37225
|
$self->{Code} = 0; |
|
91
|
|
|
|
|
|
|
} else { |
|
92
|
19
|
|
|
|
|
42
|
$self->{Bad} = 1; |
|
93
|
19
|
|
|
|
|
63
|
return TAKE_BAD; |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
} elsif ($b >= 0xa1 && $b <= 0xfe) { |
|
96
|
22037
|
|
|
|
|
31788
|
$self->{Code} = $b; |
|
97
|
22037
|
|
|
|
|
34542
|
$self->{Left} = 1; |
|
98
|
|
|
|
|
|
|
} elsif ($b & 0x80) { |
|
99
|
19
|
|
|
|
|
46
|
$self->{Bad} = 1; |
|
100
|
19
|
|
|
|
|
66
|
return TAKE_BAD; |
|
101
|
|
|
|
|
|
|
} else { |
|
102
|
64857
|
|
|
|
|
109120
|
$self->{Total}++; |
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
|
|
106
|
6791
|
|
|
|
|
17301
|
return TAKE_OK; |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub confidence { |
|
111
|
|
|
|
|
|
|
|
|
112
|
7
|
|
|
7
|
0
|
15
|
my ($self) = @_; |
|
113
|
|
|
|
|
|
|
|
|
114
|
7
|
50
|
33
|
|
|
50
|
if ($self->{Bad} or $self->{MBs} == 0) { |
|
115
|
0
|
|
|
|
|
0
|
return 0; |
|
116
|
|
|
|
|
|
|
} |
|
117
|
|
|
|
|
|
|
|
|
118
|
7
|
|
|
|
|
38
|
return $self->{Freqs} / $self->{MBs}; |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
} |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub bad { |
|
123
|
|
|
|
|
|
|
|
|
124
|
0
|
|
|
0
|
0
|
0
|
my ($self) = @_; |
|
125
|
|
|
|
|
|
|
|
|
126
|
0
|
|
|
|
|
0
|
return $self->{Bad}; |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
} |
|
129
|
|
|
|
|
|
|
|
|
130
|
7
|
|
|
7
|
0
|
21
|
sub encoding { 'EUC-KR' } |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
1; |
|
133
|
|
|
|
|
|
|
|