| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package EBook::Ishmael::CharDet::Big5; |
|
2
|
18
|
|
|
18
|
|
361
|
use 5.016; |
|
|
18
|
|
|
|
|
69
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '2.03'; |
|
4
|
18
|
|
|
18
|
|
105
|
use strict; |
|
|
18
|
|
|
|
|
41
|
|
|
|
18
|
|
|
|
|
579
|
|
|
5
|
18
|
|
|
18
|
|
97
|
use warnings; |
|
|
18
|
|
|
|
|
64
|
|
|
|
18
|
|
|
|
|
1036
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
18
|
|
|
18
|
|
125
|
use EBook::Ishmael::CharDet::Constants qw(:CONSTANTS); |
|
|
18
|
|
|
|
|
52
|
|
|
|
18
|
|
|
|
|
16337
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# Generated from contrib/cjk2encodings.pl |
|
10
|
|
|
|
|
|
|
my %BIG5_FREQS = map { $_ => 1 } ( |
|
11
|
|
|
|
|
|
|
41283,42048,42147,42056,42054,43706,42151,41333,41334,42675,44111,43219,42970, |
|
12
|
|
|
|
|
|
|
47428,48033,42594,42092,42316,42090,42077,42071,42344,45935,42817,43171,45423, |
|
13
|
|
|
|
|
|
|
44499,42148,44208,42069,44641,42312,42707,43244,42681,43236,42217,42055,42445, |
|
14
|
|
|
|
|
|
|
42328,42068,46555,41282,46158,42608,43194,42193,42067,43206,43710,44398,42096, |
|
15
|
|
|
|
|
|
|
42715,42065,46412,42705,43724,44745,42218,42173,42345,44253,43474,46413,42739, |
|
16
|
|
|
|
|
|
|
42207,42978,43877,42590,42165,42606,42622,43234,42357,41325,41326,47952,42726, |
|
17
|
|
|
|
|
|
|
47436,42099,42237,44001,44465,45390,45224,43769,42103,43507,49241,45024,43767, |
|
18
|
|
|
|
|
|
|
44753,45151,44251,42716,42593,42493,42431,42364,43181,42600,42308,42091,42210, |
|
19
|
|
|
|
|
|
|
42583,43851,43179,43173,43514,42816,43384,42213,42355,45277,44492,45987,42576, |
|
20
|
|
|
|
|
|
|
50597,42366,43232,47085,46697,42682,42194,42471,49625,42157,42195,42216,43197, |
|
21
|
|
|
|
|
|
|
46501,49267,42428,43880,43449,42420,44971,42441,44103,47308,49518,42453,42589, |
|
22
|
|
|
|
|
|
|
42219,42664,45747,42855,46926,43745,46929,42427,44230,42728,43341,47324,45425, |
|
23
|
|
|
|
|
|
|
47069,42052,42446,42176,42418,42228,45890,42698,45226,42733,44377,45237,42732, |
|
24
|
|
|
|
|
|
|
43119,43768,45417,42828,42841,42615,42088,42956,42916,45290,43590,46717,48592, |
|
25
|
|
|
|
|
|
|
47348,43215,42058,44917,42459,42231,42086,42170,43346,42306,43844,42198,44723, |
|
26
|
|
|
|
|
|
|
48114,42323,42610,43472,45645,46972,42110,43091,46753,43388,42059,45171,47940, |
|
27
|
|
|
|
|
|
|
43887,42997,45308,42585,43207,46168,43886,43357,42561,44242,44471,45905,42857, |
|
28
|
|
|
|
|
|
|
43383,48326,44986,47318,49099,45520,42304,42102,45409,46771,45473,44662,42154, |
|
29
|
|
|
|
|
|
|
43848,42171,42051,42053,43455,45764,44784,47599,43610,50148,44625,42079,43627, |
|
30
|
|
|
|
|
|
|
46257,42581,48247,42463,42749,47429,48839,42831,49331,45748,46035,45157,42432, |
|
31
|
|
|
|
|
|
|
45229,63960,43843,42586,46521,46791,45939,43258,45690,49771,45937,44472,47993, |
|
32
|
|
|
|
|
|
|
42413,42168,42680,43442,46072,46447,43952,46047,45566,42338,44007,42727,42582, |
|
33
|
|
|
|
|
|
|
43505,42190,48194,42619,45299,46784,44491,43373,49146,42227,47594,44106,47337, |
|
34
|
|
|
|
|
|
|
48509,42084,42846,42490,47313,42687,43344,44710,43076,49654,44524,45565,47189, |
|
35
|
|
|
|
|
|
|
46271,42669,45258,46759,50168,43691,44796,45408,44459,44107,43709,42335,42061, |
|
36
|
|
|
|
|
|
|
49831,44408,45916,47217,42494,48748,44475,49060,42319,47207,42311,42722,45806, |
|
37
|
|
|
|
|
|
|
44020,45542,48072,42570,43591,49111,42063,43470,48331,43976,44528,42852,44660, |
|
38
|
|
|
|
|
|
|
46767,42320,42607,43260,47346,43370,46010,44474,46963,42346,47817,42449,43754, |
|
39
|
|
|
|
|
|
|
46274,43855,43180,49737,44382,47434,42709,44277,48116,43390,42456,50353,45135, |
|
40
|
|
|
|
|
|
|
48047,47215,47027,41318,48055,41317,43203,46174,49224,43596,42584,42470,43210, |
|
41
|
|
|
|
|
|
|
44900,42659,42229,42748,43997,44532,42189,44153,44366,42938,43255,42075,42714, |
|
42
|
|
|
|
|
|
|
48342,46175,44902,44006,47272,42995,42966,48593,51535,43368,44121,42358,43364, |
|
43
|
|
|
|
|
|
|
49532,43756,42160,44400,45493,50661,43382,48598,42182,44637,45816,42963,43621, |
|
44
|
|
|
|
|
|
|
47833,42613,43441,50166,42101,43464,45180,44004,50271,42402,45535,42433,42738, |
|
45
|
|
|
|
|
|
|
47059,44284,47742,43717,42220,48339,47838,50609,46157,45779,44523,48613,46525, |
|
46
|
|
|
|
|
|
|
44265,43761,42668,43585,47830,45519,44669,47777,44028,45814,48231,45691,48711, |
|
47
|
|
|
|
|
|
|
43466,45664,44500,42191,47578,48599,42821,43075,50779,44454,46584,46964,43358, |
|
48
|
|
|
|
|
|
|
46775,44375,43439,49888,42361,42318,49504,50348,44473,47289,49075,48852,42937, |
|
49
|
|
|
|
|
|
|
43734,42410,42353,44645,46179,45999,42412,43337,43463,45783,41337,47595,43362, |
|
50
|
|
|
|
|
|
|
44278,47842, |
|
51
|
|
|
|
|
|
|
); |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# https://www-archive.mozilla.org/projects/intl/universalcharsetdetection |
|
54
|
|
|
|
|
|
|
my $DIST_RATIO = 0.75; |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub new { |
|
57
|
|
|
|
|
|
|
|
|
58
|
67
|
|
|
67
|
0
|
227
|
my ($class) = @_; |
|
59
|
|
|
|
|
|
|
|
|
60
|
67
|
|
|
|
|
526
|
my $self = { |
|
61
|
|
|
|
|
|
|
Code => 0, |
|
62
|
|
|
|
|
|
|
Left => 0, |
|
63
|
|
|
|
|
|
|
Freqs => 0, |
|
64
|
|
|
|
|
|
|
MBs => 0, |
|
65
|
|
|
|
|
|
|
Total => 0, |
|
66
|
|
|
|
|
|
|
Bad => 0, |
|
67
|
|
|
|
|
|
|
}; |
|
68
|
|
|
|
|
|
|
|
|
69
|
67
|
|
|
|
|
696
|
return bless $self, $class; |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
} |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub take { |
|
74
|
|
|
|
|
|
|
|
|
75
|
19885
|
|
|
19885
|
0
|
38327
|
my ($self, $bytes) = @_; |
|
76
|
|
|
|
|
|
|
|
|
77
|
19885
|
50
|
|
|
|
41521
|
return TAKE_BAD if $self->{Bad}; |
|
78
|
|
|
|
|
|
|
|
|
79
|
19885
|
|
|
|
|
38799
|
for my $i (0 .. length($bytes) - 1) { |
|
80
|
318160
|
|
|
|
|
480927
|
my $b = ord(substr $bytes, $i, 1) & 0xff; |
|
81
|
318160
|
100
|
100
|
|
|
724800
|
if ($self->{Left}) { |
|
|
|
100
|
|
|
|
|
|
|
82
|
63857
|
|
|
|
|
96815
|
$self->{Code} = ($self->{Code} << 8) | $b; |
|
83
|
63857
|
|
|
|
|
84556
|
$self->{Left}--; |
|
84
|
63857
|
100
|
|
|
|
127878
|
if (exists $BIG5_FREQS{ $self->{Code} }) { |
|
85
|
4973
|
|
|
|
|
8193
|
$self->{Freqs}++; |
|
86
|
|
|
|
|
|
|
} |
|
87
|
63857
|
|
|
|
|
86932
|
$self->{MBs}++; |
|
88
|
63857
|
|
|
|
|
91955
|
$self->{Total}++; |
|
89
|
|
|
|
|
|
|
} elsif ($b >= 0xa4 && $b <= 0xfe) { |
|
90
|
63876
|
|
|
|
|
91343
|
$self->{Code} = $b; |
|
91
|
63876
|
|
|
|
|
94518
|
$self->{Left} = 1; |
|
92
|
|
|
|
|
|
|
} else { |
|
93
|
190427
|
|
|
|
|
313960
|
$self->{Total}++; |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
|
|
97
|
19885
|
|
|
|
|
48964
|
return TAKE_OK; |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
sub confidence { |
|
102
|
|
|
|
|
|
|
|
|
103
|
34
|
|
|
34
|
0
|
118
|
my ($self) = @_; |
|
104
|
|
|
|
|
|
|
|
|
105
|
34
|
100
|
66
|
|
|
188
|
if ($self->{Bad} or $self->{MBs} == 0) { |
|
106
|
2
|
|
|
|
|
16
|
return 0; |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
|
|
109
|
32
|
|
|
|
|
308
|
return $self->{Freqs} / $self->{MBs}; |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub bad { |
|
114
|
|
|
|
|
|
|
|
|
115
|
0
|
|
|
0
|
0
|
0
|
my ($self) = @_; |
|
116
|
|
|
|
|
|
|
|
|
117
|
0
|
|
|
|
|
0
|
return $self->{Bad}; |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
} |
|
120
|
|
|
|
|
|
|
|
|
121
|
34
|
|
|
34
|
0
|
147
|
sub encoding { 'Big5' } |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
1; |