File Coverage

blib/lib/EBook/Ishmael/CharDet/Big5.pm
Criterion Covered Total %
statement 34 36 94.4
branch 9 10 90.0
condition 5 6 83.3
subroutine 8 9 88.8
pod 0 5 0.0
total 56 66 84.8


line stmt bran cond sub pod time code
1             package EBook::Ishmael::CharDet::Big5;
2 18     18   361 use 5.016;
  18         69  
3             our $VERSION = '2.03';
4 18     18   105 use strict;
  18         41  
  18         579  
5 18     18   97 use warnings;
  18         64  
  18         1036  
6              
7 18     18   125 use EBook::Ishmael::CharDet::Constants qw(:CONSTANTS);
  18         52  
  18         16337  
8              
9             # Generated from contrib/cjk2encodings.pl
10             my %BIG5_FREQS = map { $_ => 1 } (
11             41283,42048,42147,42056,42054,43706,42151,41333,41334,42675,44111,43219,42970,
12             47428,48033,42594,42092,42316,42090,42077,42071,42344,45935,42817,43171,45423,
13             44499,42148,44208,42069,44641,42312,42707,43244,42681,43236,42217,42055,42445,
14             42328,42068,46555,41282,46158,42608,43194,42193,42067,43206,43710,44398,42096,
15             42715,42065,46412,42705,43724,44745,42218,42173,42345,44253,43474,46413,42739,
16             42207,42978,43877,42590,42165,42606,42622,43234,42357,41325,41326,47952,42726,
17             47436,42099,42237,44001,44465,45390,45224,43769,42103,43507,49241,45024,43767,
18             44753,45151,44251,42716,42593,42493,42431,42364,43181,42600,42308,42091,42210,
19             42583,43851,43179,43173,43514,42816,43384,42213,42355,45277,44492,45987,42576,
20             50597,42366,43232,47085,46697,42682,42194,42471,49625,42157,42195,42216,43197,
21             46501,49267,42428,43880,43449,42420,44971,42441,44103,47308,49518,42453,42589,
22             42219,42664,45747,42855,46926,43745,46929,42427,44230,42728,43341,47324,45425,
23             47069,42052,42446,42176,42418,42228,45890,42698,45226,42733,44377,45237,42732,
24             43119,43768,45417,42828,42841,42615,42088,42956,42916,45290,43590,46717,48592,
25             47348,43215,42058,44917,42459,42231,42086,42170,43346,42306,43844,42198,44723,
26             48114,42323,42610,43472,45645,46972,42110,43091,46753,43388,42059,45171,47940,
27             43887,42997,45308,42585,43207,46168,43886,43357,42561,44242,44471,45905,42857,
28             43383,48326,44986,47318,49099,45520,42304,42102,45409,46771,45473,44662,42154,
29             43848,42171,42051,42053,43455,45764,44784,47599,43610,50148,44625,42079,43627,
30             46257,42581,48247,42463,42749,47429,48839,42831,49331,45748,46035,45157,42432,
31             45229,63960,43843,42586,46521,46791,45939,43258,45690,49771,45937,44472,47993,
32             42413,42168,42680,43442,46072,46447,43952,46047,45566,42338,44007,42727,42582,
33             43505,42190,48194,42619,45299,46784,44491,43373,49146,42227,47594,44106,47337,
34             48509,42084,42846,42490,47313,42687,43344,44710,43076,49654,44524,45565,47189,
35             46271,42669,45258,46759,50168,43691,44796,45408,44459,44107,43709,42335,42061,
36             49831,44408,45916,47217,42494,48748,44475,49060,42319,47207,42311,42722,45806,
37             44020,45542,48072,42570,43591,49111,42063,43470,48331,43976,44528,42852,44660,
38             46767,42320,42607,43260,47346,43370,46010,44474,46963,42346,47817,42449,43754,
39             46274,43855,43180,49737,44382,47434,42709,44277,48116,43390,42456,50353,45135,
40             48047,47215,47027,41318,48055,41317,43203,46174,49224,43596,42584,42470,43210,
41             44900,42659,42229,42748,43997,44532,42189,44153,44366,42938,43255,42075,42714,
42             48342,46175,44902,44006,47272,42995,42966,48593,51535,43368,44121,42358,43364,
43             49532,43756,42160,44400,45493,50661,43382,48598,42182,44637,45816,42963,43621,
44             47833,42613,43441,50166,42101,43464,45180,44004,50271,42402,45535,42433,42738,
45             47059,44284,47742,43717,42220,48339,47838,50609,46157,45779,44523,48613,46525,
46             44265,43761,42668,43585,47830,45519,44669,47777,44028,45814,48231,45691,48711,
47             43466,45664,44500,42191,47578,48599,42821,43075,50779,44454,46584,46964,43358,
48             46775,44375,43439,49888,42361,42318,49504,50348,44473,47289,49075,48852,42937,
49             43734,42410,42353,44645,46179,45999,42412,43337,43463,45783,41337,47595,43362,
50             44278,47842,
51             );
52              
53             # https://www-archive.mozilla.org/projects/intl/universalcharsetdetection
54             my $DIST_RATIO = 0.75;
55              
56             sub new {
57              
58 67     67 0 227 my ($class) = @_;
59              
60 67         526 my $self = {
61             Code => 0,
62             Left => 0,
63             Freqs => 0,
64             MBs => 0,
65             Total => 0,
66             Bad => 0,
67             };
68              
69 67         696 return bless $self, $class;
70              
71             }
72              
73             sub take {
74              
75 19885     19885 0 38327 my ($self, $bytes) = @_;
76              
77 19885 50       41521 return TAKE_BAD if $self->{Bad};
78              
79 19885         38799 for my $i (0 .. length($bytes) - 1) {
80 318160         480927 my $b = ord(substr $bytes, $i, 1) & 0xff;
81 318160 100 100     724800 if ($self->{Left}) {
    100          
82 63857         96815 $self->{Code} = ($self->{Code} << 8) | $b;
83 63857         84556 $self->{Left}--;
84 63857 100       127878 if (exists $BIG5_FREQS{ $self->{Code} }) {
85 4973         8193 $self->{Freqs}++;
86             }
87 63857         86932 $self->{MBs}++;
88 63857         91955 $self->{Total}++;
89             } elsif ($b >= 0xa4 && $b <= 0xfe) {
90 63876         91343 $self->{Code} = $b;
91 63876         94518 $self->{Left} = 1;
92             } else {
93 190427         313960 $self->{Total}++;
94             }
95             }
96              
97 19885         48964 return TAKE_OK;
98              
99             }
100              
101             sub confidence {
102              
103 34     34 0 118 my ($self) = @_;
104              
105 34 100 66     188 if ($self->{Bad} or $self->{MBs} == 0) {
106 2         16 return 0;
107             }
108              
109 32         308 return $self->{Freqs} / $self->{MBs};
110              
111             }
112              
113             sub bad {
114              
115 0     0 0 0 my ($self) = @_;
116              
117 0         0 return $self->{Bad};
118              
119             }
120              
121 34     34 0 147 sub encoding { 'Big5' }
122              
123             1;