File Coverage

blib/lib/EBook/Ishmael/CharDet/EUCKR.pm
Criterion Covered Total %
statement 39 42 92.8
branch 12 14 85.7
condition 5 9 55.5
subroutine 8 9 88.8
pod 0 5 0.0
total 64 79 81.0


line stmt bran cond sub pod time code
1             package EBook::Ishmael::CharDet::EUCKR;
2 18     18   374 use 5.016;
  18         71  
3             our $VERSION = '2.03';
4 18     18   159 use strict;
  18         57  
  18         686  
5 18     18   92 use warnings;
  18         45  
  18         1754  
6              
7 18     18   124 use EBook::Ishmael::CharDet::Constants qw(:CONSTANTS);
  18         35  
  18         17533  
8              
9             # Generated from contrib/cjk2encodings.pl
10             my %EUCKR_FREQS = map { $_ => 1 } (
11             51151,49356,46297,49057,49339,41379,48103,49351,45293,46274,48301,47270,45217,
12             49654,47054,49065,45268,46287,46517,49336,49338,51153,48878,45538,49572,48838,
13             49590,48579,45994,49076,47822,46315,45261,49574,47278,49404,49370,45306,45308,
14             47800,46011,45527,41378,46569,45533,46835,48314,48374,49622,48581,45286,47341,
15             49358,45498,49144,49359,47534,48890,48115,49381,45304,47531,49366,49363,47291,
16             47041,49101,46574,49631,48338,46501,46839,47337,47288,49132,51160,46535,48840,
17             46542,47286,45253,50337,49109,49088,49319,51447,47857,47023,51168,47344,51363,
18             49655,45497,45218,48111,47593,47335,48888,49327,48316,50101,49656,48842,46527,
19             45496,50107,51154,49341,47267,48576,47590,49598,48871,45288,50626,45235,49403,
20             49364,47577,45224,51373,45219,46311,49077,47352,46759,45550,47536,46791,48305,
21             47055,47049,45491,46521,48125,51156,47559,46050,50143,49915,48583,49131,49383,
22             47582,45229,48850,45255,47826,47564,48106,49081,50099,46002,47796,50637,47801,
23             49405,45544,49611,48339,48585,49134,51396,47613,49124,49068,45746,51120,47581,
24             49066,47803,51384,48578,46300,51378,51159,46325,47566,45274,48609,46587,50887,
25             49911,47301,49402,47345,51196,50667,51167,45232,47608,45999,50930,48863,47824,
26             47575,49116,49602,49329,45507,51190,46059,48376,49091,49916,45529,47823,46302,
27             48580,49090,48302,49623,50935,47578,47009,47791,48570,50150,46565,47592,48843,
28             51157,51152,41398,41399,49097,46536,45511,50608,51171,50339,47808,45281,46757,
29             45517,48108,49661,49378,50338,51173,46326,49135,49145,50122,51143,47269,49624,
30             47612,49371,45524,47297,49110,41400,50082,51439,41401,50085,48864,48830,49902,
31             50134,47792,45534,48378,46328,51109,51375,48347,49658,50343,48307,46843,47050,
32             47079,49142,46272,47583,48341,50925,47066,46549,49866,46281,47022,47779,45542,
33             46837,46061,49913,46249,50660,50616,48887,48880,49108,45277,45997,47271,45476,
34             45276,50385,50884,46275,49357,45279,50931,45303,46554,47526,51146,46537,46026,
35             50116,50146,50129,47315,51364,49569,50108,48117,50100,46568,50144,51118,45228,
36             51170,47606,47043,51391,46518,46822,47289,45512,48118,47610,45528,48573,46308,
37             50618,51371,47030,48839,51441,48818,49582,49647,50603,45531,51123,48375,49069,
38             49601,51451,49375,47568,47298,47048,47579,50104,49086,47280,47317,50155,49591,
39             49089,49662,45282,47042,45294,46003,49825,49653,45259,49342,48599,49093,49328,
40             46303,47274,48886,49072,50103,47789,49570,46307,49587,45986,50346,50084,47282,
41             45225,47287,51385,46567,47316,45296,51176,47299,51414,47859,49138,47025,48853,
42             45230,41404,41405,47350,41396,49903,41397,49593,50617,46576,47348,41394,41395,
43             49625,48883,47073,46069,51398,49344,42237,48622,46039,50601,51197,50102,50863,
44             50896,51161,46005,48590,46566,47587,46836,48310,48854,48083,45221,47273,50145,
45             48841,47045,47353,46060,46768,46067,49372,48104,51376,46545,47584,49133,50118,
46             46842,46544,48831,48882,48382,46025,50369,46251,47056,48847,46018,49098,47019,
47             48848,51365,47309,46771,48076,46062,50391,45785,50627,45295,45525,46068,49592,
48             49103,49660,49095,50677,49377,47097,47027,48041,46007,47283,46809,49873,48851,
49             47044,45549,47078,47535,45540,46010,51188,46033,47032,47059,49125,48332,50341,
50             47028,45483,50377,48586,50924,
51             );
52              
53             # https://www-archive.mozilla.org/projects/intl/universalcharsetdetection
54             my $DIST_RATIO = 0.99;
55              
56             sub new {
57              
58 67     67 0 173 my ($class) = @_;
59              
60 67         378 my $self = {
61             Code => 0,
62             Left => 0,
63             Freqs => 0,
64             MBs => 0,
65             Total => 0,
66             Bad => 0,
67             };
68              
69 67         617 return bless $self, $class;
70              
71             }
72              
73             sub take {
74              
75 6829     6829 0 13500 my ($self, $bytes) = @_;
76              
77 6829 50       15349 return TAKE_BAD if $self->{Bad};
78              
79 6829         16812 for my $i (0 .. length($bytes) - 1) {
80 108945         170165 my $b = ord(substr $bytes, $i, 1) & 0xff;
81 108945 100 66     275827 if ($self->{Left}) {
    100          
    100          
82 22032 100 66     56350 if ($b >= 0x21 && $b <= 0xfe) {
83 22013         36295 $self->{Code} = ($self->{Code} << 8) | $b;
84 22013         29479 $self->{Left}--;
85 22013 100       47393 if (exists $EUCKR_FREQS{ $self->{Code} }) {
86 6283         8163 $self->{Freqs}++;
87             }
88 22013         30845 $self->{MBs}++;
89 22013         29277 $self->{Total}++;
90 22013         37225 $self->{Code} = 0;
91             } else {
92 19         42 $self->{Bad} = 1;
93 19         63 return TAKE_BAD;
94             }
95             } elsif ($b >= 0xa1 && $b <= 0xfe) {
96 22037         31788 $self->{Code} = $b;
97 22037         34542 $self->{Left} = 1;
98             } elsif ($b & 0x80) {
99 19         46 $self->{Bad} = 1;
100 19         66 return TAKE_BAD;
101             } else {
102 64857         109120 $self->{Total}++;
103             }
104             }
105              
106 6791         17301 return TAKE_OK;
107              
108             }
109              
110             sub confidence {
111              
112 7     7 0 15 my ($self) = @_;
113              
114 7 50 33     50 if ($self->{Bad} or $self->{MBs} == 0) {
115 0         0 return 0;
116             }
117              
118 7         38 return $self->{Freqs} / $self->{MBs};
119              
120             }
121              
122             sub bad {
123              
124 0     0 0 0 my ($self) = @_;
125              
126 0         0 return $self->{Bad};
127              
128             }
129              
130 7     7 0 21 sub encoding { 'EUC-KR' }
131              
132             1;
133