File Coverage

blib/lib/EBook/Ishmael/CharDet/CP1251.pm
Criterion Covered Total %
statement 16 17 94.1
branch n/a
condition n/a
subroutine 7 8 87.5
pod 0 4 0.0
total 23 29 79.3


line stmt bran cond sub pod time code
1             package EBook::Ishmael::CharDet::CP1251;
2 18     18   330 use 5.016;
  18         76  
3             our $VERSION = '2.03';
4 18     18   114 use strict;
  18         39  
  18         478  
5 18     18   79 use warnings;
  18         32  
  18         965  
6              
7 18     18   100 use parent 'EBook::Ishmael::CharDet::CP';
  18         78  
  18         129  
8              
9             # Generated from contrib/512-bigrams.pl, trained from various public-domain
10             # texts from Project Gutenberg (https://www.gutenberg.org/).
11             my %CP1251_FREQS = map { $_ => 1 } (
12             "\xf1\xf2","\xf2\xee","\xe5\xed","\xed\xee","\xed\xe8","\xed\xe0","\xf0\xe0",
13             "\xed\xe5","\xea\xee","\xe3\xee","\xee\xe2","\xf0\xee","\xef\xee","\xe5\xf0",
14             "\xee\xf1","\xf0\xe5","\xe2\xee","\xef\xf0","\xeb\xe8","\xee\xf0","\xe0\xeb",
15             "\xee\xf2","\xe5\xeb","\xe2\xe5","\xe5\xf2","\xe0\xed","\xe5\xf1","\xf2\xe5",
16             "\xee\xeb","\xea\xe0","\xf2\xe0","\xee\xec","\xeb\xee","\xe2\xe0","\xf1\xea",
17             "\xee\xe4","\xf0\xe8","\xeb\xe5","\xf2\xfc","\xee\xe3","\xf2\xe8","\xe4\xe5",
18             "\xee\xed","\xe0\xf2","\xe5\xec","\xeb\xe0","\xec\xe5","\xed\xfb","\xe8\xf2",
19             "\xeb\xfc","\xf7\xe5","\xe4\xe0","\xe0\xea","\xee\xe1","\xee\xe9","\xe8\xf1",
20             "\xf2\xe2","\xe4\xee","\xe8\xeb","\xe8\xed","\xe8\xec","\xe7\xe0","\xed\xed",
21             "\xe5\xe4","\xe0\xf1","\xec\xee","\xe2\xe8","\xf2\xf0","\xf1\xee","\xe8\xe5",
22             "\xf1\xff","\xe0\xf0","\xea\xe8","\xe6\xe5","\xe8\xe7","\xe0\xe2","\xe0\xec",
23             "\xe0\xe7","\xec\xe8","\xf1\xe5","\xe5\xe3","\xe8\xff","\xe1\xfb","\xec\xe0",
24             "\xee\xe5","\xf1\xeb","\xf0\xf3","\xe5\xe9","\xf7\xf2","\xe4\xe8","\xe1\xee",
25             "\xe2\xf1","\xe8\xea","\xf7\xe0","\xe0\x2c","\xf1\xfc","\xe8\xf5","\xe5\xea",
26             "\xef\xe5","\xe8\x2c","\xe0\xff","\xe2\xfb","\xe8\xe2","\xe8\xf7","\xe8\xe9",
27             "\xf8\xe5","\xe5\x2c","\xee\xea","\xf3\xe4","\xe1\xe5","\xf1\xe2","\xe4\xf3",
28             "\xec\xf3","\xeb\xff","\xe0\xe4","\xe5\xe2","\xf2\xf1","\xe4\xed","\xfd\xf2",
29             "\xf9\xe5","\xee\x2c","\xe8\xe8","\xfb\xeb","\xfb\xe9","\xee\xe6","\xe0\xe5",
30             "\xf1\xe0","\xfc\xed","\xf7\xe8","\xe7\xed","\xf1\xe8","\xee\xef","\xf5\xee",
31             "\xe5\xe5","\xef\xe8","\xed\xf3","\xf6\xe5","\xee\xf7","\xf0\xfb","\xe8\xe4",
32             "\xff\x2c","\xe5\xf7","\xff\xf2","\xf3\xf2","\xea\xf0","\xf2\xed","\xfb\xe5",
33             "\xed\xff","\xf1\xef","\xfb\xf5","\xec\xed","\xe6\xe8","\xfb\xec","\xf8\xe8",
34             "\xe8\xf0","\xec\x2c","\xee\xe7","\xe1\xf0","\xe2\xeb","\xf2\xf3","\xf3\xfe",
35             "\xe5\xe7","\xf6\xe8","\xe3\xe0","\xfc\x2c","\xe7\xe2","\xec\xfb","\xea\xf3",
36             "\xe4\xf0","\xee\xe8","\xf2\xfb","\xeb\xf3","\xef\xe0","\xeb\xfe","\xe3\xf0",
37             "\xf3\xf1","\xe2\xed","\xe9\x2c","\xf0\xed","\x31\x38","\xf1\xf1","\xe0\xf5",
38             "\xe5\xe1","\xe6\xe4","\xf1\xed","\xe1\xeb","\xe0\xfe","\xe7\xe4","\xe0\xe6",
39             "\xe3\xeb","\xe5\xe6","\xf3\xec","\xf0\xff","\xf1\xf3","\xed\xfc","\xf3\xe6",
40             "\xed\xf2","\xfb\xe2","\xe6\xe0","\xe3\xe8","\xf3\xf7","\xe6\xed","\xe0\xf7",
41             "\xe3\xe4","\xf1\xec","\xe1\xf3","\xe2\xf0","\xfc\xf1","\xe0\xef","\xe0\x2e",
42             "\xf3\x2c","\xed\xf1","\xec\x2e","\xe8\xf6","\xf3\xe3","\xf9\xe8","\xe0\xe1",
43             "\xe4\xe2","\xf3\xf0","\xe2\xf3","\xea\xf2","\xe5\xef","\xf7\xed","\xf2\x2c",
44             "\xeb\xf1","\xee\xf8","\xe8\x2e","\xfb\x2c","\xfe\xf2","\xe4\xeb","\xf3\xea",
45             "\xe0\xe3","\xe7\xee","\xf3\xe1","\xf0\xf6","\xf0\xf2","\xee\xfe","\xe7\xe8",
46             "\xef\xeb","\xe0\xf8","\xc3\xe5","\xe1\xe8","\xfb\xf1","\xe4\xfb","\xed\xf6",
47             "\xe5\x2e","\xee\xff","\xe8\xe3","\xfc\xea","\xea\xeb","\xe8\xe1","\xef\xf3",
48             "\xf6\xe0","\xe4\xf1","\xf8\xe0","\xff\xed","\xe8\xfe","\xe9\xf1","\xe2\xf8",
49             "\xfe\xf9","\xfb\xf2","\xe3\xf3","\xf3\xf8","\xf3\xef","\xe3\xe5","\xf5\xe0",
50             "\x29\x2e","\xe1\xe0","\xcf\xee","\xfe\x2c","\xe5\xf9","\xe7\xfb","\xe8\xe0",
51             "\xea\xe5","\xf0\xf1","\xe5\xee","\xbb\x2c","\xe7\xec","\xf0\xe3","\xff\xf1",
52             "\xf0\xfc","\xfc\xe5","\xeb\x2c","\xea\xed","\xf3\xeb","\xe7\xe5","\xcf\xf0",
53             "\xe2\x2c","\xed\xe4","\xe5\xf8","\xfc\xec","\xeb\xed","\xf0\xe2","\xe5\xf5",
54             "\xff\x2e","\xe0\xe9","\xf3\xe2","\xff\xec","\x31\x37","\xeb\xfb","\xf2\xea",
55             "\xf0\x2e","\xf2\xff","\xf5\x2c","\x29\x2c","\xec\xff","\xca\xe0","\xf7\xf3",
56             "\x65\x72","\xe8\xee","\x2e\x29","\xe3\x2e","\xf3\xf9","\xf4\xe8","\xee\x2e",
57             "\xff\xeb","\xea\xf1","\xbb\x2e","\xed\x2c","\xff\xe7","\xff\xe2","\xf9\xe0",
58             "\x97\x31","\xcd\xee","\xe9\x2e","\xf2\x2e","\xe5\xf6","\x65\x6e","\xf5\xe8",
59             "\x69\x73","\x38\x33","\xe8\xe6","\xf0\xf5","\xfc\xfe","\xea\x2c","\xf3\xed",
60             "\xe1\xf9","\xf3\xf5","\xf0\xec","\xcd\xe0","\xe1\xed","\x6f\x6e","\xf0\xe6",
61             "\xe2\xff","\xf3\xe7","\xe9\xed","\xff\xf9","\x28\x31","\xcf\xe5","\xcd\xe5",
62             "\xfb\xf8","\xeb\xe6","\x27\x27","\xf1\xfb","\x2d\xf2","\x69\x6e","\xe7\xf0",
63             "\xe8\xf8","\xee\xf5","\xfc\xff","\xfe\xe4","\xff\xe5","\xf0\xe4","\xe2\xe7",
64             "\xe4\xfc","\xe7\xf3","\xe8\xef","\xf4\xf0","\x74\x65","\xe4\x2e","\xe2\xf2",
65             "\x64\x65","\xe2\x2e","\xf5\xf0","\xfc\x2e","\xcc\xee","\x6f\x72","\xe0\xbb",
66             "\xe5\xfe","\xea\xe2","\xf3\xe5","\xeb\xeb","\xf0\xea","\xd1\xf2","\xec\xeb",
67             "\xf3\x2e","\xce\xed","\xee\xf4","\xe4\xff","\xc7\xe0","\x65\x73","\xfe\xe1",
68             "\xe0\xf4","\x72\x65","\xe5\xff","\xc3\xee","\xfc\xf8","\xfb\xf0","\xe3\xed",
69             "\xe6\xf3","\xe1\xff","\xff\xe4","\xfb\x2e","\xe1\xf1","\xee\x2d","\xee\xee",
70             "\xd0\xe5","\xf8\xed","\xca\xee","\xf4\xee","\x75\x72","\x49\x49","\xf8\xeb",
71             "\x6c\x65","\xe7\xeb","\x74\x69","\xff\xf5","\xc0\x2e","\xe7\xff","\xcc\xe0",
72             "\xf1\xf7","\xc2\xee","\xe8\xbb","\xe0\xf9","\x6c\x69","\xf4\xe0","\x31\x39",
73             "\xe0\xf6","\xef\xfb","\xd2\xe0","\xed\xe3","\x61\x6e","\xd0\xe0","\xee\xf6",
74             "\xeb\xea","\xf1\xf0","\xf0\x2c","\xdd\xf2","\x72\x69","\xf0\xf8","\xed\xea",
75             "\xeb\x2e","\xed\x2e","\xe7\xfc","\x28\xf1","\xe2\xea","\xec\xef","\x61\x74",
76             "\xfb\xea","\xe2\xef","\xf8\xfc","\xc2\xe5","\xcf\xe0","\x6f\x75","\xf1\xf5",
77             "\x69\x65","\xf4\xe5","\xe5\xe8","\xe8\xf9","\x63\x65","\x63\x68","\x73\x74",
78             "\xf1\xe4","\xff\xea","\xc2\xe8","\xfb\xed","\xe7\xe3","\xf8\xf3","\xfe\x2e",
79             "\xcc\xe5","\x69\x74","\xf7\xfc","\xf8\xea","\x68\x65","\xf2\xeb","\x28\xab",
80             "\x6e\x65","\xf5\x2e","\xf8\xee","\xfe\xf7","\x61\x72","\xe0\xf3","\xe0\xe8",
81             "\xf3\xe0","\xe5\xe0","\xe5\xbb","\xbb\x29","\x6f\x6d","\x6e\x73","\xe9\xf8",
82             "\xec\xf1","\xff\xfe","\xc0\xeb","\x73\x65","\xec\xec","\xed\xf7","\xf6\xee",
83             "\xee\x3a","\xee\xf9","\xf7\xea","\x61\x6c","\xc4\xe0","\x31\x36","\xab\xcf",
84             "\xfc\xe7","\xe1\xfa","\xe2\xfc","\xf6\xf3","\xe5\x3b","\x6d\x65","\xf0\xe1",
85             "\xe0\x3b",
86             );
87              
88             my $DIST_RATIO = '0.91';
89              
90             my %CHARSET_SPACE_SET = map { $_ => 1 } (
91             "\x98", # Alt
92             "\xa0", # NBSP
93             );
94              
95             my %IGNORE = (
96             %EBook::Ishmael::CharDet::Constants::ASCII_SPACE_SET,
97             %CHARSET_SPACE_SET,
98             );
99              
100             sub ignore {
101              
102 548864     548864 0 868248 my ($self, $byte) = @_;
103              
104 548864         1436457 return exists $IGNORE{ $byte };
105              
106             }
107              
108             sub freq_bigram {
109              
110 418835     418835 0 671784 my ($self, $bigram) = @_;
111              
112 418835         1095788 return exists $CP1251_FREQS{ $bigram };
113              
114             }
115              
116 0     0 0 0 sub dist_ratio { $DIST_RATIO };
117              
118 67     67 0 246 sub encoding { 'CP1251' }
119              
120             1;
121