File Coverage

blib/lib/EBook/Ishmael/CharDet/CP1253.pm
Criterion Covered Total %
statement 16 17 94.1
branch n/a
condition n/a
subroutine 7 8 87.5
pod 0 4 0.0
total 23 29 79.3


line stmt bran cond sub pod time code
1             package EBook::Ishmael::CharDet::CP1253;
2 18     18   316 use 5.016;
  18         89  
3             our $VERSION = '2.03';
4 18     18   108 use strict;
  18         39  
  18         478  
5 18     18   83 use warnings;
  18         37  
  18         1027  
6              
7 18     18   120 use parent 'EBook::Ishmael::CharDet::CP';
  18         43  
  18         122  
8              
9             # Generated from contrib/512-bigram.pl using various Greek texts from
10             # Wikisource.
11             my %CP1253_FREQS = map { $_ => 1 } (
12             "\xf4\xef","\xef\xf5","\xe1\xe9","\xea\xe1","\xef\xed","\xe5\xe9","\xf4\xe1",
13             "\xf9\xed","\xe1\xed","\xef\xf2","\xef\xe9","\xe5\xed","\xf1\xef","\xe9\xed",
14             "\xf4\xe7","\xf3\xf4","\xf0\xef","\xed\xe1","\xed\xef","\xec\xe5","\xf4\xe9",
15             "\xf1\xe1","\xf4\xe5","\xe9\xf2","\xf4\xf9","\xe1\xf4","\xed\xf4","\xe4\xe5",
16             "\xe1\xf1","\xe7\xed","\xe5\xf1","\xf5\xf4","\xe1\xf2","\xec\xe1","\xf0\xf1",
17             "\xed\x2c","\xe9\xf3","\xe7\xf2","\xe9\xe1","\xf0\xe5","\xf3\xe9","\xe9\xea",
18             "\xe1\xeb","\xeb\xef","\xf1\xe9","\xe1\xf0","\xe5\xf3","\xf9\xf2","\xe5\xf0",
19             "\xf5\xf2","\xe5\xdf","\xeb\xeb","\xf0\xe1","\xf3\xe1","\xe5\xf4","\xe4\xe9",
20             "\xec\xef","\xe1\xf5","\xf2\x2c","\xf3\xe5","\xec\xdd","\xf5\xed","\xf4\xf1",
21             "\xe1\xf3","\xe9\xef","\xea\xef","\xeb\xe5","\xeb\xe1","\xdd\xed","\xed\xe5",
22             "\xef\xfd","\xef\xeb","\xf3\xe7","\xdf\xe1","\xe5\xf5","\xe8\xe5","\xf1\xe5",
23             "\xf0\xe9","\xe1\xec","\xf5\xf3","\xfc\xed","\xef\xec","\xe5\xea","\xef\xf1",
24             "\xe5\xeb","\xea\xe5","\xe5\xf2","\xf0\xfc","\xe3\xe1","\xf5\xf1","\xf1\xdf",
25             "\xdc\xf3","\xe7\xf3","\xeb\xe9","\xe3\xe5","\xe8\xe1","\xed\x2e","\xf3\xef",
26             "\xf7\xe5","\xf4\xdc","\xe1\xea","\xe7\xec","\xed\xe7","\xdd\xf1","\xdc\xed",
27             "\xe9\x2c","\xdf\xed","\xf5\xf0","\xeb\xe7","\xf1\xf9","\xfe\xed","\xe1\xdf",
28             "\xef\xf4","\xe1\xe3","\xec\xe7","\xdc\xf4","\xf3\xe8","\xdf\xef","\xf3\xf5",
29             "\xe1\x2c","\xef\xf3","\xef\xf0","\xe9\xe4","\xe5\xf6","\xf1\xdc","\xf5\xec",
30             "\xe4\xef","\xe9\xec","\xea\xf1","\xed\xf9","\xec\xe9","\xf0\xeb","\xf4\xfc",
31             "\xde\xf3","\xf2\x2e","\xe3\xf9","\xf0\xdc","\xf6\xe7","\xf3\xec","\xf4\xdd",
32             "\xf7\xef","\xdc\xeb","\xe9\xf0","\xe3\xef","\xe1\xe8","\xe8\xe7","\xea\xdc",
33             "\xdf\xf3","\xe5\xec","\xed\xe9","\xe2\xdc","\xdf\xe4","\xe4\xe1","\xfc\xf4",
34             "\xe9\xf4","\xf9\xf3","\xec\xdc","\xf4\xdf","\xf7\xf1","\xe7\xf4","\xeb\xdc",
35             "\xea\xfc","\xec\xdf","\xf5\xe4","\xf1\xfc","\xea\xeb","\xeb\xdd","\xe4\xf1",
36             "\xf1\xe7","\xe9\xeb","\xef\xdf","\xed\xe4","\xf6\xe1","\xe5\xe3","\xe7\x2c",
37             "\xeb\xf9","\xea\xf4","\xeb\xdf","\xe9\xf1","\xf3\xea","\xfc\xec","\xe5\xf7",
38             "\xf3\xf3","\xe9\xe5","\xe7\xf1","\xfc\xeb","\xe5\xfd","\xe1\xf6","\xf0\xf4",
39             "\xe5\xee","\xf5\xea","\xf4\xde","\xe9\x2e","\xdd\xeb","\xf3\xdf","\xf1\xe3",
40             "\xf3\xf9","\xfd\xed","\xe3\xe9","\xe4\xe7","\xdc\xf1","\xea\xfd","\xfd\xf3",
41             "\xeb\xfc","\xed\xb7","\xdd\xe3","\xf0\xf5","\xe7\xeb","\xdf\xf9","\xf0\xf9",
42             "\xf9\xf1","\xef\xe4","\xf7\xe1","\xf5\xe3","\xea\xe7","\xf6\xef","\xe2\xe1",
43             "\xe8\xef","\xec\xfc","\xf9\xec","\xc2\xc3","\xe5\xf9","\xca\xe1","\xeb\xfd",
44             "\xea\xe9","\xe5\xe4","\xc1\xc2","\xf5\xf7","\xfc\xf3","\xf0\xdd","\xf1\xdd",
45             "\xec\xf0","\xfd\xf4","\xf1\xf7","\xe2\xef","\xf1\xf5","\xfc\xf2","\xe4\xdd",
46             "\xf9\xf4","\xdd\xf3","\xf5\xeb","\xe9\xe8","\xdd\xf4","\xed\xfc","\xf1\xde",
47             "\xeb\xde","\xeb\xf5","\xfd\xec","\xed\xdd","\xee\xe1","\xe5\xe1","\xe4\xdf",
48             "\xef\xe3","\xdf\xe6","\xe6\xe5","\xe9\xdc","\xed\xdc","\xe5\xef","\xfd\xea",
49             "\xe8\xdd","\xf0\xdf","\xed\xe8","\xee\xe5","\xf0\xe7","\xf6\xe9","\xde\xf2",
50             "\xf5\xe8","\xe4\xf9","\xef\xea","\xe9\xe3","\xdf\xf4","\xe6\xef","\xf3\xf7",
51             "\xc5\xc6","\xed\xdf","\xfc\xe3","\xe7\xea","\xf6\xe5","\xe1\xee","\xea\xf9",
52             "\xf7\xf9","\xf6\xdd","\xf3\xf0","\xe1\xf7","\xec\xf9","\xe3\xf1","\xe7\xe3",
53             "\xe7\x2e","\xe9\xf7","\xdc\xe4","\xe8\xde","\xf6\xf1","\xea\xde","\xdc\xec",
54             "\xf2\xb7","\xe3\xdf","\xf1\xfe","\xe2\xeb","\xe5\xe8","\xe3\xdc","\xe7\xe4",
55             "\xe8\xf9","\xe9\xe2","\xde\xec","\xdf\xe3","\xe1\x2e","\xe1\xe4","\xef\xf6",
56             "\xfd\xeb","\xe8\xf1","\xec\xec","\xdf\xf1","\xf5\x2c","\xec\xde","\xf1\xf4",
57             "\xf3\xf6","\xe4\xfd","\xf1\xec","\xea\xfe","\xde\xed","\xf4\xf5","\xdf\xe7",
58             "\xfc\xf0","\xf7\xe8","\xec\xed","\xed\xde","\xf5\xf6","\xde\xea","\xe4\xfc",
59             "\xdd\xf7","\xfd\xf2","\xdd\xf2","\xe3\xe7","\xdc\xe8","\xec\xf6","\xe3\xed",
60             "\xfd\xef","\xe3\xdd","\xfe\xf1","\xdd\xf0","\xed\xf5","\xc3\xc4","\xe5\x2c",
61             "\xdf\xec","\xe9\xf9","\xf7\xe7","\xea\xdd","\xdd\xec","\xe3\xea","\xf4\xf4",
62             "\xc4\xc5","\xdc\xe3","\xf5\x2e","\xe3\xe3","\xf3\xfd","\xe4\xf5","\xdf\xea",
63             "\xef\xf7","\xe9\xfc","\xe7\xe8","\xe8\xf5","\xe3\xfe","\xfd\xf1","\xfe\xf3",
64             "\xf6\xf5","\xfd\xe3","\xe9\xb7","\x32\x30","\xe2\xf1","\xdf\xeb","\xe9\xfe",
65             "\xdf\xf0","\xdd\xef","\xf4\xfe","\xfc\xf1","\xea\xf5","\xe2\xe9","\xec\xe2",
66             "\xe3\xde","\xc7\xc8","\xef\xe8","\xe6\xf9","\xde\xf1","\xdc\xea","\xed\xfe",
67             "\xf9\xf0","\xdc\xf0","\xc6\xc7","\xfd\xe8","\xe1\xfd","\xf5\xf8","\xe9\xe6",
68             "\xdc\xe6","\xef\x2c","\xc4\xe9","\xf9\x2c","\xdf\xf2","\xfe\xec","\xe8\xdc",
69             "\xee\xef","\xf8\xef","\xe9\xe7","\xdd\xe1","\xfe\xf4","\xe1\xe2","\xf6\xf9",
70             "\xf6\xdc","\xf8\xe7","\xe1\xe5","\xd4\xef","\xe8\xe9","\xde\xf4","\xf8\xe5",
71             "\xf1\xe8","\xdf\xf7","\xde\xeb","\xdc\xf7","\xe1\xef","\xdd\xea","\xf3\xdd",
72             "\xfc\xe4","\xf5\xe1","\xe1\xb7","\xfd\xe5","\xf7\xfe","\xc1\xf1","\xe3\xec",
73             "\xe5\xe2","\xf4\x92","\x6f\x72","\xf6\xfc","\xdd\xe4","\xe4\xde","\xf6\xdf",
74             "\xe9\x3b","\xe2\xe5","\xf7\xdf","\xe3\xfc","\xed\x3b","\xe3\xf5","\xf7\xdd",
75             "\xea\x92","\xe4\xdc","\xe1\xe6","\x31\x30","\x74\x74","\xef\xe2","\xf6\xe8",
76             "\xf9\x2e","\xe2\xe7","\x68\x74","\xfe\xf0","\xee\xe7","\xfd\xf0","\xf7\xe9",
77             "\xf3\xdc","\x30\x30","\xf7\xde","\xdc\xee","\xdd\xf9","\xde\xe8","\xe7\xf0",
78             "\xd0\xe1","\xed\xed","\xf3\xfc","\xe9\xde","\xd7\xef","\xeb\xe8","\xf2\x3a",
79             "\xf2\x3b","\xdc\x2c","\xf8\xe1","\xf1\xea","\xdf\xe8","\xf1\xf1","\xf8\xf5",
80             "\xdf\xe5","\xe7\xb7","\xfc\xea","\xf1\xed","\xf1\x2e","\xf6\xfd","\xcf\xf5",
81             "\x31\x39","\xf9\xea","\xf0\xf0","\xc2\xe9","\xea\xdf","\xc1\xeb","\x69\x6b",
82             "\x77\x77","\xe3\xeb","\xc1\xed","\xf0\xed","\x72\x67","\xf0\xde","\x6f\x6e",
83             "\xc6\xc8","\x73\x3a","\x3a\x2f","\xee\xe9","\xe9\xee","\x74\x70","\xed\xfd",
84             "\x2f\x2f","\xdd\xf6","\xf7\xed","\x2e\x6f","\xc2\xe1","\xdf\x2c","\xc1\xf0",
85             "\xc5\xe9",
86             );
87              
88             my $DIST_RATIO = '0.92';
89              
90             my %CHARSET_SPACE_SET = map { $_ => 1 } (
91             "\x81", # undef
92             "\x88", # ^
93             "\x8a", # ^
94             "\x8c", # ^
95             "\x8d", # ^
96             "\x8e", # ^
97             "\x8f", # ^
98             "\x90", # ^
99             "\x98", # ^
100             "\x9a", # ^
101             "\x9c", # ^
102             "\x9d", # ^
103             "\x9e", # ^
104             "\x9f", # ^
105             "\xaa", # ^
106             "\xa0", # NBSP
107             "\xff", # undef
108             );
109              
110             my %IGNORE = (
111             %EBook::Ishmael::CharDet::Constants::ASCII_SPACE_SET,
112             %CHARSET_SPACE_SET,
113             );
114              
115             sub ignore {
116              
117 548864     548864 0 883653 my ($self, $byte) = @_;
118              
119 548864         1427478 return exists $IGNORE{ $byte };
120              
121             }
122              
123             sub freq_bigram {
124              
125 387306     387306 0 616683 my ($self, $bigram) = @_;
126              
127 387306         993469 return exists $CP1253_FREQS{ $bigram };
128              
129             }
130              
131 0     0 0 0 sub dist_ratio { $DIST_RATIO };
132              
133 67     67 0 205 sub encoding { 'CP1253' }
134              
135             1;