File Coverage

blib/lib/EBook/Ishmael/CharDet/CP1254.pm
Criterion Covered Total %
statement 16 17 94.1
branch n/a
condition n/a
subroutine 7 8 87.5
pod 0 4 0.0
total 23 29 79.3


line stmt bran cond sub pod time code
1             package EBook::Ishmael::CharDet::CP1254;
2 18     18   357 use 5.016;
  18         72  
3             our $VERSION = '2.03';
4 18     18   108 use strict;
  18         33  
  18         523  
5 18     18   80 use warnings;
  18         53  
  18         1163  
6              
7 18     18   102 use parent 'EBook::Ishmael::CharDet::CP';
  18         32  
  18         130  
8              
9             # Generated from contrib/512-bigram.pl using various Turkish texts from
10             # Wikisource.
11             my %CP1254_FREQS = map { $_ => 1 } (
12             "\x69\x6e","\x61\x72","\x61\x6e","\x65\x72","\x6c\x61","\x6c\x65","\x64\x65",
13             "\xfd\x6e","\x64\x61","\x65\x6e","\x69\x6c","\x6c\x69","\x69\x72","\x6e\x64",
14             "\x6d\x61","\x72\x69","\x64\x69","\x79\x61","\x72\x61","\x61\x6b","\x6d\x65",
15             "\x61\x6c","\x73\x69","\x65\x74","\x6b\x61","\x6e\x69","\x62\x69","\x61\x79",
16             "\x75\x6e","\x76\x65","\x65\x6c","\x72\x65","\x6e\xfd","\x6e\x65","\x74\x61",
17             "\x74\x65","\x74\x69","\x65\x6b","\x79\x65","\x6f\x6c","\x72\xfd","\x6b\x69",
18             "\x73\x61","\x6e\x61","\x65\x73","\x69\x79","\x61\x73","\x69\x6d","\x6f\x72",
19             "\x68\x61","\x62\x75","\x61\x6d","\x75\x72","\x65\x6d","\xf0\x69","\x61\x64",
20             "\x6e\x75","\x61\x74","\x69\x6b","\x65\x64","\x6c\xfd","\x73\xfd","\x64\xfd",
21             "\x6d\x69","\x72\x64","\x65\x79","\x61\xfe","\x69\x73","\x6e\x6c","\x72\x75",
22             "\xfd\x6c","\x75\x6c","\x74\xfd","\x6c\x64","\x62\x61","\x72\x2e","\x6b\x6c",
23             "\x6f\x6e","\x73\x65","\x79\x6f","\x6c\x6d","\x63\x65","\x69\x7a","\x67\x65",
24             "\x72\x6c","\xf0\xfd","\x6c\x75","\x69\xfe","\x6b\x65","\x64\x75","\xfc\x6e",
25             "\x6c\x6c","\x75\x6d","\xfd\x72","\x79\xfd","\x65\x63","\x61\x68","\xfc\x72",
26             "\x69\xf0","\x76\x61","\xfd\xf0","\xfd\x6b","\x62\x65","\x79\x6c","\xfd\x6d",
27             "\x65\xf0","\x6b\x74","\x61\x7a","\x6d\xfd","\x79\x69","\x7a\x65","\x61\x62",
28             "\x73\x6f","\xfe\x74","\x61\x70","\x6b\xfd","\xf6\x72","\x72\x73","\x73\x74",
29             "\x72\x74","\xf0\x75","\x6b\x75","\x69\xe7","\x69\x74","\x72\x6b","\x6d\x75",
30             "\x67\xf6","\x6b\x6f","\x63\x61","\x67\x69","\x75\xf0","\x68\x69","\x74\xfc",
31             "\x69\x2c","\x6f\x6b","\xfd\x7a","\xfd\xfe","\x65\x76","\xe7\x69","\x6d\x6c",
32             "\x6c\xfc","\xf6\x6e","\x73\x75","\x42\x75","\x74\x74","\xfd\x79","\xfe\xfd",
33             "\xfc\x7a","\xfe\x61","\xfc\x6c","\x64\xfc","\xfd\x2c","\x7a\x61","\x61\xf0",
34             "\x69\x68","\x68\x65","\x67\xfc","\x73\x6c","\x6e\x73","\x6e\x63","\xfe\x65",
35             "\x6e\x6d","\x65\x70","\x72\x6d","\xfe\x6d","\x74\x6c","\x75\x7a","\x32\x30",
36             "\x2e\x2e","\x66\x61","\xfe\x69","\x69\x70","\x64\x6f","\x54\x61","\xfc\x6d",
37             "\xf6\x79","\x41\x6c","\x6c\x6b","\x75\xfe","\x74\x6d","\x6e\x2c","\x61\x63",
38             "\xf6\x7a","\x7a\x2e","\x72\x2c","\x75\x79","\x52\x65","\xe7\x65","\x63\x69",
39             "\x69\x64","\x6f\xf0","\x6e\xfc","\x72\xfc","\xfe\x6c","\x75\x6b","\xfc\x6b",
40             "\x70\x61","\x4d\x65","\x7a\x69","\x65\x2c","\x4b\x61","\x42\x61","\xe7\xfd",
41             "\x75\x73","\x74\x75","\x69\x66","\xfc\x73","\xfc\xfe","\x7a\xfd","\x45\x72",
42             "\x6b\xfc","\x4b\x75","\x61\x76","\x30\x30","\xfc\x79","\x7a\x6c","\x73\xf6",
43             "\x61\x66","\x6d\xfc","\x79\xfc","\xfd\x73","\xfe\x6b","\x74\x6f","\x65\x7a",
44             "\x68\x75","\xf0\x61","\x6f\x70","\x72\x67","\xf0\x72","\x65\xfe","\x75\x74",
45             "\x42\x65","\x65\x62","\x61\xe7","\x75\x2c","\x70\x6c","\xfc\x74","\xe7\x61",
46             "\x73\xfc","\x65\x66","\x79\x79","\x27\x6e","\x79\x6e","\x54\xfc","\x4d\x61",
47             "\x63\xfd","\xf0\x65","\x42\x69","\x64\x64","\x6b\x2c","\x6e\x74","\xfd\x64",
48             "\x6e\x72","\x6d\x2e","\x61\x2c","\x61\x69","\x66\x65","\x6c\x67","\x27\x64",
49             "\x6d\x64","\x70\xfd","\x6b\x2e","\x69\x63","\x69\x62","\x6d\x73","\x44\x65",
50             "\x2d\x69","\x67\x61","\x65\xe7","\x79\x72","\x6b\x72","\x48\x61","\x61\x61",
51             "\x7a\x64","\x6e\x27","\x6b\x73","\x6b\x6b","\xe7\x6f","\x27\xfd","\xfd\x74",
52             "\xfc\xf0","\xfe\xfc","\x79\x75","\x65\x68","\x70\x74","\x4d\x69","\x72\x6f",
53             "\x69\x2e","\x72\xfe","\x72\x6e","\x6c\x74","\x6c\xe2","\xf0\xfc","\x22\x2c",
54             "\x76\x69","\x68\x6d","\x6f\x6d","\x6e\x67","\x54\x65","\x59\x61","\xfe\x75",
55             "\x6d\x2c","\x68\x74","\xf6\xf0","\xf0\x6c","\x79\x64","\x72\x62","\x75\x76",
56             "\x69\x76","\xfe\x62","\xdd\x73","\x2d\xfd","\x4c\x55","\x2e\x32","\x41\x6e",
57             "\x53\x65","\x53\x61","\x79\x67","\x2e\x30","\x74\x72","\x7a\x75","\x62\xfc",
58             "\x4d\xfc","\x66\xfd","\x63\x75","\x48\xfc","\x62\xfd","\x76\x6c","\x69\x3a",
59             "\xe2\x6c","\x66\x6f","\x7a\x67","\x6d\x68","\x56\x69","\x76\x72","\xfc\x70",
60             "\xfc\x64","\x6e\x6f","\x2e\x31","\x41\x52","\x79\xf6","\x74\x2d","\x43\x75",
61             "\x62\xf6","\x45\x6b","\x6e\x6b","\x4d\x75","\x27\x69","\x97\x52","\x6f\x73",
62             "\x70\x68","\xe2\x6e","\x6b\x6d","\x65\x27","\x47\x65","\x50\x61","\x68\xfc",
63             "\x68\xfd","\x27\x61","\x64\xf6","\x44\x61","\x75\x64","\x75\x68","\x47\xf6",
64             "\x41\x64","\x6f\x74","\x7a\xfc","\x67\xfd","\x31\x2e","\x6d\x6f","\x74\x73",
65             "\x77\x69","\x30\x32","\x30\x37","\x6f\x79","\x70\x65","\x72\x63","\x4b\x41",
66             "\x66\x69","\x33\x2e","\x6e\x2e","\x68\x6c","\x7a\x2c","\x6e\x62","\x48\x65",
67             "\x2c\x5b","\x70\x73","\xdd\x68","\x4b\x55","\x68\x73","\xde\x75","\x74\x6b",
68             "\x52\x61","\x41\x44","\x46\x69","\x67\x75","\x54\x4c","\x6d\x6d","\x68\xe2",
69             "\xfe\xf6","\x47\xd6","\xfd\x2e","\x61\x27","\xf6\x6b","\x73\x6d","\x69\x27",
70             "\x6c\x73","\xf6\x6c","\x68\x6b","\xf6\x73","\x75\x70","\xe7\x6c","\x53\x69",
71             "\x6e\x79","\x70\x72","\x72\xe7","\x41\x4c","\xfc\xe7","\x55\x54","\x41\x72",
72             "\xd6\x52","\x65\x2e","\x52\xdc","\x72\x68","\xdc\x4d","\x73\x6b","\xfe\x2c",
73             "\x72\x7a","\x75\x66","\x75\x62","\x63\x6c","\x2e\x94","\x73\x79","\x31\x39",
74             "\x74\x70","\x2e\x6f","\x3a\x2f","\x30\x31","\x6b\xf6","\x75\x67","\x30\x35",
75             "\x2f\x2f","\x41\xd0","\xe2\x74","\xfc\x63","\x43\x65","\x6f\x75","\x55\x27",
76             "\x61\x75","\x31\x30","\x59\x65","\x27\xfc","\x6c\x70","\x6d\xe2","\x4d\x27",
77             "\xe2\x68","\x63\x72","\xd0\x4c","\xfd\x63","\x6b\x68","\x41\x6d","\x41\x79",
78             "\x56\x65","\x52\x41","\x72\x27","\x68\x72","\x4b\x47","\x74\x2c","\x6b\xe7",
79             "\x47\x42","\x56\x4b","\x6c\x6f","\x41\x68","\x63\x6f","\x47\x61","\x62\x6c",
80             "\x41\x48","\x76\x75","\x69\x61","\x4c\x69","\x48\xdd","\x44\x41","\x6b\xe2",
81             "\x59\xf6","\x7a\xe2","\x6f\xfe","\x6c\x2d","\x4b\x65","\x41\x42","\x75\x2e",
82             "\x62\x6f","\xe7\x6d","\xe7\x74","\x7a\x6d","\x45\xf0","\x50\x41","\xfd\x66",
83             "\x74\x27","\x41\x74","\x6d\x63","\x70\x69","\x7a\x6f","\x6c\x63","\x61\x22",
84             "\xf0\x64","\x75\x63","\xdd\x6c","\x4f\xd0","\x65\x2d","\x2e\x22","\x6f\x64",
85             "\xe2\x6d",
86             );
87              
88             my $DIST_RATIO = '0.94';
89              
90             my %CHARSET_SPACE_SET = map { $_ => 1 } (
91             "\x81", # Alt
92             "\x8d", # ^
93             "\x8e", # ^
94             "\x8f", # ^
95             "\x90", # ^
96             "\x9d", # ^
97             "\x9e", # ^
98             "\xa0", # NBSP
99             );
100              
101             my %IGNORE = (
102             %EBook::Ishmael::CharDet::Constants::ASCII_SPACE_SET,
103             %CHARSET_SPACE_SET,
104             );
105              
106             sub ignore {
107              
108 548864     548864 0 875345 my ($self, $byte) = @_;
109              
110 548864         1427837 return exists $IGNORE{ $byte };
111              
112             }
113              
114             sub freq_bigram {
115              
116 404829     404829 0 652006 my ($self, $bigram) = @_;
117              
118 404829         1100809 return exists $CP1254_FREQS{ $bigram };
119              
120             }
121              
122 0     0 0 0 sub dist_ratio { $DIST_RATIO };
123              
124 67     67 0 209 sub encoding { 'CP1254' }
125              
126             1;