File Coverage

blib/lib/EBook/Ishmael/CharDet/CP1250.pm
Criterion Covered Total %
statement 16 17 94.1
branch n/a
condition n/a
subroutine 7 8 87.5
pod 0 4 0.0
total 23 29 79.3


line stmt bran cond sub pod time code
1             package EBook::Ishmael::CharDet::CP1250;
2 18     18   307 use 5.016;
  18         71  
3             our $VERSION = '2.03';
4 18     18   104 use strict;
  18         35  
  18         440  
5 18     18   82 use warnings;
  18         60  
  18         1084  
6              
7 18     18   591 use parent 'EBook::Ishmael::CharDet::CP';
  18         424  
  18         135  
8              
9             # Generated from contrib/512-bigram.pl using various Polish, Czech, and
10             # Hungarian texts from Wikisource.
11             my %CP1250_FREQS = map { $_ => 1 } (
12             "\x69\x65","\x6e\x69","\x6e\x61","\x73\x7a","\x7a\x65","\x73\x74","\x63\x7a",
13             "\x70\x6f","\x63\x68","\x72\x7a","\x77\x69","\x61\x6e","\x72\x61","\x7a\x61",
14             "\x65\x6e","\x74\x61","\x7a\x79","\x65\x6d","\x6f\x77","\x61\xb3","\x6d\x69",
15             "\x72\x6f","\x6b\x69","\x74\x65","\x61\x6b","\x65\x67","\x73\x69","\x70\x72",
16             "\x64\x7a","\x63\x69","\x6a\x65","\x6c\x65","\xb3\x61","\x6e\x65","\x65\x72",
17             "\x69\xea","\x69\x61","\x6b\x6f","\x61\x72","\x64\x6f","\x7a\x69","\x61\x6c",
18             "\x65\x6c","\x6f\x64","\x74\x6f","\x6f\x6e","\x61\x6d","\x65\x6a","\x67\x6f",
19             "\x65\x73","\x6c\x69","\x77\x61","\x6f\x73","\x6b\x61","\x6e\x79","\x65\x64",
20             "\x6a\x61","\x6c\x61","\x61\x64","\x6f\x72","\x73\x6b","\x61\x74","\x61\x6a",
21             "\x61\x63","\x6f\x6c","\x69\x63","\x6d\x61","\x6f\x62","\x62\x79","\xb3\x6f",
22             "\x72\x65","\x6e\x6f","\x65\x6b","\x65\x63","\x74\x72","\x61\x73","\x74\x79",
23             "\x70\x61","\x77\x79","\x65\x74","\x61\x77","\x69\x6e","\x69\x73","\x79\x63",
24             "\xbf\x65","\x7a\x6e","\x61\x7a","\x64\x61","\x6d\x6f","\x63\x65","\x6f\x6d",
25             "\x67\x79","\x61\x2c","\x6f\x74","\x68\x6f","\x62\x69","\x79\x6d","\x77\x6f",
26             "\x65\x2c","\x73\x65","\x79\x73","\x7a\x6f","\x68\x61","\x6f\x7a","\x64\x65",
27             "\x64\x6e","\x6d\x65","\xf3\x77","\x6f\x9c","\x77\x73","\x6f\x67","\x6a\xb9",
28             "\x65\x7a","\x70\x69","\x62\x61","\x64\x79","\x2e\x2e","\x6e\x74","\x6f\x6b",
29             "\x69\x2c","\x6f\x63","\x61\x2e","\x75\x73","\x69\xb3","\x69\x6b","\x72\x79",
30             "\x6b\x74","\x74\x75","\x69\x6c","\xb9\x63","\x9c\x63","\x73\x61","\x79\xb3",
31             "\x61\x67","\x62\x65","\x76\x61","\x6f\x6a","\x77\x65","\x72\x74","\x6d\x75",
32             "\x73\x70","\x62\x6f","\x65\x2e","\x6e\xb9","\x61\x70","\x69\x74","\x7a\x75",
33             "\x74\xf3","\x79\x2c","\x6c\x6f","\x73\x6f","\x72\x75","\xb3\x79","\x74\x74",
34             "\x6d\x6e","\x6e\x6e","\x61\xe6","\x69\xb9","\x76\x65","\x6b\x75","\x75\x64",
35             "\xf3\x72","\x67\x61","\x6b\x72","\x75\x63","\x6c\x6e","\x65\x77","\x6d\x2c",
36             "\x6d\x79","\x69\x2e","\x6f\x70","\x69\x6d","\x62\x72","\x7a\x74","\x74\x6b",
37             "\x75\x74","\x6f\x2c","\xe1\x72","\x70\x65","\x63\x61","\x77\x6e","\x7a\x63",
38             "\x73\xb3","\x61\x62","\xe1\x6e","\xe1\x74","\x63\x6f","\x74\x69","\x74\x77",
39             "\x64\x72","\x61\xbf","\x74\x6e","\x6c\x6b","\x72\xf3","\x74\x2c","\x72\x64",
40             "\x6c\x79","\x6e\x64","\xe1\x6c","\x75\x6a","\x6c\x75","\x67\xb3","\x79\x74",
41             "\x79\x77","\x79\x6e","\x69\x6f","\x75\x72","\x65\x62","\x79\x6c","\x67\x72",
42             "\x70\x75","\x63\x79","\x64\x75","\x69\x77","\x63\x73","\xbf\x79","\x75\x6d",
43             "\x65\xbf","\x69\x64","\x7a\x77","\x6f\x76","\x79\x2e","\x79\x6a","\x7a\xb9",
44             "\x65\x70","\x7a\x64","\x72\x73","\x6a\x73","\x72\xe1","\x6f\x75","\x6f\xbf",
45             "\x7a\x6b","\x6c\x74","\x75\x62","\x6e\x6b","\x72\x6e","\x6b\x2c","\x50\x6f",
46             "\x69\xe6","\x6a\x75","\xb3\x65","\x76\xe1","\x6b\x65","\x6c\x6c","\x7a\xea",
47             "\x75\xbf","\xea\x63","\x6a\x69","\x4e\x69","\x67\x65","\x64\x6c","\x67\x69",
48             "\x73\x77","\x75\x6b","\x6e\x63","\x79\x6b","\x67\x64","\xb3\x75","\x75\x6c",
49             "\x9c\x6c","\x75\x2c","\x62\x6c","\x69\x72","\x6d\x2e","\x6c\xe1","\xea\x64",
50             "\xe9\x72","\xea\x2c","\x54\x6f","\x6e\xed","\x63\x6b","\x9c\xe6","\x64\x69",
51             "\xe1\x73","\x6f\xb3","\x63\x6a","\x73\x73","\x72\x63","\x74\xe1","\x68\x65",
52             "\x6e\x2c","\x6c\x73","\x79\x9c","\x6b\xe9","\x73\x75","\x6f\x2e","\x69\x67",
53             "\x79\x62","\x6e\xe9","\x65\x9c","\x6c\x2c","\xb9\x64","\x79\x65","\x72\x69",
54             "\x75\x2e","\x66\x65","\xe9\x6e","\x73\x63","\x76\xe9","\xe9\x73","\x73\x6e",
55             "\x75\x6e","\x6d\xf3","\xe9\x67","\x7a\xb3","\x6e\x75","\x76\x6f","\x4a\x61",
56             "\x6b\x6e","\x72\x77","\x6e\xe1","\x50\x72","\x6b\xb9","\x79\xe6","\x76\x69",
57             "\x64\xb3","\xb9\x2c","\x6a\x6f","\xbf\x61","\x69\x75","\x7a\x6d","\x75\x70",
58             "\x6a\xea","\x6e\xea","\x6d\xe1","\x65\xb3","\x4e\x61","\x6b\x6c","\xea\x6b",
59             "\x72\xea","\xe9\x74","\x74\xe9","\xf1\x73","\x66\x6f","\xe9\x6c","\x68\x2c",
60             "\x66\x69","\x7a\xe1","\x9c\x6d","\xe6\x2c","\xf3\x6c","\x77\x72","\x73\x79",
61             "\x4d\x69","\x69\x7a","\x6b\x73","\x79\x70","\x6a\xe1","\x74\xea","\x67\x6e",
62             "\x6c\x64","\x62\x75","\x7a\x72","\xbf\x6e","\x6e\xec","\xb9\xb3","\x67\x6c",
63             "\x75\x67","\x77\x63","\x69\x9c","\x79\x61","\xea\x2e","\x6c\xe9","\xea\x74",
64             "\x72\x6d","\x61\x9c","\x77\xb3","\x50\x61","\x6b\xb3","\xb9\x2e","\x52\x6f",
65             "\x64\x6a","\x61\x75","\x68\x77","\x53\x7a","\x61\x69","\x69\x76","\x7a\x62",
66             "\x6b\x2e","\x74\x2e","\x74\xb9","\x6d\xe9","\x62\x62","\x6e\x73","\x6a\x6e",
67             "\x9e\x65","\x65\x68","\xb3\x6e","\x4c\x69","\xe6\x2e","\x70\x6c","\x6f\x68",
68             "\x66\x61","\x77\x64","\x73\x2c","\x7a\xf3","\x70\xf8","\x65\xf1","\x65\x76",
69             "\x4a\x65","\x64\x77","\x61\x76","\x6f\x69","\x6b\xf6","\x57\x69","\x7a\xe9",
70             "\x9c\x77","\xfc\x6c","\xf8\x65","\x6a\x2c","\xf6\x72","\x9c\x6e","\x73\xe9",
71             "\x54\x61","\x67\xf3","\x7a\x70","\xe1\x6d","\x6b\xf3","\x77\xb9","\x68\xe1",
72             "\x79\x64","\xe1\x67","\x7a\x67","\x79\x6f","\x4d\x6f","\xed\x74","\xb3\x2c",
73             "\x65\xe6","\x48\x61","\x68\x63","\x6e\x67","\x61\x3a","\x6b\xea","\x77\x2c",
74             "\x79\x72","\xf3\x64","\x62\xea","\x79\x7a","\x5a\x61","\x4d\x61","\xed\x6d",
75             "\x64\xe1","\x7a\x6c","\x76\xec","\x6d\x62","\x77\xf3","\x6b\xe1","\xe9\x6b",
76             "\x72\xb9","\x73\x76","\x72\x6b","\x68\x75","\x68\x6e","\x70\x79","\xf1\x63",
77             "\x74\x76","\x7a\x2c","\x54\x65","\x73\xe1","\xbf\x6f","\x6c\x6a","\x64\x63",
78             "\x64\x6b","\x41\x6c","\x79\x67","\xe1\x62","\x61\xf1","\x75\xb3","\xea\xbf",
79             "\x73\xb9","\x6c\xea","\x62\x6e","\x6e\x2e","\x68\x6c","\x72\x67","\x77\x7a",
80             "\xe9\x70","\x73\x6c","\xb3\xb9","\x72\x6a","\x43\x7a","\x74\x6a","\x6c\x6d",
81             "\x44\x65","\x6a\x64","\xf3\x6a","\x64\x70","\x6a\x6d","\x4e\x65","\xe1\x7a",
82             "\x73\x6d","\x68\x69","\xf6\x6c","\xea\xb3","\x44\x6f","\x67\x75","\x75\x77",
83             "\xf3\xb3","\x48\x6f","\xf6\x6e","\x69\xbf","\x9a\x65","\x77\x2e","\xb3\x2e",
84             "\x68\x79","\x6c\x2e","\x4d\x65","\x68\x2e","\x74\xec","\x72\xe9","\x4b\x6f",
85             "\x64\x73",
86             );
87              
88             my $DIST_RATIO = '0.90';
89              
90             my %CHARSET_SPACE_SET = map { $_ => 1 } (
91             "\x81", # Alt
92             "\x83", # ^
93             "\x88", # ^
94             "\x90", # ^
95             "\x98", # ^
96             "\xa0", # NBSP
97             );
98              
99             my %IGNORE = (
100             %EBook::Ishmael::CharDet::Constants::ASCII_SPACE_SET,
101             %CHARSET_SPACE_SET,
102             );
103              
104             sub ignore {
105              
106 548864     548864 0 882239 my ($self, $byte) = @_;
107              
108 548864         1428983 return exists $IGNORE{ $byte };
109              
110             }
111              
112             sub freq_bigram {
113              
114 406408     406408 0 645195 my ($self, $bigram) = @_;
115              
116 406408         1086735 return exists $CP1250_FREQS{ $bigram };
117              
118             }
119              
120 0     0 0 0 sub dist_ratio { $DIST_RATIO };
121              
122 67     67 0 335 sub encoding { 'CP1250' }
123              
124             1;