File Coverage

blib/lib/EBook/Ishmael/CharDet/CP1252.pm
Criterion Covered Total %
statement 16 17 94.1
branch n/a
condition n/a
subroutine 7 8 87.5
pod 0 4 0.0
total 23 29 79.3


line stmt bran cond sub pod time code
1             package EBook::Ishmael::CharDet::CP1252;
2 18     18   316 use 5.016;
  18         73  
3             our $VERSION = '2.03';
4 18     18   114 use strict;
  18         37  
  18         653  
5 18     18   177 use warnings;
  18         64  
  18         1240  
6              
7 18     18   233 use parent 'EBook::Ishmael::CharDet::CP';
  18         48  
  18         119  
8              
9             # Generated from contrib/512-bigrams.pl, trained from various public-domain
10             # texts from Project Gutenberg (https://www.gutenberg.org/).
11             my %CP1252_FREQS = map { $_ => 1 } (
12             "\x65\x72","\x65\x6e","\x68\x65","\x69\x6e","\x61\x6e","\x64\x65","\x65\x73",
13             "\x74\x68","\x72\x65","\x74\x65","\x6e\x64","\x6f\x6e","\x61\x73","\x61\x72",
14             "\x73\x65","\x73\x74","\x63\x68","\x6f\x72","\x6e\x74","\x72\x61","\x6c\x61",
15             "\x75\x65","\x6c\x65","\x68\x61","\x74\x6f","\x65\x6c","\x6f\x73","\x61\x6c",
16             "\x69\x65","\x71\x75","\x61\x74","\x69\x74","\x69\x73","\x6d\x65","\x63\x6f",
17             "\x65\x64","\x75\x6e","\x74\x61","\x72\x6f","\x6e\x65","\x6e\x67","\x64\x6f",
18             "\x6c\x6c","\x6f\x75","\x6e\x6f","\x72\x69","\x68\x69","\x74\x69","\x69\x63",
19             "\x61\x64","\x76\x65","\x64\x69","\x6c\x6f","\x62\x65","\x6d\x61","\x65\x69",
20             "\x64\x61","\x73\x69","\x6c\x69","\x6f\x6d","\x63\x65","\x63\x61","\x67\x65",
21             "\x74\x72","\x73\x61","\x73\x6f","\x61\x62","\x75\x73","\x68\x6f","\x6f\x66",
22             "\x6e\x61","\x75\x72","\x65\x6d","\x61\x63","\x6d\x69","\x70\x6f","\x6d\x6f",
23             "\x70\x65","\x70\x61","\x65\x74","\x63\x69","\x65\x61","\x65\x63","\x69\x64",
24             "\x69\x6c","\x73\x2c","\x69\x6d","\x69\x6f","\x69\x72","\x65\x2c","\x61\x6d",
25             "\x72\x74","\x75\x74","\x6e\x63","\x6e\x73","\x6f\x74","\x6e\x69","\x77\x61",
26             "\x6f\x6c","\x73\x75","\x70\x72","\x61\x69","\x69\x67","\x77\x69","\x2e\x2e",
27             "\x73\x73","\x73\x63","\x72\x73","\x62\x61","\x61\x75","\x69\x61","\x6f\x2c",
28             "\x76\x69","\x77\x65","\x61\x2c","\x66\x6f","\x68\x74","\x75\x6c","\x6e\x2c",
29             "\x65\x65","\x77\x68","\x6c\x79","\x6f\x77","\x67\x61","\x72\x64","\x74\x75",
30             "\x75\x69","\x73\x68","\x62\x72","\x73\x70","\x63\x75","\x66\x65","\x65\x67",
31             "\x72\x72","\x61\x67","\x6b\x65","\x76\x6f","\x65\x2e","\x6f\x64","\x79\x6f",
32             "\xed\x61","\x61\x76","\x6c\x64","\x6d\x70","\x73\x2e","\x62\x6c","\x2d\x2d",
33             "\x74\x74","\x66\x69","\x65\x76","\x6f\x63","\x6f\x6f","\x67\x6f","\x75\x63",
34             "\x72\x2c","\x76\x61","\x61\x79","\x67\x72","\x74\x2c","\x75\x6d","\x66\x61",
35             "\x63\x74","\x67\x75","\x6d\x75","\x72\x6d","\x61\x70","\x75\x61","\x67\x68",
36             "\x6c\x74","\x72\x6e","\x70\x69","\x62\x75","\x77\x6f","\x70\x6c","\x6e\x6e",
37             "\x6e\x2e","\x70\x75","\x66\x72","\x69\x76","\x68\x72","\x6f\x76","\x62\x6f",
38             "\x62\x69","\x64\x2c","\x6a\x6f","\x63\x6b","\x72\x75","\x72\x67","\x74\x73",
39             "\x65\x70","\x72\x2e","\x64\x72","\x6c\x75","\x61\x2e","\x6f\x70","\x65\x66",
40             "\x72\x79","\x66\x75","\x69\xf3","\x75\x67","\x6f\x2e","\x69\x66","\x63\x72",
41             "\x6f\x62","\x65\x75","\x6d\x62","\x72\x63","\x67\x69","\x75\x64","\x74\x2e",
42             "\x64\x75","\x54\x68","\x65\x78","\x72\x6c","\xf3\x6e","\x7a\x75","\x75\x70",
43             "\x79\x2c","\x6d\x6d","\x6e\x75","\x65\x62","\x69\x62","\x6a\x65","\x65\x79",
44             "\x65\x68","\x92\x73","\xe1\x73","\x7a\x61","\x6c\x73","\x66\x66","\x74\x6c",
45             "\x6f\x69","\x75\x66","\x65\x77","\x48\x65","\x68\x75","\x66\x74","\x69\x6a",
46             "\x63\x6c","\x4d\x61","\x6a\x61","\x64\x2e","\x65\x7a","\x79\x65","\x70\x70",
47             "\x72\x6b","\x6f\x6b","\x75\x62","\x74\x79","\x7a\x65","\x6e\x6b","\x61\x66",
48             "\x67\x6c","\x6c\x2c","\x3d\x2c","\x61\x6b","\x6e\x66","\x73\x6d","\x6d\xe1",
49             "\x6b\x69","\x41\x6e","\x62\x79","\x6e\x7a","\x6d\x79","\x61\x7a","\x79\x61",
50             "\x61\x71","\x6c\x66","\x75\xe9","\x61\x68","\x65\x6a","\x50\x72","\x6e\x6c",
51             "\x65\x6f","\x69\x7a","\x69\x68","\x64\x73","\x66\x6c","\x6e\x79","\x6f\x67",
52             "\x68\x2c","\x72\xed","\x68\x6c","\x51\x75","\x72\x62","\x77\x6e","\x68\x6e",
53             "\x44\x65","\x6d\x73","\xf1\x6f","\xe3\x6f","\x67\x2c","\x70\x74","\x6a\x75",
54             "\x61\x77","\x44\x69","\x79\x2e","\x42\x65","\x6c\x67","\x69\x70","\x2e\x94",
55             "\x47\x65","\x6d\x2c","\x48\x61","\x6d\x2e","\x6b\x6e","\x4e\x6f","\x53\x61",
56             "\x53\x69","\x50\x65","\x69\x6b","\x72\xe1","\x67\x74","\x6f\x79","\x65\xf1",
57             "\xe1\x6e","\x53\x74","\x73\x6b","\x7a\x6f","\x72\x76","\x74\x7a","\x2c\x94",
58             "\x6e\x76","\x6c\x6d","\x63\x63","\x62\xed","\x67\x6e","\x67\x2e","\xf1\x61",
59             "\x73\x3b","\x43\x61","\x6c\x76","\x6f\x6a","\x72\x70","\x57\x65","\x6f\x61",
60             "\x53\x63","\x74\xe1","\x43\x6f","\x73\xed","\xfc\x72","\x73\x6c","\x72\x66",
61             "\x61\x6a","\x65\x3b","\x6e\x68","\xe9\x73","\x53\x65","\x42\x75","\x6e\x62",
62             "\x67\x73","\x61\xf1","\x4e\x61","\xe9\x6e","\x6b\x6f","\x68\x6d","\x6c\x2e",
63             "\x74\x77","\x45\x73","\x57\x68","\x44\x61","\x6e\x92","\x79\x73","\x46\x72",
64             "\x6f\x3d","\x64\x6c","\xe9\x6c","\x4d\x69","\x4c\x61","\x62\x73","\x72\x71",
65             "\x41\x6c","\x78\x70","\x75\x79","\x45\x72","\x72\x7a","\x6b\x73","\x72\xe9",
66             "\x61\xdf","\x64\xed","\x69\xe9","\x74\xf3","\x21\x94","\x6f\x3b","\x41\x75",
67             "\x6c\x63","\x53\x6f","\x61\x3b","\x61\x3d","\x4c\x65","\x63\xed","\x55\x6e",
68             "\x49\x49","\x64\x64","\x6b\x61","\x50\x61","\x6e\xed","\x56\x65","\x49\x6e",
69             "\x6f\x68","\x52\x65","\x6c\x68","\x75\x76","\x70\x73","\x59\x6f","\x45\x6e",
70             "\x6c\x62","\x50\x6f","\x4a\x61","\x47\x75","\x64\x79","\x4d\x65","\x68\x2e",
71             "\x74\x63","\x70\x68","\x66\xfc","\x52\x6f","\x72\x68","\x45\x6c","\x72\x3d",
72             "\x49\x74","\x7a\x69","\x46\x6f","\x76\x75","\x4d\x6f","\x31\x38","\x93\x49",
73             "\xfc\x62","\x6d\xed","\x6b\x74","\x42\x61","\x65\x21","\x44\x6f","\x6b\x2c",
74             "\x92\x74","\x57\x69","\x6b\x6c","\x6f\x65","\x72\xfc","\x56\x6f","\x6c\x6b",
75             "\x68\x79","\x50\x69","\xdf\x65","\x54\x6f","\x6e\x71","\x65\x6b","\x65\x2d",
76             "\xe9\x74","\x74\x92","\x4c\x69","\x79\x69","\xed\x73","\x41\x72","\x65\x71",
77             "\x69\x75","\x6c\xed","\x78\x74","\x4c\x6f","\x54\x65","\x78\x69","\x73\x21",
78             "\x61\x65","\x73\x77","\x6c\x6e","\x68\x73","\x75\xed","\x78\x63","\x47\x72",
79             "\x6e\x6d","\x6e\x3b","\x48\x6f","\x42\x6f","\x68\x77","\x6f\x7a","\xfc\x63",
80             "\x77\x2c","\xe4\x6e","\x74\xe9","\x2e\x2c","\x6e\xe3","\xe9\x72","\x62\x74",
81             "\x72\x77","\xed\x6e","\x2d\x6d","\x48\x69","\x7a\x74","\x54\x61","\x93\x57",
82             "\xbb\x2e","\x74\xed","\x63\xf3","\x66\x2e","\x75\x6a","\x72\xf3","\x6f\x3a",
83             "\x52\x61","\x2d\x61","\xfa\x6e","\x6c\x27","\x66\x2c","\x27\x61","\x70\x66",
84             "\x47\x6f","\x49\x6c","\x6b\x2e","\x65\x92","\x6c\xe1","\x74\xfa","\x53\x68",
85             "\x57\x61",
86             );
87              
88             my $DIST_RATIO = '0.94';
89              
90             my %CHARSET_SPACE_SET = map { $_ => 1 } (
91             "\x81", # undefined
92             "\x8d", # ^
93             "\x8f", # ^
94             "\x90", # ^
95             "\x9d", # ^
96             "\xa0", # NBSP
97             );
98              
99             my %IGNORE = (
100             %EBook::Ishmael::CharDet::Constants::ASCII_SPACE_SET,
101             %CHARSET_SPACE_SET,
102             );
103              
104             sub ignore {
105              
106 548864     548864 0 874323 my ($self, $byte) = @_;
107              
108 548864         1432660 return exists $IGNORE{ $byte };
109              
110             }
111              
112             sub freq_bigram {
113              
114 407315     407315 0 647726 my ($self, $bigram) = @_;
115              
116 407315         1073428 return exists $CP1252_FREQS{ $bigram };
117              
118             }
119              
120 0     0 0 0 sub dist_ratio { $DIST_RATIO }
121              
122 67     67 0 202 sub encoding { 'CP1252' }
123              
124             1;
125