| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package EBook::Ishmael::CharDet::CP1252; |
|
2
|
18
|
|
|
18
|
|
316
|
use 5.016; |
|
|
18
|
|
|
|
|
73
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '2.03'; |
|
4
|
18
|
|
|
18
|
|
114
|
use strict; |
|
|
18
|
|
|
|
|
37
|
|
|
|
18
|
|
|
|
|
653
|
|
|
5
|
18
|
|
|
18
|
|
177
|
use warnings; |
|
|
18
|
|
|
|
|
64
|
|
|
|
18
|
|
|
|
|
1240
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
18
|
|
|
18
|
|
233
|
use parent 'EBook::Ishmael::CharDet::CP'; |
|
|
18
|
|
|
|
|
48
|
|
|
|
18
|
|
|
|
|
119
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# Generated from contrib/512-bigrams.pl, trained from various public-domain |
|
10
|
|
|
|
|
|
|
# texts from Project Gutenberg (https://www.gutenberg.org/). |
|
11
|
|
|
|
|
|
|
my %CP1252_FREQS = map { $_ => 1 } ( |
|
12
|
|
|
|
|
|
|
"\x65\x72","\x65\x6e","\x68\x65","\x69\x6e","\x61\x6e","\x64\x65","\x65\x73", |
|
13
|
|
|
|
|
|
|
"\x74\x68","\x72\x65","\x74\x65","\x6e\x64","\x6f\x6e","\x61\x73","\x61\x72", |
|
14
|
|
|
|
|
|
|
"\x73\x65","\x73\x74","\x63\x68","\x6f\x72","\x6e\x74","\x72\x61","\x6c\x61", |
|
15
|
|
|
|
|
|
|
"\x75\x65","\x6c\x65","\x68\x61","\x74\x6f","\x65\x6c","\x6f\x73","\x61\x6c", |
|
16
|
|
|
|
|
|
|
"\x69\x65","\x71\x75","\x61\x74","\x69\x74","\x69\x73","\x6d\x65","\x63\x6f", |
|
17
|
|
|
|
|
|
|
"\x65\x64","\x75\x6e","\x74\x61","\x72\x6f","\x6e\x65","\x6e\x67","\x64\x6f", |
|
18
|
|
|
|
|
|
|
"\x6c\x6c","\x6f\x75","\x6e\x6f","\x72\x69","\x68\x69","\x74\x69","\x69\x63", |
|
19
|
|
|
|
|
|
|
"\x61\x64","\x76\x65","\x64\x69","\x6c\x6f","\x62\x65","\x6d\x61","\x65\x69", |
|
20
|
|
|
|
|
|
|
"\x64\x61","\x73\x69","\x6c\x69","\x6f\x6d","\x63\x65","\x63\x61","\x67\x65", |
|
21
|
|
|
|
|
|
|
"\x74\x72","\x73\x61","\x73\x6f","\x61\x62","\x75\x73","\x68\x6f","\x6f\x66", |
|
22
|
|
|
|
|
|
|
"\x6e\x61","\x75\x72","\x65\x6d","\x61\x63","\x6d\x69","\x70\x6f","\x6d\x6f", |
|
23
|
|
|
|
|
|
|
"\x70\x65","\x70\x61","\x65\x74","\x63\x69","\x65\x61","\x65\x63","\x69\x64", |
|
24
|
|
|
|
|
|
|
"\x69\x6c","\x73\x2c","\x69\x6d","\x69\x6f","\x69\x72","\x65\x2c","\x61\x6d", |
|
25
|
|
|
|
|
|
|
"\x72\x74","\x75\x74","\x6e\x63","\x6e\x73","\x6f\x74","\x6e\x69","\x77\x61", |
|
26
|
|
|
|
|
|
|
"\x6f\x6c","\x73\x75","\x70\x72","\x61\x69","\x69\x67","\x77\x69","\x2e\x2e", |
|
27
|
|
|
|
|
|
|
"\x73\x73","\x73\x63","\x72\x73","\x62\x61","\x61\x75","\x69\x61","\x6f\x2c", |
|
28
|
|
|
|
|
|
|
"\x76\x69","\x77\x65","\x61\x2c","\x66\x6f","\x68\x74","\x75\x6c","\x6e\x2c", |
|
29
|
|
|
|
|
|
|
"\x65\x65","\x77\x68","\x6c\x79","\x6f\x77","\x67\x61","\x72\x64","\x74\x75", |
|
30
|
|
|
|
|
|
|
"\x75\x69","\x73\x68","\x62\x72","\x73\x70","\x63\x75","\x66\x65","\x65\x67", |
|
31
|
|
|
|
|
|
|
"\x72\x72","\x61\x67","\x6b\x65","\x76\x6f","\x65\x2e","\x6f\x64","\x79\x6f", |
|
32
|
|
|
|
|
|
|
"\xed\x61","\x61\x76","\x6c\x64","\x6d\x70","\x73\x2e","\x62\x6c","\x2d\x2d", |
|
33
|
|
|
|
|
|
|
"\x74\x74","\x66\x69","\x65\x76","\x6f\x63","\x6f\x6f","\x67\x6f","\x75\x63", |
|
34
|
|
|
|
|
|
|
"\x72\x2c","\x76\x61","\x61\x79","\x67\x72","\x74\x2c","\x75\x6d","\x66\x61", |
|
35
|
|
|
|
|
|
|
"\x63\x74","\x67\x75","\x6d\x75","\x72\x6d","\x61\x70","\x75\x61","\x67\x68", |
|
36
|
|
|
|
|
|
|
"\x6c\x74","\x72\x6e","\x70\x69","\x62\x75","\x77\x6f","\x70\x6c","\x6e\x6e", |
|
37
|
|
|
|
|
|
|
"\x6e\x2e","\x70\x75","\x66\x72","\x69\x76","\x68\x72","\x6f\x76","\x62\x6f", |
|
38
|
|
|
|
|
|
|
"\x62\x69","\x64\x2c","\x6a\x6f","\x63\x6b","\x72\x75","\x72\x67","\x74\x73", |
|
39
|
|
|
|
|
|
|
"\x65\x70","\x72\x2e","\x64\x72","\x6c\x75","\x61\x2e","\x6f\x70","\x65\x66", |
|
40
|
|
|
|
|
|
|
"\x72\x79","\x66\x75","\x69\xf3","\x75\x67","\x6f\x2e","\x69\x66","\x63\x72", |
|
41
|
|
|
|
|
|
|
"\x6f\x62","\x65\x75","\x6d\x62","\x72\x63","\x67\x69","\x75\x64","\x74\x2e", |
|
42
|
|
|
|
|
|
|
"\x64\x75","\x54\x68","\x65\x78","\x72\x6c","\xf3\x6e","\x7a\x75","\x75\x70", |
|
43
|
|
|
|
|
|
|
"\x79\x2c","\x6d\x6d","\x6e\x75","\x65\x62","\x69\x62","\x6a\x65","\x65\x79", |
|
44
|
|
|
|
|
|
|
"\x65\x68","\x92\x73","\xe1\x73","\x7a\x61","\x6c\x73","\x66\x66","\x74\x6c", |
|
45
|
|
|
|
|
|
|
"\x6f\x69","\x75\x66","\x65\x77","\x48\x65","\x68\x75","\x66\x74","\x69\x6a", |
|
46
|
|
|
|
|
|
|
"\x63\x6c","\x4d\x61","\x6a\x61","\x64\x2e","\x65\x7a","\x79\x65","\x70\x70", |
|
47
|
|
|
|
|
|
|
"\x72\x6b","\x6f\x6b","\x75\x62","\x74\x79","\x7a\x65","\x6e\x6b","\x61\x66", |
|
48
|
|
|
|
|
|
|
"\x67\x6c","\x6c\x2c","\x3d\x2c","\x61\x6b","\x6e\x66","\x73\x6d","\x6d\xe1", |
|
49
|
|
|
|
|
|
|
"\x6b\x69","\x41\x6e","\x62\x79","\x6e\x7a","\x6d\x79","\x61\x7a","\x79\x61", |
|
50
|
|
|
|
|
|
|
"\x61\x71","\x6c\x66","\x75\xe9","\x61\x68","\x65\x6a","\x50\x72","\x6e\x6c", |
|
51
|
|
|
|
|
|
|
"\x65\x6f","\x69\x7a","\x69\x68","\x64\x73","\x66\x6c","\x6e\x79","\x6f\x67", |
|
52
|
|
|
|
|
|
|
"\x68\x2c","\x72\xed","\x68\x6c","\x51\x75","\x72\x62","\x77\x6e","\x68\x6e", |
|
53
|
|
|
|
|
|
|
"\x44\x65","\x6d\x73","\xf1\x6f","\xe3\x6f","\x67\x2c","\x70\x74","\x6a\x75", |
|
54
|
|
|
|
|
|
|
"\x61\x77","\x44\x69","\x79\x2e","\x42\x65","\x6c\x67","\x69\x70","\x2e\x94", |
|
55
|
|
|
|
|
|
|
"\x47\x65","\x6d\x2c","\x48\x61","\x6d\x2e","\x6b\x6e","\x4e\x6f","\x53\x61", |
|
56
|
|
|
|
|
|
|
"\x53\x69","\x50\x65","\x69\x6b","\x72\xe1","\x67\x74","\x6f\x79","\x65\xf1", |
|
57
|
|
|
|
|
|
|
"\xe1\x6e","\x53\x74","\x73\x6b","\x7a\x6f","\x72\x76","\x74\x7a","\x2c\x94", |
|
58
|
|
|
|
|
|
|
"\x6e\x76","\x6c\x6d","\x63\x63","\x62\xed","\x67\x6e","\x67\x2e","\xf1\x61", |
|
59
|
|
|
|
|
|
|
"\x73\x3b","\x43\x61","\x6c\x76","\x6f\x6a","\x72\x70","\x57\x65","\x6f\x61", |
|
60
|
|
|
|
|
|
|
"\x53\x63","\x74\xe1","\x43\x6f","\x73\xed","\xfc\x72","\x73\x6c","\x72\x66", |
|
61
|
|
|
|
|
|
|
"\x61\x6a","\x65\x3b","\x6e\x68","\xe9\x73","\x53\x65","\x42\x75","\x6e\x62", |
|
62
|
|
|
|
|
|
|
"\x67\x73","\x61\xf1","\x4e\x61","\xe9\x6e","\x6b\x6f","\x68\x6d","\x6c\x2e", |
|
63
|
|
|
|
|
|
|
"\x74\x77","\x45\x73","\x57\x68","\x44\x61","\x6e\x92","\x79\x73","\x46\x72", |
|
64
|
|
|
|
|
|
|
"\x6f\x3d","\x64\x6c","\xe9\x6c","\x4d\x69","\x4c\x61","\x62\x73","\x72\x71", |
|
65
|
|
|
|
|
|
|
"\x41\x6c","\x78\x70","\x75\x79","\x45\x72","\x72\x7a","\x6b\x73","\x72\xe9", |
|
66
|
|
|
|
|
|
|
"\x61\xdf","\x64\xed","\x69\xe9","\x74\xf3","\x21\x94","\x6f\x3b","\x41\x75", |
|
67
|
|
|
|
|
|
|
"\x6c\x63","\x53\x6f","\x61\x3b","\x61\x3d","\x4c\x65","\x63\xed","\x55\x6e", |
|
68
|
|
|
|
|
|
|
"\x49\x49","\x64\x64","\x6b\x61","\x50\x61","\x6e\xed","\x56\x65","\x49\x6e", |
|
69
|
|
|
|
|
|
|
"\x6f\x68","\x52\x65","\x6c\x68","\x75\x76","\x70\x73","\x59\x6f","\x45\x6e", |
|
70
|
|
|
|
|
|
|
"\x6c\x62","\x50\x6f","\x4a\x61","\x47\x75","\x64\x79","\x4d\x65","\x68\x2e", |
|
71
|
|
|
|
|
|
|
"\x74\x63","\x70\x68","\x66\xfc","\x52\x6f","\x72\x68","\x45\x6c","\x72\x3d", |
|
72
|
|
|
|
|
|
|
"\x49\x74","\x7a\x69","\x46\x6f","\x76\x75","\x4d\x6f","\x31\x38","\x93\x49", |
|
73
|
|
|
|
|
|
|
"\xfc\x62","\x6d\xed","\x6b\x74","\x42\x61","\x65\x21","\x44\x6f","\x6b\x2c", |
|
74
|
|
|
|
|
|
|
"\x92\x74","\x57\x69","\x6b\x6c","\x6f\x65","\x72\xfc","\x56\x6f","\x6c\x6b", |
|
75
|
|
|
|
|
|
|
"\x68\x79","\x50\x69","\xdf\x65","\x54\x6f","\x6e\x71","\x65\x6b","\x65\x2d", |
|
76
|
|
|
|
|
|
|
"\xe9\x74","\x74\x92","\x4c\x69","\x79\x69","\xed\x73","\x41\x72","\x65\x71", |
|
77
|
|
|
|
|
|
|
"\x69\x75","\x6c\xed","\x78\x74","\x4c\x6f","\x54\x65","\x78\x69","\x73\x21", |
|
78
|
|
|
|
|
|
|
"\x61\x65","\x73\x77","\x6c\x6e","\x68\x73","\x75\xed","\x78\x63","\x47\x72", |
|
79
|
|
|
|
|
|
|
"\x6e\x6d","\x6e\x3b","\x48\x6f","\x42\x6f","\x68\x77","\x6f\x7a","\xfc\x63", |
|
80
|
|
|
|
|
|
|
"\x77\x2c","\xe4\x6e","\x74\xe9","\x2e\x2c","\x6e\xe3","\xe9\x72","\x62\x74", |
|
81
|
|
|
|
|
|
|
"\x72\x77","\xed\x6e","\x2d\x6d","\x48\x69","\x7a\x74","\x54\x61","\x93\x57", |
|
82
|
|
|
|
|
|
|
"\xbb\x2e","\x74\xed","\x63\xf3","\x66\x2e","\x75\x6a","\x72\xf3","\x6f\x3a", |
|
83
|
|
|
|
|
|
|
"\x52\x61","\x2d\x61","\xfa\x6e","\x6c\x27","\x66\x2c","\x27\x61","\x70\x66", |
|
84
|
|
|
|
|
|
|
"\x47\x6f","\x49\x6c","\x6b\x2e","\x65\x92","\x6c\xe1","\x74\xfa","\x53\x68", |
|
85
|
|
|
|
|
|
|
"\x57\x61", |
|
86
|
|
|
|
|
|
|
); |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
my $DIST_RATIO = '0.94'; |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
my %CHARSET_SPACE_SET = map { $_ => 1 } ( |
|
91
|
|
|
|
|
|
|
"\x81", # undefined |
|
92
|
|
|
|
|
|
|
"\x8d", # ^ |
|
93
|
|
|
|
|
|
|
"\x8f", # ^ |
|
94
|
|
|
|
|
|
|
"\x90", # ^ |
|
95
|
|
|
|
|
|
|
"\x9d", # ^ |
|
96
|
|
|
|
|
|
|
"\xa0", # NBSP |
|
97
|
|
|
|
|
|
|
); |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
my %IGNORE = ( |
|
100
|
|
|
|
|
|
|
%EBook::Ishmael::CharDet::Constants::ASCII_SPACE_SET, |
|
101
|
|
|
|
|
|
|
%CHARSET_SPACE_SET, |
|
102
|
|
|
|
|
|
|
); |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
sub ignore { |
|
105
|
|
|
|
|
|
|
|
|
106
|
548864
|
|
|
548864
|
0
|
874323
|
my ($self, $byte) = @_; |
|
107
|
|
|
|
|
|
|
|
|
108
|
548864
|
|
|
|
|
1432660
|
return exists $IGNORE{ $byte }; |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub freq_bigram { |
|
113
|
|
|
|
|
|
|
|
|
114
|
407315
|
|
|
407315
|
0
|
647726
|
my ($self, $bigram) = @_; |
|
115
|
|
|
|
|
|
|
|
|
116
|
407315
|
|
|
|
|
1073428
|
return exists $CP1252_FREQS{ $bigram }; |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
|
|
120
|
0
|
|
|
0
|
0
|
0
|
sub dist_ratio { $DIST_RATIO } |
|
121
|
|
|
|
|
|
|
|
|
122
|
67
|
|
|
67
|
0
|
202
|
sub encoding { 'CP1252' } |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
1; |
|
125
|
|
|
|
|
|
|
|