| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package EBook::Ishmael::CharDet::CP1251; |
|
2
|
18
|
|
|
18
|
|
330
|
use 5.016; |
|
|
18
|
|
|
|
|
76
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '2.03'; |
|
4
|
18
|
|
|
18
|
|
114
|
use strict; |
|
|
18
|
|
|
|
|
39
|
|
|
|
18
|
|
|
|
|
478
|
|
|
5
|
18
|
|
|
18
|
|
79
|
use warnings; |
|
|
18
|
|
|
|
|
32
|
|
|
|
18
|
|
|
|
|
965
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
18
|
|
|
18
|
|
100
|
use parent 'EBook::Ishmael::CharDet::CP'; |
|
|
18
|
|
|
|
|
78
|
|
|
|
18
|
|
|
|
|
129
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# Generated from contrib/512-bigrams.pl, trained from various public-domain |
|
10
|
|
|
|
|
|
|
# texts from Project Gutenberg (https://www.gutenberg.org/). |
|
11
|
|
|
|
|
|
|
my %CP1251_FREQS = map { $_ => 1 } ( |
|
12
|
|
|
|
|
|
|
"\xf1\xf2","\xf2\xee","\xe5\xed","\xed\xee","\xed\xe8","\xed\xe0","\xf0\xe0", |
|
13
|
|
|
|
|
|
|
"\xed\xe5","\xea\xee","\xe3\xee","\xee\xe2","\xf0\xee","\xef\xee","\xe5\xf0", |
|
14
|
|
|
|
|
|
|
"\xee\xf1","\xf0\xe5","\xe2\xee","\xef\xf0","\xeb\xe8","\xee\xf0","\xe0\xeb", |
|
15
|
|
|
|
|
|
|
"\xee\xf2","\xe5\xeb","\xe2\xe5","\xe5\xf2","\xe0\xed","\xe5\xf1","\xf2\xe5", |
|
16
|
|
|
|
|
|
|
"\xee\xeb","\xea\xe0","\xf2\xe0","\xee\xec","\xeb\xee","\xe2\xe0","\xf1\xea", |
|
17
|
|
|
|
|
|
|
"\xee\xe4","\xf0\xe8","\xeb\xe5","\xf2\xfc","\xee\xe3","\xf2\xe8","\xe4\xe5", |
|
18
|
|
|
|
|
|
|
"\xee\xed","\xe0\xf2","\xe5\xec","\xeb\xe0","\xec\xe5","\xed\xfb","\xe8\xf2", |
|
19
|
|
|
|
|
|
|
"\xeb\xfc","\xf7\xe5","\xe4\xe0","\xe0\xea","\xee\xe1","\xee\xe9","\xe8\xf1", |
|
20
|
|
|
|
|
|
|
"\xf2\xe2","\xe4\xee","\xe8\xeb","\xe8\xed","\xe8\xec","\xe7\xe0","\xed\xed", |
|
21
|
|
|
|
|
|
|
"\xe5\xe4","\xe0\xf1","\xec\xee","\xe2\xe8","\xf2\xf0","\xf1\xee","\xe8\xe5", |
|
22
|
|
|
|
|
|
|
"\xf1\xff","\xe0\xf0","\xea\xe8","\xe6\xe5","\xe8\xe7","\xe0\xe2","\xe0\xec", |
|
23
|
|
|
|
|
|
|
"\xe0\xe7","\xec\xe8","\xf1\xe5","\xe5\xe3","\xe8\xff","\xe1\xfb","\xec\xe0", |
|
24
|
|
|
|
|
|
|
"\xee\xe5","\xf1\xeb","\xf0\xf3","\xe5\xe9","\xf7\xf2","\xe4\xe8","\xe1\xee", |
|
25
|
|
|
|
|
|
|
"\xe2\xf1","\xe8\xea","\xf7\xe0","\xe0\x2c","\xf1\xfc","\xe8\xf5","\xe5\xea", |
|
26
|
|
|
|
|
|
|
"\xef\xe5","\xe8\x2c","\xe0\xff","\xe2\xfb","\xe8\xe2","\xe8\xf7","\xe8\xe9", |
|
27
|
|
|
|
|
|
|
"\xf8\xe5","\xe5\x2c","\xee\xea","\xf3\xe4","\xe1\xe5","\xf1\xe2","\xe4\xf3", |
|
28
|
|
|
|
|
|
|
"\xec\xf3","\xeb\xff","\xe0\xe4","\xe5\xe2","\xf2\xf1","\xe4\xed","\xfd\xf2", |
|
29
|
|
|
|
|
|
|
"\xf9\xe5","\xee\x2c","\xe8\xe8","\xfb\xeb","\xfb\xe9","\xee\xe6","\xe0\xe5", |
|
30
|
|
|
|
|
|
|
"\xf1\xe0","\xfc\xed","\xf7\xe8","\xe7\xed","\xf1\xe8","\xee\xef","\xf5\xee", |
|
31
|
|
|
|
|
|
|
"\xe5\xe5","\xef\xe8","\xed\xf3","\xf6\xe5","\xee\xf7","\xf0\xfb","\xe8\xe4", |
|
32
|
|
|
|
|
|
|
"\xff\x2c","\xe5\xf7","\xff\xf2","\xf3\xf2","\xea\xf0","\xf2\xed","\xfb\xe5", |
|
33
|
|
|
|
|
|
|
"\xed\xff","\xf1\xef","\xfb\xf5","\xec\xed","\xe6\xe8","\xfb\xec","\xf8\xe8", |
|
34
|
|
|
|
|
|
|
"\xe8\xf0","\xec\x2c","\xee\xe7","\xe1\xf0","\xe2\xeb","\xf2\xf3","\xf3\xfe", |
|
35
|
|
|
|
|
|
|
"\xe5\xe7","\xf6\xe8","\xe3\xe0","\xfc\x2c","\xe7\xe2","\xec\xfb","\xea\xf3", |
|
36
|
|
|
|
|
|
|
"\xe4\xf0","\xee\xe8","\xf2\xfb","\xeb\xf3","\xef\xe0","\xeb\xfe","\xe3\xf0", |
|
37
|
|
|
|
|
|
|
"\xf3\xf1","\xe2\xed","\xe9\x2c","\xf0\xed","\x31\x38","\xf1\xf1","\xe0\xf5", |
|
38
|
|
|
|
|
|
|
"\xe5\xe1","\xe6\xe4","\xf1\xed","\xe1\xeb","\xe0\xfe","\xe7\xe4","\xe0\xe6", |
|
39
|
|
|
|
|
|
|
"\xe3\xeb","\xe5\xe6","\xf3\xec","\xf0\xff","\xf1\xf3","\xed\xfc","\xf3\xe6", |
|
40
|
|
|
|
|
|
|
"\xed\xf2","\xfb\xe2","\xe6\xe0","\xe3\xe8","\xf3\xf7","\xe6\xed","\xe0\xf7", |
|
41
|
|
|
|
|
|
|
"\xe3\xe4","\xf1\xec","\xe1\xf3","\xe2\xf0","\xfc\xf1","\xe0\xef","\xe0\x2e", |
|
42
|
|
|
|
|
|
|
"\xf3\x2c","\xed\xf1","\xec\x2e","\xe8\xf6","\xf3\xe3","\xf9\xe8","\xe0\xe1", |
|
43
|
|
|
|
|
|
|
"\xe4\xe2","\xf3\xf0","\xe2\xf3","\xea\xf2","\xe5\xef","\xf7\xed","\xf2\x2c", |
|
44
|
|
|
|
|
|
|
"\xeb\xf1","\xee\xf8","\xe8\x2e","\xfb\x2c","\xfe\xf2","\xe4\xeb","\xf3\xea", |
|
45
|
|
|
|
|
|
|
"\xe0\xe3","\xe7\xee","\xf3\xe1","\xf0\xf6","\xf0\xf2","\xee\xfe","\xe7\xe8", |
|
46
|
|
|
|
|
|
|
"\xef\xeb","\xe0\xf8","\xc3\xe5","\xe1\xe8","\xfb\xf1","\xe4\xfb","\xed\xf6", |
|
47
|
|
|
|
|
|
|
"\xe5\x2e","\xee\xff","\xe8\xe3","\xfc\xea","\xea\xeb","\xe8\xe1","\xef\xf3", |
|
48
|
|
|
|
|
|
|
"\xf6\xe0","\xe4\xf1","\xf8\xe0","\xff\xed","\xe8\xfe","\xe9\xf1","\xe2\xf8", |
|
49
|
|
|
|
|
|
|
"\xfe\xf9","\xfb\xf2","\xe3\xf3","\xf3\xf8","\xf3\xef","\xe3\xe5","\xf5\xe0", |
|
50
|
|
|
|
|
|
|
"\x29\x2e","\xe1\xe0","\xcf\xee","\xfe\x2c","\xe5\xf9","\xe7\xfb","\xe8\xe0", |
|
51
|
|
|
|
|
|
|
"\xea\xe5","\xf0\xf1","\xe5\xee","\xbb\x2c","\xe7\xec","\xf0\xe3","\xff\xf1", |
|
52
|
|
|
|
|
|
|
"\xf0\xfc","\xfc\xe5","\xeb\x2c","\xea\xed","\xf3\xeb","\xe7\xe5","\xcf\xf0", |
|
53
|
|
|
|
|
|
|
"\xe2\x2c","\xed\xe4","\xe5\xf8","\xfc\xec","\xeb\xed","\xf0\xe2","\xe5\xf5", |
|
54
|
|
|
|
|
|
|
"\xff\x2e","\xe0\xe9","\xf3\xe2","\xff\xec","\x31\x37","\xeb\xfb","\xf2\xea", |
|
55
|
|
|
|
|
|
|
"\xf0\x2e","\xf2\xff","\xf5\x2c","\x29\x2c","\xec\xff","\xca\xe0","\xf7\xf3", |
|
56
|
|
|
|
|
|
|
"\x65\x72","\xe8\xee","\x2e\x29","\xe3\x2e","\xf3\xf9","\xf4\xe8","\xee\x2e", |
|
57
|
|
|
|
|
|
|
"\xff\xeb","\xea\xf1","\xbb\x2e","\xed\x2c","\xff\xe7","\xff\xe2","\xf9\xe0", |
|
58
|
|
|
|
|
|
|
"\x97\x31","\xcd\xee","\xe9\x2e","\xf2\x2e","\xe5\xf6","\x65\x6e","\xf5\xe8", |
|
59
|
|
|
|
|
|
|
"\x69\x73","\x38\x33","\xe8\xe6","\xf0\xf5","\xfc\xfe","\xea\x2c","\xf3\xed", |
|
60
|
|
|
|
|
|
|
"\xe1\xf9","\xf3\xf5","\xf0\xec","\xcd\xe0","\xe1\xed","\x6f\x6e","\xf0\xe6", |
|
61
|
|
|
|
|
|
|
"\xe2\xff","\xf3\xe7","\xe9\xed","\xff\xf9","\x28\x31","\xcf\xe5","\xcd\xe5", |
|
62
|
|
|
|
|
|
|
"\xfb\xf8","\xeb\xe6","\x27\x27","\xf1\xfb","\x2d\xf2","\x69\x6e","\xe7\xf0", |
|
63
|
|
|
|
|
|
|
"\xe8\xf8","\xee\xf5","\xfc\xff","\xfe\xe4","\xff\xe5","\xf0\xe4","\xe2\xe7", |
|
64
|
|
|
|
|
|
|
"\xe4\xfc","\xe7\xf3","\xe8\xef","\xf4\xf0","\x74\x65","\xe4\x2e","\xe2\xf2", |
|
65
|
|
|
|
|
|
|
"\x64\x65","\xe2\x2e","\xf5\xf0","\xfc\x2e","\xcc\xee","\x6f\x72","\xe0\xbb", |
|
66
|
|
|
|
|
|
|
"\xe5\xfe","\xea\xe2","\xf3\xe5","\xeb\xeb","\xf0\xea","\xd1\xf2","\xec\xeb", |
|
67
|
|
|
|
|
|
|
"\xf3\x2e","\xce\xed","\xee\xf4","\xe4\xff","\xc7\xe0","\x65\x73","\xfe\xe1", |
|
68
|
|
|
|
|
|
|
"\xe0\xf4","\x72\x65","\xe5\xff","\xc3\xee","\xfc\xf8","\xfb\xf0","\xe3\xed", |
|
69
|
|
|
|
|
|
|
"\xe6\xf3","\xe1\xff","\xff\xe4","\xfb\x2e","\xe1\xf1","\xee\x2d","\xee\xee", |
|
70
|
|
|
|
|
|
|
"\xd0\xe5","\xf8\xed","\xca\xee","\xf4\xee","\x75\x72","\x49\x49","\xf8\xeb", |
|
71
|
|
|
|
|
|
|
"\x6c\x65","\xe7\xeb","\x74\x69","\xff\xf5","\xc0\x2e","\xe7\xff","\xcc\xe0", |
|
72
|
|
|
|
|
|
|
"\xf1\xf7","\xc2\xee","\xe8\xbb","\xe0\xf9","\x6c\x69","\xf4\xe0","\x31\x39", |
|
73
|
|
|
|
|
|
|
"\xe0\xf6","\xef\xfb","\xd2\xe0","\xed\xe3","\x61\x6e","\xd0\xe0","\xee\xf6", |
|
74
|
|
|
|
|
|
|
"\xeb\xea","\xf1\xf0","\xf0\x2c","\xdd\xf2","\x72\x69","\xf0\xf8","\xed\xea", |
|
75
|
|
|
|
|
|
|
"\xeb\x2e","\xed\x2e","\xe7\xfc","\x28\xf1","\xe2\xea","\xec\xef","\x61\x74", |
|
76
|
|
|
|
|
|
|
"\xfb\xea","\xe2\xef","\xf8\xfc","\xc2\xe5","\xcf\xe0","\x6f\x75","\xf1\xf5", |
|
77
|
|
|
|
|
|
|
"\x69\x65","\xf4\xe5","\xe5\xe8","\xe8\xf9","\x63\x65","\x63\x68","\x73\x74", |
|
78
|
|
|
|
|
|
|
"\xf1\xe4","\xff\xea","\xc2\xe8","\xfb\xed","\xe7\xe3","\xf8\xf3","\xfe\x2e", |
|
79
|
|
|
|
|
|
|
"\xcc\xe5","\x69\x74","\xf7\xfc","\xf8\xea","\x68\x65","\xf2\xeb","\x28\xab", |
|
80
|
|
|
|
|
|
|
"\x6e\x65","\xf5\x2e","\xf8\xee","\xfe\xf7","\x61\x72","\xe0\xf3","\xe0\xe8", |
|
81
|
|
|
|
|
|
|
"\xf3\xe0","\xe5\xe0","\xe5\xbb","\xbb\x29","\x6f\x6d","\x6e\x73","\xe9\xf8", |
|
82
|
|
|
|
|
|
|
"\xec\xf1","\xff\xfe","\xc0\xeb","\x73\x65","\xec\xec","\xed\xf7","\xf6\xee", |
|
83
|
|
|
|
|
|
|
"\xee\x3a","\xee\xf9","\xf7\xea","\x61\x6c","\xc4\xe0","\x31\x36","\xab\xcf", |
|
84
|
|
|
|
|
|
|
"\xfc\xe7","\xe1\xfa","\xe2\xfc","\xf6\xf3","\xe5\x3b","\x6d\x65","\xf0\xe1", |
|
85
|
|
|
|
|
|
|
"\xe0\x3b", |
|
86
|
|
|
|
|
|
|
); |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
my $DIST_RATIO = '0.91'; |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
my %CHARSET_SPACE_SET = map { $_ => 1 } ( |
|
91
|
|
|
|
|
|
|
"\x98", # Alt |
|
92
|
|
|
|
|
|
|
"\xa0", # NBSP |
|
93
|
|
|
|
|
|
|
); |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
my %IGNORE = ( |
|
96
|
|
|
|
|
|
|
%EBook::Ishmael::CharDet::Constants::ASCII_SPACE_SET, |
|
97
|
|
|
|
|
|
|
%CHARSET_SPACE_SET, |
|
98
|
|
|
|
|
|
|
); |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
sub ignore { |
|
101
|
|
|
|
|
|
|
|
|
102
|
548864
|
|
|
548864
|
0
|
868248
|
my ($self, $byte) = @_; |
|
103
|
|
|
|
|
|
|
|
|
104
|
548864
|
|
|
|
|
1436457
|
return exists $IGNORE{ $byte }; |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
} |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub freq_bigram { |
|
109
|
|
|
|
|
|
|
|
|
110
|
418835
|
|
|
418835
|
0
|
671784
|
my ($self, $bigram) = @_; |
|
111
|
|
|
|
|
|
|
|
|
112
|
418835
|
|
|
|
|
1095788
|
return exists $CP1251_FREQS{ $bigram }; |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
} |
|
115
|
|
|
|
|
|
|
|
|
116
|
0
|
|
|
0
|
0
|
0
|
sub dist_ratio { $DIST_RATIO }; |
|
117
|
|
|
|
|
|
|
|
|
118
|
67
|
|
|
67
|
0
|
246
|
sub encoding { 'CP1251' } |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
1; |
|
121
|
|
|
|
|
|
|
|