| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package EBook::Ishmael::CharDet::CP1250; |
|
2
|
18
|
|
|
18
|
|
307
|
use 5.016; |
|
|
18
|
|
|
|
|
71
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '2.03'; |
|
4
|
18
|
|
|
18
|
|
104
|
use strict; |
|
|
18
|
|
|
|
|
35
|
|
|
|
18
|
|
|
|
|
440
|
|
|
5
|
18
|
|
|
18
|
|
82
|
use warnings; |
|
|
18
|
|
|
|
|
60
|
|
|
|
18
|
|
|
|
|
1084
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
18
|
|
|
18
|
|
591
|
use parent 'EBook::Ishmael::CharDet::CP'; |
|
|
18
|
|
|
|
|
424
|
|
|
|
18
|
|
|
|
|
135
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# Generated from contrib/512-bigram.pl using various Polish, Czech, and |
|
10
|
|
|
|
|
|
|
# Hungarian texts from Wikisource. |
|
11
|
|
|
|
|
|
|
my %CP1250_FREQS = map { $_ => 1 } ( |
|
12
|
|
|
|
|
|
|
"\x69\x65","\x6e\x69","\x6e\x61","\x73\x7a","\x7a\x65","\x73\x74","\x63\x7a", |
|
13
|
|
|
|
|
|
|
"\x70\x6f","\x63\x68","\x72\x7a","\x77\x69","\x61\x6e","\x72\x61","\x7a\x61", |
|
14
|
|
|
|
|
|
|
"\x65\x6e","\x74\x61","\x7a\x79","\x65\x6d","\x6f\x77","\x61\xb3","\x6d\x69", |
|
15
|
|
|
|
|
|
|
"\x72\x6f","\x6b\x69","\x74\x65","\x61\x6b","\x65\x67","\x73\x69","\x70\x72", |
|
16
|
|
|
|
|
|
|
"\x64\x7a","\x63\x69","\x6a\x65","\x6c\x65","\xb3\x61","\x6e\x65","\x65\x72", |
|
17
|
|
|
|
|
|
|
"\x69\xea","\x69\x61","\x6b\x6f","\x61\x72","\x64\x6f","\x7a\x69","\x61\x6c", |
|
18
|
|
|
|
|
|
|
"\x65\x6c","\x6f\x64","\x74\x6f","\x6f\x6e","\x61\x6d","\x65\x6a","\x67\x6f", |
|
19
|
|
|
|
|
|
|
"\x65\x73","\x6c\x69","\x77\x61","\x6f\x73","\x6b\x61","\x6e\x79","\x65\x64", |
|
20
|
|
|
|
|
|
|
"\x6a\x61","\x6c\x61","\x61\x64","\x6f\x72","\x73\x6b","\x61\x74","\x61\x6a", |
|
21
|
|
|
|
|
|
|
"\x61\x63","\x6f\x6c","\x69\x63","\x6d\x61","\x6f\x62","\x62\x79","\xb3\x6f", |
|
22
|
|
|
|
|
|
|
"\x72\x65","\x6e\x6f","\x65\x6b","\x65\x63","\x74\x72","\x61\x73","\x74\x79", |
|
23
|
|
|
|
|
|
|
"\x70\x61","\x77\x79","\x65\x74","\x61\x77","\x69\x6e","\x69\x73","\x79\x63", |
|
24
|
|
|
|
|
|
|
"\xbf\x65","\x7a\x6e","\x61\x7a","\x64\x61","\x6d\x6f","\x63\x65","\x6f\x6d", |
|
25
|
|
|
|
|
|
|
"\x67\x79","\x61\x2c","\x6f\x74","\x68\x6f","\x62\x69","\x79\x6d","\x77\x6f", |
|
26
|
|
|
|
|
|
|
"\x65\x2c","\x73\x65","\x79\x73","\x7a\x6f","\x68\x61","\x6f\x7a","\x64\x65", |
|
27
|
|
|
|
|
|
|
"\x64\x6e","\x6d\x65","\xf3\x77","\x6f\x9c","\x77\x73","\x6f\x67","\x6a\xb9", |
|
28
|
|
|
|
|
|
|
"\x65\x7a","\x70\x69","\x62\x61","\x64\x79","\x2e\x2e","\x6e\x74","\x6f\x6b", |
|
29
|
|
|
|
|
|
|
"\x69\x2c","\x6f\x63","\x61\x2e","\x75\x73","\x69\xb3","\x69\x6b","\x72\x79", |
|
30
|
|
|
|
|
|
|
"\x6b\x74","\x74\x75","\x69\x6c","\xb9\x63","\x9c\x63","\x73\x61","\x79\xb3", |
|
31
|
|
|
|
|
|
|
"\x61\x67","\x62\x65","\x76\x61","\x6f\x6a","\x77\x65","\x72\x74","\x6d\x75", |
|
32
|
|
|
|
|
|
|
"\x73\x70","\x62\x6f","\x65\x2e","\x6e\xb9","\x61\x70","\x69\x74","\x7a\x75", |
|
33
|
|
|
|
|
|
|
"\x74\xf3","\x79\x2c","\x6c\x6f","\x73\x6f","\x72\x75","\xb3\x79","\x74\x74", |
|
34
|
|
|
|
|
|
|
"\x6d\x6e","\x6e\x6e","\x61\xe6","\x69\xb9","\x76\x65","\x6b\x75","\x75\x64", |
|
35
|
|
|
|
|
|
|
"\xf3\x72","\x67\x61","\x6b\x72","\x75\x63","\x6c\x6e","\x65\x77","\x6d\x2c", |
|
36
|
|
|
|
|
|
|
"\x6d\x79","\x69\x2e","\x6f\x70","\x69\x6d","\x62\x72","\x7a\x74","\x74\x6b", |
|
37
|
|
|
|
|
|
|
"\x75\x74","\x6f\x2c","\xe1\x72","\x70\x65","\x63\x61","\x77\x6e","\x7a\x63", |
|
38
|
|
|
|
|
|
|
"\x73\xb3","\x61\x62","\xe1\x6e","\xe1\x74","\x63\x6f","\x74\x69","\x74\x77", |
|
39
|
|
|
|
|
|
|
"\x64\x72","\x61\xbf","\x74\x6e","\x6c\x6b","\x72\xf3","\x74\x2c","\x72\x64", |
|
40
|
|
|
|
|
|
|
"\x6c\x79","\x6e\x64","\xe1\x6c","\x75\x6a","\x6c\x75","\x67\xb3","\x79\x74", |
|
41
|
|
|
|
|
|
|
"\x79\x77","\x79\x6e","\x69\x6f","\x75\x72","\x65\x62","\x79\x6c","\x67\x72", |
|
42
|
|
|
|
|
|
|
"\x70\x75","\x63\x79","\x64\x75","\x69\x77","\x63\x73","\xbf\x79","\x75\x6d", |
|
43
|
|
|
|
|
|
|
"\x65\xbf","\x69\x64","\x7a\x77","\x6f\x76","\x79\x2e","\x79\x6a","\x7a\xb9", |
|
44
|
|
|
|
|
|
|
"\x65\x70","\x7a\x64","\x72\x73","\x6a\x73","\x72\xe1","\x6f\x75","\x6f\xbf", |
|
45
|
|
|
|
|
|
|
"\x7a\x6b","\x6c\x74","\x75\x62","\x6e\x6b","\x72\x6e","\x6b\x2c","\x50\x6f", |
|
46
|
|
|
|
|
|
|
"\x69\xe6","\x6a\x75","\xb3\x65","\x76\xe1","\x6b\x65","\x6c\x6c","\x7a\xea", |
|
47
|
|
|
|
|
|
|
"\x75\xbf","\xea\x63","\x6a\x69","\x4e\x69","\x67\x65","\x64\x6c","\x67\x69", |
|
48
|
|
|
|
|
|
|
"\x73\x77","\x75\x6b","\x6e\x63","\x79\x6b","\x67\x64","\xb3\x75","\x75\x6c", |
|
49
|
|
|
|
|
|
|
"\x9c\x6c","\x75\x2c","\x62\x6c","\x69\x72","\x6d\x2e","\x6c\xe1","\xea\x64", |
|
50
|
|
|
|
|
|
|
"\xe9\x72","\xea\x2c","\x54\x6f","\x6e\xed","\x63\x6b","\x9c\xe6","\x64\x69", |
|
51
|
|
|
|
|
|
|
"\xe1\x73","\x6f\xb3","\x63\x6a","\x73\x73","\x72\x63","\x74\xe1","\x68\x65", |
|
52
|
|
|
|
|
|
|
"\x6e\x2c","\x6c\x73","\x79\x9c","\x6b\xe9","\x73\x75","\x6f\x2e","\x69\x67", |
|
53
|
|
|
|
|
|
|
"\x79\x62","\x6e\xe9","\x65\x9c","\x6c\x2c","\xb9\x64","\x79\x65","\x72\x69", |
|
54
|
|
|
|
|
|
|
"\x75\x2e","\x66\x65","\xe9\x6e","\x73\x63","\x76\xe9","\xe9\x73","\x73\x6e", |
|
55
|
|
|
|
|
|
|
"\x75\x6e","\x6d\xf3","\xe9\x67","\x7a\xb3","\x6e\x75","\x76\x6f","\x4a\x61", |
|
56
|
|
|
|
|
|
|
"\x6b\x6e","\x72\x77","\x6e\xe1","\x50\x72","\x6b\xb9","\x79\xe6","\x76\x69", |
|
57
|
|
|
|
|
|
|
"\x64\xb3","\xb9\x2c","\x6a\x6f","\xbf\x61","\x69\x75","\x7a\x6d","\x75\x70", |
|
58
|
|
|
|
|
|
|
"\x6a\xea","\x6e\xea","\x6d\xe1","\x65\xb3","\x4e\x61","\x6b\x6c","\xea\x6b", |
|
59
|
|
|
|
|
|
|
"\x72\xea","\xe9\x74","\x74\xe9","\xf1\x73","\x66\x6f","\xe9\x6c","\x68\x2c", |
|
60
|
|
|
|
|
|
|
"\x66\x69","\x7a\xe1","\x9c\x6d","\xe6\x2c","\xf3\x6c","\x77\x72","\x73\x79", |
|
61
|
|
|
|
|
|
|
"\x4d\x69","\x69\x7a","\x6b\x73","\x79\x70","\x6a\xe1","\x74\xea","\x67\x6e", |
|
62
|
|
|
|
|
|
|
"\x6c\x64","\x62\x75","\x7a\x72","\xbf\x6e","\x6e\xec","\xb9\xb3","\x67\x6c", |
|
63
|
|
|
|
|
|
|
"\x75\x67","\x77\x63","\x69\x9c","\x79\x61","\xea\x2e","\x6c\xe9","\xea\x74", |
|
64
|
|
|
|
|
|
|
"\x72\x6d","\x61\x9c","\x77\xb3","\x50\x61","\x6b\xb3","\xb9\x2e","\x52\x6f", |
|
65
|
|
|
|
|
|
|
"\x64\x6a","\x61\x75","\x68\x77","\x53\x7a","\x61\x69","\x69\x76","\x7a\x62", |
|
66
|
|
|
|
|
|
|
"\x6b\x2e","\x74\x2e","\x74\xb9","\x6d\xe9","\x62\x62","\x6e\x73","\x6a\x6e", |
|
67
|
|
|
|
|
|
|
"\x9e\x65","\x65\x68","\xb3\x6e","\x4c\x69","\xe6\x2e","\x70\x6c","\x6f\x68", |
|
68
|
|
|
|
|
|
|
"\x66\x61","\x77\x64","\x73\x2c","\x7a\xf3","\x70\xf8","\x65\xf1","\x65\x76", |
|
69
|
|
|
|
|
|
|
"\x4a\x65","\x64\x77","\x61\x76","\x6f\x69","\x6b\xf6","\x57\x69","\x7a\xe9", |
|
70
|
|
|
|
|
|
|
"\x9c\x77","\xfc\x6c","\xf8\x65","\x6a\x2c","\xf6\x72","\x9c\x6e","\x73\xe9", |
|
71
|
|
|
|
|
|
|
"\x54\x61","\x67\xf3","\x7a\x70","\xe1\x6d","\x6b\xf3","\x77\xb9","\x68\xe1", |
|
72
|
|
|
|
|
|
|
"\x79\x64","\xe1\x67","\x7a\x67","\x79\x6f","\x4d\x6f","\xed\x74","\xb3\x2c", |
|
73
|
|
|
|
|
|
|
"\x65\xe6","\x48\x61","\x68\x63","\x6e\x67","\x61\x3a","\x6b\xea","\x77\x2c", |
|
74
|
|
|
|
|
|
|
"\x79\x72","\xf3\x64","\x62\xea","\x79\x7a","\x5a\x61","\x4d\x61","\xed\x6d", |
|
75
|
|
|
|
|
|
|
"\x64\xe1","\x7a\x6c","\x76\xec","\x6d\x62","\x77\xf3","\x6b\xe1","\xe9\x6b", |
|
76
|
|
|
|
|
|
|
"\x72\xb9","\x73\x76","\x72\x6b","\x68\x75","\x68\x6e","\x70\x79","\xf1\x63", |
|
77
|
|
|
|
|
|
|
"\x74\x76","\x7a\x2c","\x54\x65","\x73\xe1","\xbf\x6f","\x6c\x6a","\x64\x63", |
|
78
|
|
|
|
|
|
|
"\x64\x6b","\x41\x6c","\x79\x67","\xe1\x62","\x61\xf1","\x75\xb3","\xea\xbf", |
|
79
|
|
|
|
|
|
|
"\x73\xb9","\x6c\xea","\x62\x6e","\x6e\x2e","\x68\x6c","\x72\x67","\x77\x7a", |
|
80
|
|
|
|
|
|
|
"\xe9\x70","\x73\x6c","\xb3\xb9","\x72\x6a","\x43\x7a","\x74\x6a","\x6c\x6d", |
|
81
|
|
|
|
|
|
|
"\x44\x65","\x6a\x64","\xf3\x6a","\x64\x70","\x6a\x6d","\x4e\x65","\xe1\x7a", |
|
82
|
|
|
|
|
|
|
"\x73\x6d","\x68\x69","\xf6\x6c","\xea\xb3","\x44\x6f","\x67\x75","\x75\x77", |
|
83
|
|
|
|
|
|
|
"\xf3\xb3","\x48\x6f","\xf6\x6e","\x69\xbf","\x9a\x65","\x77\x2e","\xb3\x2e", |
|
84
|
|
|
|
|
|
|
"\x68\x79","\x6c\x2e","\x4d\x65","\x68\x2e","\x74\xec","\x72\xe9","\x4b\x6f", |
|
85
|
|
|
|
|
|
|
"\x64\x73", |
|
86
|
|
|
|
|
|
|
); |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
my $DIST_RATIO = '0.90'; |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
my %CHARSET_SPACE_SET = map { $_ => 1 } ( |
|
91
|
|
|
|
|
|
|
"\x81", # Alt |
|
92
|
|
|
|
|
|
|
"\x83", # ^ |
|
93
|
|
|
|
|
|
|
"\x88", # ^ |
|
94
|
|
|
|
|
|
|
"\x90", # ^ |
|
95
|
|
|
|
|
|
|
"\x98", # ^ |
|
96
|
|
|
|
|
|
|
"\xa0", # NBSP |
|
97
|
|
|
|
|
|
|
); |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
my %IGNORE = ( |
|
100
|
|
|
|
|
|
|
%EBook::Ishmael::CharDet::Constants::ASCII_SPACE_SET, |
|
101
|
|
|
|
|
|
|
%CHARSET_SPACE_SET, |
|
102
|
|
|
|
|
|
|
); |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
sub ignore { |
|
105
|
|
|
|
|
|
|
|
|
106
|
548864
|
|
|
548864
|
0
|
882239
|
my ($self, $byte) = @_; |
|
107
|
|
|
|
|
|
|
|
|
108
|
548864
|
|
|
|
|
1428983
|
return exists $IGNORE{ $byte }; |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub freq_bigram { |
|
113
|
|
|
|
|
|
|
|
|
114
|
406408
|
|
|
406408
|
0
|
645195
|
my ($self, $bigram) = @_; |
|
115
|
|
|
|
|
|
|
|
|
116
|
406408
|
|
|
|
|
1086735
|
return exists $CP1250_FREQS{ $bigram }; |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
|
|
120
|
0
|
|
|
0
|
0
|
0
|
sub dist_ratio { $DIST_RATIO }; |
|
121
|
|
|
|
|
|
|
|
|
122
|
67
|
|
|
67
|
0
|
335
|
sub encoding { 'CP1250' } |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
1; |