| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package EBook::Ishmael::CharDet::CP1253; |
|
2
|
18
|
|
|
18
|
|
316
|
use 5.016; |
|
|
18
|
|
|
|
|
89
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '2.03'; |
|
4
|
18
|
|
|
18
|
|
108
|
use strict; |
|
|
18
|
|
|
|
|
39
|
|
|
|
18
|
|
|
|
|
478
|
|
|
5
|
18
|
|
|
18
|
|
83
|
use warnings; |
|
|
18
|
|
|
|
|
37
|
|
|
|
18
|
|
|
|
|
1027
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
18
|
|
|
18
|
|
120
|
use parent 'EBook::Ishmael::CharDet::CP'; |
|
|
18
|
|
|
|
|
43
|
|
|
|
18
|
|
|
|
|
122
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# Generated from contrib/512-bigram.pl using various Greek texts from |
|
10
|
|
|
|
|
|
|
# Wikisource. |
|
11
|
|
|
|
|
|
|
my %CP1253_FREQS = map { $_ => 1 } ( |
|
12
|
|
|
|
|
|
|
"\xf4\xef","\xef\xf5","\xe1\xe9","\xea\xe1","\xef\xed","\xe5\xe9","\xf4\xe1", |
|
13
|
|
|
|
|
|
|
"\xf9\xed","\xe1\xed","\xef\xf2","\xef\xe9","\xe5\xed","\xf1\xef","\xe9\xed", |
|
14
|
|
|
|
|
|
|
"\xf4\xe7","\xf3\xf4","\xf0\xef","\xed\xe1","\xed\xef","\xec\xe5","\xf4\xe9", |
|
15
|
|
|
|
|
|
|
"\xf1\xe1","\xf4\xe5","\xe9\xf2","\xf4\xf9","\xe1\xf4","\xed\xf4","\xe4\xe5", |
|
16
|
|
|
|
|
|
|
"\xe1\xf1","\xe7\xed","\xe5\xf1","\xf5\xf4","\xe1\xf2","\xec\xe1","\xf0\xf1", |
|
17
|
|
|
|
|
|
|
"\xed\x2c","\xe9\xf3","\xe7\xf2","\xe9\xe1","\xf0\xe5","\xf3\xe9","\xe9\xea", |
|
18
|
|
|
|
|
|
|
"\xe1\xeb","\xeb\xef","\xf1\xe9","\xe1\xf0","\xe5\xf3","\xf9\xf2","\xe5\xf0", |
|
19
|
|
|
|
|
|
|
"\xf5\xf2","\xe5\xdf","\xeb\xeb","\xf0\xe1","\xf3\xe1","\xe5\xf4","\xe4\xe9", |
|
20
|
|
|
|
|
|
|
"\xec\xef","\xe1\xf5","\xf2\x2c","\xf3\xe5","\xec\xdd","\xf5\xed","\xf4\xf1", |
|
21
|
|
|
|
|
|
|
"\xe1\xf3","\xe9\xef","\xea\xef","\xeb\xe5","\xeb\xe1","\xdd\xed","\xed\xe5", |
|
22
|
|
|
|
|
|
|
"\xef\xfd","\xef\xeb","\xf3\xe7","\xdf\xe1","\xe5\xf5","\xe8\xe5","\xf1\xe5", |
|
23
|
|
|
|
|
|
|
"\xf0\xe9","\xe1\xec","\xf5\xf3","\xfc\xed","\xef\xec","\xe5\xea","\xef\xf1", |
|
24
|
|
|
|
|
|
|
"\xe5\xeb","\xea\xe5","\xe5\xf2","\xf0\xfc","\xe3\xe1","\xf5\xf1","\xf1\xdf", |
|
25
|
|
|
|
|
|
|
"\xdc\xf3","\xe7\xf3","\xeb\xe9","\xe3\xe5","\xe8\xe1","\xed\x2e","\xf3\xef", |
|
26
|
|
|
|
|
|
|
"\xf7\xe5","\xf4\xdc","\xe1\xea","\xe7\xec","\xed\xe7","\xdd\xf1","\xdc\xed", |
|
27
|
|
|
|
|
|
|
"\xe9\x2c","\xdf\xed","\xf5\xf0","\xeb\xe7","\xf1\xf9","\xfe\xed","\xe1\xdf", |
|
28
|
|
|
|
|
|
|
"\xef\xf4","\xe1\xe3","\xec\xe7","\xdc\xf4","\xf3\xe8","\xdf\xef","\xf3\xf5", |
|
29
|
|
|
|
|
|
|
"\xe1\x2c","\xef\xf3","\xef\xf0","\xe9\xe4","\xe5\xf6","\xf1\xdc","\xf5\xec", |
|
30
|
|
|
|
|
|
|
"\xe4\xef","\xe9\xec","\xea\xf1","\xed\xf9","\xec\xe9","\xf0\xeb","\xf4\xfc", |
|
31
|
|
|
|
|
|
|
"\xde\xf3","\xf2\x2e","\xe3\xf9","\xf0\xdc","\xf6\xe7","\xf3\xec","\xf4\xdd", |
|
32
|
|
|
|
|
|
|
"\xf7\xef","\xdc\xeb","\xe9\xf0","\xe3\xef","\xe1\xe8","\xe8\xe7","\xea\xdc", |
|
33
|
|
|
|
|
|
|
"\xdf\xf3","\xe5\xec","\xed\xe9","\xe2\xdc","\xdf\xe4","\xe4\xe1","\xfc\xf4", |
|
34
|
|
|
|
|
|
|
"\xe9\xf4","\xf9\xf3","\xec\xdc","\xf4\xdf","\xf7\xf1","\xe7\xf4","\xeb\xdc", |
|
35
|
|
|
|
|
|
|
"\xea\xfc","\xec\xdf","\xf5\xe4","\xf1\xfc","\xea\xeb","\xeb\xdd","\xe4\xf1", |
|
36
|
|
|
|
|
|
|
"\xf1\xe7","\xe9\xeb","\xef\xdf","\xed\xe4","\xf6\xe1","\xe5\xe3","\xe7\x2c", |
|
37
|
|
|
|
|
|
|
"\xeb\xf9","\xea\xf4","\xeb\xdf","\xe9\xf1","\xf3\xea","\xfc\xec","\xe5\xf7", |
|
38
|
|
|
|
|
|
|
"\xf3\xf3","\xe9\xe5","\xe7\xf1","\xfc\xeb","\xe5\xfd","\xe1\xf6","\xf0\xf4", |
|
39
|
|
|
|
|
|
|
"\xe5\xee","\xf5\xea","\xf4\xde","\xe9\x2e","\xdd\xeb","\xf3\xdf","\xf1\xe3", |
|
40
|
|
|
|
|
|
|
"\xf3\xf9","\xfd\xed","\xe3\xe9","\xe4\xe7","\xdc\xf1","\xea\xfd","\xfd\xf3", |
|
41
|
|
|
|
|
|
|
"\xeb\xfc","\xed\xb7","\xdd\xe3","\xf0\xf5","\xe7\xeb","\xdf\xf9","\xf0\xf9", |
|
42
|
|
|
|
|
|
|
"\xf9\xf1","\xef\xe4","\xf7\xe1","\xf5\xe3","\xea\xe7","\xf6\xef","\xe2\xe1", |
|
43
|
|
|
|
|
|
|
"\xe8\xef","\xec\xfc","\xf9\xec","\xc2\xc3","\xe5\xf9","\xca\xe1","\xeb\xfd", |
|
44
|
|
|
|
|
|
|
"\xea\xe9","\xe5\xe4","\xc1\xc2","\xf5\xf7","\xfc\xf3","\xf0\xdd","\xf1\xdd", |
|
45
|
|
|
|
|
|
|
"\xec\xf0","\xfd\xf4","\xf1\xf7","\xe2\xef","\xf1\xf5","\xfc\xf2","\xe4\xdd", |
|
46
|
|
|
|
|
|
|
"\xf9\xf4","\xdd\xf3","\xf5\xeb","\xe9\xe8","\xdd\xf4","\xed\xfc","\xf1\xde", |
|
47
|
|
|
|
|
|
|
"\xeb\xde","\xeb\xf5","\xfd\xec","\xed\xdd","\xee\xe1","\xe5\xe1","\xe4\xdf", |
|
48
|
|
|
|
|
|
|
"\xef\xe3","\xdf\xe6","\xe6\xe5","\xe9\xdc","\xed\xdc","\xe5\xef","\xfd\xea", |
|
49
|
|
|
|
|
|
|
"\xe8\xdd","\xf0\xdf","\xed\xe8","\xee\xe5","\xf0\xe7","\xf6\xe9","\xde\xf2", |
|
50
|
|
|
|
|
|
|
"\xf5\xe8","\xe4\xf9","\xef\xea","\xe9\xe3","\xdf\xf4","\xe6\xef","\xf3\xf7", |
|
51
|
|
|
|
|
|
|
"\xc5\xc6","\xed\xdf","\xfc\xe3","\xe7\xea","\xf6\xe5","\xe1\xee","\xea\xf9", |
|
52
|
|
|
|
|
|
|
"\xf7\xf9","\xf6\xdd","\xf3\xf0","\xe1\xf7","\xec\xf9","\xe3\xf1","\xe7\xe3", |
|
53
|
|
|
|
|
|
|
"\xe7\x2e","\xe9\xf7","\xdc\xe4","\xe8\xde","\xf6\xf1","\xea\xde","\xdc\xec", |
|
54
|
|
|
|
|
|
|
"\xf2\xb7","\xe3\xdf","\xf1\xfe","\xe2\xeb","\xe5\xe8","\xe3\xdc","\xe7\xe4", |
|
55
|
|
|
|
|
|
|
"\xe8\xf9","\xe9\xe2","\xde\xec","\xdf\xe3","\xe1\x2e","\xe1\xe4","\xef\xf6", |
|
56
|
|
|
|
|
|
|
"\xfd\xeb","\xe8\xf1","\xec\xec","\xdf\xf1","\xf5\x2c","\xec\xde","\xf1\xf4", |
|
57
|
|
|
|
|
|
|
"\xf3\xf6","\xe4\xfd","\xf1\xec","\xea\xfe","\xde\xed","\xf4\xf5","\xdf\xe7", |
|
58
|
|
|
|
|
|
|
"\xfc\xf0","\xf7\xe8","\xec\xed","\xed\xde","\xf5\xf6","\xde\xea","\xe4\xfc", |
|
59
|
|
|
|
|
|
|
"\xdd\xf7","\xfd\xf2","\xdd\xf2","\xe3\xe7","\xdc\xe8","\xec\xf6","\xe3\xed", |
|
60
|
|
|
|
|
|
|
"\xfd\xef","\xe3\xdd","\xfe\xf1","\xdd\xf0","\xed\xf5","\xc3\xc4","\xe5\x2c", |
|
61
|
|
|
|
|
|
|
"\xdf\xec","\xe9\xf9","\xf7\xe7","\xea\xdd","\xdd\xec","\xe3\xea","\xf4\xf4", |
|
62
|
|
|
|
|
|
|
"\xc4\xc5","\xdc\xe3","\xf5\x2e","\xe3\xe3","\xf3\xfd","\xe4\xf5","\xdf\xea", |
|
63
|
|
|
|
|
|
|
"\xef\xf7","\xe9\xfc","\xe7\xe8","\xe8\xf5","\xe3\xfe","\xfd\xf1","\xfe\xf3", |
|
64
|
|
|
|
|
|
|
"\xf6\xf5","\xfd\xe3","\xe9\xb7","\x32\x30","\xe2\xf1","\xdf\xeb","\xe9\xfe", |
|
65
|
|
|
|
|
|
|
"\xdf\xf0","\xdd\xef","\xf4\xfe","\xfc\xf1","\xea\xf5","\xe2\xe9","\xec\xe2", |
|
66
|
|
|
|
|
|
|
"\xe3\xde","\xc7\xc8","\xef\xe8","\xe6\xf9","\xde\xf1","\xdc\xea","\xed\xfe", |
|
67
|
|
|
|
|
|
|
"\xf9\xf0","\xdc\xf0","\xc6\xc7","\xfd\xe8","\xe1\xfd","\xf5\xf8","\xe9\xe6", |
|
68
|
|
|
|
|
|
|
"\xdc\xe6","\xef\x2c","\xc4\xe9","\xf9\x2c","\xdf\xf2","\xfe\xec","\xe8\xdc", |
|
69
|
|
|
|
|
|
|
"\xee\xef","\xf8\xef","\xe9\xe7","\xdd\xe1","\xfe\xf4","\xe1\xe2","\xf6\xf9", |
|
70
|
|
|
|
|
|
|
"\xf6\xdc","\xf8\xe7","\xe1\xe5","\xd4\xef","\xe8\xe9","\xde\xf4","\xf8\xe5", |
|
71
|
|
|
|
|
|
|
"\xf1\xe8","\xdf\xf7","\xde\xeb","\xdc\xf7","\xe1\xef","\xdd\xea","\xf3\xdd", |
|
72
|
|
|
|
|
|
|
"\xfc\xe4","\xf5\xe1","\xe1\xb7","\xfd\xe5","\xf7\xfe","\xc1\xf1","\xe3\xec", |
|
73
|
|
|
|
|
|
|
"\xe5\xe2","\xf4\x92","\x6f\x72","\xf6\xfc","\xdd\xe4","\xe4\xde","\xf6\xdf", |
|
74
|
|
|
|
|
|
|
"\xe9\x3b","\xe2\xe5","\xf7\xdf","\xe3\xfc","\xed\x3b","\xe3\xf5","\xf7\xdd", |
|
75
|
|
|
|
|
|
|
"\xea\x92","\xe4\xdc","\xe1\xe6","\x31\x30","\x74\x74","\xef\xe2","\xf6\xe8", |
|
76
|
|
|
|
|
|
|
"\xf9\x2e","\xe2\xe7","\x68\x74","\xfe\xf0","\xee\xe7","\xfd\xf0","\xf7\xe9", |
|
77
|
|
|
|
|
|
|
"\xf3\xdc","\x30\x30","\xf7\xde","\xdc\xee","\xdd\xf9","\xde\xe8","\xe7\xf0", |
|
78
|
|
|
|
|
|
|
"\xd0\xe1","\xed\xed","\xf3\xfc","\xe9\xde","\xd7\xef","\xeb\xe8","\xf2\x3a", |
|
79
|
|
|
|
|
|
|
"\xf2\x3b","\xdc\x2c","\xf8\xe1","\xf1\xea","\xdf\xe8","\xf1\xf1","\xf8\xf5", |
|
80
|
|
|
|
|
|
|
"\xdf\xe5","\xe7\xb7","\xfc\xea","\xf1\xed","\xf1\x2e","\xf6\xfd","\xcf\xf5", |
|
81
|
|
|
|
|
|
|
"\x31\x39","\xf9\xea","\xf0\xf0","\xc2\xe9","\xea\xdf","\xc1\xeb","\x69\x6b", |
|
82
|
|
|
|
|
|
|
"\x77\x77","\xe3\xeb","\xc1\xed","\xf0\xed","\x72\x67","\xf0\xde","\x6f\x6e", |
|
83
|
|
|
|
|
|
|
"\xc6\xc8","\x73\x3a","\x3a\x2f","\xee\xe9","\xe9\xee","\x74\x70","\xed\xfd", |
|
84
|
|
|
|
|
|
|
"\x2f\x2f","\xdd\xf6","\xf7\xed","\x2e\x6f","\xc2\xe1","\xdf\x2c","\xc1\xf0", |
|
85
|
|
|
|
|
|
|
"\xc5\xe9", |
|
86
|
|
|
|
|
|
|
); |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
my $DIST_RATIO = '0.92'; |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
my %CHARSET_SPACE_SET = map { $_ => 1 } ( |
|
91
|
|
|
|
|
|
|
"\x81", # undef |
|
92
|
|
|
|
|
|
|
"\x88", # ^ |
|
93
|
|
|
|
|
|
|
"\x8a", # ^ |
|
94
|
|
|
|
|
|
|
"\x8c", # ^ |
|
95
|
|
|
|
|
|
|
"\x8d", # ^ |
|
96
|
|
|
|
|
|
|
"\x8e", # ^ |
|
97
|
|
|
|
|
|
|
"\x8f", # ^ |
|
98
|
|
|
|
|
|
|
"\x90", # ^ |
|
99
|
|
|
|
|
|
|
"\x98", # ^ |
|
100
|
|
|
|
|
|
|
"\x9a", # ^ |
|
101
|
|
|
|
|
|
|
"\x9c", # ^ |
|
102
|
|
|
|
|
|
|
"\x9d", # ^ |
|
103
|
|
|
|
|
|
|
"\x9e", # ^ |
|
104
|
|
|
|
|
|
|
"\x9f", # ^ |
|
105
|
|
|
|
|
|
|
"\xaa", # ^ |
|
106
|
|
|
|
|
|
|
"\xa0", # NBSP |
|
107
|
|
|
|
|
|
|
"\xff", # undef |
|
108
|
|
|
|
|
|
|
); |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
my %IGNORE = ( |
|
111
|
|
|
|
|
|
|
%EBook::Ishmael::CharDet::Constants::ASCII_SPACE_SET, |
|
112
|
|
|
|
|
|
|
%CHARSET_SPACE_SET, |
|
113
|
|
|
|
|
|
|
); |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub ignore { |
|
116
|
|
|
|
|
|
|
|
|
117
|
548864
|
|
|
548864
|
0
|
883653
|
my ($self, $byte) = @_; |
|
118
|
|
|
|
|
|
|
|
|
119
|
548864
|
|
|
|
|
1427478
|
return exists $IGNORE{ $byte }; |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
} |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
sub freq_bigram { |
|
124
|
|
|
|
|
|
|
|
|
125
|
387306
|
|
|
387306
|
0
|
616683
|
my ($self, $bigram) = @_; |
|
126
|
|
|
|
|
|
|
|
|
127
|
387306
|
|
|
|
|
993469
|
return exists $CP1253_FREQS{ $bigram }; |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
} |
|
130
|
|
|
|
|
|
|
|
|
131
|
0
|
|
|
0
|
0
|
0
|
sub dist_ratio { $DIST_RATIO }; |
|
132
|
|
|
|
|
|
|
|
|
133
|
67
|
|
|
67
|
0
|
205
|
sub encoding { 'CP1253' } |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
1; |