| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package EBook::Ishmael::CharDet::CP1254; |
|
2
|
18
|
|
|
18
|
|
357
|
use 5.016; |
|
|
18
|
|
|
|
|
72
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '2.03'; |
|
4
|
18
|
|
|
18
|
|
108
|
use strict; |
|
|
18
|
|
|
|
|
33
|
|
|
|
18
|
|
|
|
|
523
|
|
|
5
|
18
|
|
|
18
|
|
80
|
use warnings; |
|
|
18
|
|
|
|
|
53
|
|
|
|
18
|
|
|
|
|
1163
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
18
|
|
|
18
|
|
102
|
use parent 'EBook::Ishmael::CharDet::CP'; |
|
|
18
|
|
|
|
|
32
|
|
|
|
18
|
|
|
|
|
130
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# Generated from contrib/512-bigram.pl using various Turkish texts from |
|
10
|
|
|
|
|
|
|
# Wikisource. |
|
11
|
|
|
|
|
|
|
my %CP1254_FREQS = map { $_ => 1 } ( |
|
12
|
|
|
|
|
|
|
"\x69\x6e","\x61\x72","\x61\x6e","\x65\x72","\x6c\x61","\x6c\x65","\x64\x65", |
|
13
|
|
|
|
|
|
|
"\xfd\x6e","\x64\x61","\x65\x6e","\x69\x6c","\x6c\x69","\x69\x72","\x6e\x64", |
|
14
|
|
|
|
|
|
|
"\x6d\x61","\x72\x69","\x64\x69","\x79\x61","\x72\x61","\x61\x6b","\x6d\x65", |
|
15
|
|
|
|
|
|
|
"\x61\x6c","\x73\x69","\x65\x74","\x6b\x61","\x6e\x69","\x62\x69","\x61\x79", |
|
16
|
|
|
|
|
|
|
"\x75\x6e","\x76\x65","\x65\x6c","\x72\x65","\x6e\xfd","\x6e\x65","\x74\x61", |
|
17
|
|
|
|
|
|
|
"\x74\x65","\x74\x69","\x65\x6b","\x79\x65","\x6f\x6c","\x72\xfd","\x6b\x69", |
|
18
|
|
|
|
|
|
|
"\x73\x61","\x6e\x61","\x65\x73","\x69\x79","\x61\x73","\x69\x6d","\x6f\x72", |
|
19
|
|
|
|
|
|
|
"\x68\x61","\x62\x75","\x61\x6d","\x75\x72","\x65\x6d","\xf0\x69","\x61\x64", |
|
20
|
|
|
|
|
|
|
"\x6e\x75","\x61\x74","\x69\x6b","\x65\x64","\x6c\xfd","\x73\xfd","\x64\xfd", |
|
21
|
|
|
|
|
|
|
"\x6d\x69","\x72\x64","\x65\x79","\x61\xfe","\x69\x73","\x6e\x6c","\x72\x75", |
|
22
|
|
|
|
|
|
|
"\xfd\x6c","\x75\x6c","\x74\xfd","\x6c\x64","\x62\x61","\x72\x2e","\x6b\x6c", |
|
23
|
|
|
|
|
|
|
"\x6f\x6e","\x73\x65","\x79\x6f","\x6c\x6d","\x63\x65","\x69\x7a","\x67\x65", |
|
24
|
|
|
|
|
|
|
"\x72\x6c","\xf0\xfd","\x6c\x75","\x69\xfe","\x6b\x65","\x64\x75","\xfc\x6e", |
|
25
|
|
|
|
|
|
|
"\x6c\x6c","\x75\x6d","\xfd\x72","\x79\xfd","\x65\x63","\x61\x68","\xfc\x72", |
|
26
|
|
|
|
|
|
|
"\x69\xf0","\x76\x61","\xfd\xf0","\xfd\x6b","\x62\x65","\x79\x6c","\xfd\x6d", |
|
27
|
|
|
|
|
|
|
"\x65\xf0","\x6b\x74","\x61\x7a","\x6d\xfd","\x79\x69","\x7a\x65","\x61\x62", |
|
28
|
|
|
|
|
|
|
"\x73\x6f","\xfe\x74","\x61\x70","\x6b\xfd","\xf6\x72","\x72\x73","\x73\x74", |
|
29
|
|
|
|
|
|
|
"\x72\x74","\xf0\x75","\x6b\x75","\x69\xe7","\x69\x74","\x72\x6b","\x6d\x75", |
|
30
|
|
|
|
|
|
|
"\x67\xf6","\x6b\x6f","\x63\x61","\x67\x69","\x75\xf0","\x68\x69","\x74\xfc", |
|
31
|
|
|
|
|
|
|
"\x69\x2c","\x6f\x6b","\xfd\x7a","\xfd\xfe","\x65\x76","\xe7\x69","\x6d\x6c", |
|
32
|
|
|
|
|
|
|
"\x6c\xfc","\xf6\x6e","\x73\x75","\x42\x75","\x74\x74","\xfd\x79","\xfe\xfd", |
|
33
|
|
|
|
|
|
|
"\xfc\x7a","\xfe\x61","\xfc\x6c","\x64\xfc","\xfd\x2c","\x7a\x61","\x61\xf0", |
|
34
|
|
|
|
|
|
|
"\x69\x68","\x68\x65","\x67\xfc","\x73\x6c","\x6e\x73","\x6e\x63","\xfe\x65", |
|
35
|
|
|
|
|
|
|
"\x6e\x6d","\x65\x70","\x72\x6d","\xfe\x6d","\x74\x6c","\x75\x7a","\x32\x30", |
|
36
|
|
|
|
|
|
|
"\x2e\x2e","\x66\x61","\xfe\x69","\x69\x70","\x64\x6f","\x54\x61","\xfc\x6d", |
|
37
|
|
|
|
|
|
|
"\xf6\x79","\x41\x6c","\x6c\x6b","\x75\xfe","\x74\x6d","\x6e\x2c","\x61\x63", |
|
38
|
|
|
|
|
|
|
"\xf6\x7a","\x7a\x2e","\x72\x2c","\x75\x79","\x52\x65","\xe7\x65","\x63\x69", |
|
39
|
|
|
|
|
|
|
"\x69\x64","\x6f\xf0","\x6e\xfc","\x72\xfc","\xfe\x6c","\x75\x6b","\xfc\x6b", |
|
40
|
|
|
|
|
|
|
"\x70\x61","\x4d\x65","\x7a\x69","\x65\x2c","\x4b\x61","\x42\x61","\xe7\xfd", |
|
41
|
|
|
|
|
|
|
"\x75\x73","\x74\x75","\x69\x66","\xfc\x73","\xfc\xfe","\x7a\xfd","\x45\x72", |
|
42
|
|
|
|
|
|
|
"\x6b\xfc","\x4b\x75","\x61\x76","\x30\x30","\xfc\x79","\x7a\x6c","\x73\xf6", |
|
43
|
|
|
|
|
|
|
"\x61\x66","\x6d\xfc","\x79\xfc","\xfd\x73","\xfe\x6b","\x74\x6f","\x65\x7a", |
|
44
|
|
|
|
|
|
|
"\x68\x75","\xf0\x61","\x6f\x70","\x72\x67","\xf0\x72","\x65\xfe","\x75\x74", |
|
45
|
|
|
|
|
|
|
"\x42\x65","\x65\x62","\x61\xe7","\x75\x2c","\x70\x6c","\xfc\x74","\xe7\x61", |
|
46
|
|
|
|
|
|
|
"\x73\xfc","\x65\x66","\x79\x79","\x27\x6e","\x79\x6e","\x54\xfc","\x4d\x61", |
|
47
|
|
|
|
|
|
|
"\x63\xfd","\xf0\x65","\x42\x69","\x64\x64","\x6b\x2c","\x6e\x74","\xfd\x64", |
|
48
|
|
|
|
|
|
|
"\x6e\x72","\x6d\x2e","\x61\x2c","\x61\x69","\x66\x65","\x6c\x67","\x27\x64", |
|
49
|
|
|
|
|
|
|
"\x6d\x64","\x70\xfd","\x6b\x2e","\x69\x63","\x69\x62","\x6d\x73","\x44\x65", |
|
50
|
|
|
|
|
|
|
"\x2d\x69","\x67\x61","\x65\xe7","\x79\x72","\x6b\x72","\x48\x61","\x61\x61", |
|
51
|
|
|
|
|
|
|
"\x7a\x64","\x6e\x27","\x6b\x73","\x6b\x6b","\xe7\x6f","\x27\xfd","\xfd\x74", |
|
52
|
|
|
|
|
|
|
"\xfc\xf0","\xfe\xfc","\x79\x75","\x65\x68","\x70\x74","\x4d\x69","\x72\x6f", |
|
53
|
|
|
|
|
|
|
"\x69\x2e","\x72\xfe","\x72\x6e","\x6c\x74","\x6c\xe2","\xf0\xfc","\x22\x2c", |
|
54
|
|
|
|
|
|
|
"\x76\x69","\x68\x6d","\x6f\x6d","\x6e\x67","\x54\x65","\x59\x61","\xfe\x75", |
|
55
|
|
|
|
|
|
|
"\x6d\x2c","\x68\x74","\xf6\xf0","\xf0\x6c","\x79\x64","\x72\x62","\x75\x76", |
|
56
|
|
|
|
|
|
|
"\x69\x76","\xfe\x62","\xdd\x73","\x2d\xfd","\x4c\x55","\x2e\x32","\x41\x6e", |
|
57
|
|
|
|
|
|
|
"\x53\x65","\x53\x61","\x79\x67","\x2e\x30","\x74\x72","\x7a\x75","\x62\xfc", |
|
58
|
|
|
|
|
|
|
"\x4d\xfc","\x66\xfd","\x63\x75","\x48\xfc","\x62\xfd","\x76\x6c","\x69\x3a", |
|
59
|
|
|
|
|
|
|
"\xe2\x6c","\x66\x6f","\x7a\x67","\x6d\x68","\x56\x69","\x76\x72","\xfc\x70", |
|
60
|
|
|
|
|
|
|
"\xfc\x64","\x6e\x6f","\x2e\x31","\x41\x52","\x79\xf6","\x74\x2d","\x43\x75", |
|
61
|
|
|
|
|
|
|
"\x62\xf6","\x45\x6b","\x6e\x6b","\x4d\x75","\x27\x69","\x97\x52","\x6f\x73", |
|
62
|
|
|
|
|
|
|
"\x70\x68","\xe2\x6e","\x6b\x6d","\x65\x27","\x47\x65","\x50\x61","\x68\xfc", |
|
63
|
|
|
|
|
|
|
"\x68\xfd","\x27\x61","\x64\xf6","\x44\x61","\x75\x64","\x75\x68","\x47\xf6", |
|
64
|
|
|
|
|
|
|
"\x41\x64","\x6f\x74","\x7a\xfc","\x67\xfd","\x31\x2e","\x6d\x6f","\x74\x73", |
|
65
|
|
|
|
|
|
|
"\x77\x69","\x30\x32","\x30\x37","\x6f\x79","\x70\x65","\x72\x63","\x4b\x41", |
|
66
|
|
|
|
|
|
|
"\x66\x69","\x33\x2e","\x6e\x2e","\x68\x6c","\x7a\x2c","\x6e\x62","\x48\x65", |
|
67
|
|
|
|
|
|
|
"\x2c\x5b","\x70\x73","\xdd\x68","\x4b\x55","\x68\x73","\xde\x75","\x74\x6b", |
|
68
|
|
|
|
|
|
|
"\x52\x61","\x41\x44","\x46\x69","\x67\x75","\x54\x4c","\x6d\x6d","\x68\xe2", |
|
69
|
|
|
|
|
|
|
"\xfe\xf6","\x47\xd6","\xfd\x2e","\x61\x27","\xf6\x6b","\x73\x6d","\x69\x27", |
|
70
|
|
|
|
|
|
|
"\x6c\x73","\xf6\x6c","\x68\x6b","\xf6\x73","\x75\x70","\xe7\x6c","\x53\x69", |
|
71
|
|
|
|
|
|
|
"\x6e\x79","\x70\x72","\x72\xe7","\x41\x4c","\xfc\xe7","\x55\x54","\x41\x72", |
|
72
|
|
|
|
|
|
|
"\xd6\x52","\x65\x2e","\x52\xdc","\x72\x68","\xdc\x4d","\x73\x6b","\xfe\x2c", |
|
73
|
|
|
|
|
|
|
"\x72\x7a","\x75\x66","\x75\x62","\x63\x6c","\x2e\x94","\x73\x79","\x31\x39", |
|
74
|
|
|
|
|
|
|
"\x74\x70","\x2e\x6f","\x3a\x2f","\x30\x31","\x6b\xf6","\x75\x67","\x30\x35", |
|
75
|
|
|
|
|
|
|
"\x2f\x2f","\x41\xd0","\xe2\x74","\xfc\x63","\x43\x65","\x6f\x75","\x55\x27", |
|
76
|
|
|
|
|
|
|
"\x61\x75","\x31\x30","\x59\x65","\x27\xfc","\x6c\x70","\x6d\xe2","\x4d\x27", |
|
77
|
|
|
|
|
|
|
"\xe2\x68","\x63\x72","\xd0\x4c","\xfd\x63","\x6b\x68","\x41\x6d","\x41\x79", |
|
78
|
|
|
|
|
|
|
"\x56\x65","\x52\x41","\x72\x27","\x68\x72","\x4b\x47","\x74\x2c","\x6b\xe7", |
|
79
|
|
|
|
|
|
|
"\x47\x42","\x56\x4b","\x6c\x6f","\x41\x68","\x63\x6f","\x47\x61","\x62\x6c", |
|
80
|
|
|
|
|
|
|
"\x41\x48","\x76\x75","\x69\x61","\x4c\x69","\x48\xdd","\x44\x41","\x6b\xe2", |
|
81
|
|
|
|
|
|
|
"\x59\xf6","\x7a\xe2","\x6f\xfe","\x6c\x2d","\x4b\x65","\x41\x42","\x75\x2e", |
|
82
|
|
|
|
|
|
|
"\x62\x6f","\xe7\x6d","\xe7\x74","\x7a\x6d","\x45\xf0","\x50\x41","\xfd\x66", |
|
83
|
|
|
|
|
|
|
"\x74\x27","\x41\x74","\x6d\x63","\x70\x69","\x7a\x6f","\x6c\x63","\x61\x22", |
|
84
|
|
|
|
|
|
|
"\xf0\x64","\x75\x63","\xdd\x6c","\x4f\xd0","\x65\x2d","\x2e\x22","\x6f\x64", |
|
85
|
|
|
|
|
|
|
"\xe2\x6d", |
|
86
|
|
|
|
|
|
|
); |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
my $DIST_RATIO = '0.94'; |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
my %CHARSET_SPACE_SET = map { $_ => 1 } ( |
|
91
|
|
|
|
|
|
|
"\x81", # Alt |
|
92
|
|
|
|
|
|
|
"\x8d", # ^ |
|
93
|
|
|
|
|
|
|
"\x8e", # ^ |
|
94
|
|
|
|
|
|
|
"\x8f", # ^ |
|
95
|
|
|
|
|
|
|
"\x90", # ^ |
|
96
|
|
|
|
|
|
|
"\x9d", # ^ |
|
97
|
|
|
|
|
|
|
"\x9e", # ^ |
|
98
|
|
|
|
|
|
|
"\xa0", # NBSP |
|
99
|
|
|
|
|
|
|
); |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
my %IGNORE = ( |
|
102
|
|
|
|
|
|
|
%EBook::Ishmael::CharDet::Constants::ASCII_SPACE_SET, |
|
103
|
|
|
|
|
|
|
%CHARSET_SPACE_SET, |
|
104
|
|
|
|
|
|
|
); |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub ignore { |
|
107
|
|
|
|
|
|
|
|
|
108
|
548864
|
|
|
548864
|
0
|
875345
|
my ($self, $byte) = @_; |
|
109
|
|
|
|
|
|
|
|
|
110
|
548864
|
|
|
|
|
1427837
|
return exists $IGNORE{ $byte }; |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub freq_bigram { |
|
115
|
|
|
|
|
|
|
|
|
116
|
404829
|
|
|
404829
|
0
|
652006
|
my ($self, $bigram) = @_; |
|
117
|
|
|
|
|
|
|
|
|
118
|
404829
|
|
|
|
|
1100809
|
return exists $CP1254_FREQS{ $bigram }; |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
} |
|
121
|
|
|
|
|
|
|
|
|
122
|
0
|
|
|
0
|
0
|
0
|
sub dist_ratio { $DIST_RATIO }; |
|
123
|
|
|
|
|
|
|
|
|
124
|
67
|
|
|
67
|
0
|
209
|
sub encoding { 'CP1254' } |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
1; |