| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package EBook::Ishmael::CharDet::CP1256; |
|
2
|
18
|
|
|
18
|
|
343
|
use 5.016; |
|
|
18
|
|
|
|
|
68
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '2.03'; |
|
4
|
18
|
|
|
18
|
|
102
|
use strict; |
|
|
18
|
|
|
|
|
36
|
|
|
|
18
|
|
|
|
|
463
|
|
|
5
|
18
|
|
|
18
|
|
111
|
use warnings; |
|
|
18
|
|
|
|
|
35
|
|
|
|
18
|
|
|
|
|
1001
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
18
|
|
|
18
|
|
117
|
use parent 'EBook::Ishmael::CharDet::CP'; |
|
|
18
|
|
|
|
|
35
|
|
|
|
18
|
|
|
|
|
107
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# Generated from contrib/512-bigram.pl from various Arabic texts from |
|
10
|
|
|
|
|
|
|
# Project Gutenberg and Wikisource. |
|
11
|
|
|
|
|
|
|
my %CP1256_FREQS = map { $_ => 1 } ( |
|
12
|
|
|
|
|
|
|
"\xc7\xe1","\xe1\xf3","\xf3\xc7","\xf3\xf8","\xe4\xf3","\xe6\xf3","\xf6\xed", |
|
13
|
|
|
|
|
|
|
"\xf3\xe1","\xe3\xf3","\xc3\xf3","\xf5\xe6","\xe1\xfa","\xe6\xc7","\xe5\xf5", |
|
14
|
|
|
|
|
|
|
"\xda\xf3","\xf3\xe4","\xed\xf3","\xf6\xe4","\xc7\xe4","\xca\xf3","\xe1\xe3", |
|
15
|
|
|
|
|
|
|
"\xdf\xf3","\xdd\xf3","\xd1\xf3","\xe1\xc7","\xe3\xf6","\xe1\xf6","\xf5\xe3", |
|
16
|
|
|
|
|
|
|
"\xe3\xfa","\xf3\xe3","\xed\xe4","\xf3\xed","\xc5\xf6","\xc8\xf6","\xe5\xf6", |
|
17
|
|
|
|
|
|
|
"\xe6\xe4","\xdd\xed","\xde\xf3","\xf3\xe5","\xf3\xd1","\xc8\xf3","\xe3\xc7", |
|
18
|
|
|
|
|
|
|
"\xe3\xe4","\xf3\xda","\xed\xfa","\xe3\xf5","\xf3\xca","\xf6\xe1","\xe5\xc7", |
|
19
|
|
|
|
|
|
|
"\xe5\xf3","\xdf\xf5","\xe4\xfa","\xed\xc9","\xe6\xe1","\xf3\xc8","\xf6\xf8", |
|
20
|
|
|
|
|
|
|
"\xe1\xe1","\xf8\xe5","\xfa\xe3","\xda\xe1","\xf3\xe6","\xf3\xdf","\xe1\xec", |
|
21
|
|
|
|
|
|
|
"\xf3\xcf","\xf3\xec","\xd3\xf3","\xfa\xca","\xc7\xd1","\xdd\xf6","\xf3\xde", |
|
22
|
|
|
|
|
|
|
"\xc8\xc7","\xd1\xf6","\xc3\xe4","\xe1\xed","\xed\xd1","\xf8\xc7","\xf3\xc9", |
|
23
|
|
|
|
|
|
|
"\xc7\xe3","\xe4\xc7","\xf6\xe5","\xf3\xc3","\xe4\xf6","\xfa\xd1","\xc7\xc8", |
|
24
|
|
|
|
|
|
|
"\xd1\xc7","\xe1\xf5","\xfa\xe4","\xcd\xf3","\xcf\xf3","\xc7\xca","\xfa\xe1", |
|
25
|
|
|
|
|
|
|
"\xd0\xf3","\xe6\xd1","\xca\xf5","\xd1\xed","\xe1\xc3","\xe1\xd3","\xe1\xcd", |
|
26
|
|
|
|
|
|
|
"\xed\xe5","\xf6\xe3","\xe1\xca","\xd1\xf5","\xc7\xcf","\xe1\xda","\xc7\xd3", |
|
27
|
|
|
|
|
|
|
"\xf6\xdf","\xe1\xe4","\xe6\xfa","\xda\xfa","\xfa\xe5","\xe1\xdf","\xca\xf6", |
|
28
|
|
|
|
|
|
|
"\xe1\xe5","\xed\xf5","\xdf\xc7","\xf3\xdd","\xcc\xf3","\xd1\xfa","\xf3\xd3", |
|
29
|
|
|
|
|
|
|
"\xf3\xc2","\xe4\xe5","\xc8\xed","\xf5\xf8","\xed\xc7","\xe4\xf5","\xc7\xc1", |
|
30
|
|
|
|
|
|
|
"\xed\xe3","\xd0\xf6","\xcf\xf6","\xf5\xe1","\xe4\xed","\xe6\xe3","\xe5\xe3", |
|
31
|
|
|
|
|
|
|
"\xc5\xe1","\xcf\xf5","\xf3\xd0","\xf0\xc7","\xce\xf3","\xde\xf5","\xde\xc7", |
|
32
|
|
|
|
|
|
|
"\xe1\xe6","\xd4\xf3","\xfa\xda","\xd3\xfa","\xed\xe1","\xe1\xc8","\xf3\xcc", |
|
33
|
|
|
|
|
|
|
"\xe4\xca","\xf3\xcd","\xe3\xed","\xe1\xde","\xf6\xd1","\xc1\xf3","\xf8\xe4", |
|
34
|
|
|
|
|
|
|
"\xed\xcf","\xca\xe5","\xe3\xda","\xc8\xf5","\xd3\xca","\xf5\xe4","\xe1\xdd", |
|
35
|
|
|
|
|
|
|
"\xda\xc7","\xde\xcf","\xfa\xc8","\xcf\xc7","\xd5\xf3","\xfa\xc3","\xdd\xc7", |
|
36
|
|
|
|
|
|
|
"\xf5\xd1","\xfa\xdf","\xd1\xc9","\xd3\xed","\xe5\xd0","\xc7\xda","\xd0\xc7", |
|
37
|
|
|
|
|
|
|
"\xcf\xfa","\xda\xe4","\xf8\xd0","\xd3\xc7","\xcf\xed","\xe1\xd4","\xda\xf6", |
|
38
|
|
|
|
|
|
|
"\xcd\xfa","\xe6\xde","\xf3\xc5","\xc8\xfa","\xe6\xe5","\xe1\xcf","\xe3\xe6", |
|
39
|
|
|
|
|
|
|
"\xe1\xd1","\xfa\xd3","\xc7\xe5","\xd3\xf6","\xc7\xc6","\xf5\xe5","\xda\xcf", |
|
40
|
|
|
|
|
|
|
"\xda\xf5","\xde\xf6","\xc8\xda","\xe3\xc9","\xd1\xc8","\xfa\xcf","\xe3\xd1", |
|
41
|
|
|
|
|
|
|
"\xc8\xe1","\xe3\xe1","\xed\xda","\xc3\xe3","\xe1\xcc","\xca\xed","\xc7\xcd", |
|
42
|
|
|
|
|
|
|
"\xdf\xf6","\xd1\xe6","\xf8\xe3","\xc9\xf6","\xd8\xf3","\xfa\xdd","\xfa\xde", |
|
43
|
|
|
|
|
|
|
"\xed\xde","\xc7\xdd","\xda\xd1","\xc8\xe5","\xde\xe6","\xc7\xde","\xf6\xc8", |
|
44
|
|
|
|
|
|
|
"\xf6\xd0","\xca\xfa","\xf3\xd5","\xe6\xcf","\xca\xe3","\xc3\xe6","\xca\xc7", |
|
45
|
|
|
|
|
|
|
"\xc8\xd1","\xde\xed","\xd3\xf5","\xcd\xf6","\xe5\xe6","\xdf\xe1","\xdf\xe6", |
|
46
|
|
|
|
|
|
|
"\xe6\xd3","\xe4\xd3","\xfa\xcd","\xe6\xca","\xdd\xd1","\xe3\xca","\xc7\xed", |
|
47
|
|
|
|
|
|
|
"\xca\xd1","\xe6\xdd","\xe6\xed","\xe1\xc9","\xdd\xfa","\xc7\xf0","\xc2\xc1", |
|
48
|
|
|
|
|
|
|
"\xf8\xed","\xd3\xe6","\xf3\xce","\xdb\xf3","\xcd\xc7","\xcf\xc9","\xd4\xd1", |
|
49
|
|
|
|
|
|
|
"\xed\xd3","\xdf\xd1","\xe6\xc3","\xe1\xd5","\xe4\xdd","\xe1\xd0","\xcc\xe3", |
|
50
|
|
|
|
|
|
|
"\xd0\xe1","\xf8\xdf","\xd2\xf3","\xca\xde","\xd5\xe1","\xe3\xd3","\xed\xca", |
|
51
|
|
|
|
|
|
|
"\xe6\xda","\xe6\xc8","\xd3\xe1","\xe1\xce","\xd1\xdf","\xf8\xc8","\xd6\xf3", |
|
52
|
|
|
|
|
|
|
"\xcc\xc7","\xdd\xf5","\xde\xe1","\xcd\xcf","\xda\xed","\xe4\xcf","\xe6\xdf", |
|
53
|
|
|
|
|
|
|
"\xcf\xe6","\xc3\xf5","\xcf\xe3","\xfa\xd6","\xdf\xe3","\xf3\xd4","\xc8\xe4", |
|
54
|
|
|
|
|
|
|
"\xf6\xda","\xf8\xe1","\xd0\xe5","\xe3\xcf","\xe3\xcd","\xed\xe6","\xd1\xe5", |
|
55
|
|
|
|
|
|
|
"\xca\xcd","\xcc\xfa","\xde\xfa","\xd8\xc7","\xe6\xcc","\xe1\xd8","\xc6\xf6", |
|
56
|
|
|
|
|
|
|
"\xc3\xed","\xc7\xcc","\xdf\xe4","\xfa\xed","\xcd\xed","\xe4\xc9","\xcf\xd1", |
|
57
|
|
|
|
|
|
|
"\xc7\xd8","\xcd\xd1","\xf6\xcf","\xde\xd1","\xcd\xe3","\xe4\xe6","\xd3\xe3", |
|
58
|
|
|
|
|
|
|
"\xc7\xe6","\xed\xc8","\xdf\xfa","\xca\xda","\xfa\xe6","\xdd\xe1","\xe5\xed", |
|
59
|
|
|
|
|
|
|
"\xf3\xd2","\xc5\xe4","\xcc\xf5","\xed\xdd","\xe5\xfa","\xce\xfa","\xca\xe6", |
|
60
|
|
|
|
|
|
|
"\xca\xe4","\xf3\xd8","\xcb\xc7","\xd1\xdd","\xca\xc8","\xd0\xed","\xed\xdf", |
|
61
|
|
|
|
|
|
|
"\xce\xe1","\xc3\xce","\xd4\xc7","\xf5\xdf","\xcb\xf3","\xd1\xd3","\xcd\xdf", |
|
62
|
|
|
|
|
|
|
"\xc3\xfa","\xc8\xcf","\xde\xc8","\xd5\xc7","\xf3\xd6","\xd6\xf6","\xd1\xe4", |
|
63
|
|
|
|
|
|
|
"\xdf\xed","\xd1\xcc","\xda\xe3","\xf3\xdb","\xc7\xdf","\xcb\xf5","\xcf\xe5", |
|
64
|
|
|
|
|
|
|
"\xe3\xe5","\xd4\xed","\xfa\xcc","\xce\xd1","\xcd\xca","\xd2\xf6","\xcc\xf6", |
|
65
|
|
|
|
|
|
|
"\xd5\xf6","\xf5\xd3","\xd3\xe4","\xc9\xf3","\xdb\xed","\xdd\xde","\xf8\xc9", |
|
66
|
|
|
|
|
|
|
"\xda\xc9","\xca\xe1","\xc8\xc9","\xc7\xd5","\xc8\xe6","\xf8\xe6","\xf6\xde", |
|
67
|
|
|
|
|
|
|
"\xda\xca","\xdf\xca","\xe1\xcb","\xce\xc7","\xc9\xa1","\xdd\xc9","\xdd\xd3", |
|
68
|
|
|
|
|
|
|
"\xcd\xf5","\xd5\xfa","\xf5\xcf","\xc3\xd1","\xd1\xca","\xcb\xe3","\xe3\xdf", |
|
69
|
|
|
|
|
|
|
"\xcd\xde","\xe3\xde","\xda\xc8","\xd5\xd1","\xf8\xd1","\xcc\xe6","\xe6\xcd", |
|
70
|
|
|
|
|
|
|
"\xc9\xf5","\xc7\xd4","\xdd\xca","\xe1\xc5","\xe4\xc8","\xf6\xd3","\xd6\xc7", |
|
71
|
|
|
|
|
|
|
"\xd5\xe6","\xd9\xf3","\xe1\xdb","\xf6\xca","\xf8\xcd","\xcb\xe1","\xd4\xfa", |
|
72
|
|
|
|
|
|
|
"\xe3\xd4","\xf3\xcb","\xf8\xca","\xd0\xfa","\xe4\xdf","\xc1\xf6","\xcd\xe1", |
|
73
|
|
|
|
|
|
|
"\xca\xdd","\xf8\xec","\xe5\xd1","\xda\xe5","\xd8\xe6","\xed\xcd","\xd3\xc8", |
|
74
|
|
|
|
|
|
|
"\xf6\xcd","\xc8\xca","\xce\xf6","\xc3\xda","\xed\xf6","\xcc\xcf","\xe6\xd6", |
|
75
|
|
|
|
|
|
|
"\xf5\xc4","\xe4\xd9","\xd0\xdf","\xc7\xd6","\xe3\xd5","\xc3\xcd","\xf8\xde", |
|
76
|
|
|
|
|
|
|
"\xda\xe6","\xf5\xc8","\xde\xc9","\xe3\xcb","\xe4\xde","\xed\xce","\xc4\xfa", |
|
77
|
|
|
|
|
|
|
"\xc8\xc3","\xc3\xd3","\xd1\xcf","\xc7\xce","\xd2\xc7","\xcd\xd3","\xf6\xdd", |
|
78
|
|
|
|
|
|
|
"\xcc\xe5","\xed\xed","\xe6\xd5","\xd1\xde","\xc8\xde","\xd2\xed","\xcc\xe1", |
|
79
|
|
|
|
|
|
|
"\xe6\xd8","\xd8\xe1","\xf5\xcd","\xc8\xe3","\xca\xdf","\xdb\xfa","\xf3\xc6", |
|
80
|
|
|
|
|
|
|
"\xdd\xda","\xe4\xd5","\xde\xca","\xda\xd6","\xd3\xd8","\xca\xd5","\xfa\xd8", |
|
81
|
|
|
|
|
|
|
"\xcc\xed","\xce\xed","\xca\xcf","\xfa\xce","\xd1\xd6","\xcc\xe4","\xf3\xd9", |
|
82
|
|
|
|
|
|
|
"\xdd\xe6","\xd8\xc8","\xd8\xd1","\xd6\xed","\xf5\xde","\xe4\xda","\xdd\xe5", |
|
83
|
|
|
|
|
|
|
"\xc8\xcd","\xd0\xf5","\xe1\xc2","\xc7\xa1","\xd3\xdf","\xed\xcb","\xe3\xcc", |
|
84
|
|
|
|
|
|
|
"\xe3\xc4","\xe5\xcf","\xc1\xf5","\xd8\xf6","\xfa\xd2","\xed\xd8","\xe6\xc9", |
|
85
|
|
|
|
|
|
|
"\xd8\xed", |
|
86
|
|
|
|
|
|
|
); |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
my $DIST_RATIO = '0.91'; |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
my %CHARSET_SPACE_SET = map { $_ => 1 } ( |
|
91
|
|
|
|
|
|
|
"\xa0", # NBSP |
|
92
|
|
|
|
|
|
|
); |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
my %IGNORE = ( |
|
95
|
|
|
|
|
|
|
%EBook::Ishmael::CharDet::Constants::ASCII_SPACE_SET, |
|
96
|
|
|
|
|
|
|
%CHARSET_SPACE_SET, |
|
97
|
|
|
|
|
|
|
); |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub ignore { |
|
100
|
|
|
|
|
|
|
|
|
101
|
548864
|
|
|
548864
|
0
|
874777
|
my ($self, $byte) = @_; |
|
102
|
|
|
|
|
|
|
|
|
103
|
548864
|
|
|
|
|
1460973
|
return exists $IGNORE{ $byte }; |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
sub freq_bigram { |
|
108
|
|
|
|
|
|
|
|
|
109
|
420149
|
|
|
420149
|
0
|
674618
|
my ($self, $bigram) = @_; |
|
110
|
|
|
|
|
|
|
|
|
111
|
420149
|
|
|
|
|
1090606
|
return exists $CP1256_FREQS{ $bigram }; |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
} |
|
114
|
|
|
|
|
|
|
|
|
115
|
0
|
|
|
0
|
0
|
0
|
sub dist_ratio { $DIST_RATIO }; |
|
116
|
|
|
|
|
|
|
|
|
117
|
67
|
|
|
67
|
0
|
251
|
sub encoding { 'CP1256' } |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
1; |