File Coverage

blib/lib/EBook/Ishmael/CharDet/CP1256.pm
Criterion Covered Total %
statement 16 17 94.1
branch n/a
condition n/a
subroutine 7 8 87.5
pod 0 4 0.0
total 23 29 79.3


line stmt bran cond sub pod time code
1             package EBook::Ishmael::CharDet::CP1256;
2 18     18   343 use 5.016;
  18         68  
3             our $VERSION = '2.03';
4 18     18   102 use strict;
  18         36  
  18         463  
5 18     18   111 use warnings;
  18         35  
  18         1001  
6              
7 18     18   117 use parent 'EBook::Ishmael::CharDet::CP';
  18         35  
  18         107  
8              
9             # Generated from contrib/512-bigram.pl from various Arabic texts from
10             # Project Gutenberg and Wikisource.
11             my %CP1256_FREQS = map { $_ => 1 } (
12             "\xc7\xe1","\xe1\xf3","\xf3\xc7","\xf3\xf8","\xe4\xf3","\xe6\xf3","\xf6\xed",
13             "\xf3\xe1","\xe3\xf3","\xc3\xf3","\xf5\xe6","\xe1\xfa","\xe6\xc7","\xe5\xf5",
14             "\xda\xf3","\xf3\xe4","\xed\xf3","\xf6\xe4","\xc7\xe4","\xca\xf3","\xe1\xe3",
15             "\xdf\xf3","\xdd\xf3","\xd1\xf3","\xe1\xc7","\xe3\xf6","\xe1\xf6","\xf5\xe3",
16             "\xe3\xfa","\xf3\xe3","\xed\xe4","\xf3\xed","\xc5\xf6","\xc8\xf6","\xe5\xf6",
17             "\xe6\xe4","\xdd\xed","\xde\xf3","\xf3\xe5","\xf3\xd1","\xc8\xf3","\xe3\xc7",
18             "\xe3\xe4","\xf3\xda","\xed\xfa","\xe3\xf5","\xf3\xca","\xf6\xe1","\xe5\xc7",
19             "\xe5\xf3","\xdf\xf5","\xe4\xfa","\xed\xc9","\xe6\xe1","\xf3\xc8","\xf6\xf8",
20             "\xe1\xe1","\xf8\xe5","\xfa\xe3","\xda\xe1","\xf3\xe6","\xf3\xdf","\xe1\xec",
21             "\xf3\xcf","\xf3\xec","\xd3\xf3","\xfa\xca","\xc7\xd1","\xdd\xf6","\xf3\xde",
22             "\xc8\xc7","\xd1\xf6","\xc3\xe4","\xe1\xed","\xed\xd1","\xf8\xc7","\xf3\xc9",
23             "\xc7\xe3","\xe4\xc7","\xf6\xe5","\xf3\xc3","\xe4\xf6","\xfa\xd1","\xc7\xc8",
24             "\xd1\xc7","\xe1\xf5","\xfa\xe4","\xcd\xf3","\xcf\xf3","\xc7\xca","\xfa\xe1",
25             "\xd0\xf3","\xe6\xd1","\xca\xf5","\xd1\xed","\xe1\xc3","\xe1\xd3","\xe1\xcd",
26             "\xed\xe5","\xf6\xe3","\xe1\xca","\xd1\xf5","\xc7\xcf","\xe1\xda","\xc7\xd3",
27             "\xf6\xdf","\xe1\xe4","\xe6\xfa","\xda\xfa","\xfa\xe5","\xe1\xdf","\xca\xf6",
28             "\xe1\xe5","\xed\xf5","\xdf\xc7","\xf3\xdd","\xcc\xf3","\xd1\xfa","\xf3\xd3",
29             "\xf3\xc2","\xe4\xe5","\xc8\xed","\xf5\xf8","\xed\xc7","\xe4\xf5","\xc7\xc1",
30             "\xed\xe3","\xd0\xf6","\xcf\xf6","\xf5\xe1","\xe4\xed","\xe6\xe3","\xe5\xe3",
31             "\xc5\xe1","\xcf\xf5","\xf3\xd0","\xf0\xc7","\xce\xf3","\xde\xf5","\xde\xc7",
32             "\xe1\xe6","\xd4\xf3","\xfa\xda","\xd3\xfa","\xed\xe1","\xe1\xc8","\xf3\xcc",
33             "\xe4\xca","\xf3\xcd","\xe3\xed","\xe1\xde","\xf6\xd1","\xc1\xf3","\xf8\xe4",
34             "\xed\xcf","\xca\xe5","\xe3\xda","\xc8\xf5","\xd3\xca","\xf5\xe4","\xe1\xdd",
35             "\xda\xc7","\xde\xcf","\xfa\xc8","\xcf\xc7","\xd5\xf3","\xfa\xc3","\xdd\xc7",
36             "\xf5\xd1","\xfa\xdf","\xd1\xc9","\xd3\xed","\xe5\xd0","\xc7\xda","\xd0\xc7",
37             "\xcf\xfa","\xda\xe4","\xf8\xd0","\xd3\xc7","\xcf\xed","\xe1\xd4","\xda\xf6",
38             "\xcd\xfa","\xe6\xde","\xf3\xc5","\xc8\xfa","\xe6\xe5","\xe1\xcf","\xe3\xe6",
39             "\xe1\xd1","\xfa\xd3","\xc7\xe5","\xd3\xf6","\xc7\xc6","\xf5\xe5","\xda\xcf",
40             "\xda\xf5","\xde\xf6","\xc8\xda","\xe3\xc9","\xd1\xc8","\xfa\xcf","\xe3\xd1",
41             "\xc8\xe1","\xe3\xe1","\xed\xda","\xc3\xe3","\xe1\xcc","\xca\xed","\xc7\xcd",
42             "\xdf\xf6","\xd1\xe6","\xf8\xe3","\xc9\xf6","\xd8\xf3","\xfa\xdd","\xfa\xde",
43             "\xed\xde","\xc7\xdd","\xda\xd1","\xc8\xe5","\xde\xe6","\xc7\xde","\xf6\xc8",
44             "\xf6\xd0","\xca\xfa","\xf3\xd5","\xe6\xcf","\xca\xe3","\xc3\xe6","\xca\xc7",
45             "\xc8\xd1","\xde\xed","\xd3\xf5","\xcd\xf6","\xe5\xe6","\xdf\xe1","\xdf\xe6",
46             "\xe6\xd3","\xe4\xd3","\xfa\xcd","\xe6\xca","\xdd\xd1","\xe3\xca","\xc7\xed",
47             "\xca\xd1","\xe6\xdd","\xe6\xed","\xe1\xc9","\xdd\xfa","\xc7\xf0","\xc2\xc1",
48             "\xf8\xed","\xd3\xe6","\xf3\xce","\xdb\xf3","\xcd\xc7","\xcf\xc9","\xd4\xd1",
49             "\xed\xd3","\xdf\xd1","\xe6\xc3","\xe1\xd5","\xe4\xdd","\xe1\xd0","\xcc\xe3",
50             "\xd0\xe1","\xf8\xdf","\xd2\xf3","\xca\xde","\xd5\xe1","\xe3\xd3","\xed\xca",
51             "\xe6\xda","\xe6\xc8","\xd3\xe1","\xe1\xce","\xd1\xdf","\xf8\xc8","\xd6\xf3",
52             "\xcc\xc7","\xdd\xf5","\xde\xe1","\xcd\xcf","\xda\xed","\xe4\xcf","\xe6\xdf",
53             "\xcf\xe6","\xc3\xf5","\xcf\xe3","\xfa\xd6","\xdf\xe3","\xf3\xd4","\xc8\xe4",
54             "\xf6\xda","\xf8\xe1","\xd0\xe5","\xe3\xcf","\xe3\xcd","\xed\xe6","\xd1\xe5",
55             "\xca\xcd","\xcc\xfa","\xde\xfa","\xd8\xc7","\xe6\xcc","\xe1\xd8","\xc6\xf6",
56             "\xc3\xed","\xc7\xcc","\xdf\xe4","\xfa\xed","\xcd\xed","\xe4\xc9","\xcf\xd1",
57             "\xc7\xd8","\xcd\xd1","\xf6\xcf","\xde\xd1","\xcd\xe3","\xe4\xe6","\xd3\xe3",
58             "\xc7\xe6","\xed\xc8","\xdf\xfa","\xca\xda","\xfa\xe6","\xdd\xe1","\xe5\xed",
59             "\xf3\xd2","\xc5\xe4","\xcc\xf5","\xed\xdd","\xe5\xfa","\xce\xfa","\xca\xe6",
60             "\xca\xe4","\xf3\xd8","\xcb\xc7","\xd1\xdd","\xca\xc8","\xd0\xed","\xed\xdf",
61             "\xce\xe1","\xc3\xce","\xd4\xc7","\xf5\xdf","\xcb\xf3","\xd1\xd3","\xcd\xdf",
62             "\xc3\xfa","\xc8\xcf","\xde\xc8","\xd5\xc7","\xf3\xd6","\xd6\xf6","\xd1\xe4",
63             "\xdf\xed","\xd1\xcc","\xda\xe3","\xf3\xdb","\xc7\xdf","\xcb\xf5","\xcf\xe5",
64             "\xe3\xe5","\xd4\xed","\xfa\xcc","\xce\xd1","\xcd\xca","\xd2\xf6","\xcc\xf6",
65             "\xd5\xf6","\xf5\xd3","\xd3\xe4","\xc9\xf3","\xdb\xed","\xdd\xde","\xf8\xc9",
66             "\xda\xc9","\xca\xe1","\xc8\xc9","\xc7\xd5","\xc8\xe6","\xf8\xe6","\xf6\xde",
67             "\xda\xca","\xdf\xca","\xe1\xcb","\xce\xc7","\xc9\xa1","\xdd\xc9","\xdd\xd3",
68             "\xcd\xf5","\xd5\xfa","\xf5\xcf","\xc3\xd1","\xd1\xca","\xcb\xe3","\xe3\xdf",
69             "\xcd\xde","\xe3\xde","\xda\xc8","\xd5\xd1","\xf8\xd1","\xcc\xe6","\xe6\xcd",
70             "\xc9\xf5","\xc7\xd4","\xdd\xca","\xe1\xc5","\xe4\xc8","\xf6\xd3","\xd6\xc7",
71             "\xd5\xe6","\xd9\xf3","\xe1\xdb","\xf6\xca","\xf8\xcd","\xcb\xe1","\xd4\xfa",
72             "\xe3\xd4","\xf3\xcb","\xf8\xca","\xd0\xfa","\xe4\xdf","\xc1\xf6","\xcd\xe1",
73             "\xca\xdd","\xf8\xec","\xe5\xd1","\xda\xe5","\xd8\xe6","\xed\xcd","\xd3\xc8",
74             "\xf6\xcd","\xc8\xca","\xce\xf6","\xc3\xda","\xed\xf6","\xcc\xcf","\xe6\xd6",
75             "\xf5\xc4","\xe4\xd9","\xd0\xdf","\xc7\xd6","\xe3\xd5","\xc3\xcd","\xf8\xde",
76             "\xda\xe6","\xf5\xc8","\xde\xc9","\xe3\xcb","\xe4\xde","\xed\xce","\xc4\xfa",
77             "\xc8\xc3","\xc3\xd3","\xd1\xcf","\xc7\xce","\xd2\xc7","\xcd\xd3","\xf6\xdd",
78             "\xcc\xe5","\xed\xed","\xe6\xd5","\xd1\xde","\xc8\xde","\xd2\xed","\xcc\xe1",
79             "\xe6\xd8","\xd8\xe1","\xf5\xcd","\xc8\xe3","\xca\xdf","\xdb\xfa","\xf3\xc6",
80             "\xdd\xda","\xe4\xd5","\xde\xca","\xda\xd6","\xd3\xd8","\xca\xd5","\xfa\xd8",
81             "\xcc\xed","\xce\xed","\xca\xcf","\xfa\xce","\xd1\xd6","\xcc\xe4","\xf3\xd9",
82             "\xdd\xe6","\xd8\xc8","\xd8\xd1","\xd6\xed","\xf5\xde","\xe4\xda","\xdd\xe5",
83             "\xc8\xcd","\xd0\xf5","\xe1\xc2","\xc7\xa1","\xd3\xdf","\xed\xcb","\xe3\xcc",
84             "\xe3\xc4","\xe5\xcf","\xc1\xf5","\xd8\xf6","\xfa\xd2","\xed\xd8","\xe6\xc9",
85             "\xd8\xed",
86             );
87              
88             my $DIST_RATIO = '0.91';
89              
90             my %CHARSET_SPACE_SET = map { $_ => 1 } (
91             "\xa0", # NBSP
92             );
93              
94             my %IGNORE = (
95             %EBook::Ishmael::CharDet::Constants::ASCII_SPACE_SET,
96             %CHARSET_SPACE_SET,
97             );
98              
99             sub ignore {
100              
101 548864     548864 0 874777 my ($self, $byte) = @_;
102              
103 548864         1460973 return exists $IGNORE{ $byte };
104              
105             }
106              
107             sub freq_bigram {
108              
109 420149     420149 0 674618 my ($self, $bigram) = @_;
110              
111 420149         1090606 return exists $CP1256_FREQS{ $bigram };
112              
113             }
114              
115 0     0 0 0 sub dist_ratio { $DIST_RATIO };
116              
117 67     67 0 251 sub encoding { 'CP1256' }
118              
119             1;