File Coverage

blib/lib/EBook/Ishmael/CharDet/CP1255.pm
Criterion Covered Total %
statement 16 17 94.1
branch n/a
condition n/a
subroutine 7 8 87.5
pod 0 4 0.0
total 23 29 79.3


line stmt bran cond sub pod time code
1             package EBook::Ishmael::CharDet::CP1255;
2 18     18   397 use 5.016;
  18         67  
3             our $VERSION = '2.03';
4 18     18   115 use strict;
  18         34  
  18         516  
5 18     18   118 use warnings;
  18         37  
  18         1066  
6              
7 18     18   103 use parent 'EBook::Ishmael::CharDet::CP';
  18         31  
  18         115  
8              
9             # Generated from contrib/512-bigram.pl from various Hebrew texts from
10             # Project Gutenberg.
11             my %CP1255_FREQS = map { $_ => 1 } (
12             "\xe5\xfa","\xe9\xed","\xf0\xe9","\xec\xe9","\xe9\xe5","\xe4\xe9","\xe0\xe5",
13             "\xe5\xec","\xfa\xe9","\xf8\xe9","\xec\xe4","\xe1\xe9","\xe0\xfa","\xe5\xe0",
14             "\xe5\xf8","\xec\xe0","\xf8\xe5","\xf0\xe5","\xf2\xec","\xf9\xec","\xee\xe5",
15             "\xeb\xec","\xec\xe5","\xe0\xe9","\xee\xe9","\xe4\xe5","\xe5\xe1","\xee\xe4",
16             "\xe9\xfa","\xe9\xe4","\xf9\xe9","\xee\xf8","\xe1\xf8","\xe1\xe5","\xe7\xe5",
17             "\xf2\xe5","\xe9\xe9","\xf9\xe5","\xe0\xf0","\xe0\xec","\xe5\xe3","\xe9\xf8",
18             "\xe5\xee","\xe6\xe4","\xf9\xe4","\xe5\xe4","\xe9\xf0","\xe0\xe7","\xfa\xe5",
19             "\xe3\xe9","\xe9\xf9","\xe0\xee","\xe4\xee","\xe9\xec","\xf8\xe0","\xe4\x2e",
20             "\xe9\xef","\xe4\xe0","\xf0\xe4","\xe5\xf0","\xee\xf9","\xeb\xe5","\xe1\xe4",
21             "\xe1\xec","\xf7\xe5","\xe5\xf9","\xe9\xe3","\xe3\xe5","\xf9\xe1","\xf2\xe9",
22             "\xf8\xe4","\xeb\xe9","\xfa\xe4","\xee\xfa","\xe0\xf9","\xe7\xe9","\xe9\xe0",
23             "\xf9\xf8","\xe4\xf9","\xe1\xf2","\xf9\xee","\xee\xf2","\xe5\xeb","\xe5\xe9",
24             "\xf9\xe0","\xe4\x2c","\xf4\xe9","\xed\x2e","\xe4\xf8","\x74\x68","\xe5\xef",
25             "\xf8\xe1","\xe7\xf8","\xe4\xfa","\xe5\xed","\xe4\xf2","\xf4\xe5","\xe4\xed",
26             "\xe5\xf2","\xe0\xe1","\xf7\xf8","\xe1\xe0","\xf4\xf8","\xe5\xe7","\xf2\xf8",
27             "\xe4\xe7","\x74\x65","\xe1\xfa","\xe9\xe1","\xed\x2c","\xe9\xf2","\xec\xee",
28             "\xee\xe0","\x65\x72","\xfa\xf8","\xe7\xe3","\xe4\xe1","\xfa\x2e","\xf7\xe9",
29             "\xe0\xf8","\xf8\xfa","\xec\xe1","\x6f\x72","\xe5\xf7","\xe9\xeb","\x68\x65",
30             "\xe4\xf0","\xf6\xe9","\xf1\xe5","\xe0\xe4","\xe3\xf8","\xeb\xee","\xe1\xee",
31             "\x6f\x6e","\xeb\xe1","\xe5\xf4","\xec\xfa","\xf9\xf0","\x69\x6e","\xf1\xe9",
32             "\xe1\xf9","\xec\xeb","\xee\xe3","\xe9\xf4","\xf2\xfa","\xe4\xe6","\xe9\xee",
33             "\xf9\xfa","\x22\xe0","\xe8\xe5","\xe7\xfa","\xee\xf0","\xf2\xe3","\xee\xec",
34             "\x2e\x22","\xf6\xe5","\xfa\x2c","\xe4\xe2","\xe4\xeb","\xf4\xf0","\xe9\xe7",
35             "\x65\x6e","\xe9\xf7","\xe3\xe1","\xe7\xec","\xee\xe7","\xeb\xe4","\xe5\x2e",
36             "\xe4\xec","\xe9\x2e","\xfa\xe7","\xe1\xeb","\xeb\xf9","\xe2\xe5","\xe5\xf6",
37             "\xe0\xe3","\xe2\xe9","\x72\x65","\x72\x6f","\xe4\xf7","\xe4\xf4","\xe9\x2c",
38             "\xec\xf2","\xe8\xe9","\xe2\xe3","\x61\x74","\xf2\xf9","\xec\xf9","\xf4\xe4",
39             "\xec\xe7","\xf2\xed","\xee\xf6","\xec\xea","\x22\x2c","\xee\xf7","\xf9\xf2",
40             "\xf8\x2e","\x65\x63","\xf8\xf7","\xe6\xe5","\xe2\xec","\xeb\xfa","\xf2\xee",
41             "\xe1\xe7","\xee\xe1","\xf8\xe2","\x61\x6e","\xf2\xf0","\xe1\xe3","\xe9\xea",
42             "\xf4\xfa","\xe5\xf1","\xe0\xed","\xe1\xf0","\x69\x74","\xf9\xed","\xf6\xf8",
43             "\x63\x74","\x74\x69","\xeb\xef","\xe4\xe3","\x75\x74","\xef\x2e","\xe4\xf6",
44             "\xf2\xf6","\xe3\xe4","\xec\xf4","\xe3\xf2","\xe1\xf7","\xf0\xf9","\xf8\xe7",
45             "\xee\xee","\xfa\xf0","\xee\xef","\xf2\xe4","\xe5\xe2","\xe5\x2c","\x6f\x75",
46             "\xe4\xf1","\xe6\xf8","\xec\xed","\x65\x64","\xeb\xea","\xf2\xe1","\xec\xec",
47             "\xe5\xe5","\xf6\xe4","\xf6\xe0","\xeb\xf8","\xe9\xe8","\xe9\xf6","\xf0\xe7",
48             "\x69\x73","\xec\xe3","\xec\xf0","\xf9\xeb","\xee\xeb","\xf0\xf2","\x6e\x64",
49             "\xe0\xe6","\xf8\xf2","\xef\x2c","\xf1\xf4","\x62\x65","\xf0\xfa","\xf4\xf2",
50             "\x6f\x66","\xee\xf1","\xeb\xf0","\x69\x6f","\xf4\xf9","\xf4\xec","\xe7\xf9",
51             "\x61\x72","\xec\xf7","\xe3\xee","\xfa\xf4","\xf2\xeb","\xf8\x2c","\xee\xf4",
52             "\xfa\xed","\x21\x22","\xf9\xf4","\xe5\xe8","\xe7\xf0","\xe5\xea","\xeb\xe0",
53             "\xe5\xe6","\xe0\xf4","\xf0\xf4","\xe2\xed","\xf6\xee","\x63\x6f","\xe6\xe9",
54             "\x72\x67","\x65\x73","\xe9\xf1","\xf0\xe0","\xe7\xe4","\x22\xe4","\xf7\xe8",
55             "\xe7\xe6","\xf7\xe4","\xf8\xf9","\xe1\xe8","\xec\xe2","\xe5\xf3","\xee\xe6",
56             "\xe7\xee","\xfa\xee","\xe2\xe1","\xe3\xec","\xfa\xf7","\xf6\xec","\xe1\xe1",
57             "\x72\x6b","\x72\x69","\xfa\xe1","\x6e\x62","\xf9\xe7","\x73\x65","\x6e\x67",
58             "\xf8\xe3","\xf9\xf7","\x73\x74","\xe4\xef","\xf7\xf9","\xf8\xea","\x64\x69",
59             "\x50\x72","\x6e\x74","\x69\x63","\xe6\xee","\xfa\xe0","\xec\x2e","\xe3\xed",
60             "\xf0\xf1","\xf8\xf6","\xfa\xf2","\xe8\xf8","\x6a\x65","\xf4\xf1","\x61\x6c",
61             "\xe2\xf8","\x47\x75","\xf0\xf7","\x6f\x6a","\x74\x72","\xe7\xe1","\x63\x65",
62             "\x77\x6f","\xf8\xeb","\xf8\xf5","\xf6\xf2","\xee\xe8","\x6c\x65","\xf1\xfa",
63             "\x72\x61","\x74\x6f","\xe0\xeb","\xf4\xe8","\xf0\xf8","\xe4\x22","\xf1\xe1",
64             "\xe8\xe4","\x68\x69","\xf6\xe1","\x64\x65","\xf4\xe7","\xf7\xf0","\x77\x69",
65             "\xec\xf8","\xe1\xe2","\xe1\xf4","\xf0\xee","\xe6\xf7","\xe6\xe0","\xfa\xec",
66             "\xe9\xe6","\xe3\xfa","\x74\x61","\xe2\xf2","\x66\x6f","\x6d\x61","\xec\xe8",
67             "\xe0\xf1","\xf7\xe3","\x79\x6f","\xe3\xf7","\xf7\xec","\xf2\xe8","\xe1\xf1",
68             "\xec\x2c","\xea\x2c","\xe7\xeb","\xf7\xf6","\x75\x6e","\xea\x2e","\xe8\xe1",
69             "\x6f\x74","\xf0\xf0","\xfa\xeb","\xf7\xe1","\xe3\x2e","\xe1\xe6","\xe9\xe2",
70             "\xe1\xef","\x6e\x73","\xe1\x2e","\xf0\xe2","\xe3\xf9","\xe4\xe8","\xe4\xe4",
71             "\x65\x65","\xeb\xeb","\xe7\xf7","\xf0\xe1","\xfa\xef","\x76\x65","\xe3\xe0",
72             "\x6e\x69","\xf0\xeb","\x6d\x65","\x6c\x6c","\x72\x6d","\xec\xf6","\x63\x68",
73             "\xe2\xf9","\xee\xe2","\xf4\xf7","\xf1\xf8","\xf0\xe3","\xe1\xf6","\x70\x72",
74             "\xf6\xf4","\xe1\x2c","\xe0\xf6","\xfa\xea","\xf2\xf5","\xeb\xe7","\xeb\xe3",
75             "\x6f\x70","\xf1\xec","\x67\x99","\x6c\x61","\x65\x6d","\xf7\xef","\xeb\xed",
76             "\xe4\x21","\x61\x67","\x6c\x69","\xe2\xee","\xf9\xe8","\xe5\xf5","\x22\xee",
77             "\xf9\xf9","\x22\xec","\xe8\xec","\xe8\xf0","\xf8\xf4","\xe6\xeb","\xec\xf1",
78             "\x75\x73","\xed\x22","\x65\x6c","\x68\x61","\x6f\x6d","\xf6\xe3","\xfa\xf9",
79             "\xf7\xf4","\xe3\x2c","\xf8\xf0","\x22\xe6","\x73\x73","\xf6\xe7","\xe0\x2c",
80             "\x72\x74","\x6e\x6f","\xf1\xee","\xe7\xf1","\xe0\xf3","\xf8\xe8","\xe8\xef",
81             "\xeb\xf4","\x6e\x79","\xf6\xfa","\xf2\xf7","\x65\x61","\xf9\x2e","\xf1\xe3",
82             "\xf8\xee","\x6b\x73","\x6e\x65","\x69\x76","\xe3\xf0","\x64\x61","\x70\x61",
83             "\x63\x61","\x31\x2e","\xf1\xf7","\x70\x65","\xe0\xe2","\xf0\xed","\x67\x72",
84             "\x2e\x97","\x61\x63","\x73\x69","\xf7\xfa","\xe7\xf6","\xf2\x2e","\xe9\x22",
85             "\xe7\xf4",
86             );
87              
88             my $DIST_RATIO = '0.91';
89              
90             my %CHARSET_SPACE_SET = map { $_ => 1 } (
91             "\x81", # undef
92             "\x8a", # ^
93             "\x8c", # ^
94             "\x8d", # ^
95             "\x8e", # ^
96             "\x8f", # ^
97             "\x90", # ^
98             "\x9a", # ^
99             "\x9c", # ^
100             "\x9d", # ^
101             "\x9e", # ^
102             "\x9f", # ^
103             "\xaa", # ^
104             "\xa0", # NBSP
105             "\xd9", # undef
106             "\xda", # ^
107             "\xdb", # ^
108             "\xdc", # ^
109             "\xdd", # ^
110             "\xde", # ^
111             "\xfb", # ^
112             "\xfc", # ^
113             "\xff", # undef
114             );
115              
116             my %IGNORE = (
117             %EBook::Ishmael::CharDet::Constants::ASCII_SPACE_SET,
118             %CHARSET_SPACE_SET,
119             );
120              
121             sub ignore {
122              
123 548864     548864 0 899440 my ($self, $byte) = @_;
124              
125 548864         1437975 return exists $IGNORE{ $byte };
126              
127             }
128              
129             sub freq_bigram {
130              
131 372763     372763 0 604484 my ($self, $bigram) = @_;
132              
133 372763         999347 return exists $CP1255_FREQS{ $bigram };
134              
135             }
136              
137 0     0 0 0 sub dist_ratio { $DIST_RATIO };
138              
139 67     67 0 188 sub encoding { 'CP1255' }
140              
141             1;