| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package EBook::Ishmael::CharDet::CP; |
|
2
|
18
|
|
|
18
|
|
9422
|
use 5.016; |
|
|
18
|
|
|
|
|
93
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '2.03'; |
|
4
|
18
|
|
|
18
|
|
123
|
use strict; |
|
|
18
|
|
|
|
|
54
|
|
|
|
18
|
|
|
|
|
555
|
|
|
5
|
18
|
|
|
18
|
|
85
|
use warnings; |
|
|
18
|
|
|
|
|
33
|
|
|
|
18
|
|
|
|
|
1041
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
18
|
|
|
18
|
|
96
|
use EBook::Ishmael::CharDet::Constants qw(:CONSTANTS); |
|
|
18
|
|
|
|
|
36
|
|
|
|
18
|
|
|
|
|
10277
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
sub new { |
|
10
|
|
|
|
|
|
|
|
|
11
|
536
|
|
|
536
|
0
|
946
|
my ($class) = @_; |
|
12
|
|
|
|
|
|
|
|
|
13
|
536
|
|
|
|
|
1676
|
my $self = { |
|
14
|
|
|
|
|
|
|
Freqs => 0, |
|
15
|
|
|
|
|
|
|
Bigrams => 0, |
|
16
|
|
|
|
|
|
|
Prev => undef, |
|
17
|
|
|
|
|
|
|
Total => 0, |
|
18
|
|
|
|
|
|
|
}; |
|
19
|
536
|
|
|
|
|
3417
|
return bless $self, $class; |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
} |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
sub take { |
|
24
|
|
|
|
|
|
|
|
|
25
|
274432
|
|
|
274432
|
0
|
508163
|
my ($self, $data) = @_; |
|
26
|
|
|
|
|
|
|
|
|
27
|
274432
|
|
|
|
|
607838
|
for my $i (0 .. length($data) - 1) { |
|
28
|
4390912
|
|
|
|
|
6734340
|
my $c = substr $data, $i, 1; |
|
29
|
4390912
|
100
|
|
|
|
8133724
|
if ($self->ignore($c)) { |
|
|
|
100
|
|
|
|
|
|
|
30
|
614927
|
|
|
|
|
1029063
|
undef $self->{Prev}; |
|
31
|
|
|
|
|
|
|
} elsif (defined $self->{Prev}) { |
|
32
|
3237754
|
100
|
|
|
|
7495647
|
if ($self->freq_bigram($self->{Prev} . $c)) { |
|
33
|
748367
|
|
|
|
|
1110325
|
$self->{Freqs}++; |
|
34
|
|
|
|
|
|
|
} |
|
35
|
3237754
|
|
|
|
|
5279537
|
$self->{Bigrams}++; |
|
36
|
3237754
|
|
|
|
|
4851280
|
$self->{Prev} = $c; |
|
37
|
|
|
|
|
|
|
} else { |
|
38
|
538231
|
|
|
|
|
882498
|
$self->{Prev} = $c; |
|
39
|
|
|
|
|
|
|
} |
|
40
|
4390912
|
|
|
|
|
7235971
|
$self->{Total}++; |
|
41
|
|
|
|
|
|
|
} |
|
42
|
|
|
|
|
|
|
|
|
43
|
274432
|
|
|
|
|
644795
|
return TAKE_OK; |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
|
|
47
|
0
|
|
|
0
|
0
|
0
|
sub ignore { die "ignore() not implemented" } |
|
48
|
|
|
|
|
|
|
|
|
49
|
0
|
|
|
0
|
0
|
0
|
sub freq_bigram { die "freq_bigram() not implemented" } |
|
50
|
|
|
|
|
|
|
|
|
51
|
0
|
|
|
0
|
0
|
0
|
sub dist_ratio { die "dist_ratio() not implemented" } |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub confidence { |
|
54
|
|
|
|
|
|
|
|
|
55
|
536
|
|
|
536
|
0
|
945
|
my ($self) = @_; |
|
56
|
|
|
|
|
|
|
|
|
57
|
536
|
50
|
|
|
|
1162
|
if ($self->{Total} == 0) { |
|
58
|
0
|
|
|
|
|
0
|
return 0; |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
536
|
50
|
|
|
|
1168
|
if ($self->{Freqs} == $self->{Bigrams}) { |
|
62
|
0
|
|
|
|
|
0
|
return 0.99; |
|
63
|
|
|
|
|
|
|
} |
|
64
|
|
|
|
|
|
|
|
|
65
|
536
|
|
|
|
|
2571
|
return $self->{Freqs} / $self->{Bigrams}; |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
} |
|
68
|
|
|
|
|
|
|
|
|
69
|
0
|
|
|
0
|
0
|
|
sub bad { 0 } |
|
70
|
|
|
|
|
|
|
|
|
71
|
0
|
|
|
0
|
0
|
|
sub encoding { die "encoding() not implemented" } |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
1; |
|
74
|
|
|
|
|
|
|
|