line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::Guess::Language; |
2
|
|
|
|
|
|
|
|
3
|
56
|
|
|
56
|
|
110361
|
use strict; |
|
56
|
|
|
|
|
137
|
|
|
56
|
|
|
|
|
1688
|
|
4
|
56
|
|
|
56
|
|
283
|
use warnings; |
|
56
|
|
|
|
|
115
|
|
|
56
|
|
|
|
|
2201
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
our $VERSION = '0.05'; |
7
|
|
|
|
|
|
|
|
8
|
56
|
|
|
56
|
|
31204
|
use Unicode::Normalize; |
|
56
|
|
|
|
|
118959
|
|
|
56
|
|
|
|
|
4307
|
|
9
|
56
|
|
|
56
|
|
199844
|
use Text::Guess::Language::Words; |
|
56
|
|
|
|
|
236
|
|
|
56
|
|
|
|
|
14786
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
sub new { |
12
|
6
|
|
|
6
|
1
|
1065
|
my $class = shift; |
13
|
|
|
|
|
|
|
# uncoverable condition false |
14
|
6
|
100
|
66
|
|
|
50
|
bless @_ ? @_ > 1 ? {@_} : {%{$_[0]}} : {}, ref $class || $class; |
|
2
|
100
|
|
|
|
15
|
|
15
|
|
|
|
|
|
|
} |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub guess { |
18
|
47
|
|
|
47
|
1
|
40941
|
my ($self, $text) = @_; |
19
|
|
|
|
|
|
|
|
20
|
47
|
|
|
|
|
293
|
my $guesses = $self->guesses($text); |
21
|
|
|
|
|
|
|
|
22
|
47
|
|
|
|
|
888
|
return $guesses->[0]->[0]; |
23
|
|
|
|
|
|
|
} |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
sub guesses { |
26
|
47
|
|
|
47
|
0
|
129
|
my ($self, $text) = @_; |
27
|
|
|
|
|
|
|
|
28
|
47
|
|
|
|
|
29186
|
my $text_NFC = NFC(lc($text)); |
29
|
|
|
|
|
|
|
|
30
|
56
|
|
|
56
|
|
694
|
my @tokens = $text_NFC =~ m/([\p{Letter}\p{Mark}]+)/xmsg; |
|
56
|
|
|
|
|
136
|
|
|
56
|
|
|
|
|
930
|
|
|
47
|
|
|
|
|
224592
|
|
31
|
|
|
|
|
|
|
|
32
|
47
|
|
|
|
|
857
|
my $words = Text::Guess::Language::Words->words(); |
33
|
|
|
|
|
|
|
|
34
|
47
|
|
|
|
|
139
|
my $guesses = {}; |
35
|
|
|
|
|
|
|
|
36
|
47
|
|
|
|
|
1061
|
for my $token (@tokens) { |
37
|
68461
|
100
|
|
|
|
140660
|
if (exists $words->{$token}) { |
38
|
33370
|
|
|
|
|
41025
|
for my $lang (@{$words->{$token}}) { |
|
33370
|
|
|
|
|
70093
|
|
39
|
225396
|
|
|
|
|
319898
|
$guesses->{$lang}++; |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my $result = [ |
45
|
1491
|
|
|
|
|
3496
|
map { [ $_, $guesses->{$_}/scalar(@tokens) ] } |
46
|
47
|
|
|
|
|
733
|
sort { $guesses->{$b} <=> $guesses->{$a} } |
|
6142
|
|
|
|
|
8533
|
|
47
|
|
|
|
|
|
|
keys(%$guesses) |
48
|
|
|
|
|
|
|
]; |
49
|
47
|
|
|
|
|
4164
|
return $result; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
sub languages { |
53
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
54
|
|
|
|
|
|
|
|
55
|
0
|
|
|
|
|
|
my $languages = {}; |
56
|
|
|
|
|
|
|
|
57
|
0
|
|
|
|
|
|
my $words = Text::Guess::Language::Words->words(); |
58
|
|
|
|
|
|
|
|
59
|
0
|
|
|
|
|
|
for my $word (keys %{$words}) { |
|
0
|
|
|
|
|
|
|
60
|
0
|
|
|
|
|
|
map { $languages->{$_}++ } @{$words->{$word}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
} |
62
|
0
|
|
|
|
|
|
return (sort keys %{$languages}); |
|
0
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
1; |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
__END__ |