line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package App::cdif::Command::Mecab; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
76704
|
use parent "App::cdif::Command"; |
|
1
|
|
|
|
|
307
|
|
|
1
|
|
|
|
|
6
|
|
4
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
55
|
use v5.14; |
|
1
|
|
|
|
|
3
|
|
6
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
37
|
|
7
|
1
|
|
|
1
|
|
5
|
use utf8; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
6
|
|
8
|
1
|
|
|
1
|
|
21
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
48
|
|
9
|
1
|
|
|
1
|
|
6
|
use Data::Dumper; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
476
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our $debug; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
sub wordlist { |
14
|
0
|
|
|
0
|
0
|
|
my $obj = shift; |
15
|
0
|
|
|
|
|
|
my $text = shift; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
## |
18
|
|
|
|
|
|
|
## mecab ignores trailing spaces. |
19
|
|
|
|
|
|
|
## |
20
|
|
|
|
|
|
|
my $removeme = sub { |
21
|
0
|
|
|
0
|
|
|
local *_ = shift; |
22
|
0
|
0
|
|
|
|
|
return sub { 0 } unless /[ \t]+$/m; |
|
0
|
|
|
|
|
|
|
23
|
0
|
|
|
|
|
|
my $magic = "15570"."67583"; |
24
|
0
|
|
|
|
|
|
$magic++ while /$magic/; |
25
|
0
|
|
|
|
|
|
s/[ \t]+\K$/$magic/mg; |
26
|
0
|
|
|
|
|
|
sub { $_ eq $magic }; |
|
0
|
|
|
|
|
|
|
27
|
0
|
|
|
|
|
|
}->(\$text); |
28
|
|
|
|
|
|
|
|
29
|
0
|
|
|
|
|
|
my $eos = "EOS" . "000"; |
30
|
0
|
|
|
|
|
|
$eos++ while $text =~ /$eos/; |
31
|
0
|
|
|
0
|
|
|
my $is_newline = sub { $_ eq $eos }; |
|
0
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
|
33
|
0
|
|
|
|
|
|
my $mecab = [ 'mecab', '--node-format', '%M\\n', '--eos-format', "$eos\\n" ]; |
34
|
0
|
|
|
|
|
|
my $result = $obj->command($mecab)->setstdin($text)->update->data; |
35
|
0
|
0
|
|
|
|
|
warn $result =~ s/^/MECAB: /mgr if $debug; |
36
|
0
|
|
|
|
|
|
do { |
37
|
0
|
0
|
|
|
|
|
map { $is_newline->() ? "\n" : $_ } |
38
|
0
|
|
|
|
|
|
grep { not $removeme->() } |
39
|
0
|
|
|
|
|
|
grep { length } |
|
0
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
$result =~ /^(\s*)(\S+)\n/amg; |
41
|
|
|
|
|
|
|
}; |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
1; |