| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package App::cdif::Command::Mecab; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 79631 | use parent "App::cdif::Command"; | 
|  | 1 |  |  |  |  | 308 |  | 
|  | 1 |  |  |  |  | 6 |  | 
| 4 |  |  |  |  |  |  |  | 
| 5 | 1 |  |  | 1 |  | 48 | use v5.14; | 
|  | 1 |  |  |  |  | 3 |  | 
| 6 | 1 |  |  | 1 |  | 9 | use warnings; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 37 |  | 
| 7 | 1 |  |  | 1 |  | 6 | use utf8; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 6 |  | 
| 8 | 1 |  |  | 1 |  | 21 | use Carp; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 49 |  | 
| 9 | 1 |  |  | 1 |  | 5 | use Data::Dumper; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 473 |  | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | our $debug; | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | sub wordlist { | 
| 14 | 0 |  |  | 0 | 0 |  | my $obj = shift; | 
| 15 | 0 |  |  |  |  |  | my $text = shift; | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | ## | 
| 18 |  |  |  |  |  |  | ## mecab ignores trailing spaces. | 
| 19 |  |  |  |  |  |  | ## | 
| 20 |  |  |  |  |  |  | my $removeme = sub { | 
| 21 | 0 |  |  | 0 |  |  | local *_ = shift; | 
| 22 | 0 | 0 |  |  |  |  | return sub { 0 } unless /[ \t]+$/m; | 
|  | 0 |  |  |  |  |  |  | 
| 23 | 0 |  |  |  |  |  | my $magic = "15570"."67583"; | 
| 24 | 0 |  |  |  |  |  | $magic++ while /$magic/; | 
| 25 | 0 |  |  |  |  |  | s/[ \t]+\K$/$magic/mg; | 
| 26 | 0 |  |  |  |  |  | sub { $_ eq $magic }; | 
|  | 0 |  |  |  |  |  |  | 
| 27 | 0 |  |  |  |  |  | }->(\$text); | 
| 28 |  |  |  |  |  |  |  | 
| 29 | 0 |  |  |  |  |  | my $eos = "EOS" . "000"; | 
| 30 | 0 |  |  |  |  |  | $eos++ while $text =~ /$eos/; | 
| 31 | 0 |  |  | 0 |  |  | my $is_newline = sub { $_ eq $eos }; | 
|  | 0 |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  |  | 
| 33 | 0 |  |  |  |  |  | my $mecab = [ 'mecab', '--node-format', '%M\\n', '--eos-format', "$eos\\n" ]; | 
| 34 | 0 |  |  |  |  |  | my $result = $obj->command($mecab)->setstdin($text)->update->data; | 
| 35 | 0 | 0 |  |  |  |  | warn $result =~ s/^/MECAB: /mgr if $debug; | 
| 36 | 0 |  |  |  |  |  | do { | 
| 37 | 0 | 0 |  |  |  |  | map  { $is_newline->() ? "\n" : $_ } | 
| 38 | 0 |  |  |  |  |  | grep { not $removeme->() } | 
| 39 | 0 |  |  |  |  |  | grep { length } | 
|  | 0 |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | $result =~ /^(\s*)(\S+)\n/amg; | 
| 41 |  |  |  |  |  |  | }; | 
| 42 |  |  |  |  |  |  | } | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | 1; |