line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::JA::Summarize::Extract::Plugin::Parser::NgramSimple; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
7
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
43
|
|
4
|
1
|
|
|
1
|
|
6
|
use base qw( Lingua::JA::Summarize::Extract::Plugin ); |
|
1
|
|
|
|
|
50
|
|
|
1
|
|
|
|
|
204
|
|
5
|
|
|
|
|
|
|
__PACKAGE__->mk_accessors(qw/ gram /); |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
sub parse { |
8
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
9
|
0
|
|
0
|
|
|
|
my $gram = $self->gram || 2; |
10
|
|
|
|
|
|
|
|
11
|
0
|
|
|
|
|
|
my $term_list = {}; |
12
|
0
|
|
|
|
|
|
my $text = $self->text; |
13
|
1
|
|
|
1
|
|
1093
|
while ($text =~ /([^\p{Common}]+)/g) { |
|
1
|
|
|
|
|
10
|
|
|
1
|
|
|
|
|
12
|
|
|
0
|
|
|
|
|
|
|
14
|
0
|
|
|
|
|
|
my $word = $1; |
15
|
0
|
|
|
|
|
|
my @part; |
16
|
0
|
|
|
|
|
|
for (my $i = 0;$i + $gram <= length $word;$i++) { |
17
|
0
|
|
|
|
|
|
push @part, substr $word, $i, $gram; |
18
|
|
|
|
|
|
|
} |
19
|
0
|
0
|
|
|
|
|
$term_list->{join ' ', @part}++ if @part; |
20
|
|
|
|
|
|
|
} |
21
|
0
|
|
|
|
|
|
$term_list; |
22
|
|
|
|
|
|
|
} |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
1; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
__END__ |