|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Lingua::JA::Summarize::Extract::Plugin::Parser::Ngram;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
5
 | 
 use strict;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
    | 
| 
4
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
5
 | 
 use base qw( Lingua::JA::Summarize::Extract::Plugin );  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
496
 | 
    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __PACKAGE__->mk_accessors(qw/ latin_gram kana_gram han_gram /);  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub parse {  | 
| 
8
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
 
 | 
     my ($self) = @_;  | 
| 
9
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
     my $latin_gram = $self->latin_gram || 2;  | 
| 
10
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
     my $kana_gram = $self->kana_gram || 3;  | 
| 
11
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
     my $han_gram = $self->han_gram || 2;  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
13
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $term_list = {};  | 
| 
14
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->_gram($term_list, 'Latin', $latin_gram);  | 
| 
15
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->_gram($term_list, 'Katakana', $kana_gram);  | 
| 
16
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->_gram($term_list, 'Han', $han_gram);  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
18
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $term_list;  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _gram {  | 
| 
22
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
     my($self, $list, $block, $gram) = @_;  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
24
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $text = $self->text;  | 
| 
25
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     while ($text =~ /(\p{$block}+)/g) {  | 
| 
26
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $word = $1;  | 
| 
27
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my @part;  | 
| 
28
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         for (my $i = 0;$i + $gram <= length $word;$i++) {  | 
| 
29
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             push @part, substr $word, $i, $gram;  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
31
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $list->{join ' ', @part}++ if @part;  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |