| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Lingua::JA::Summarize; |
|
2
|
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
25567
|
use strict; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
124
|
|
|
4
|
3
|
|
|
3
|
|
17
|
use warnings; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
278
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
our $VERSION = 0.08; |
|
7
|
|
|
|
|
|
|
our @EXPORT_OK = |
|
8
|
|
|
|
|
|
|
qw(keyword_summary file_keyword_summary |
|
9
|
|
|
|
|
|
|
%LJS_Defaults %LJS_Defaults_keywords); |
|
10
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( |
|
11
|
|
|
|
|
|
|
all => \@EXPORT_OK, |
|
12
|
|
|
|
|
|
|
); |
|
13
|
|
|
|
|
|
|
|
|
14
|
3
|
|
|
3
|
|
34
|
use base qw(Exporter Class::Accessor::Fast Class::ErrorHandler); |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
4311
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
use Carp; |
|
17
|
|
|
|
|
|
|
use Encode; |
|
18
|
|
|
|
|
|
|
use File::Temp qw(:POSIX); |
|
19
|
|
|
|
|
|
|
use Jcode; |
|
20
|
|
|
|
|
|
|
use Lingua::JA::Summarize::Mecab; |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
sub NG () { |
|
23
|
|
|
|
|
|
|
+{ map { $_ => 1 } ( |
|
24
|
|
|
|
|
|
|
'(', ')', '#', ',', '"', "'", '`', |
|
25
|
|
|
|
|
|
|
qw(! $ % & * + - . / : ; < = > ? @ [ \ ] ^ _ { | } ~), |
|
26
|
|
|
|
|
|
|
qw(¿Í Éà ʬ »þ Æü ·î ǯ ±ß ¥É¥ë), |
|
27
|
|
|
|
|
|
|
qw(°ì Æó »° »Í ¸Þ Ï» ¼· Ȭ ¶å ½½ É´ Àé Ëü ²¯ Ãû), |
|
28
|
|
|
|
|
|
|
qw(¢¬ ¢ ¢« ¢ª ¢Í ¢Î ¡À ¡° ¡® ¡³), |
|
29
|
|
|
|
|
|
|
qw(a any the who he she i to and in you is you str this ago about and new as of for if or it have by into at on an are were was be my am your we them there their from all its), |
|
30
|
|
|
|
|
|
|
) }; |
|
31
|
|
|
|
|
|
|
} |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
sub DEFAULT_COST_FACTOR () { 2000 } |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
my %Defaults = ( |
|
36
|
|
|
|
|
|
|
alnum_as_word => 1, |
|
37
|
|
|
|
|
|
|
concat_nouns => 1, |
|
38
|
|
|
|
|
|
|
charset => 'euc', |
|
39
|
|
|
|
|
|
|
default_cost => 1, |
|
40
|
|
|
|
|
|
|
jaascii_as_word => 1, |
|
41
|
|
|
|
|
|
|
ng => NG(), |
|
42
|
|
|
|
|
|
|
mecab => 'mecab', |
|
43
|
|
|
|
|
|
|
mecab_charset => 'euc', |
|
44
|
|
|
|
|
|
|
mecab_factory => sub { |
|
45
|
|
|
|
|
|
|
Lingua::JA::Summarize::Mecab->new(@_), |
|
46
|
|
|
|
|
|
|
}, |
|
47
|
|
|
|
|
|
|
omit_number => 1, |
|
48
|
|
|
|
|
|
|
singlechar_factor => 0.5, |
|
49
|
|
|
|
|
|
|
url_as_word => 1, |
|
50
|
|
|
|
|
|
|
); |
|
51
|
|
|
|
|
|
|
our %LJS_Defaults = (); |
|
52
|
|
|
|
|
|
|
foreach my $k (keys %Defaults) { |
|
53
|
|
|
|
|
|
|
my $n = 'LJS_' . uc($k); |
|
54
|
|
|
|
|
|
|
$LJS_Defaults{$k} = $ENV{$n} if defined $ENV{$n}; |
|
55
|
|
|
|
|
|
|
} |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
our %LJS_ascii_words = (); |
|
58
|
|
|
|
|
|
|
our %LJS_encoded_words = (); |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
__PACKAGE__->mk_accessors(keys %Defaults, qw(stats wordcount)); |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub new { |
|
63
|
|
|
|
|
|
|
my ($proto, $fields) = @_; |
|
64
|
|
|
|
|
|
|
my $class = ref $proto || $proto; |
|
65
|
|
|
|
|
|
|
my $self = bless { |
|
66
|
|
|
|
|
|
|
%Defaults, |
|
67
|
|
|
|
|
|
|
%LJS_Defaults, |
|
68
|
|
|
|
|
|
|
($fields ? %$fields : ()), |
|
69
|
|
|
|
|
|
|
}, $class; |
|
70
|
|
|
|
|
|
|
$self->{wordcount} = 0; |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
return $self; |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
my %Defaults_keywords = ( |
|
76
|
|
|
|
|
|
|
maxwords => 5, |
|
77
|
|
|
|
|
|
|
minwords => 0, |
|
78
|
|
|
|
|
|
|
threshold => 5 |
|
79
|
|
|
|
|
|
|
); |
|
80
|
|
|
|
|
|
|
our %LJS_Defaults_keywords = (); |
|
81
|
|
|
|
|
|
|
foreach my $k (keys %Defaults_keywords) { |
|
82
|
|
|
|
|
|
|
my $n = 'LJS_KEYWORDS_' . uc($k); |
|
83
|
|
|
|
|
|
|
$LJS_Defaults_keywords{$k} = $ENV{$n} if defined $ENV{$n}; |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub keywords { |
|
87
|
|
|
|
|
|
|
my ($self, $_args) = @_; |
|
88
|
|
|
|
|
|
|
my %args = ( |
|
89
|
|
|
|
|
|
|
%Defaults_keywords, |
|
90
|
|
|
|
|
|
|
%LJS_Defaults_keywords, |
|
91
|
|
|
|
|
|
|
($_args ? %$_args : ()), |
|
92
|
|
|
|
|
|
|
); |
|
93
|
|
|
|
|
|
|
my $stats = $self->{stats}; |
|
94
|
|
|
|
|
|
|
my @keywords; |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
foreach my $word ( |
|
97
|
|
|
|
|
|
|
sort { $stats->{$b}->{weight} <=> $stats->{$a}->{weight} || $a cmp $b } |
|
98
|
|
|
|
|
|
|
keys(%$stats)) { |
|
99
|
|
|
|
|
|
|
last if |
|
100
|
|
|
|
|
|
|
$args{minwords} <= @keywords |
|
101
|
|
|
|
|
|
|
&& $stats->{$word}->{weight} < $args{threshold}; |
|
102
|
|
|
|
|
|
|
push(@keywords, $word); |
|
103
|
|
|
|
|
|
|
last if $args{maxwords} == @keywords; |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
return @keywords; |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
sub analyze_file { |
|
110
|
|
|
|
|
|
|
my ($self, $file) = @_; |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
open my $fh, '<', $file or croak("failed to open: $file: $!"); |
|
113
|
|
|
|
|
|
|
my $text = do { local $/; <$fh> }; |
|
114
|
|
|
|
|
|
|
close $fh; |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
$self->analyze($text); |
|
117
|
|
|
|
|
|
|
} |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub analyze { |
|
120
|
|
|
|
|
|
|
my ($self, $text) = @_; |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
croak("already analyzed") if $self->{stats}; |
|
123
|
|
|
|
|
|
|
$self->{stats} = {}; |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# adjust text |
|
126
|
|
|
|
|
|
|
Jcode::convert(\$text, 'euc', $self->charset) if $self->charset ne 'euc'; |
|
127
|
|
|
|
|
|
|
$text = $self->_prefilter($text); |
|
128
|
|
|
|
|
|
|
$text =~ s/\s*\n\s*/\n/sg; |
|
129
|
|
|
|
|
|
|
$text .= "\n"; |
|
130
|
|
|
|
|
|
|
$text = _normalize_japanese($text); |
|
131
|
|
|
|
|
|
|
Jcode::convert(\$text, $self->mecab_charset, 'euc') |
|
132
|
|
|
|
|
|
|
if $self->mecab_charset ne 'euc'; |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# write text to temporary file |
|
135
|
|
|
|
|
|
|
my ($fh, $tempfile) = tmpnam(); |
|
136
|
|
|
|
|
|
|
print $fh $text; |
|
137
|
|
|
|
|
|
|
close $fh; |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# open mecab |
|
140
|
|
|
|
|
|
|
my $mecab = $self->mecab_factory->($self, $tempfile); |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# read from mecab |
|
143
|
|
|
|
|
|
|
my $longword = { |
|
144
|
|
|
|
|
|
|
text => '', |
|
145
|
|
|
|
|
|
|
cost => 0, |
|
146
|
|
|
|
|
|
|
count => 0, |
|
147
|
|
|
|
|
|
|
}; |
|
148
|
|
|
|
|
|
|
my $add_longword = sub { |
|
149
|
|
|
|
|
|
|
if ($longword->{text}) { |
|
150
|
|
|
|
|
|
|
$self->_add_word( |
|
151
|
|
|
|
|
|
|
$longword->{text}, |
|
152
|
|
|
|
|
|
|
$longword->{cost} / (log($longword->{count}) * 0.7 + 1)); |
|
153
|
|
|
|
|
|
|
} |
|
154
|
|
|
|
|
|
|
$longword->{text} = ''; |
|
155
|
|
|
|
|
|
|
$longword->{cost} = 0; |
|
156
|
|
|
|
|
|
|
$longword->{count} = 0; |
|
157
|
|
|
|
|
|
|
}; |
|
158
|
|
|
|
|
|
|
while (my $line = $mecab->getline) { |
|
159
|
|
|
|
|
|
|
chomp $line; |
|
160
|
|
|
|
|
|
|
Jcode::convert(\$line, 'euc', $self->mecab_charset) |
|
161
|
|
|
|
|
|
|
if $self->mecab_charset ne 'euc'; |
|
162
|
|
|
|
|
|
|
if ($line =~ /\t/o) { |
|
163
|
|
|
|
|
|
|
my ($word, $pn, $pw, $H) = split(/\t/, $line, 4); |
|
164
|
|
|
|
|
|
|
$word = $self->_postfilter($word); |
|
165
|
|
|
|
|
|
|
$word = $self->_normalize_word($word); |
|
166
|
|
|
|
|
|
|
my $ng = $self->_ng_word($word); |
|
167
|
|
|
|
|
|
|
if ($ng) { |
|
168
|
|
|
|
|
|
|
$add_longword->(); |
|
169
|
|
|
|
|
|
|
next; |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
if ($H =~ /^̾»ì/) { |
|
172
|
|
|
|
|
|
|
if ($H =~ /(Èó¼«Î©|Âå̾»ì)/) { |
|
173
|
|
|
|
|
|
|
$add_longword->(); |
|
174
|
|
|
|
|
|
|
next; |
|
175
|
|
|
|
|
|
|
} elsif (! $longword->{text} && $H =~ /ÀÜÈø/) { |
|
176
|
|
|
|
|
|
|
# ng |
|
177
|
|
|
|
|
|
|
next; |
|
178
|
|
|
|
|
|
|
} |
|
179
|
|
|
|
|
|
|
if (! $self->concat_nouns && $H !~ /ÀÜÈø/) { |
|
180
|
|
|
|
|
|
|
$add_longword->(); |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
} elsif ($H eq 'UnkType') { |
|
183
|
|
|
|
|
|
|
# handle unknown (mostly English) words |
|
184
|
|
|
|
|
|
|
if ($self->jaascii_as_word) { |
|
185
|
|
|
|
|
|
|
if ($word =~ /^\w/ && $longword->{text} =~ /\w$/) { |
|
186
|
|
|
|
|
|
|
$add_longword->(); |
|
187
|
|
|
|
|
|
|
} |
|
188
|
|
|
|
|
|
|
} else { |
|
189
|
|
|
|
|
|
|
$add_longword->(); |
|
190
|
|
|
|
|
|
|
$self->_add_word($word, $pw); |
|
191
|
|
|
|
|
|
|
next; |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
} else { |
|
194
|
|
|
|
|
|
|
$add_longword->(); |
|
195
|
|
|
|
|
|
|
next; |
|
196
|
|
|
|
|
|
|
} |
|
197
|
|
|
|
|
|
|
$longword->{text} .= $word; |
|
198
|
|
|
|
|
|
|
$longword->{cost} += $pw; # do not use $pn |
|
199
|
|
|
|
|
|
|
$longword->{count}++; |
|
200
|
|
|
|
|
|
|
} else { |
|
201
|
|
|
|
|
|
|
$add_longword->(); |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
} |
|
204
|
|
|
|
|
|
|
$add_longword->(); |
|
205
|
|
|
|
|
|
|
unlink($tempfile); |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
# calculate tf-idf |
|
208
|
|
|
|
|
|
|
$self->_calc_weight; |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
1; |
|
211
|
|
|
|
|
|
|
} |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub _add_word { |
|
214
|
|
|
|
|
|
|
my ($self, $word, $cost) = @_; |
|
215
|
|
|
|
|
|
|
return if $cost <= 0; |
|
216
|
|
|
|
|
|
|
return if $self->_ng_word($word); |
|
217
|
|
|
|
|
|
|
$self->{wordcount}++; |
|
218
|
|
|
|
|
|
|
Jcode::convert(\$word, $self->charset, 'euc') if $self->charset ne 'euc'; |
|
219
|
|
|
|
|
|
|
my $target = $self->{stats}->{$word}; |
|
220
|
|
|
|
|
|
|
if ($target) { |
|
221
|
|
|
|
|
|
|
$target->{count}++; |
|
222
|
|
|
|
|
|
|
} else { |
|
223
|
|
|
|
|
|
|
$self->{stats}->{$word} = { count => 1, cost => $cost }; |
|
224
|
|
|
|
|
|
|
} |
|
225
|
|
|
|
|
|
|
} |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
sub _calc_weight { |
|
228
|
|
|
|
|
|
|
my $self = shift; |
|
229
|
|
|
|
|
|
|
foreach my $word (keys(%{$self->{stats}})) { |
|
230
|
|
|
|
|
|
|
my $target = $self->{stats}->{$word}; |
|
231
|
|
|
|
|
|
|
my $cost = $target->{cost}; |
|
232
|
|
|
|
|
|
|
$cost = $self->default_cost * DEFAULT_COST_FACTOR unless $cost; |
|
233
|
|
|
|
|
|
|
$target->{weight} = |
|
234
|
|
|
|
|
|
|
($target->{count} - 0.5) * $cost / $self->{wordcount} / 6; |
|
235
|
|
|
|
|
|
|
if ($self->_is_singlechar($word)) { |
|
236
|
|
|
|
|
|
|
$target->{weight} *= $self->singlechar_factor; |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
} |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
sub _normalize_word { |
|
242
|
|
|
|
|
|
|
my ($self, $word) = @_; |
|
243
|
|
|
|
|
|
|
$word = Jcode->new($word, 'euc')->h2z; |
|
244
|
|
|
|
|
|
|
$word->tr('£°-£¹£Á-£Ú£á-£ú¡Ê¡Ë', '0-9A-Za-z()'); |
|
245
|
|
|
|
|
|
|
lc($word); |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
sub _ng_word { |
|
249
|
|
|
|
|
|
|
my ($self, $word) = @_; |
|
250
|
|
|
|
|
|
|
return 1 if $self->omit_number && $word =~ /^\d*$/; |
|
251
|
|
|
|
|
|
|
return 1 if exists $self->{ng}->{$word}; |
|
252
|
|
|
|
|
|
|
return 1 if $word !~ /[\w\x80-\xff]/; |
|
253
|
|
|
|
|
|
|
undef; |
|
254
|
|
|
|
|
|
|
} |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
sub _prefilter { |
|
257
|
|
|
|
|
|
|
my ($self, $text) = @_; |
|
258
|
|
|
|
|
|
|
if ($self->alnum_as_word) { |
|
259
|
|
|
|
|
|
|
if ($self->url_as_word) { |
|
260
|
|
|
|
|
|
|
$text =~ |
|
261
|
|
|
|
|
|
|
s!(https?://[A-Za-z0-9.:_/?#~\$\-=&%]+|[A-Za-z0-9_][A-Za-z0-9_.']*[A-Za-z0-9_])!_encode_ascii_word($1)!eg; |
|
262
|
|
|
|
|
|
|
} else { |
|
263
|
|
|
|
|
|
|
$text =~ |
|
264
|
|
|
|
|
|
|
s!([A-Za-z0-9_][A-Za-z0-9_.']*[A-Za-z0-9_])!_encode_ascii_word($1)!eg; |
|
265
|
|
|
|
|
|
|
} |
|
266
|
|
|
|
|
|
|
} |
|
267
|
|
|
|
|
|
|
$text; |
|
268
|
|
|
|
|
|
|
} |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
sub _postfilter { |
|
271
|
|
|
|
|
|
|
my ($self, $word) = @_; |
|
272
|
|
|
|
|
|
|
if ($word =~ /^[A-Za-z]+$/ && |
|
273
|
|
|
|
|
|
|
($self->alnum_as_word || $self->url_as_word)) { |
|
274
|
|
|
|
|
|
|
$word = _decode_ascii_word($word); |
|
275
|
|
|
|
|
|
|
} |
|
276
|
|
|
|
|
|
|
$word; |
|
277
|
|
|
|
|
|
|
} |
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
sub _is_singlechar { |
|
280
|
|
|
|
|
|
|
my ($self, $word) = @_; |
|
281
|
|
|
|
|
|
|
my $enc = $self->charset; |
|
282
|
|
|
|
|
|
|
$enc = 'euc-jp' if $enc eq 'euc'; |
|
283
|
|
|
|
|
|
|
1 == length decode($enc, $word); |
|
284
|
|
|
|
|
|
|
} |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sub _encode_ascii_word { |
|
287
|
|
|
|
|
|
|
my ($word) = @_; |
|
288
|
|
|
|
|
|
|
return $word if $word !~ /^qz[a-z]{9}q$/ && $word =~ /^([A-Za-z]{1,25}|[0-9]{1,25})$/; |
|
289
|
|
|
|
|
|
|
return $LJS_encoded_words{$word} if ($LJS_encoded_words{$word}); |
|
290
|
|
|
|
|
|
|
for(;;){ |
|
291
|
|
|
|
|
|
|
my $p="qz"; |
|
292
|
|
|
|
|
|
|
for(1..9){$p.=('a'..'z')[int rand 26];} |
|
293
|
|
|
|
|
|
|
$p.="q"; |
|
294
|
|
|
|
|
|
|
unless ($LJS_encoded_words{$word}) { |
|
295
|
|
|
|
|
|
|
$LJS_encoded_words{$word} = $p; |
|
296
|
|
|
|
|
|
|
$LJS_ascii_words{$p} = $word; |
|
297
|
|
|
|
|
|
|
return $p; |
|
298
|
|
|
|
|
|
|
} |
|
299
|
|
|
|
|
|
|
} |
|
300
|
|
|
|
|
|
|
} |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
sub _decode_ascii_word { |
|
303
|
|
|
|
|
|
|
my ($word) = @_; |
|
304
|
|
|
|
|
|
|
return $LJS_ascii_words{$word} if ($LJS_ascii_words{$word}); |
|
305
|
|
|
|
|
|
|
return $word; |
|
306
|
|
|
|
|
|
|
} |
|
307
|
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
sub _normalize_japanese { |
|
309
|
|
|
|
|
|
|
my ($in) = @_; |
|
310
|
|
|
|
|
|
|
my $out; |
|
311
|
|
|
|
|
|
|
while ($in =~ /([\x80-\xff]{2})/) { |
|
312
|
|
|
|
|
|
|
$out .= $`; |
|
313
|
|
|
|
|
|
|
$in = $'; |
|
314
|
|
|
|
|
|
|
if ($1 eq '¡£' || $1 eq '¡¥') { |
|
315
|
|
|
|
|
|
|
$out .= "¡£\n"; |
|
316
|
|
|
|
|
|
|
} elsif ($1 eq '¡¤') { |
|
317
|
|
|
|
|
|
|
$out .= "¡¢"; |
|
318
|
|
|
|
|
|
|
} else { |
|
319
|
|
|
|
|
|
|
$out .= $1; |
|
320
|
|
|
|
|
|
|
} |
|
321
|
|
|
|
|
|
|
} |
|
322
|
|
|
|
|
|
|
$out .= $in; |
|
323
|
|
|
|
|
|
|
return $out; |
|
324
|
|
|
|
|
|
|
} |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
sub keyword_summary { |
|
327
|
|
|
|
|
|
|
my ($text, $args) = @_; |
|
328
|
|
|
|
|
|
|
my $s = Lingua::JA::Summarize->new($args); |
|
329
|
|
|
|
|
|
|
$s->analyze($text); |
|
330
|
|
|
|
|
|
|
return $s->keywords($args); |
|
331
|
|
|
|
|
|
|
} |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
sub file_keyword_summary { |
|
334
|
|
|
|
|
|
|
my ($file, $args) = @_; |
|
335
|
|
|
|
|
|
|
my $s = Lingua::JA::Summarize->new($args); |
|
336
|
|
|
|
|
|
|
$s->analyze_file($file); |
|
337
|
|
|
|
|
|
|
return $s->keywords($args); |
|
338
|
|
|
|
|
|
|
} |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
1; |
|
341
|
|
|
|
|
|
|
__END__ |