line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package My::Tagger; |
2
|
|
|
|
|
|
|
@My::Tagger::ISA=qw(Lingua::EN::Tagger); |
3
|
|
|
|
|
|
|
my %known_stems; |
4
|
|
|
|
|
|
|
sub stem { |
5
|
103
|
|
|
103
|
|
159184
|
my ( $self, $word ) = @_; |
6
|
103
|
50
|
|
|
|
365
|
return $word unless $self->{'stem'}; |
7
|
0
|
0
|
|
|
|
0
|
return $known_stems{ $word } if exists $known_stems{$word}; |
8
|
0
|
|
|
|
|
0
|
my $stemref = Lingua::Stem::En::stem( -words => [ $word ] ); |
9
|
|
|
|
|
|
|
|
10
|
0
|
0
|
|
|
|
0
|
$known_stems{ $word } = $stemref->[0] if exists $stemref->[0]; |
11
|
|
|
|
|
|
|
} |
12
|
|
|
|
|
|
|
|
13
|
172
|
|
|
172
|
|
246
|
sub stems { reverse %known_stems; } |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# To test: |
16
|
|
|
|
|
|
|
package Lingua::EN::Keywords; |
17
|
1
|
|
|
1
|
|
1992
|
use Lingua::EN::Tagger; |
|
1
|
|
|
|
|
118464
|
|
|
1
|
|
|
|
|
56
|
|
18
|
|
|
|
|
|
|
require 5.005_62; |
19
|
1
|
|
|
1
|
|
13
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
41
|
|
20
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
295
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my $t = My::Tagger->new(longest_noun_phrase => 5,weight_noun_phrases=>0); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
require Exporter; |
25
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
26
|
|
|
|
|
|
|
our @EXPORT = qw( keywords); |
27
|
|
|
|
|
|
|
our $VERSION = '2.0'; |
28
|
|
|
|
|
|
|
sub keywords { |
29
|
1
|
|
|
1
|
0
|
84
|
my %wl = $t->get_words(shift); |
30
|
1
|
|
|
|
|
386
|
my %newwl; |
31
|
1
|
|
|
|
|
30
|
$newwl{unstem($_)} += $wl{$_} for keys %wl; |
32
|
1
|
|
|
|
|
61
|
return (sort { $newwl{$b} <=> $newwl{$a} } keys %newwl)[0..5]; |
|
698
|
|
|
|
|
924
|
|
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
sub unstem { |
35
|
172
|
|
|
172
|
0
|
279
|
my %cache = $t->stems; |
36
|
172
|
|
|
|
|
181
|
my $word = shift; |
37
|
172
|
|
33
|
|
|
911
|
return $cache{$word} || $word; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
#undef $/; |
40
|
|
|
|
|
|
|
#my $in = ; |
41
|
|
|
|
|
|
|
#print ((join " ", ((),keywords($in))),"\n"); |
42
|
|
|
|
|
|
|
1; |
43
|
|
|
|
|
|
|
__END__ |