line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Plucene::Analysis::StopFilter; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
Plucene::Analysis::StopFilter - the stop filter |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# isa Plucene::Analysis::TokenFilter |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
my $next = $stop_filter->next; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 DESCRIPTION |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
This removes stop words from a token stream. |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Instances of the StopFilter class are tokens filters that removes from the |
18
|
|
|
|
|
|
|
indexed text words of your choice. Typically this is used to filter out common |
19
|
|
|
|
|
|
|
words ('the', 'a' 'if' etc) that increase the overhead but add no value during |
20
|
|
|
|
|
|
|
searches. |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 METHODS |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=cut |
25
|
|
|
|
|
|
|
|
26
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
86
|
|
27
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
45
|
|
28
|
|
|
|
|
|
|
|
29
|
1
|
|
|
1
|
|
6
|
use base 'Plucene::Analysis::TokenFilter'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
582
|
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head2 next |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
my $next = $stop_filter->next; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
This returns the next input token whose term is not a stop word. |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=cut |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub next { |
40
|
10
|
|
|
10
|
1
|
108
|
my $self = shift; |
41
|
10
|
|
100
|
|
|
35
|
$self->{stophash} ||= { map { $_ => 1 } @{ $self->{stoplist} } }; |
|
68
|
|
|
|
|
122
|
|
|
2
|
|
|
|
|
7
|
|
42
|
10
|
|
|
|
|
36
|
while (my $t = $self->input->next) { |
43
|
11
|
100
|
|
|
|
62
|
next if exists $self->{stophash}->{ $t->text() }; |
44
|
8
|
|
|
|
|
50
|
return $t; |
45
|
|
|
|
|
|
|
} |
46
|
2
|
|
|
|
|
25
|
return; |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
1; |