|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Pod::Wordlist;  | 
| 
2
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
57488
 | 
 use 5.008;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
    | 
| 
3
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
28
 | 
 use strict;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
127
 | 
    | 
| 
4
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
29
 | 
 use warnings;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
269
 | 
    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = '1.24'; # TRIAL  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
8
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
4080
 | 
 use Lingua::EN::Inflect 'PL';  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
124430
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
679
 | 
    | 
| 
9
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
63
 | 
 use File::Spec ();  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
134
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 use constant {  | 
| 
11
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
628
 | 
     MAXWORDLENGTH => 50,  | 
| 
12
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
32
 | 
 };  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 use Class::Tiny {  | 
| 
15
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
     wordlist  => \&_copy_wordlist,  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     _is_debug => 0,  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     no_wide_chars => 0,  | 
| 
18
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
2985
 | 
 };  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9460
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our %Wordlist; ## no critic ( Variables::ProhibitPackageVars )  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _copy_wordlist {  | 
| 
23
 | 
8
 | 
 
 | 
 
 | 
  
8
  
 | 
 
 | 
79
 | 
     my %copy;  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # %Wordlist can be accessed externally, and users will often add terms in  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # encoded form  | 
| 
27
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4867
 | 
     for my $word ( keys %Wordlist ) {  | 
| 
28
 | 
19448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19364
 | 
         my $decoded_word = $word;  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # if it was already decoded, this should do nothing  | 
| 
30
 | 
19448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27243
 | 
         utf8::decode($decoded_word);  | 
| 
31
 | 
19448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30258
 | 
         $copy{$decoded_word} = 1;  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
34
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
789
 | 
     return \%copy;  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 BEGIN {  | 
| 
38
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
3946
 | 
     my $file;  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # try to find wordlist in non-installed dist  | 
| 
41
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
128
 | 
     my ($d, $p) = File::Spec->splitpath(__FILE__);  | 
| 
42
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
85
 | 
     $p = File::Spec->catdir($p, (File::Spec->updir) x 2, 'share');  | 
| 
43
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
76
 | 
     my $full_path = File::Spec->catpath($d, $p, 'wordlist');  | 
| 
44
 | 
6
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
156
 | 
     if ($full_path && -e $full_path) {  | 
| 
45
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $file = $full_path;  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
48
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     if ( not defined $file ) {  | 
| 
49
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2753
 | 
         require File::ShareDir;  | 
| 
50
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
135362
 | 
         $file = File::ShareDir::dist_file('Pod-Spell', 'wordlist');  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
53
 | 
6
 | 
  
 50
  
 | 
 
 | 
  
6
  
 | 
 
 | 
1229
 | 
     open my $fh, '<:encoding(UTF-8)', $file  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
62
 | 
    | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         or die "Cannot read $file: $!"; ## no critic (ErrorHandling::RequireCarping)  | 
| 
55
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
62931
 | 
     while ( defined( my $line = readline $fh ) ) {  | 
| 
56
 | 
7320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1498139
 | 
         chomp $line;  | 
| 
57
 | 
7320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21276
 | 
         $Wordlist{$line} = 1;  | 
| 
58
 | 
7320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12083
 | 
         $Wordlist{PL($line)} = 1;  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
60
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6464
 | 
     close $fh;  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub learn_stopwords {  | 
| 
64
 | 
11
 | 
 
 | 
 
 | 
  
11
  
 | 
  
1
  
 | 
2304
 | 
     my ( $self, $text ) = @_;  | 
| 
65
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
245
 | 
     my $stopwords = $self->wordlist;  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
67
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
92
 | 
     while ( $text =~ m<(\S+)>g ) {  | 
| 
68
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
487
 | 
         my $word = $1;  | 
| 
69
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
65
 | 
         utf8::decode($word);  | 
| 
70
 | 
25
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
62
 | 
         if ( $word =~ m/^!(.+)/s ) {  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # "!word" deletes from the stopword list  | 
| 
72
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
             my $negation = $1;  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # different $1 from above  | 
| 
74
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
             delete $stopwords->{$negation};  | 
| 
75
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
             delete $stopwords->{PL($negation)};  | 
| 
76
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
419
 | 
             print "Unlearning stopword <$negation>\n" if $self->_is_debug;  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
79
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
             $word =~ s{'s$}{}; # we strip 's when checking so strip here, too  | 
| 
80
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
55
 | 
             $stopwords->{$word} = 1;  | 
| 
81
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
73
 | 
             $stopwords->{PL($word)} = 1;  | 
| 
82
 | 
23
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
5795
 | 
             print "Learning stopword   <$word>\n" if $self->_is_debug;  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
85
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
234
 | 
     return;  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub is_stopword {  | 
| 
89
 | 
56
 | 
 
 | 
 
 | 
  
56
  
 | 
  
1
  
 | 
77
 | 
     my ($self, $word) = @_;  | 
| 
90
 | 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
863
 | 
     my $stopwords = $self->wordlist;  | 
| 
91
 | 
56
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
363
 | 
     if ( exists $stopwords->{$word} or exists $stopwords->{ lc $word } ) {  | 
| 
92
 | 
21
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
264
 | 
         print "  Rejecting   <$word>\n" if $self->_is_debug;  | 
| 
93
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
255
 | 
         return 1;  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
95
 | 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
143
 | 
     return;  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub strip_stopwords {  | 
| 
99
 | 
18
 | 
 
 | 
 
 | 
  
18
  
 | 
  
1
  
 | 
99
 | 
     my ($self, $text) = @_;  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Count the things in $text  | 
| 
102
 | 
18
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
383
 | 
     print "Content: <", $text, ">\n" if $self->_is_debug;  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
104
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
537
 | 
     my @words = grep { length($_) < MAXWORDLENGTH } split " ", $text;  | 
| 
 
 | 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
154
 | 
    | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
106
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
     for ( @words ) {  | 
| 
107
 | 
71
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1463
 | 
         print "Parsing word: <$_>\n" if $self->_is_debug;  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # some spellcheckers can't cope with anything but Latin1  | 
| 
109
 | 
71
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
1776
 | 
         $_ = '' if $self->no_wide_chars && /[^\x00-\xFF]/;  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # strip leading punctuation  | 
| 
112
 | 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
414
 | 
         s/^[\(\[\{\'\"\:\;\,\?\!\.]+//;  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # keep everything up to trailing punctuation, not counting  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # periods (for abbreviations like "Ph.D."), single-quotes  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # (for contractions like "don't") or colons (for package  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # names like "Foo::Bar")  | 
| 
118
 | 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
243
 | 
         s/^([^\)\]\}\"\;\,\?\!]+).*$/$1/;  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # strip trailing single-quote, periods or colons; after this  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # we have a word that could have internal periods or quotes  | 
| 
122
 | 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
146
 | 
         s/[\.\'\:]+$//;  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # strip possessive  | 
| 
125
 | 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
115
 | 
         s/'s$//i;  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # zero out variable names or things with internal symbols,  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # since those are probably code expressions outside a C<>  | 
| 
129
 | 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
125
 | 
         my $is_sigil   = /^[\&\%\$\@\:\<\*\\\_]/;  | 
| 
130
 | 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
102
 | 
         my $is_strange = /[\%\^\&\#\$\@\_\<\>\(\)\[\]\{\}\\\*\:\+\/\=\|\`\~]/;  | 
| 
131
 | 
71
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
207
 | 
         $_ = '' if $is_sigil || $is_strange;  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # stop if there are no "word" characters left; if it's just  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # punctuation that we didn't happen to strip or it's weird glyphs,  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # the spellchecker won't do any good anyway  | 
| 
136
 | 
71
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
148
 | 
         next unless /\w/;  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
138
 | 
51
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
684
 | 
         print "  Checking as <$_>\n" if $self->_is_debug;  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # replace it with any stopword or stopword parts stripped  | 
| 
141
 | 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
580
 | 
         $_ = $self->_strip_a_word($_);  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
143
 | 
51
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
591
 | 
         print "  Keeping as  <$_>\n" if $_ && $self->_is_debug;  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
146
 | 
18
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
97
 | 
     return join(" ", grep { defined && length } @words );  | 
| 
 
 | 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
214
 | 
    | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _strip_a_word {  | 
| 
150
 | 
51
 | 
 
 | 
 
 | 
  
51
  
 | 
 
 | 
89
 | 
     my ($self, $word) = @_;  | 
| 
151
 | 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
55
 | 
     my $remainder;  | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # try word as-is, including possible hyphenation vs stoplist  | 
| 
154
 | 
51
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
98
 | 
     if ($self->is_stopword($word) ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
155
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
         $remainder = '';  | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # internal period could be abbreviations, so check with  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # trailing period restored and drop or keep on that basis  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ( index($word, '.') >= 0 ) {  | 
| 
160
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
         my $abbr = "$word.";  | 
| 
161
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
         $remainder = $self->is_stopword($abbr) ? '' : $abbr;  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # check individual parts of hyphenated word, keep whatever isn't a  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # stopword as individual words  | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ( index($word, '-') >= 0 ) {  | 
| 
166
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
         my @keep;  | 
| 
167
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         for my $part ( split /-/, $word ) {  | 
| 
168
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
             push @keep, $part if ! $self->is_stopword( $part );  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
170
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
18
 | 
         $remainder = join(" ", @keep) if @keep;  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # otherwise, we just keep it  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
174
 | 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
         $remainder = $word;  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
176
 | 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
81
 | 
     return $remainder;  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |