| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Search::Tools::QueryParser; |
|
2
|
26
|
|
|
26
|
|
73591
|
use Moo; |
|
|
26
|
|
|
|
|
87342
|
|
|
|
26
|
|
|
|
|
136
|
|
|
3
|
|
|
|
|
|
|
extends 'Search::Tools::Object'; |
|
4
|
26
|
|
|
26
|
|
16215
|
use Carp; |
|
|
26
|
|
|
|
|
53
|
|
|
|
26
|
|
|
|
|
1267
|
|
|
5
|
26
|
|
|
26
|
|
2043
|
use Data::Dump qw( dump ); |
|
|
26
|
|
|
|
|
22277
|
|
|
|
26
|
|
|
|
|
1043
|
|
|
6
|
26
|
|
|
26
|
|
11825
|
use Search::Query::Parser; |
|
|
26
|
|
|
|
|
2986430
|
|
|
|
26
|
|
|
|
|
780
|
|
|
7
|
26
|
|
|
26
|
|
4282
|
use Encode; |
|
|
26
|
|
|
|
|
71077
|
|
|
|
26
|
|
|
|
|
1715
|
|
|
8
|
26
|
|
|
26
|
|
143
|
use Data::Dump; |
|
|
26
|
|
|
|
|
43
|
|
|
|
26
|
|
|
|
|
993
|
|
|
9
|
26
|
|
|
26
|
|
9663
|
use Search::Tools::Query; |
|
|
26
|
|
|
|
|
68
|
|
|
|
26
|
|
|
|
|
753
|
|
|
10
|
26
|
|
|
26
|
|
153
|
use Search::Tools::UTF8; |
|
|
26
|
|
|
|
|
42
|
|
|
|
26
|
|
|
|
|
2135
|
|
|
11
|
26
|
|
|
26
|
|
136
|
use Search::Tools::XML; |
|
|
26
|
|
|
|
|
45
|
|
|
|
26
|
|
|
|
|
457
|
|
|
12
|
26
|
|
|
26
|
|
113
|
use Search::Tools::RegEx; |
|
|
26
|
|
|
|
|
51
|
|
|
|
26
|
|
|
|
|
520
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
26
|
|
|
26
|
|
109
|
use namespace::autoclean; |
|
|
26
|
|
|
|
|
41
|
|
|
|
26
|
|
|
|
|
100
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our $VERSION = '1.006'; |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
my $XML = Search::Tools::XML->new(); |
|
19
|
|
|
|
|
|
|
my $C2E = $XML->char2ent_map; |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# we turn locale pragma on in a small block |
|
22
|
|
|
|
|
|
|
# because we don't want it to mess up our regex building |
|
23
|
|
|
|
|
|
|
# or taint vars in other areas. We just want to use setlocale() |
|
24
|
|
|
|
|
|
|
# and make sure we get correct ->utf8 encoding |
|
25
|
|
|
|
|
|
|
my ( $locale, $lang, $charset ); |
|
26
|
|
|
|
|
|
|
{ |
|
27
|
26
|
|
|
26
|
|
12069
|
use POSIX qw(locale_h); |
|
|
26
|
|
|
|
|
117853
|
|
|
|
26
|
|
|
|
|
128
|
|
|
28
|
|
|
|
|
|
|
$locale = setlocale(LC_CTYPE); |
|
29
|
|
|
|
|
|
|
( $lang, $charset ) = split( m/\./, $locale ); |
|
30
|
|
|
|
|
|
|
$charset ||= q/UTF-8/; #
|
|
31
|
|
|
|
|
|
|
$lang = q/en_US/ if $lang =~ m/^(posix|c)$/i; |
|
32
|
|
|
|
|
|
|
} |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
my %Defaults = ( |
|
35
|
|
|
|
|
|
|
and_word => q/and|near\d*/, |
|
36
|
|
|
|
|
|
|
charset => $charset, |
|
37
|
|
|
|
|
|
|
default_field => "", |
|
38
|
|
|
|
|
|
|
ignore_case => 1, |
|
39
|
|
|
|
|
|
|
ignore_fields => {}, |
|
40
|
|
|
|
|
|
|
ignore_first_char => quotemeta(q/'-/), |
|
41
|
|
|
|
|
|
|
ignore_last_char => quotemeta(q/'-/), |
|
42
|
|
|
|
|
|
|
lang => $lang, |
|
43
|
|
|
|
|
|
|
locale => $locale, |
|
44
|
|
|
|
|
|
|
not_word => q/not/, |
|
45
|
|
|
|
|
|
|
or_word => q/or/, |
|
46
|
|
|
|
|
|
|
phrase_delim => q/"/, |
|
47
|
|
|
|
|
|
|
query_class => 'Search::Tools::Query', |
|
48
|
|
|
|
|
|
|
query_dialect => "Search::Query::Dialect::Native", |
|
49
|
|
|
|
|
|
|
stemmer => undef, |
|
50
|
|
|
|
|
|
|
stopwords => [], |
|
51
|
|
|
|
|
|
|
tag_re => $XML->tag_re, |
|
52
|
|
|
|
|
|
|
term_re => qr/\w+(?:[\'\-]\w+)*/, |
|
53
|
|
|
|
|
|
|
term_min_length => 1, |
|
54
|
|
|
|
|
|
|
treat_uris_like_phrases => 1, |
|
55
|
|
|
|
|
|
|
whitespace => $XML->html_whitespace, |
|
56
|
|
|
|
|
|
|
wildcard => q/*/, |
|
57
|
|
|
|
|
|
|
word_characters => q/\w/ . quotemeta(q/'-/), |
|
58
|
|
|
|
|
|
|
); |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
for my $attr ( keys %Defaults ) { |
|
61
|
|
|
|
|
|
|
has( $attr => ( is => 'rw', default => sub { $Defaults{$attr} } ) ); |
|
62
|
|
|
|
|
|
|
} |
|
63
|
|
|
|
|
|
|
has 'start_bound' => ( is => 'ro' ); |
|
64
|
|
|
|
|
|
|
has 'end_bound' => ( is => 'ro' ); |
|
65
|
|
|
|
|
|
|
has 'plain_phrase_bound' => ( is => 'ro' ); |
|
66
|
|
|
|
|
|
|
has 'html_phrase_bound' => ( is => 'ro' ); |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub get_defaults { |
|
69
|
0
|
|
|
0
|
1
|
0
|
return {%Defaults}; |
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub BUILD { |
|
73
|
51
|
|
|
51
|
1
|
213
|
my $self = shift; |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# TODO handle case where both term_re and word_characters are defined |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# charset/locale/lang are a bit interdependent |
|
78
|
|
|
|
|
|
|
# so make sure charset/lang are set if locale is explicitly passed. |
|
79
|
51
|
100
|
|
|
|
168
|
if ( $self->{locale} ne $Defaults{locale} ) { |
|
80
|
1
|
|
|
|
|
4
|
( $self->{lang}, $self->{charset} ) = split( m/\./, $self->{locale} ); |
|
81
|
1
|
50
|
|
|
|
4
|
$self->{lang} = 'en_US' if $self->{lang} =~ m/^(posix|c)$/i; |
|
82
|
1
|
|
33
|
|
|
3
|
$self->{charset} ||= $Defaults{charset}; |
|
83
|
|
|
|
|
|
|
} |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# make sure ignore_fields is a hash ref |
|
86
|
51
|
50
|
|
|
|
171
|
if ( ref( $self->{ignore_fields} ) eq 'ARRAY' ) { |
|
87
|
|
|
|
|
|
|
$self->{ignore_fields} |
|
88
|
0
|
|
|
|
|
0
|
= { map { $_ => $_ } @{ $self->{ignore_fields} } }; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
89
|
|
|
|
|
|
|
} |
|
90
|
|
|
|
|
|
|
|
|
91
|
51
|
|
|
|
|
162
|
$self->_setup_regex_builder; |
|
92
|
|
|
|
|
|
|
|
|
93
|
51
|
|
|
|
|
393
|
return $self; |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
sub parse { |
|
97
|
63
|
|
|
63
|
1
|
7403
|
my $self = shift; |
|
98
|
63
|
|
|
|
|
109
|
my $query_str = shift; |
|
99
|
63
|
50
|
|
|
|
177
|
confess "query required" unless defined $query_str; |
|
100
|
63
|
50
|
|
|
|
160
|
if ( ref $query_str ) { |
|
101
|
0
|
|
|
|
|
0
|
croak "query must be a scalar string"; |
|
102
|
|
|
|
|
|
|
} |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
#$query_str = to_utf8( $query_str, $self->charset ); |
|
105
|
63
|
|
|
|
|
158
|
my $extracted = $self->_extract_terms($query_str); |
|
106
|
63
|
|
|
|
|
186
|
my %regex; |
|
107
|
63
|
|
|
|
|
135
|
TERM: for my $term ( @{ $extracted->{terms} } ) { |
|
|
63
|
|
|
|
|
191
|
|
|
108
|
145
|
|
|
|
|
1747
|
my ( $plain, $html, $escaped ) = $self->_build_regex($term); |
|
109
|
145
|
|
|
|
|
493
|
my $is_phrase = $term =~ m/\ /; |
|
110
|
145
|
|
|
|
|
223
|
my @phrase_terms; |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
# if the term is a phrase, |
|
113
|
|
|
|
|
|
|
# build regex for each term in the phrase |
|
114
|
145
|
100
|
|
|
|
322
|
if ($is_phrase) { |
|
115
|
32
|
|
|
|
|
155
|
my @pts = split( /\ /, $term ); |
|
116
|
32
|
|
|
|
|
81
|
for my $pt (@pts) { |
|
117
|
87
|
|
|
|
|
987
|
my ( $pt_plain, $pt_html, $pt_esc ) |
|
118
|
|
|
|
|
|
|
= $self->_build_regex($pt); |
|
119
|
87
|
|
|
|
|
2735
|
push @phrase_terms, |
|
120
|
|
|
|
|
|
|
Search::Tools::RegEx->new( |
|
121
|
|
|
|
|
|
|
plain => $pt_plain, |
|
122
|
|
|
|
|
|
|
html => $pt_html, |
|
123
|
|
|
|
|
|
|
term => $pt, |
|
124
|
|
|
|
|
|
|
term_re => qr/$pt_esc/i, |
|
125
|
|
|
|
|
|
|
is_phrase => 0, |
|
126
|
|
|
|
|
|
|
); |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
} |
|
129
|
145
|
|
|
|
|
6723
|
$regex{$term} = Search::Tools::RegEx->new( |
|
130
|
|
|
|
|
|
|
plain => $plain, |
|
131
|
|
|
|
|
|
|
html => $html, |
|
132
|
|
|
|
|
|
|
term => $term, |
|
133
|
|
|
|
|
|
|
term_re => qr/$escaped/i, |
|
134
|
|
|
|
|
|
|
is_phrase => $is_phrase, |
|
135
|
|
|
|
|
|
|
phrase_terms => \@phrase_terms, |
|
136
|
|
|
|
|
|
|
); |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
return $self->{query_class}->new( |
|
140
|
|
|
|
|
|
|
dialect => $extracted->{dialect}, |
|
141
|
|
|
|
|
|
|
terms => $extracted->{terms}, |
|
142
|
|
|
|
|
|
|
fields => $extracted->{fields}, |
|
143
|
63
|
|
|
|
|
1350
|
str => to_utf8( $query_str, $self->charset ), |
|
144
|
|
|
|
|
|
|
regex => \%regex, |
|
145
|
|
|
|
|
|
|
qp => $self, |
|
146
|
|
|
|
|
|
|
); |
|
147
|
|
|
|
|
|
|
} |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub _extract_terms { |
|
150
|
63
|
|
|
63
|
|
106
|
my $self = shift; |
|
151
|
63
|
|
|
|
|
88
|
my $query = shift; |
|
152
|
63
|
50
|
|
|
|
136
|
confess "need query to extract terms" unless defined $query; |
|
153
|
63
|
|
|
|
|
215
|
my $stopwords = $self->stopwords; |
|
154
|
63
|
|
|
|
|
131
|
my $and_word = $self->and_word; |
|
155
|
63
|
|
|
|
|
142
|
my $or_word = $self->or_word; |
|
156
|
63
|
|
|
|
|
187
|
my $not_word = $self->not_word; |
|
157
|
63
|
|
|
|
|
127
|
my $wildcard = $self->wildcard; |
|
158
|
63
|
|
|
|
|
153
|
my $phrase = $self->phrase_delim; |
|
159
|
63
|
|
|
|
|
155
|
my $igf = $self->ignore_first_char; |
|
160
|
63
|
|
|
|
|
137
|
my $igl = $self->ignore_last_char; |
|
161
|
63
|
|
|
|
|
151
|
my $wordchar = $self->word_characters; |
|
162
|
63
|
|
|
|
|
118
|
my $default_field = $self->default_field; |
|
163
|
63
|
|
|
|
|
117
|
my $esc_wildcard = quotemeta($wildcard); |
|
164
|
63
|
|
|
|
|
2051
|
my $word_re = qr/(($esc_wildcard)?[$wordchar]+($esc_wildcard)?)/; |
|
165
|
63
|
|
|
|
|
234
|
my $min_length = $self->term_min_length; |
|
166
|
63
|
|
|
|
|
100
|
my $raw_query = $query; |
|
167
|
|
|
|
|
|
|
|
|
168
|
63
|
100
|
|
|
|
190
|
$stopwords = [ split( /\s+/, $stopwords ) ] unless ref $stopwords; |
|
169
|
63
|
|
|
|
|
145
|
my %stophash = map { to_utf8( lc($_), $self->charset ) => 1 } @$stopwords; |
|
|
15
|
|
|
|
|
75
|
|
|
170
|
63
|
|
|
|
|
105
|
my ( %words, %uniq, $c ); |
|
171
|
63
|
|
|
|
|
1756
|
my $parser = Search::Query::Parser->new( |
|
172
|
|
|
|
|
|
|
and_regex => qr{$and_word}i, |
|
173
|
|
|
|
|
|
|
or_regex => qr{$or_word}i, |
|
174
|
|
|
|
|
|
|
not_regex => qr{$not_word}i, |
|
175
|
|
|
|
|
|
|
default_field => $default_field, |
|
176
|
|
|
|
|
|
|
query_class => $self->query_dialect, |
|
177
|
|
|
|
|
|
|
); |
|
178
|
|
|
|
|
|
|
|
|
179
|
63
|
|
|
|
|
113907
|
my $baked_query = $raw_query; |
|
180
|
63
|
50
|
|
|
|
302
|
$baked_query = lc($baked_query) if $self->ignore_case; |
|
181
|
63
|
|
|
|
|
4408
|
$baked_query = to_utf8( $baked_query, $self->charset ); |
|
182
|
63
|
50
|
|
|
|
203
|
my $dialect = $parser->parse($baked_query) or croak $parser->error; |
|
183
|
63
|
50
|
|
|
|
138895
|
$self->debug && carp "parsetree: " . Data::Dump::dump( $dialect->tree ); |
|
184
|
63
|
|
|
|
|
611
|
my $fields_searched |
|
185
|
|
|
|
|
|
|
= $self->_get_value_from_tree( \%uniq, $dialect->tree, $c ); |
|
186
|
|
|
|
|
|
|
|
|
187
|
63
|
50
|
|
|
|
1130
|
$self->debug && carp "parsed: " . Data::Dump::dump( \%uniq ); |
|
188
|
|
|
|
|
|
|
|
|
189
|
63
|
|
|
|
|
420
|
my $count = scalar( keys %uniq ); |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
# parse uniq into word tokens |
|
192
|
|
|
|
|
|
|
# including removing stop words |
|
193
|
|
|
|
|
|
|
|
|
194
|
63
|
50
|
|
|
|
846
|
$self->debug && carp "word_re: $word_re"; |
|
195
|
|
|
|
|
|
|
|
|
196
|
63
|
|
|
|
|
477
|
U: for my $u ( sort { $uniq{$a} <=> $uniq{$b} } keys %uniq ) { |
|
|
171
|
|
|
|
|
331
|
|
|
197
|
|
|
|
|
|
|
|
|
198
|
152
|
|
|
|
|
283
|
my $n = $uniq{$u}; |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
# only phrases have space |
|
201
|
|
|
|
|
|
|
# but due to our word_re, a single non-spaced string |
|
202
|
|
|
|
|
|
|
# might actually be multiple word tokens |
|
203
|
152
|
|
100
|
|
|
564
|
my $isphrase = $u =~ m/\s/ || 0; |
|
204
|
|
|
|
|
|
|
|
|
205
|
152
|
50
|
|
|
|
427
|
if ( $self->treat_uris_like_phrases ) { |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
# special case: treat email addresses, uris, as phrase |
|
208
|
152
|
|
100
|
|
|
2336
|
$isphrase ||= $u =~ m/[$wordchar][\@\.\\\/][$wordchar]/ || 0; |
|
|
|
|
100
|
|
|
|
|
|
209
|
|
|
|
|
|
|
} |
|
210
|
|
|
|
|
|
|
|
|
211
|
152
|
50
|
|
|
|
2278
|
$self->debug && carp "$u -> isphrase = $isphrase"; |
|
212
|
|
|
|
|
|
|
|
|
213
|
152
|
|
|
|
|
901
|
my @w = (); |
|
214
|
|
|
|
|
|
|
|
|
215
|
152
|
|
|
|
|
435
|
TOK: for my $w ( split( m/\s+/, to_utf8( $u, $self->charset ) ) ) { |
|
216
|
|
|
|
|
|
|
|
|
217
|
199
|
50
|
|
|
|
591
|
next TOK unless $w =~ m/\S/; |
|
218
|
|
|
|
|
|
|
|
|
219
|
199
|
|
|
|
|
584
|
$w =~ s/\Q$phrase\E//g; |
|
220
|
|
|
|
|
|
|
|
|
221
|
199
|
|
|
|
|
876
|
while ( $w =~ m/$word_re/g ) { |
|
222
|
208
|
|
|
|
|
378
|
my $tok = _untaint($1); |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
# strip ignorable chars |
|
225
|
208
|
50
|
|
|
|
1022
|
$tok =~ s/^[$igf]+// if length($igf); |
|
226
|
208
|
50
|
|
|
|
812
|
$tok =~ s/[$igl]+$// if length($igl); |
|
227
|
|
|
|
|
|
|
|
|
228
|
208
|
50
|
|
|
|
392
|
unless ($tok) { |
|
229
|
0
|
0
|
|
|
|
0
|
$self->debug && carp "no token for '$w' $word_re"; |
|
230
|
0
|
|
|
|
|
0
|
next TOK; |
|
231
|
|
|
|
|
|
|
} |
|
232
|
|
|
|
|
|
|
|
|
233
|
208
|
50
|
|
|
|
2987
|
$self->debug && carp "found token: $tok"; |
|
234
|
|
|
|
|
|
|
|
|
235
|
208
|
100
|
|
|
|
1306
|
if ( exists $stophash{ lc($tok) } ) { |
|
236
|
13
|
50
|
|
|
|
156
|
$self->debug && carp "$tok = stopword"; |
|
237
|
13
|
100
|
|
|
|
81
|
next TOK unless $isphrase; |
|
238
|
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
|
|
240
|
202
|
100
|
|
|
|
374
|
unless ($isphrase) { |
|
241
|
115
|
50
|
|
|
|
1169
|
next TOK if $tok =~ m/^($and_word|$or_word|$not_word)$/i; |
|
242
|
|
|
|
|
|
|
} |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
# if tainting was on, odd things can happen. |
|
245
|
|
|
|
|
|
|
# so check one more time |
|
246
|
202
|
|
|
|
|
571
|
$tok = to_utf8( $tok, $self->charset ); |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# final sanity check |
|
249
|
202
|
50
|
|
|
|
445
|
if ( !Encode::is_utf8($tok) ) { |
|
250
|
0
|
|
|
|
|
0
|
carp "$tok is NOT utf8"; |
|
251
|
0
|
|
|
|
|
0
|
next TOK; |
|
252
|
|
|
|
|
|
|
} |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
#$self->debug && carp "pushing $tok into wordlist"; |
|
255
|
202
|
|
|
|
|
777
|
push( @w, $tok ); |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
} |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
} |
|
260
|
|
|
|
|
|
|
|
|
261
|
152
|
100
|
|
|
|
382
|
next U unless @w; |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
#$self->debug && carp "joining \@w: " . Data::Dump::dump(\@w); |
|
264
|
146
|
100
|
|
|
|
288
|
if ($isphrase) { |
|
265
|
32
|
|
|
|
|
140
|
$words{ join( ' ', @w ) } = $n + $count++; |
|
266
|
|
|
|
|
|
|
} |
|
267
|
|
|
|
|
|
|
else { |
|
268
|
114
|
|
|
|
|
206
|
for (@w) { |
|
269
|
115
|
|
|
|
|
327
|
$words{$_} = $n + $count++; |
|
270
|
|
|
|
|
|
|
} |
|
271
|
|
|
|
|
|
|
} |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
} |
|
274
|
|
|
|
|
|
|
|
|
275
|
63
|
50
|
|
|
|
940
|
$self->debug && carp "tokenized: " . Data::Dump::dump( \%words ); |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
# make sure we don't have 'foo' and 'foo*' |
|
278
|
63
|
|
|
|
|
480
|
for ( keys %words ) { |
|
279
|
147
|
100
|
|
|
|
482
|
if ( $_ =~ m/$esc_wildcard/ ) { |
|
280
|
12
|
|
|
|
|
99
|
( my $copy = $_ ) =~ s,$esc_wildcard,,g; |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
# delete the more exact of the two |
|
283
|
|
|
|
|
|
|
# since the * will match both |
|
284
|
12
|
|
|
|
|
28
|
delete( $words{$copy} ); |
|
285
|
|
|
|
|
|
|
} |
|
286
|
|
|
|
|
|
|
|
|
287
|
147
|
100
|
|
|
|
372
|
if ( length $_ < $min_length ) { |
|
288
|
1
|
50
|
|
|
|
24
|
$self->debug and carp "token too short: '$_'"; |
|
289
|
1
|
|
|
|
|
9
|
delete $words{$_}; |
|
290
|
|
|
|
|
|
|
} |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
} |
|
293
|
|
|
|
|
|
|
|
|
294
|
63
|
50
|
|
|
|
940
|
$self->debug && carp "wildcards removed: " . Data::Dump::dump( \%words ); |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
# if any words need to be stemmed |
|
297
|
63
|
100
|
|
|
|
510
|
if ( $self->stemmer ) { |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
# split each $word into words |
|
300
|
|
|
|
|
|
|
# stem each word |
|
301
|
|
|
|
|
|
|
# if stem ne word, break into chars and find first N common |
|
302
|
|
|
|
|
|
|
# rejoin $uniq |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
#carp "stemming ON\n"; |
|
305
|
|
|
|
|
|
|
|
|
306
|
8
|
|
|
|
|
22
|
K: for ( keys %words ) { |
|
307
|
15
|
|
|
|
|
47
|
my (@w) = split /\s+/; |
|
308
|
15
|
|
|
|
|
27
|
W: for my $w (@w) { |
|
309
|
27
|
|
|
|
|
38
|
my $func = $self->stemmer; |
|
310
|
27
|
|
|
|
|
52
|
my $f = &$func( $self, $w ); |
|
311
|
27
|
50
|
33
|
|
|
171
|
if ( !defined $f or !length $f ) { |
|
312
|
0
|
|
|
|
|
0
|
next W; |
|
313
|
|
|
|
|
|
|
} |
|
314
|
27
|
|
|
|
|
95
|
$f = to_utf8($f); |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
#warn "w: $w\nf: $f\n"; |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
# add wildcard to indicate chars were lost |
|
319
|
27
|
|
|
|
|
70
|
$w = $f . $wildcard; |
|
320
|
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
} |
|
322
|
15
|
|
|
|
|
34
|
my $new = join ' ', @w; |
|
323
|
15
|
50
|
|
|
|
63
|
if ( $new ne $_ ) { |
|
324
|
15
|
|
|
|
|
39
|
$words{$new} = $words{$_}; |
|
325
|
15
|
|
|
|
|
33
|
delete $words{$_}; |
|
326
|
|
|
|
|
|
|
} |
|
327
|
|
|
|
|
|
|
} |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
} |
|
330
|
|
|
|
|
|
|
|
|
331
|
63
|
50
|
|
|
|
866
|
$self->debug && carp "stemming done: " . Data::Dump::dump( \%words ); |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
# sort keeps query in same order as we entered |
|
334
|
|
|
|
|
|
|
return { |
|
335
|
63
|
|
|
|
|
664
|
terms => [ sort { $words{$a} <=> $words{$b} } keys %words ], |
|
|
159
|
|
|
|
|
581
|
|
|
336
|
|
|
|
|
|
|
fields => [ keys %$fields_searched ], |
|
337
|
|
|
|
|
|
|
dialect => $dialect, |
|
338
|
|
|
|
|
|
|
query => $raw_query, |
|
339
|
|
|
|
|
|
|
}; |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
} |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
# stolen nearly verbatim from Taint::Runtime |
|
344
|
|
|
|
|
|
|
# apparently regex can be tainted when running under 'use locale'. |
|
345
|
|
|
|
|
|
|
# as of version 0.24 this should not be needed but until I can find a way |
|
346
|
|
|
|
|
|
|
# to easily test the Taint feature, we just do this. It's low overhead. |
|
347
|
|
|
|
|
|
|
sub _untaint { |
|
348
|
208
|
|
|
208
|
|
362
|
my $str = shift; |
|
349
|
208
|
50
|
|
|
|
426
|
my $ref = ref($str) ? $str : \$str; |
|
350
|
208
|
50
|
|
|
|
445
|
if ( !defined $$ref ) { |
|
351
|
0
|
|
|
|
|
0
|
$$ref = undef; |
|
352
|
|
|
|
|
|
|
} |
|
353
|
|
|
|
|
|
|
else { |
|
354
|
|
|
|
|
|
|
$$ref |
|
355
|
|
|
|
|
|
|
= ( $$ref =~ /(.*)/ ) |
|
356
|
|
|
|
|
|
|
? $1 |
|
357
|
208
|
50
|
|
|
|
695
|
: do { confess("Couldn't find data to untaint") }; |
|
|
0
|
|
|
|
|
0
|
|
|
358
|
|
|
|
|
|
|
} |
|
359
|
208
|
50
|
|
|
|
484
|
return ref($str) ? 1 : $str; |
|
360
|
|
|
|
|
|
|
} |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
sub _get_value_from_tree { |
|
363
|
63
|
|
|
63
|
|
2816
|
my $self = shift; |
|
364
|
63
|
|
|
|
|
90
|
my $uniq = shift; |
|
365
|
63
|
|
|
|
|
99
|
my $parseTree = shift; |
|
366
|
63
|
|
|
|
|
87
|
my $c = shift; |
|
367
|
63
|
|
|
|
|
108
|
my %fields = (); |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# we only want the values from non minus queries |
|
370
|
63
|
|
|
|
|
109
|
for my $node ( '+', '' ) { |
|
371
|
126
|
100
|
|
|
|
273
|
next unless exists $parseTree->{$node}; |
|
372
|
|
|
|
|
|
|
|
|
373
|
63
|
|
|
|
|
110
|
my @branches = @{ $parseTree->{$node} }; |
|
|
63
|
|
|
|
|
145
|
|
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
#warn dump \@branches; |
|
376
|
|
|
|
|
|
|
|
|
377
|
63
|
|
|
|
|
115
|
for my $leaf (@branches) { |
|
378
|
150
|
|
|
|
|
221
|
my $v = $leaf->{value}; |
|
379
|
150
|
50
|
|
|
|
269
|
if ( !defined $v ) { |
|
380
|
0
|
|
|
|
|
0
|
croak "undefined value in query tree: " . dump($leaf); |
|
381
|
|
|
|
|
|
|
} |
|
382
|
150
|
50
|
66
|
|
|
316
|
if ( defined $leaf->{field} |
|
383
|
|
|
|
|
|
|
and exists $self->ignore_fields->{ $leaf->{field} } ) |
|
384
|
|
|
|
|
|
|
{ |
|
385
|
0
|
|
|
|
|
0
|
next; |
|
386
|
|
|
|
|
|
|
} |
|
387
|
150
|
|
|
|
|
191
|
my $field = $leaf->{field}; |
|
388
|
150
|
100
|
|
|
|
244
|
if ( defined $field ) { |
|
389
|
3
|
|
|
|
|
8
|
$fields{$field}++; |
|
390
|
|
|
|
|
|
|
} |
|
391
|
150
|
50
|
|
|
|
347
|
if ( ref $v eq 'HASH' ) { |
|
|
|
100
|
|
|
|
|
|
|
392
|
0
|
|
|
|
|
0
|
my $f = $self->_get_value_from_tree( $uniq, $v, $c ); |
|
393
|
0
|
|
|
|
|
0
|
$fields{$_} = $f->{$_} for ( keys %$f ); |
|
394
|
|
|
|
|
|
|
} |
|
395
|
|
|
|
|
|
|
elsif ( ref $v eq 'ARRAY' ) { |
|
396
|
1
|
|
|
|
|
3
|
for my $value (@$v) { |
|
397
|
2
|
|
|
|
|
3
|
$value =~ s/\s+/ /g; |
|
398
|
2
|
|
|
|
|
7
|
$uniq->{$value} = ++$c; |
|
399
|
|
|
|
|
|
|
} |
|
400
|
|
|
|
|
|
|
} |
|
401
|
|
|
|
|
|
|
else { |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
# if the $leaf is a proximity query, |
|
404
|
|
|
|
|
|
|
# ignore the "phrase-ness" of it and split |
|
405
|
|
|
|
|
|
|
# on whitespace. This is a compromise, |
|
406
|
|
|
|
|
|
|
# mitigated by the tendency of HeatMap |
|
407
|
|
|
|
|
|
|
# to reward proximity anyway. |
|
408
|
149
|
100
|
66
|
|
|
281
|
if ( $leaf->{proximity} and $leaf->{proximity} > 1 ) { |
|
409
|
1
|
|
|
|
|
7
|
my @tokens = split( m/\ +/, $v ); |
|
410
|
1
|
|
|
|
|
5
|
$uniq->{$_} = ++$c for @tokens; |
|
411
|
1
|
|
|
|
|
3
|
next; |
|
412
|
|
|
|
|
|
|
} |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
# collapse any whitespace |
|
415
|
148
|
|
|
|
|
362
|
$v =~ s,\s+,\ ,g; |
|
416
|
|
|
|
|
|
|
|
|
417
|
148
|
|
|
|
|
405
|
$uniq->{$v} = ++$c; |
|
418
|
|
|
|
|
|
|
} |
|
419
|
|
|
|
|
|
|
} |
|
420
|
|
|
|
|
|
|
} |
|
421
|
63
|
|
|
|
|
177
|
return \%fields; |
|
422
|
|
|
|
|
|
|
} |
|
423
|
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
sub _setup_regex_builder { |
|
425
|
51
|
|
|
51
|
|
77
|
my $self = shift; |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
# TODO optional for term_re |
|
428
|
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
# a search for a '<' or '>' should still highlight, |
|
430
|
|
|
|
|
|
|
# since < or > can be indexed as literal < and > |
|
431
|
|
|
|
|
|
|
# but this causes a great deal of hassle |
|
432
|
|
|
|
|
|
|
# so we just ignore them. |
|
433
|
51
|
|
|
|
|
150
|
my $wordchars = $self->word_characters; |
|
434
|
51
|
|
|
|
|
115
|
$wordchars =~ s,[<>&],,g; |
|
435
|
51
|
|
|
|
|
95
|
$self->{html_safe_wordchars} = $wordchars; # remember for build |
|
436
|
51
|
|
|
|
|
161
|
my $ignore_first = $self->ignore_first_char; |
|
437
|
51
|
|
|
|
|
117
|
my $ignore_last = $self->ignore_last_char; |
|
438
|
51
|
|
|
|
|
196
|
my $html_whitespace = $self->whitespace; |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
# what's the boundary between a word and a not-word? |
|
441
|
|
|
|
|
|
|
# by default: |
|
442
|
|
|
|
|
|
|
# the beginning of a string |
|
443
|
|
|
|
|
|
|
# the end of a string |
|
444
|
|
|
|
|
|
|
# whatever we've defined as WhiteSpace |
|
445
|
|
|
|
|
|
|
# any character that is not a WordChar |
|
446
|
|
|
|
|
|
|
# any character we explicitly ignore at start or end of word |
|
447
|
|
|
|
|
|
|
# |
|
448
|
|
|
|
|
|
|
# the \A and \Z (beginning and end) should help if the word butts up |
|
449
|
|
|
|
|
|
|
# against the beginning or end of a tagset |
|
450
|
|
|
|
|
|
|
# like Word or Word |
|
451
|
|
|
|
|
|
|
|
|
452
|
51
|
|
|
|
|
212
|
my @start_bound = ( |
|
453
|
|
|
|
|
|
|
'\A', |
|
454
|
|
|
|
|
|
|
'[>]', |
|
455
|
|
|
|
|
|
|
'(?:&[\w\#]+;)', # because a ; might be a legitimate wordchar |
|
456
|
|
|
|
|
|
|
# and we treat a char entity like a single char. |
|
457
|
|
|
|
|
|
|
# if &char; resolves to a legit wordchar |
|
458
|
|
|
|
|
|
|
# this might give unexpected results. |
|
459
|
|
|
|
|
|
|
# NOTE that etc is in $WhiteSpace |
|
460
|
|
|
|
|
|
|
$html_whitespace, |
|
461
|
|
|
|
|
|
|
'[^' . $wordchars . ']' |
|
462
|
|
|
|
|
|
|
); |
|
463
|
51
|
50
|
|
|
|
561
|
push( @start_bound, qr/[$ignore_first]+/i ) if length $ignore_first; |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
my @end_bound |
|
466
|
51
|
|
|
|
|
291
|
= ( '\Z', '[<&]', $html_whitespace, '[^' . $wordchars . ']' ); |
|
467
|
51
|
50
|
|
|
|
371
|
push( @end_bound, qr/[$ignore_last]+/i ) if length $ignore_last; |
|
468
|
|
|
|
|
|
|
|
|
469
|
51
|
|
33
|
|
|
401
|
$self->{start_bound} ||= join( '|', @start_bound ); |
|
470
|
|
|
|
|
|
|
|
|
471
|
51
|
|
33
|
|
|
2815
|
$self->{end_bound} ||= join( '|', @end_bound ); |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
# the whitespace in a query phrase might be: |
|
474
|
|
|
|
|
|
|
# any ignore_last_char, followed by |
|
475
|
|
|
|
|
|
|
# one or more nonwordchar or whitespace, followed by |
|
476
|
|
|
|
|
|
|
# any ignore_first_char |
|
477
|
|
|
|
|
|
|
# define for both text and html |
|
478
|
|
|
|
|
|
|
# NOTE the first/last swap for plain vs html |
|
479
|
|
|
|
|
|
|
# is intentional because of how regex are built. |
|
480
|
|
|
|
|
|
|
|
|
481
|
51
|
50
|
|
|
|
3442
|
my @plain_phrase_bound = ( |
|
|
|
50
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
( length($ignore_last) ? qr/[$ignore_last]*/i : '' ), |
|
483
|
|
|
|
|
|
|
qr/(?:[\s\x20]|[^$wordchars])+/is, |
|
484
|
|
|
|
|
|
|
( length($ignore_first) ? qr/[$ignore_first]?/i : '' ), |
|
485
|
|
|
|
|
|
|
); |
|
486
|
51
|
|
|
|
|
252
|
$self->{plain_phrase_bound} = join( '', @plain_phrase_bound ); |
|
487
|
|
|
|
|
|
|
|
|
488
|
51
|
50
|
|
|
|
1859
|
my @html_phrase_bound = ( |
|
|
|
50
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
( length($ignore_first) ? qr/[$ignore_first]*/i : '' ), |
|
490
|
|
|
|
|
|
|
qr/(?:$html_whitespace|[^$wordchars])+/is, |
|
491
|
|
|
|
|
|
|
( length($ignore_last) ? qr/[$ignore_last]?/i : '' ), |
|
492
|
|
|
|
|
|
|
); |
|
493
|
51
|
|
|
|
|
195610
|
$self->{html_phrase_bound} = join( '', @html_phrase_bound ); |
|
494
|
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
} |
|
496
|
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
sub _build_regex { |
|
498
|
232
|
|
|
232
|
|
411
|
my $self = shift; |
|
499
|
232
|
50
|
|
|
|
533
|
my $q = shift or croak "need query to build()"; |
|
500
|
232
|
|
|
|
|
413
|
my $wild = $self->{html_safe_wordchars}; |
|
501
|
232
|
|
|
|
|
368
|
my $st_bound = $self->{start_bound}; |
|
502
|
232
|
|
|
|
|
367
|
my $end_bound = $self->{end_bound}; |
|
503
|
232
|
|
|
|
|
407
|
my $wc = $self->{html_safe_wordchars}; |
|
504
|
232
|
|
|
|
|
314
|
my $ppb = $self->{plain_phrase_bound}; |
|
505
|
232
|
|
|
|
|
347
|
my $hpb = $self->{html_phrase_bound}; |
|
506
|
232
|
|
|
|
|
532
|
my $wildcard = $self->wildcard; |
|
507
|
232
|
|
|
|
|
358
|
my $wild_esc = quotemeta($wildcard); |
|
508
|
232
|
|
|
|
|
391
|
my $tag_re = $self->tag_re; |
|
509
|
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
# define simple pattern for plain text |
|
511
|
|
|
|
|
|
|
# and complex pattern for HTML markup |
|
512
|
232
|
|
|
|
|
312
|
my ( $plain, $html ); |
|
513
|
232
|
|
|
|
|
366
|
my $escaped = quotemeta($q); |
|
514
|
232
|
|
|
|
|
943
|
$escaped =~ s/\\[$wild_esc]/[$wc]*/g; # wildcard |
|
515
|
232
|
|
|
|
|
660
|
$escaped =~ s/\\[\s]/$ppb/g; # whitespace |
|
516
|
|
|
|
|
|
|
|
|
517
|
232
|
|
|
|
|
28886
|
$plain = qr/ |
|
518
|
|
|
|
|
|
|
( |
|
519
|
|
|
|
|
|
|
\A|$ppb |
|
520
|
|
|
|
|
|
|
) |
|
521
|
|
|
|
|
|
|
( |
|
522
|
|
|
|
|
|
|
${escaped} |
|
523
|
|
|
|
|
|
|
) |
|
524
|
|
|
|
|
|
|
( |
|
525
|
|
|
|
|
|
|
\Z|$ppb |
|
526
|
|
|
|
|
|
|
) |
|
527
|
|
|
|
|
|
|
/xis; |
|
528
|
|
|
|
|
|
|
|
|
529
|
232
|
|
|
|
|
1423
|
my (@char) = split( m//, $q ); |
|
530
|
|
|
|
|
|
|
|
|
531
|
232
|
|
|
|
|
358
|
my $counter = -1; |
|
532
|
|
|
|
|
|
|
|
|
533
|
232
|
|
|
|
|
426
|
CHAR: foreach my $c (@char) { |
|
534
|
1461
|
|
|
|
|
1738
|
$counter++; |
|
535
|
|
|
|
|
|
|
|
|
536
|
1461
|
|
100
|
|
|
3201
|
my $ent = $C2E->{$c} || undef; |
|
537
|
1461
|
|
|
|
|
1799
|
my $num = ord($c); |
|
538
|
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
# if this is a special regexp char, protect it |
|
540
|
1461
|
|
|
|
|
1724
|
$c = quotemeta($c); |
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
# if it's a *, replace it with the Wild class |
|
543
|
1461
|
100
|
|
|
|
2177
|
$c = "[$wild]*" if $c eq $wild_esc; |
|
544
|
|
|
|
|
|
|
|
|
545
|
1461
|
100
|
|
|
|
2014
|
if ( $c eq '\ ' ) { |
|
546
|
55
|
|
|
|
|
140
|
$c = $hpb . $tag_re . '*'; |
|
547
|
55
|
|
|
|
|
114
|
next CHAR; |
|
548
|
|
|
|
|
|
|
} |
|
549
|
|
|
|
|
|
|
|
|
550
|
1406
|
|
|
|
|
1434
|
my $aka; |
|
551
|
1406
|
100
|
|
|
|
1928
|
if ($ent) { |
|
552
|
1404
|
100
|
|
|
|
2517
|
$aka = $ent eq "$num;" ? $ent : "$ent|$num;"; |
|
553
|
|
|
|
|
|
|
} |
|
554
|
|
|
|
|
|
|
else { |
|
555
|
2
|
|
|
|
|
5
|
$aka = "$num;"; |
|
556
|
|
|
|
|
|
|
} |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
# make $c into a regexp |
|
559
|
1406
|
100
|
|
|
|
12396
|
$c = qr/$c|$aka/i unless $c eq "[$wild]*"; |
|
560
|
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
# any char might be followed by zero or more tags, unless it's the last char |
|
562
|
1406
|
100
|
|
|
|
5104
|
$c .= $tag_re . '*' unless $counter == $#char; |
|
563
|
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
} |
|
565
|
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
# re-join the chars into a single string |
|
567
|
232
|
|
|
|
|
707
|
my $safe = join( "\n", @char ); # use \n to make it legible in debugging |
|
568
|
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
# for debugging legibility we include newlines, so make sure we s//x in matches |
|
570
|
232
|
|
|
|
|
119020
|
$html = qr/ |
|
571
|
|
|
|
|
|
|
( |
|
572
|
|
|
|
|
|
|
${st_bound} |
|
573
|
|
|
|
|
|
|
) |
|
574
|
|
|
|
|
|
|
( |
|
575
|
|
|
|
|
|
|
${safe} |
|
576
|
|
|
|
|
|
|
) |
|
577
|
|
|
|
|
|
|
( |
|
578
|
|
|
|
|
|
|
${end_bound} |
|
579
|
|
|
|
|
|
|
) |
|
580
|
|
|
|
|
|
|
/xis; |
|
581
|
|
|
|
|
|
|
|
|
582
|
232
|
|
|
|
|
2737
|
return ( $plain, $html, $escaped ); |
|
583
|
|
|
|
|
|
|
} |
|
584
|
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
sub _build_term_re { |
|
586
|
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
# this based on SWISH::PhraseHighlight::set_match_regexp() |
|
588
|
|
|
|
|
|
|
|
|
589
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
590
|
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
#dump $self; |
|
592
|
|
|
|
|
|
|
|
|
593
|
0
|
|
|
|
|
|
my $wc = $self->word_characters; |
|
594
|
|
|
|
|
|
|
$self->{_wc_regexp} |
|
595
|
0
|
|
|
|
|
|
= qr/[^$wc]+/io; # regexp for splitting into swish-words |
|
596
|
|
|
|
|
|
|
|
|
597
|
0
|
|
|
|
|
|
my $igf = $self->ignore_first_char; |
|
598
|
0
|
|
|
|
|
|
my $igl = $self->ignore_last_char; |
|
599
|
0
|
|
|
|
|
|
for ( $igf, $igl ) { |
|
600
|
0
|
0
|
|
|
|
|
if ($_) { |
|
601
|
0
|
|
|
|
|
|
$_ = "[$_]*"; |
|
602
|
|
|
|
|
|
|
} |
|
603
|
|
|
|
|
|
|
else { |
|
604
|
0
|
|
|
|
|
|
$_ = ''; |
|
605
|
|
|
|
|
|
|
} |
|
606
|
|
|
|
|
|
|
} |
|
607
|
|
|
|
|
|
|
|
|
608
|
0
|
|
|
|
|
|
$self->{_ignoreFirst} = $igf; |
|
609
|
0
|
|
|
|
|
|
$self->{_ignoreLast} = $igl; |
|
610
|
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
} |
|
612
|
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
1; |
|
614
|
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
__END__ |