line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Search::Query::Parser; |
2
|
8
|
|
|
8
|
|
9862
|
use Moo; |
|
8
|
|
|
|
|
134618
|
|
|
8
|
|
|
|
|
55
|
|
3
|
8
|
|
|
8
|
|
13003
|
use Carp; |
|
8
|
|
|
|
|
21
|
|
|
8
|
|
|
|
|
742
|
|
4
|
8
|
|
|
8
|
|
1936
|
use Data::Dump qw( dump ); |
|
8
|
|
|
|
|
22721
|
|
|
8
|
|
|
|
|
572
|
|
5
|
8
|
|
|
8
|
|
1548
|
use Search::Query; |
|
8
|
|
|
|
|
17
|
|
|
8
|
|
|
|
|
254
|
|
6
|
8
|
|
|
8
|
|
5205
|
use Search::Query::Dialect::Native; |
|
8
|
|
|
|
|
23
|
|
|
8
|
|
|
|
|
342
|
|
7
|
8
|
|
|
8
|
|
4617
|
use Search::Query::Clause; |
|
8
|
|
|
|
|
23
|
|
|
8
|
|
|
|
|
283
|
|
8
|
8
|
|
|
8
|
|
3744
|
use Search::Query::Field; |
|
8
|
|
|
|
|
21
|
|
|
8
|
|
|
|
|
340
|
|
9
|
8
|
|
|
8
|
|
59
|
use Scalar::Util qw( blessed weaken ); |
|
8
|
|
|
|
|
9
|
|
|
8
|
|
|
|
|
500
|
|
10
|
8
|
|
|
8
|
|
45
|
use namespace::autoclean; |
|
8
|
|
|
|
|
12
|
|
|
8
|
|
|
|
|
51
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our $VERSION = '0.305'; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
has 'and_regex' => ( is => 'rw', default => sub {qr/\&|AND|ET|UND|E/i} ); |
15
|
|
|
|
|
|
|
has 'clause_class' => |
16
|
|
|
|
|
|
|
( is => 'rw', default => sub {'Search::Query::Clause'} ); |
17
|
|
|
|
|
|
|
has 'croak_on_error' => ( is => 'rw', default => sub {0} ); |
18
|
|
|
|
|
|
|
has 'default_boolop' => ( is => 'rw', default => sub {'+'} ); |
19
|
|
|
|
|
|
|
has 'default_field' => ( is => 'rw' ); |
20
|
|
|
|
|
|
|
has 'default_op' => ( is => 'rw', default => sub {':'} ); |
21
|
|
|
|
|
|
|
has 'field_class' => ( is => 'rw', default => sub {'Search::Query::Field'} ); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# match prefix.field: or field |
24
|
|
|
|
|
|
|
has 'field_regex' => ( is => 'rw', default => sub {qr/[\.\w]+/}, ); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
has 'fixup' => ( is => 'rw', default => sub {0} ); |
27
|
|
|
|
|
|
|
has 'near_regex' => ( is => 'rw', default => sub {qr/NEAR\d+/i}, ); |
28
|
|
|
|
|
|
|
has 'not_regex' => ( is => 'rw', default => sub {qr/NOT|PAS|NICHT|NON/i}, ); |
29
|
|
|
|
|
|
|
has 'null_term' => ( is => 'rw', ); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# ops that admit an empty left operand |
32
|
|
|
|
|
|
|
has 'op_nofield_regex' => ( is => 'rw', default => sub {qr/=~|!~|[~:#]/}, ); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# longest ops first ! |
35
|
|
|
|
|
|
|
has 'op_regex' => |
36
|
|
|
|
|
|
|
( is => 'rw', default => sub {qr/~\d+|==|<=|>=|!=|!:|=~|!~|[:=<>~#]/}, ); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
has 'or_regex' => ( is => 'rw', default => sub {qr/\||OR|OU|ODER|O/i}, ); |
39
|
|
|
|
|
|
|
has 'phrase_delim' => ( is => 'rw', default => sub {q/"/}, ); |
40
|
|
|
|
|
|
|
has 'query_class' => |
41
|
|
|
|
|
|
|
( is => 'rw', default => sub {'Search::Query::Dialect::Native'} ); |
42
|
|
|
|
|
|
|
has 'query_class_opts' => ( is => 'rw', default => sub { {} } ); |
43
|
|
|
|
|
|
|
has 'range_regex' => ( is => 'rw', default => sub {qr/\.\./}, ); |
44
|
|
|
|
|
|
|
has 'sloppy' => ( is => 'rw', default => sub {0} ); |
45
|
|
|
|
|
|
|
has 'sloppy_term_regex' => ( is => 'rw', default => sub {qr/[\.\w]+/}, ); |
46
|
|
|
|
|
|
|
has 'term_expander' => ( is => 'rw' ); |
47
|
|
|
|
|
|
|
has 'term_regex' => ( is => 'rw', default => sub {qr/[^\s()]+/}, ); |
48
|
|
|
|
|
|
|
has 'error' => ( is => 'ro' ); |
49
|
|
|
|
|
|
|
has 'fields' => ( is => 'ro' ); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
my %SQPCOMPAT = ( |
52
|
|
|
|
|
|
|
rxAnd => 'and_regex', |
53
|
|
|
|
|
|
|
rxOr => 'or_regex', |
54
|
|
|
|
|
|
|
rxNot => 'not_regex', |
55
|
|
|
|
|
|
|
defField => 'default_field', |
56
|
|
|
|
|
|
|
rxTerm => 'term_regex', |
57
|
|
|
|
|
|
|
rxField => 'field_regex', |
58
|
|
|
|
|
|
|
rxOp => 'op_regex', |
59
|
|
|
|
|
|
|
rxOpNoField => 'op_nofield_regex', |
60
|
|
|
|
|
|
|
dialect => 'query_class', # our own compat |
61
|
|
|
|
|
|
|
); |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head1 NAME |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
Search::Query::Parser - convert query strings into query objects |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 SYNOPSIS |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
use Search::Query; |
70
|
|
|
|
|
|
|
my $parser = Search::Query->parser( |
71
|
|
|
|
|
|
|
term_regex => qr/[^\s()]+/, |
72
|
|
|
|
|
|
|
field_regex => qr/\w+/, |
73
|
|
|
|
|
|
|
op_regex => qr/==|<=|>=|!=|=~|!~|[:=<>~#]/, |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# ops that admit an empty left operand |
76
|
|
|
|
|
|
|
op_nofield_regex => qr/=~|!~|[~:#]/, |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# case insensitive |
79
|
|
|
|
|
|
|
and_regex => qr/\&|AND|ET|UND|E/i, |
80
|
|
|
|
|
|
|
or_regex => qr/\||OR|OU|ODER|O/i, |
81
|
|
|
|
|
|
|
not_regex => qr/NOT|PAS|NICHT|NON/i, |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
default_field => 'myfield', # or ['myfield', 'myfield2'] |
84
|
|
|
|
|
|
|
phrase_delim => q/"/, |
85
|
|
|
|
|
|
|
default_boolop => '+', |
86
|
|
|
|
|
|
|
query_class => 'Search::Query::Dialect::Native', |
87
|
|
|
|
|
|
|
field_class => 'Search::Query::Field', |
88
|
|
|
|
|
|
|
query_class_opts => { |
89
|
|
|
|
|
|
|
default_field => 'foo', # or ['foo', 'bar'] |
90
|
|
|
|
|
|
|
}, |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# a generous mode, overlooking boolean-parser syntax errors |
93
|
|
|
|
|
|
|
sloppy => 0, |
94
|
|
|
|
|
|
|
sloppy_term_regex => qr/[\.\w]+/, |
95
|
|
|
|
|
|
|
fixup => 0, |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# if set, this special term indicates a NULL query |
98
|
|
|
|
|
|
|
null_term => 'NULL', |
99
|
|
|
|
|
|
|
); |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
my $query = $parser->parse('+hello -world now'); |
102
|
|
|
|
|
|
|
print $query; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=head1 DESCRIPTION |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Search::Query::Parser is a fork of Search::QueryParser |
107
|
|
|
|
|
|
|
that supports multiple query dialects. |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
The Parser class transforms a query string into a Dialect object structure |
110
|
|
|
|
|
|
|
to be handled by external search engines. |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
The query string can contain simple terms, "exact phrases", field |
113
|
|
|
|
|
|
|
names and comparison operators, '+/-' prefixes, parentheses, and |
114
|
|
|
|
|
|
|
boolean connectors. |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
The parser can be customized using regular expressions for specific |
117
|
|
|
|
|
|
|
notions of "term", "field name" or "operator" -- see the L |
118
|
|
|
|
|
|
|
method. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
The Dialect object resulting from a parsed query is a tree of terms |
121
|
|
|
|
|
|
|
and operators. Each Dialect can be re-serialized as a string |
122
|
|
|
|
|
|
|
using the stringify() method, or simply by printing the Dialect object, |
123
|
|
|
|
|
|
|
since the string-related Perl operations are overloaded using stringify(). |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=head1 QUERY STRING |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
The query string is decomposed into Clause objects, where |
128
|
|
|
|
|
|
|
each Clause has an optional sign prefix, |
129
|
|
|
|
|
|
|
an optional field name and comparison operator, |
130
|
|
|
|
|
|
|
and a mandatory value. |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=head2 Sign prefix |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
Prefix '+' means that the item is mandatory. |
135
|
|
|
|
|
|
|
Prefix '-' means that the item must be excluded. |
136
|
|
|
|
|
|
|
No prefix means that the item will be searched |
137
|
|
|
|
|
|
|
for, but is not mandatory. |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
See also section L below, which is another |
140
|
|
|
|
|
|
|
way to combine items into a query. |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=head2 Field name and comparison operator |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
Internally, each query item has a field name and comparison |
145
|
|
|
|
|
|
|
operator; if not written explicitly in the query, these |
146
|
|
|
|
|
|
|
take default values C<''> (empty field name) and |
147
|
|
|
|
|
|
|
C<':'> (colon operator). |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Operators have a left operand (the field name) and |
150
|
|
|
|
|
|
|
a right operand (the value to be compared with); |
151
|
|
|
|
|
|
|
for example, C means "search documents containing |
152
|
|
|
|
|
|
|
term 'bar' in field 'foo'", whereas C means |
153
|
|
|
|
|
|
|
"search documents where field 'foo' has exact value 'bar'". |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Here is the list of admitted operators with their intended meaning: |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=over |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=item C<:> |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
treat value as a term to be searched within field. |
162
|
|
|
|
|
|
|
This is the default operator. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=item C<~> or C<=~> |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
treat value as a regex; match field against the regex. |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
Note that C<~> |
169
|
|
|
|
|
|
|
after a phrase indicates a proximity assertion: |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
"foo bar"~5 |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
means "match 'foo' and 'bar' within 5 positions of each other." |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
=item C |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
negation of above |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=item C<==> or C<=>, C=>, C=>, C, C>, C> |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
classical relational operators |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=item C<#> |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Inclusion in the set of comma-separated integers supplied |
186
|
|
|
|
|
|
|
on the right-hand side. |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=back |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Operators C<:>, C<~>, C<=~>, C and C<#> admit an empty |
191
|
|
|
|
|
|
|
left operand (so the field name will be C<''>). |
192
|
|
|
|
|
|
|
Search engines will usually interpret this as |
193
|
|
|
|
|
|
|
"any field" or "the whole data record". But see the B |
194
|
|
|
|
|
|
|
feature. |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=head2 Value |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
A value (right operand to a comparison operator) can be |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=over |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=item * |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
A term (as recognized by regex C, see L method below). |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=item * |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
A quoted phrase, i.e. a collection of terms within |
209
|
|
|
|
|
|
|
single or double quotes. |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Quotes can be used not only for "exact phrases", but also |
212
|
|
|
|
|
|
|
to prevent misinterpretation of some values : for example |
213
|
|
|
|
|
|
|
C<-2> would mean "value '2' with prefix '-'", |
214
|
|
|
|
|
|
|
in other words "exclude term '2'", so if you want to search for |
215
|
|
|
|
|
|
|
value -2, you should write C<"-2"> instead. |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
Note that C<~> |
218
|
|
|
|
|
|
|
after a phrase indicates a proximity assertion: |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
"foo bar"~5 |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
means "match 'foo' and 'bar' within 5 positions of each other." |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=item * |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
A subquery within parentheses. |
227
|
|
|
|
|
|
|
Field names and operators distribute over parentheses, so for |
228
|
|
|
|
|
|
|
example C is equivalent to |
229
|
|
|
|
|
|
|
C. |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
Nested field names such as C are not allowed. |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
Sign prefixes do not distribute : C<+(foo bar) +bie> is not |
234
|
|
|
|
|
|
|
equivalent to C<+foo +bar +bie>. |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=back |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
=head2 Boolean connectors |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
Queries can contain boolean connectors 'AND', 'OR', 'NOT' |
241
|
|
|
|
|
|
|
(or their equivalent in some other languages -- see the *_regex |
242
|
|
|
|
|
|
|
features in new()). |
243
|
|
|
|
|
|
|
This is mere syntactic sugar for the '+' and '-' prefixes : |
244
|
|
|
|
|
|
|
C is equivalent to C<+a +b>; |
245
|
|
|
|
|
|
|
C is equivalent to C<(a b)>; |
246
|
|
|
|
|
|
|
C is equivalent to C<-a>. |
247
|
|
|
|
|
|
|
C<+a OR b> does not make sense, |
248
|
|
|
|
|
|
|
but it is translated into C<(a b)>, under the assumption |
249
|
|
|
|
|
|
|
that the user understands "OR" better than a |
250
|
|
|
|
|
|
|
'+' prefix. |
251
|
|
|
|
|
|
|
C<-a OR b> does not make sense either, |
252
|
|
|
|
|
|
|
but has no meaningful approximation, so it is rejected. |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
Combinations of AND/OR clauses must be surrounded by |
255
|
|
|
|
|
|
|
parentheses, i.e. C<(a AND b) OR c> or C are |
256
|
|
|
|
|
|
|
allowed, but C is not. |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
The C connector is treated like the proximity phrase assertion. |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
foo NEAR5 bar |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
is treated as if it were: |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
"foo bar"~5 |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
See the B option. |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=head1 METHODS |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head2 new |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
The following attributes may be initialized in new(). |
273
|
|
|
|
|
|
|
These are also available as get/set methods on the returned |
274
|
|
|
|
|
|
|
Parser object. |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=over |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=item default_boolop |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=item term_regex |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
=item field_regex |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=item op_regex |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=item op_nofield_regex |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
=item and_regex |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=item or_regex |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=item not_regex |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
=item near_regex |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
=item range_regex |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=item default_field |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
Applied to all terms where no field is defined. |
301
|
|
|
|
|
|
|
The default value is undef (no default). |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=item default_op |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
The operator used when default_field is applied. |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=item fields |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=item phrase_delim |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=item query_class |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
C is an alias for C. |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=item field_class |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
=item clause_class |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
=item query_class_opts |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
Will be passed to I new() method each time a query is parse()'d. |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
=item dialect_opts |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
Alias for query_class_opts. |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
=item croak_on_error |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
Default value is false (0). Set to true to automatically throw an exception |
330
|
|
|
|
|
|
|
via Carp::croak() if parse() would return undef. |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
=item term_expander |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
A function reference for transforming query terms after they have been parsed. |
335
|
|
|
|
|
|
|
Examples might include adding alternate spellings, synonyms, or |
336
|
|
|
|
|
|
|
expanding wildcards based on lexicon listings. |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
Example: |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
my $parser = Search::Query->parser( |
341
|
|
|
|
|
|
|
term_expander => sub { |
342
|
|
|
|
|
|
|
my ($term, $field) = @_; |
343
|
|
|
|
|
|
|
return ($term) if ref $term; # skip ranges |
344
|
|
|
|
|
|
|
return ( qw( one two three ), $term ); |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
); |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
my $query = $parser->parse("foo=bar") |
349
|
|
|
|
|
|
|
print "$query\n"; # +foo=(one OR two OR three OR bar) |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
The term_expander reference should expect two arguments: the term value |
352
|
|
|
|
|
|
|
and, if available, the term field name. It should return an array of values. |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
The term_expander reference is called internally during the parse() method, |
355
|
|
|
|
|
|
|
B any field alias expansion or validation is performed. |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=item sloppy( 0|1 ) |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
If the string passed to parse() has any incorrect or unsupported syntax |
360
|
|
|
|
|
|
|
in it, the default behavior is for parsing to stop immediately, error() |
361
|
|
|
|
|
|
|
to be set, and for parse() to return undef. |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
In certain cases (as on a web form) this is undesirable. Set sloppy |
364
|
|
|
|
|
|
|
mode to true to fallback to non-boolean evaluation of the string, |
365
|
|
|
|
|
|
|
which in most cases should still return a Dialect object. |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
Example: |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
$parser->parse('foo -- OR bar'); # if sloppy==0, returns undef |
370
|
|
|
|
|
|
|
$parser->parse('foo -- OR bar'); # if sloppy==1, equivalent to 'foo bar' |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=item sloppy_term_regex |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
The regex definition used to match a term when sloppy==1. |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
=item fixup( 0|1 ) |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
Attempt to fix syntax errors like the lack of a closing parenthesis |
379
|
|
|
|
|
|
|
or a missing double-quote. Different than sloppy() which will not |
380
|
|
|
|
|
|
|
attempt to fix broken syntax, but should probably be used together |
381
|
|
|
|
|
|
|
if you really do not care about strict syntax checking. |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=item null_term |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
If set to I, the B feature will treat field value |
386
|
|
|
|
|
|
|
of I as if it was undefined. Example: |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
$parser->parse('foo='); # throws fatal error |
389
|
|
|
|
|
|
|
$parser->null_term('NULL'); |
390
|
|
|
|
|
|
|
$parser->parse('foo=NULL'); # field foo has NULL value |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
This feature is most useful with the SQL dialect, where you might want to |
393
|
|
|
|
|
|
|
find NULL values. Use it like: |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
my $parser = Search::Query->parser( |
396
|
|
|
|
|
|
|
dialect => 'SQL', |
397
|
|
|
|
|
|
|
null_term => 'NULL' |
398
|
|
|
|
|
|
|
); |
399
|
|
|
|
|
|
|
my $query = $parser->parse('foo!=NULL'); |
400
|
|
|
|
|
|
|
print $query; # prints "foo is not NULL" |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=back |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=head2 BUILDARGS |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
Internal method for mangling constructor params. |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=cut |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
sub BUILDARGS { |
412
|
27
|
|
|
27
|
1
|
46980
|
my ( $class, %args ) = @_; |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
# Search::QueryParser compatability |
415
|
27
|
50
|
|
|
|
112
|
if ( exists $args{dialect_opts} ) { |
416
|
0
|
|
|
|
|
0
|
$args{query_class_opts} = delete $args{dialect_opts}; |
417
|
|
|
|
|
|
|
} |
418
|
27
|
|
|
|
|
104
|
for my $key ( keys %args ) { |
419
|
83
|
100
|
|
|
|
204
|
if ( exists $SQPCOMPAT{$key} ) { |
420
|
17
|
|
|
|
|
69
|
$args{ $SQPCOMPAT{$key} } = delete $args{$key}; |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
} |
423
|
27
|
|
|
|
|
1446
|
return \%args; |
424
|
|
|
|
|
|
|
} |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
=head2 BUILD |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
Called internally to initialize the object. |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=cut |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
sub BUILD { |
433
|
27
|
|
|
27
|
1
|
167
|
my $self = shift; |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
# query class can be shortcut |
436
|
27
|
|
|
|
|
172
|
$self->{query_class} |
437
|
|
|
|
|
|
|
= Search::Query->get_query_class( $self->{query_class} ); |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
# use field class if query class defines one |
440
|
|
|
|
|
|
|
# and we weren't passed one explicitly |
441
|
27
|
100
|
|
|
|
205
|
if ( $self->{query_class}->field_class ne $self->{field_class} ) { |
442
|
17
|
|
|
|
|
67
|
$self->{field_class} = $self->{query_class}->field_class; |
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
|
445
|
27
|
100
|
|
|
|
160
|
$self->set_fields( $self->{fields} ) if $self->{fields}; |
446
|
|
|
|
|
|
|
|
447
|
27
|
|
|
|
|
501
|
return $self; |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=head2 error |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
Returns the last error message. |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=cut |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
=head2 clear_error |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
Sets error message to undef. |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
=cut |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
sub clear_error { |
463
|
2
|
|
|
2
|
1
|
155
|
$_->{error} = undef; |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=head2 get_field( I ) |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
Returns Field object for I or undef if there isn't one |
469
|
|
|
|
|
|
|
defined. |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
=cut |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
sub get_field { |
474
|
230
|
|
|
230
|
1
|
256
|
my $self = shift; |
475
|
230
|
50
|
|
|
|
496
|
my $name = shift or croak "name required"; |
476
|
230
|
100
|
|
|
|
641
|
if ( !exists $self->{fields}->{$name} ) { |
477
|
8
|
|
|
|
|
27
|
return undef; |
478
|
|
|
|
|
|
|
} |
479
|
222
|
|
|
|
|
629
|
return $self->{fields}->{$name}; |
480
|
|
|
|
|
|
|
} |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=head2 set_fields( I ) |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
Set the I structure. Called internally by BUILD() |
485
|
|
|
|
|
|
|
if you pass a C key/value pair to new(). |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
The structure of I may be one of the following: |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
my $fields = { |
490
|
|
|
|
|
|
|
field1 => 1, |
491
|
|
|
|
|
|
|
field2 => { alias_for => 'field1' }, |
492
|
|
|
|
|
|
|
field3 => Search::Query::Field->new( name => 'field3' ), |
493
|
|
|
|
|
|
|
field4 => { alias_for => [qw( field1 field3 )] }, |
494
|
|
|
|
|
|
|
}; |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
# or |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
my $fields = [ |
499
|
|
|
|
|
|
|
'field1', |
500
|
|
|
|
|
|
|
{ name => 'field2', alias_for => 'field1' }, |
501
|
|
|
|
|
|
|
Search::Query::Field->new( name => 'field3' ), |
502
|
|
|
|
|
|
|
{ name => 'field4', alias_for => [qw( field1 field3 )] }, |
503
|
|
|
|
|
|
|
]; |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=cut |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
sub set_fields { |
509
|
22
|
|
|
22
|
1
|
44
|
my $self = shift; |
510
|
22
|
|
|
|
|
37
|
my $origfields = shift; |
511
|
22
|
50
|
|
|
|
65
|
if ( !defined $origfields ) { |
512
|
0
|
|
|
|
|
0
|
croak "fields required"; |
513
|
|
|
|
|
|
|
} |
514
|
|
|
|
|
|
|
|
515
|
22
|
|
|
|
|
37
|
my %fields; |
516
|
22
|
|
|
|
|
45
|
my $field_class = $self->{field_class}; |
517
|
|
|
|
|
|
|
|
518
|
22
|
|
|
|
|
48
|
my $reftype = ref($origfields); |
519
|
22
|
50
|
66
|
|
|
169
|
if ( !$reftype or ( $reftype ne 'ARRAY' and $reftype ne 'HASH' ) ) { |
|
|
|
33
|
|
|
|
|
520
|
0
|
|
|
|
|
0
|
croak "fields must be an ARRAY or HASH ref"; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
# convert simple array to hash |
524
|
22
|
100
|
|
|
|
76
|
if ( $reftype eq 'ARRAY' ) { |
|
|
50
|
|
|
|
|
|
525
|
18
|
|
|
|
|
49
|
for my $name (@$origfields) { |
526
|
34
|
50
|
|
|
|
1156
|
if ( blessed($name) ) { |
|
|
100
|
|
|
|
|
|
527
|
0
|
|
|
|
|
0
|
$fields{ $name->name } = $name; |
528
|
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
elsif ( ref($name) eq 'HASH' ) { |
530
|
1
|
50
|
|
|
|
6
|
if ( !exists $name->{name} ) { |
531
|
0
|
|
|
|
|
0
|
croak "'name' required in hashref: " . dump($name); |
532
|
|
|
|
|
|
|
} |
533
|
1
|
|
|
|
|
30
|
$fields{ $name->{name} } = $field_class->new(%$name); |
534
|
|
|
|
|
|
|
} |
535
|
|
|
|
|
|
|
else { |
536
|
33
|
|
|
|
|
674
|
$fields{$name} = $field_class->new( name => $name, ); |
537
|
|
|
|
|
|
|
} |
538
|
|
|
|
|
|
|
} |
539
|
|
|
|
|
|
|
} |
540
|
|
|
|
|
|
|
elsif ( $reftype eq 'HASH' ) { |
541
|
4
|
|
|
|
|
22
|
for my $name ( keys %$origfields ) { |
542
|
12
|
|
|
|
|
23
|
my $val = $origfields->{$name}; |
543
|
12
|
|
|
|
|
14
|
my $obj; |
544
|
12
|
50
|
|
|
|
77
|
if ( blessed($val) ) { |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
545
|
0
|
|
|
|
|
0
|
$obj = $val; |
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
elsif ( ref($val) eq 'HASH' ) { |
548
|
12
|
50
|
|
|
|
37
|
if ( !exists $val->{name} ) { |
549
|
12
|
|
|
|
|
27
|
$val->{name} = $name; |
550
|
|
|
|
|
|
|
} |
551
|
12
|
|
|
|
|
353
|
$obj = $field_class->new(%$val); |
552
|
|
|
|
|
|
|
} |
553
|
|
|
|
|
|
|
elsif ( !ref $val ) { |
554
|
0
|
|
|
|
|
0
|
$obj = $field_class->new( name => $name ); |
555
|
|
|
|
|
|
|
} |
556
|
|
|
|
|
|
|
else { |
557
|
0
|
|
|
|
|
0
|
croak |
558
|
|
|
|
|
|
|
"field value for $name must be a field name, hashref or Field object"; |
559
|
|
|
|
|
|
|
} |
560
|
12
|
|
|
|
|
1893
|
$fields{$name} = $obj; |
561
|
|
|
|
|
|
|
} |
562
|
|
|
|
|
|
|
} |
563
|
|
|
|
|
|
|
|
564
|
22
|
|
|
|
|
192
|
$self->{fields} = \%fields; |
565
|
22
|
|
|
|
|
52
|
return $self->{fields}; |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
=head2 set_field( I => I ) |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
Sets field I to Field object I. |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
=cut |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
sub set_field { |
575
|
8
|
|
|
8
|
1
|
48
|
my $self = shift; |
576
|
8
|
|
|
|
|
17
|
my ( $name, $field ) = @_; |
577
|
8
|
50
|
|
|
|
25
|
confess "name required" unless $name; |
578
|
8
|
50
|
|
|
|
18
|
confess "field object required" unless $field; |
579
|
8
|
50
|
|
|
|
40
|
confess "field not an object: $field" unless blessed($field); |
580
|
8
|
|
|
|
|
33
|
$self->{fields}->{$name} = $field; |
581
|
|
|
|
|
|
|
} |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
=head2 parse( I ) |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
Returns a Search::Query::Dialect object of type |
586
|
|
|
|
|
|
|
I. |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
If there is a syntax error in I, |
589
|
|
|
|
|
|
|
parse() will return C and set error(). |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
=cut |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
sub parse { |
594
|
82
|
|
|
82
|
1
|
18325
|
my $self = shift; |
595
|
82
|
|
|
|
|
123
|
my $q = shift; |
596
|
82
|
50
|
|
|
|
220
|
croak "query required" unless defined $q; |
597
|
82
|
|
33
|
|
|
513
|
my $class = shift || $self->query_class; |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
# reset state in case we are called multiple times |
600
|
82
|
|
|
|
|
167
|
$self->{error} = undef; |
601
|
82
|
|
|
|
|
148
|
$self->{_paren_count} = 0; |
602
|
|
|
|
|
|
|
|
603
|
82
|
|
|
|
|
383
|
$q = $class->preprocess($q); |
604
|
82
|
|
|
|
|
261
|
my ($query) = $self->_parse( $q, undef, undef, $class ); |
605
|
82
|
100
|
66
|
|
|
235
|
if ( !defined $query && !$self->sloppy ) { |
606
|
2
|
100
|
|
|
|
159
|
croak $self->error if $self->croak_on_error; |
607
|
1
|
|
|
|
|
4
|
return $query; |
608
|
|
|
|
|
|
|
} |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
# if in sloppy mode and we failed to parse, |
611
|
|
|
|
|
|
|
# extract what looks like terms and re-parse. |
612
|
80
|
100
|
66
|
|
|
238
|
if ( !defined $query && $self->sloppy ) { |
613
|
1
|
|
|
|
|
3
|
return $self->_sloppify( $q, $class ); |
614
|
|
|
|
|
|
|
} |
615
|
|
|
|
|
|
|
|
616
|
79
|
100
|
|
|
|
195
|
if ( $self->{term_expander} ) { |
617
|
3
|
|
|
|
|
7
|
$self->_call_term_expander($query); |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
|
620
|
79
|
100
|
|
|
|
227
|
if ( $self->{fields} ) { |
621
|
66
|
|
|
|
|
257
|
$self->_expand($query); |
622
|
66
|
|
|
|
|
1198
|
$self->_validate($query); |
623
|
|
|
|
|
|
|
} |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
# if in sloppy mode and we failed to parse, |
626
|
|
|
|
|
|
|
# extract what looks like terms and re-parse. |
627
|
77
|
100
|
66
|
|
|
328
|
if ( $self->error && $self->sloppy ) { |
628
|
1
|
|
|
|
|
3
|
return $self->_sloppify( $q, $class ); |
629
|
|
|
|
|
|
|
} |
630
|
|
|
|
|
|
|
|
631
|
76
|
|
|
|
|
152
|
$query->{parser} = $self; |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
#warn dump $query; |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
# if the query isn't re-parse-able once stringified |
636
|
|
|
|
|
|
|
# then it is broken, somehow. |
637
|
76
|
100
|
33
|
|
|
665
|
if ( defined $query |
|
|
|
66
|
|
|
|
|
638
|
|
|
|
|
|
|
and !$self->error |
639
|
|
|
|
|
|
|
and $self->croak_on_error ) |
640
|
|
|
|
|
|
|
{ |
641
|
26
|
|
|
|
|
98
|
my ($reparsed) = $self->_parse( "$query", undef, undef, $class ); |
642
|
26
|
50
|
|
|
|
389
|
if ( !defined $reparsed ) { |
643
|
0
|
|
|
|
|
0
|
croak sprintf( "Error: unable to parse '%s'. Reason: '%s'.", |
644
|
|
|
|
|
|
|
$q, $self->error ); |
645
|
|
|
|
|
|
|
} |
646
|
|
|
|
|
|
|
} |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
#weaken( $query->{parser} ); # TODO leaks possible? |
649
|
|
|
|
|
|
|
|
650
|
76
|
|
|
|
|
470
|
return $query; |
651
|
|
|
|
|
|
|
} |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
sub _sloppify { |
654
|
2
|
|
|
2
|
|
3
|
my ( $self, $q, $class ) = @_; |
655
|
2
|
|
|
|
|
4
|
my $term = $self->{sloppy_term_regex}; |
656
|
2
|
|
|
|
|
2
|
my $and = $self->{and_regex}; |
657
|
2
|
|
|
|
|
3
|
my $or = $self->{or_regex}; |
658
|
2
|
|
|
|
|
4
|
my $not = $self->{not_regex}; |
659
|
2
|
|
|
|
|
2
|
my $near = $self->{near_regex}; |
660
|
2
|
|
|
|
|
2
|
my $ops = $self->{op_regex}; |
661
|
2
|
|
|
|
|
101
|
my $bools = qr/($and|$or|$not|$near|$ops)/; |
662
|
2
|
|
|
|
|
7
|
my @terms; |
663
|
|
|
|
|
|
|
|
664
|
2
|
|
|
|
|
34
|
while ( $q =~ m/($term)/ig ) { |
665
|
14
|
|
|
|
|
15
|
my $t = $1; |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
#warn "$t =~ $bools\n"; |
668
|
14
|
100
|
|
|
|
112
|
if ( $t =~ m/^$bools$/ ) { |
669
|
7
|
|
|
|
|
25
|
next; |
670
|
|
|
|
|
|
|
} |
671
|
7
|
|
|
|
|
40
|
push @terms, split( /$ops/, $t ); |
672
|
|
|
|
|
|
|
} |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
#dump \@terms; |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
# reset errors since we will re-parse |
677
|
2
|
|
|
|
|
3
|
$self->{error} = undef; |
678
|
2
|
|
|
|
|
6
|
my ($query) = $self->_parse( join( ' ', @terms ), undef, undef, $class ); |
679
|
2
|
50
|
|
|
|
5
|
if ( !defined $query ) { |
680
|
0
|
0
|
|
|
|
0
|
$self->croak_on_error and croak $self->error; |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
else { |
683
|
2
|
|
|
|
|
3
|
$query->{parser} = $self; |
684
|
|
|
|
|
|
|
} |
685
|
2
|
|
|
|
|
12
|
return $query; |
686
|
|
|
|
|
|
|
} |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
sub _call_term_expander { |
689
|
3
|
|
|
3
|
|
3
|
my ( $self, $query ) = @_; |
690
|
3
|
|
|
|
|
4
|
my $expander = $self->{term_expander}; |
691
|
3
|
50
|
|
|
|
7
|
if ( ref($expander) ne 'CODE' ) { |
692
|
0
|
|
|
|
|
0
|
croak "term_expander must be a CODE reference"; |
693
|
|
|
|
|
|
|
} |
694
|
|
|
|
|
|
|
|
695
|
3
|
|
|
|
|
4
|
my $query_class = $self->{query_class}; |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
$query->walk( |
698
|
|
|
|
|
|
|
sub { |
699
|
3
|
|
|
3
|
|
4
|
my ( $clause, $tree, $code, $prefix ) = @_; |
700
|
3
|
50
|
|
|
|
10
|
if ( $clause->is_tree ) { |
701
|
0
|
|
|
|
|
0
|
$clause->value->walk($code); |
702
|
0
|
|
|
|
|
0
|
return; |
703
|
|
|
|
|
|
|
} |
704
|
|
|
|
|
|
|
|
705
|
3
|
|
|
|
|
14
|
my @newterms = $expander->( $clause->value, $clause->field ); |
706
|
3
|
50
|
33
|
|
|
30
|
if ( ref $newterms[0] and ref $clause->value ) { |
|
|
100
|
|
|
|
|
|
707
|
0
|
|
|
|
|
0
|
$clause->value( $newterms[0] ); |
708
|
|
|
|
|
|
|
} |
709
|
|
|
|
|
|
|
elsif ( @newterms > 1 ) { |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
# turn $clause into a tree |
712
|
1
|
|
|
|
|
4
|
my $class = blessed($clause); |
713
|
1
|
|
|
|
|
2
|
my $op = $clause->op; |
714
|
1
|
|
|
|
|
4
|
my $field = $clause->field; |
715
|
1
|
|
|
|
|
2
|
my $proximity = $clause->proximity; |
716
|
1
|
|
|
|
|
3
|
my $quote = $clause->quote; |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
#warn "before tree: " . dump $tree; |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
#warn "code clause: " . dump $clause; |
721
|
1
|
|
|
|
|
1
|
my @subclauses; |
722
|
1
|
|
|
|
|
1
|
for my $term (@newterms) { |
723
|
4
|
|
|
|
|
125
|
push( |
724
|
|
|
|
|
|
|
@subclauses, |
725
|
|
|
|
|
|
|
$class->new( |
726
|
|
|
|
|
|
|
field => $field, |
727
|
|
|
|
|
|
|
op => $op, |
728
|
|
|
|
|
|
|
value => $term, |
729
|
|
|
|
|
|
|
quote => $quote, |
730
|
|
|
|
|
|
|
proximity => $proximity, |
731
|
|
|
|
|
|
|
) |
732
|
|
|
|
|
|
|
); |
733
|
|
|
|
|
|
|
} |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
# OR the fields together. TODO optional? |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
# we must set "" key here explicitly, because |
738
|
|
|
|
|
|
|
# our bool op keys are not methods. |
739
|
1
|
|
|
|
|
20
|
my $subclause |
740
|
1
|
|
|
|
|
18
|
= $query_class->new( %{ $self->query_class_opts }, |
741
|
|
|
|
|
|
|
parser => $self ); |
742
|
1
|
|
|
|
|
16
|
$subclause->{""} = \@subclauses; |
743
|
|
|
|
|
|
|
|
744
|
1
|
|
|
|
|
4
|
$clause->op('()'); |
745
|
1
|
|
|
|
|
5
|
$clause->value($subclause); |
746
|
|
|
|
|
|
|
} |
747
|
|
|
|
|
|
|
else { |
748
|
2
|
|
|
|
|
11
|
$clause->value( $newterms[0] ); |
749
|
|
|
|
|
|
|
} |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
} |
752
|
3
|
|
|
|
|
25
|
); |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
} |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
sub _expand { |
757
|
66
|
|
|
66
|
|
108
|
my ( $self, $query ) = @_; |
758
|
|
|
|
|
|
|
|
759
|
66
|
50
|
|
|
|
177
|
return if !exists $self->{fields}; |
760
|
66
|
|
|
|
|
105
|
my $fields = $self->{fields}; |
761
|
66
|
|
|
|
|
125
|
my $query_class = $self->{query_class}; |
762
|
66
|
|
|
|
|
121
|
my $default_field = $self->{default_field}; |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
#dump $fields; |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
$query->walk( |
767
|
|
|
|
|
|
|
sub { |
768
|
165
|
|
|
165
|
|
252
|
my ( $clause, $tree, $code, $prefix ) = @_; |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
#warn "code clause: " . dump $clause; |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
#warn "code tree: " . dump $tree; |
773
|
|
|
|
|
|
|
|
774
|
165
|
100
|
|
|
|
491
|
if ( $clause->is_tree ) { |
775
|
46
|
|
|
|
|
195
|
$clause->value->walk($code); |
776
|
46
|
|
|
|
|
195
|
return; |
777
|
|
|
|
|
|
|
} |
778
|
119
|
100
|
66
|
|
|
639
|
if ( ( !defined $clause->field || !length $clause->field ) |
|
|
|
66
|
|
|
|
|
779
|
|
|
|
|
|
|
&& !defined $default_field ) |
780
|
|
|
|
|
|
|
{ |
781
|
13
|
|
|
|
|
43
|
return; |
782
|
|
|
|
|
|
|
} |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
# make sure clause has an op |
785
|
106
|
100
|
|
|
|
266
|
if ( !$clause->op ) { |
786
|
16
|
|
|
|
|
67
|
$clause->op( $self->default_op ); |
787
|
|
|
|
|
|
|
} |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
# even if $clause has a field defined, |
790
|
|
|
|
|
|
|
# it may be aliased to multiple others, |
791
|
|
|
|
|
|
|
# so check field def and default_field to determine. |
792
|
106
|
|
|
|
|
101
|
my @field_names; |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
# first, which field name to start with? |
795
|
|
|
|
|
|
|
my @clause_fields; # could be plural |
796
|
106
|
100
|
|
|
|
235
|
if ( !defined $clause->field ) { |
797
|
|
|
|
|
|
|
@clause_fields |
798
|
16
|
100
|
|
|
|
54
|
= ref($default_field) |
799
|
|
|
|
|
|
|
? @$default_field |
800
|
|
|
|
|
|
|
: ($default_field); |
801
|
|
|
|
|
|
|
} |
802
|
|
|
|
|
|
|
else { |
803
|
90
|
|
|
|
|
231
|
@clause_fields = ( $clause->field ); |
804
|
|
|
|
|
|
|
} |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
# second, resolve any aliases |
807
|
106
|
|
|
|
|
162
|
for my $cfield (@clause_fields) { |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
# if we have no definition for $cfield, it's invalid |
810
|
108
|
100
|
|
|
|
246
|
if ( !exists $fields->{$cfield} ) { |
811
|
3
|
|
|
|
|
16
|
return; |
812
|
|
|
|
|
|
|
} |
813
|
|
|
|
|
|
|
|
814
|
105
|
|
|
|
|
144
|
my $field_def = $fields->{$cfield}; |
815
|
105
|
100
|
|
|
|
277
|
if ( $field_def->alias_for ) { |
816
|
|
|
|
|
|
|
my @aliases |
817
|
2
|
|
|
|
|
10
|
= ref $field_def->alias_for |
818
|
10
|
100
|
|
|
|
36
|
? @{ $field_def->alias_for } |
819
|
|
|
|
|
|
|
: ( $field_def->alias_for ); |
820
|
10
|
|
|
|
|
34
|
push @field_names, @aliases; |
821
|
|
|
|
|
|
|
} |
822
|
|
|
|
|
|
|
else { |
823
|
95
|
|
|
|
|
239
|
push @field_names, $cfield; |
824
|
|
|
|
|
|
|
} |
825
|
|
|
|
|
|
|
} |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
#warn "resolved field_names: " . dump( \@field_names ); |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
# third, apply our canonical names to the $clause |
830
|
103
|
100
|
|
|
|
226
|
if ( @field_names > 1 ) { |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
# turn $clause into a tree |
833
|
4
|
|
|
|
|
15
|
my $class = blessed($clause); |
834
|
4
|
|
|
|
|
11
|
my $op = $clause->op; |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
#warn "before tree: " . dump $tree; |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
#warn "code clause: " . dump $clause; |
839
|
4
|
|
|
|
|
7
|
my @newfields; |
840
|
4
|
|
|
|
|
6
|
for my $name (@field_names) { |
841
|
8
|
|
|
|
|
377
|
push( |
842
|
|
|
|
|
|
|
@newfields, |
843
|
|
|
|
|
|
|
$class->new( |
844
|
|
|
|
|
|
|
field => $name, |
845
|
|
|
|
|
|
|
op => $op, |
846
|
|
|
|
|
|
|
value => $clause->value, |
847
|
|
|
|
|
|
|
quote => $clause->quote, |
848
|
|
|
|
|
|
|
proximity => $clause->proximity, |
849
|
|
|
|
|
|
|
) |
850
|
|
|
|
|
|
|
); |
851
|
|
|
|
|
|
|
} |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
# OR the fields together. TODO optional? |
854
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
# we must bless here because |
856
|
|
|
|
|
|
|
# our bool op keys are not methods. |
857
|
4
|
|
|
|
|
110
|
my $newfield |
858
|
4
|
|
|
|
|
113
|
= $query_class->new( %{ $self->query_class_opts }, |
859
|
|
|
|
|
|
|
parser => $self ); |
860
|
4
|
|
|
|
|
87
|
$newfield->{""} = \@newfields; |
861
|
|
|
|
|
|
|
|
862
|
4
|
|
|
|
|
14
|
$clause->op('()'); |
863
|
4
|
|
|
|
|
10
|
$clause->value($newfield); |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
#warn "after tree: " . dump $tree; |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
} |
868
|
|
|
|
|
|
|
else { |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
# if no field defined in clause, or it differs, override. |
871
|
99
|
100
|
100
|
|
|
504
|
if ( !defined $clause->field |
872
|
|
|
|
|
|
|
or $field_names[0] ne $clause->field ) |
873
|
|
|
|
|
|
|
{ |
874
|
20
|
|
|
|
|
54
|
$clause->field( $field_names[0] ); |
875
|
|
|
|
|
|
|
} |
876
|
|
|
|
|
|
|
} |
877
|
|
|
|
|
|
|
|
878
|
103
|
|
|
|
|
388
|
return $clause; |
879
|
|
|
|
|
|
|
} |
880
|
66
|
|
|
|
|
765
|
); |
881
|
|
|
|
|
|
|
} |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
sub _validate { |
884
|
66
|
|
|
66
|
|
95
|
my ( $self, $query ) = @_; |
885
|
|
|
|
|
|
|
|
886
|
66
|
|
|
|
|
118
|
my $fields = $self->{fields}; |
887
|
|
|
|
|
|
|
my $validator = sub { |
888
|
173
|
|
|
173
|
|
264
|
my ( $clause, $tree, $code, $prefix ) = @_; |
889
|
173
|
100
|
|
|
|
376
|
if ( $clause->is_tree ) { |
890
|
50
|
|
|
|
|
180
|
$clause->value->walk($code); |
891
|
|
|
|
|
|
|
} |
892
|
|
|
|
|
|
|
else { |
893
|
123
|
100
|
100
|
|
|
706
|
return unless defined $clause->field and length $clause->field; |
894
|
110
|
|
|
|
|
190
|
my $field_name = $clause->field; |
895
|
110
|
|
|
|
|
192
|
my $field_value = $clause->value; |
896
|
110
|
|
|
|
|
151
|
my $field = $fields->{$field_name}; |
897
|
110
|
100
|
|
|
|
208
|
if ( !$field ) { |
898
|
3
|
100
|
|
|
|
13
|
if ( $self->croak_on_error ) { |
899
|
2
|
|
|
|
|
448
|
croak "No such field: $field_name"; |
900
|
|
|
|
|
|
|
} |
901
|
|
|
|
|
|
|
else { |
902
|
1
|
|
|
|
|
3
|
$self->{error} = "No such field: $field_name"; |
903
|
1
|
|
|
|
|
3
|
return; |
904
|
|
|
|
|
|
|
} |
905
|
|
|
|
|
|
|
} |
906
|
107
|
50
|
|
|
|
344
|
if ( !$field->validate($field_value) ) { |
907
|
0
|
0
|
|
|
|
0
|
if ( $self->croak_on_error ) { |
908
|
0
|
|
|
|
|
0
|
my $err = $field->error; |
909
|
0
|
|
|
|
|
0
|
croak |
910
|
|
|
|
|
|
|
"Invalid field value for $field_name: $field_value ($err)"; |
911
|
|
|
|
|
|
|
} |
912
|
|
|
|
|
|
|
} |
913
|
|
|
|
|
|
|
} |
914
|
66
|
|
|
|
|
346
|
}; |
915
|
66
|
|
|
|
|
212
|
$query->walk($validator); |
916
|
|
|
|
|
|
|
} |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
sub _parse { |
919
|
173
|
|
|
173
|
|
236
|
my $self = shift; |
920
|
173
|
|
|
|
|
215
|
my $str = shift; |
921
|
173
|
|
|
|
|
223
|
my $parent_field = shift; # only for recursive calls |
922
|
173
|
|
|
|
|
193
|
my $parent_op = shift; # only for recursive calls |
923
|
173
|
|
|
|
|
174
|
my $query_class = shift; |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
#warn "_parse: " . dump [ $str, $parent_field, $parent_op, $query_class ]; |
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
#dump $self; |
928
|
|
|
|
|
|
|
|
929
|
173
|
|
|
|
|
369
|
my $q = {}; |
930
|
173
|
|
|
|
|
278
|
my $pre_bool = ''; |
931
|
173
|
|
|
|
|
177
|
my $err = undef; |
932
|
173
|
|
|
|
|
186
|
my $s_orig = $str; |
933
|
173
|
|
|
|
|
258
|
my $phrase_delim = $self->{phrase_delim}; |
934
|
173
|
|
|
|
|
238
|
my $field_regex = $self->{field_regex}; |
935
|
173
|
|
|
|
|
234
|
my $and_regex = $self->{and_regex}; |
936
|
173
|
|
|
|
|
201
|
my $or_regex = $self->{or_regex}; |
937
|
173
|
|
|
|
|
208
|
my $not_regex = $self->{not_regex}; |
938
|
173
|
|
|
|
|
206
|
my $op_regex = $self->{op_regex}; |
939
|
173
|
|
|
|
|
205
|
my $op_nofield_regex = $self->{op_nofield_regex}; |
940
|
173
|
|
|
|
|
200
|
my $term_regex = $self->{term_regex}; |
941
|
173
|
|
|
|
|
643
|
my $phrase_regex = qr/[^"()]+/; |
942
|
173
|
|
|
|
|
247
|
my $near_regex = $self->{near_regex}; |
943
|
173
|
|
|
|
|
228
|
my $range_regex = $self->{range_regex}; |
944
|
173
|
|
|
|
|
233
|
my $clause_class = $self->{clause_class}; |
945
|
173
|
|
|
|
|
217
|
my $fixup = $self->{fixup}; |
946
|
173
|
|
|
|
|
195
|
my $null_term = $self->{null_term}; |
947
|
|
|
|
|
|
|
|
948
|
173
|
|
|
|
|
496
|
$str =~ s/^\s+//; # remove leading spaces |
949
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
LOOP: |
951
|
173
|
|
|
|
|
449
|
while ( length $str ) { # while query string is not empty |
952
|
350
|
|
|
|
|
543
|
for ($str) { # temporary alias to $_ for easier regex application |
953
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
#warn "LOOP start: " . dump [ $str, $parent_field, $parent_op ]; |
955
|
|
|
|
|
|
|
|
956
|
350
|
|
|
|
|
515
|
my $sign = $self->{default_boolop}; |
957
|
350
|
|
|
|
|
349
|
my $field = $parent_field; |
958
|
350
|
|
100
|
|
|
1136
|
my $op = $parent_op || ""; |
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
#warn "LOOP after start: " . dump [ $sign, $field, $op ]; |
961
|
|
|
|
|
|
|
|
962
|
350
|
100
|
|
|
|
955
|
if (m/^\)/) { |
963
|
64
|
|
|
|
|
111
|
$self->{_paren_count}--; |
964
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
#warn "leaving loop on ) [paren_count==$self->{_paren_count}]"; |
966
|
64
|
100
|
|
|
|
146
|
if ( $self->{_paren_count} < 0 ) { |
967
|
4
|
100
|
|
|
|
7
|
if ( !$fixup ) { |
968
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
#warn "unbalanced parens -- extra right-hand )"; |
970
|
1
|
|
|
|
|
1
|
$err = "unbalanced parentheses -- extra right-hand )"; |
971
|
1
|
|
|
|
|
2
|
last LOOP; |
972
|
|
|
|
|
|
|
} |
973
|
|
|
|
|
|
|
else { |
974
|
3
|
|
|
|
|
7
|
s/^[\)\s]+//; # trim all trailing ) and space |
975
|
3
|
|
|
|
|
6
|
next LOOP; |
976
|
|
|
|
|
|
|
} |
977
|
|
|
|
|
|
|
} |
978
|
|
|
|
|
|
|
else { |
979
|
60
|
|
|
|
|
149
|
last LOOP; # return from recursive call if meeting a ')' |
980
|
|
|
|
|
|
|
} |
981
|
|
|
|
|
|
|
} |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
# try to parse sign prefix ('+', '-' or '!|NOT') |
984
|
286
|
100
|
|
|
|
2580
|
if (s/^(\+|-)\s*//) { $sign = $1; } |
|
11
|
100
|
|
|
|
25
|
|
|
|
100
|
|
|
|
|
|
985
|
5
|
|
|
|
|
10
|
elsif (s/^($not_regex)\b\s*//) { $sign = '-'; } |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
# special check because of \b above |
988
|
3
|
|
|
|
|
6
|
elsif (s/^\!\s*([^:=~])/$1/) { $sign = '-'; } |
989
|
|
|
|
|
|
|
|
990
|
|
|
|
|
|
|
# try to parse field name and operator |
991
|
286
|
100
|
66
|
|
|
7895
|
if (s/^"($field_regex)"\s*($op_regex)\s*// # "field name" and op |
|
|
|
100
|
|
|
|
|
992
|
|
|
|
|
|
|
or |
993
|
|
|
|
|
|
|
s/^'?($field_regex)'?\s*($op_regex)\s*// # 'field name' and op |
994
|
|
|
|
|
|
|
or s/^()($op_nofield_regex)\s*// # no field, just op |
995
|
|
|
|
|
|
|
) |
996
|
|
|
|
|
|
|
{ |
997
|
132
|
|
|
|
|
403
|
( $field, $op ) = ( $1, $2 ); |
998
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
#warn "matched field+op = " . dump [ $field, $op ]; |
1000
|
132
|
50
|
|
|
|
272
|
if ($parent_field) { |
1001
|
0
|
|
|
|
|
0
|
$err = "field '$field' inside '$parent_field' (op=$op)"; |
1002
|
0
|
|
|
|
|
0
|
last LOOP; |
1003
|
|
|
|
|
|
|
} |
1004
|
|
|
|
|
|
|
} |
1005
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
# parse a value (single term or quoted list or parens) |
1007
|
286
|
|
|
|
|
429
|
my $clause = undef; |
1008
|
|
|
|
|
|
|
|
1009
|
286
|
100
|
100
|
|
|
4769
|
if ( s/^(")([^"]*?)"~(\d+)\s*// |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
or s/^(")([^"]*?)"\s*// |
1011
|
|
|
|
|
|
|
or s/^(')([^']*?)'\s*// ) |
1012
|
|
|
|
|
|
|
{ # parse a quoted string. |
1013
|
31
|
|
|
|
|
96
|
my ( $quote, $val, $proximity ) = ( $1, $2, $3 ); |
1014
|
31
|
|
66
|
|
|
994
|
$clause = $clause_class->new( |
1015
|
|
|
|
|
|
|
field => $field, |
1016
|
|
|
|
|
|
|
op => ( $op || $parent_op || ( $field ? ":" : "" ) ), |
1017
|
|
|
|
|
|
|
value => $val, |
1018
|
|
|
|
|
|
|
quote => $quote, |
1019
|
|
|
|
|
|
|
proximity => $proximity |
1020
|
|
|
|
|
|
|
); |
1021
|
|
|
|
|
|
|
} |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
# fixup mode allows for a partially quoted string. |
1024
|
|
|
|
|
|
|
elsif ( $fixup and s/^(")([^"]*?)\s*$// ) { |
1025
|
1
|
|
|
|
|
3
|
my ( $quote, $val, $proximity ) = ( $1, $2, $3 ); |
1026
|
1
|
|
33
|
|
|
28
|
$clause = $clause_class->new( |
1027
|
|
|
|
|
|
|
field => $field, |
1028
|
|
|
|
|
|
|
op => ( $op || $parent_op || ( $field ? ":" : "" ) ), |
1029
|
|
|
|
|
|
|
value => $val, |
1030
|
|
|
|
|
|
|
quote => $quote, |
1031
|
|
|
|
|
|
|
proximity => $proximity |
1032
|
|
|
|
|
|
|
); |
1033
|
|
|
|
|
|
|
} |
1034
|
|
|
|
|
|
|
|
1035
|
|
|
|
|
|
|
# special case for range grouped with () since we do not |
1036
|
|
|
|
|
|
|
# want the op of record to be the (). |
1037
|
|
|
|
|
|
|
elsif ( |
1038
|
|
|
|
|
|
|
s/^\(\s*"?($phrase_regex)"?$range_regex"?($phrase_regex)"?\s*\)\s*// |
1039
|
|
|
|
|
|
|
) |
1040
|
|
|
|
|
|
|
{ |
1041
|
7
|
|
|
|
|
17
|
my ( $t1, $t2 ) = ( $1, $2 ); |
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
# trim any spaces since phrase_regex includes it |
1044
|
7
|
|
|
|
|
20
|
$t1 =~ s/^\ +|\ +$//g; |
1045
|
7
|
|
|
|
|
18
|
$t2 =~ s/^\ +|\ +$//g; |
1046
|
|
|
|
|
|
|
|
1047
|
7
|
100
|
|
|
|
31
|
my $this_op = $op =~ m/\!/ ? '!..' : '..'; |
1048
|
7
|
|
|
|
|
11
|
my $has_spaces = 0; |
1049
|
7
|
100
|
66
|
|
|
44
|
if ( index( $t1, ' ' ) != -1 or index( $t2, ' ' ) != -1 ) { |
1050
|
1
|
|
|
|
|
2
|
$has_spaces = 1; |
1051
|
|
|
|
|
|
|
} |
1052
|
7
|
100
|
|
|
|
196
|
$clause = $clause_class->new( |
1053
|
|
|
|
|
|
|
field => $field, |
1054
|
|
|
|
|
|
|
op => $this_op, |
1055
|
|
|
|
|
|
|
value => [ $t1, $t2 ], |
1056
|
|
|
|
|
|
|
quote => ( $has_spaces ? '"' : undef ), |
1057
|
|
|
|
|
|
|
); |
1058
|
|
|
|
|
|
|
} |
1059
|
|
|
|
|
|
|
elsif (s/^\(\s*//) { # parse parentheses |
1060
|
63
|
|
|
|
|
119
|
$self->{_paren_count}++; |
1061
|
63
|
|
|
|
|
331
|
my ( $r, $s2 ) |
1062
|
|
|
|
|
|
|
= $self->_parse( $str, $field, $op, $query_class ); |
1063
|
63
|
50
|
|
|
|
270
|
if ( !$r ) { |
1064
|
0
|
|
|
|
|
0
|
$err = $self->error; |
1065
|
0
|
|
|
|
|
0
|
last LOOP; |
1066
|
|
|
|
|
|
|
} |
1067
|
63
|
|
|
|
|
114
|
$str = $s2; |
1068
|
63
|
100
|
33
|
|
|
482
|
if ( !defined($str) or !( $str =~ s/^\)\s*// ) ) { |
1069
|
4
|
100
|
66
|
|
|
15
|
if ( defined($str) and $fixup ) { |
1070
|
2
|
|
|
|
|
4
|
$str = ') ' . $str; |
1071
|
|
|
|
|
|
|
} |
1072
|
|
|
|
|
|
|
else { |
1073
|
2
|
|
|
|
|
3
|
$err = "no matching ) "; |
1074
|
2
|
|
|
|
|
8
|
last LOOP; |
1075
|
|
|
|
|
|
|
} |
1076
|
|
|
|
|
|
|
} |
1077
|
|
|
|
|
|
|
|
1078
|
61
|
|
|
|
|
1678
|
$clause = $clause_class->new( |
1079
|
|
|
|
|
|
|
field => '', |
1080
|
|
|
|
|
|
|
op => '()', |
1081
|
|
|
|
|
|
|
value => bless( $r, $query_class ), # re-bless |
1082
|
|
|
|
|
|
|
); |
1083
|
|
|
|
|
|
|
|
1084
|
|
|
|
|
|
|
} |
1085
|
|
|
|
|
|
|
elsif (s/^($term_regex)\s*//) { # parse a single term |
1086
|
184
|
|
|
|
|
360
|
my $term = $1; |
1087
|
184
|
50
|
66
|
|
|
1204
|
if ( $term =~ m/^($term_regex)$range_regex($term_regex)$/ ) { |
|
|
100
|
|
|
|
|
|
1088
|
0
|
|
|
|
|
0
|
my $t1 = $1; |
1089
|
0
|
|
|
|
|
0
|
my $t2 = $2; |
1090
|
|
|
|
|
|
|
|
1091
|
|
|
|
|
|
|
#warn "found range ($op $parent_op): $term => $t1 .. $t2"; |
1092
|
0
|
0
|
|
|
|
0
|
my $this_op = $op =~ m/\!/ ? '!..' : '..'; |
1093
|
0
|
|
|
|
|
0
|
$clause = $clause_class->new( |
1094
|
|
|
|
|
|
|
field => $field, |
1095
|
|
|
|
|
|
|
op => $this_op, |
1096
|
|
|
|
|
|
|
value => [ $t1, $t2 ], |
1097
|
|
|
|
|
|
|
); |
1098
|
|
|
|
|
|
|
} |
1099
|
|
|
|
|
|
|
elsif ( $null_term and $term eq $null_term ) { |
1100
|
5
|
|
33
|
|
|
161
|
$clause = $clause_class->new( |
1101
|
|
|
|
|
|
|
field => $field, |
1102
|
|
|
|
|
|
|
op => ( $op || $parent_op || ( $field ? ":" : "" ) ), |
1103
|
|
|
|
|
|
|
value => undef, # mimic NULL |
1104
|
|
|
|
|
|
|
); |
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
} |
1107
|
|
|
|
|
|
|
else { |
1108
|
|
|
|
|
|
|
|
1109
|
179
|
|
66
|
|
|
5239
|
$clause = $clause_class->new( |
1110
|
|
|
|
|
|
|
field => $field, |
1111
|
|
|
|
|
|
|
op => ( $op || $parent_op || ( $field ? ":" : "" ) ), |
1112
|
|
|
|
|
|
|
value => $term, |
1113
|
|
|
|
|
|
|
); |
1114
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
} |
1116
|
|
|
|
|
|
|
} |
1117
|
|
|
|
|
|
|
|
1118
|
284
|
100
|
|
|
|
15335
|
if (s/^($near_regex)\s+//) { |
1119
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
# modify the existing clause |
1121
|
|
|
|
|
|
|
# and treat what comes next like a phrase |
1122
|
|
|
|
|
|
|
# matching the syntax "foo bar"~\d+ |
1123
|
2
|
|
|
|
|
5
|
my ($prox_match) = ($1); |
1124
|
2
|
|
|
|
|
4
|
my ($proximity) = $prox_match; |
1125
|
2
|
|
|
|
|
7
|
$proximity =~ s/\D+//; # leave only number |
1126
|
2
|
50
|
|
|
|
69
|
if (s/^($term_regex)\s*//) { |
1127
|
2
|
|
|
|
|
4
|
my $term = $1; |
1128
|
2
|
|
|
|
|
7
|
$clause->{value} .= ' ' . $term; |
1129
|
2
|
|
|
|
|
4
|
$clause->{proximity} = $proximity; |
1130
|
2
|
|
|
|
|
4
|
$clause->{quote} = '"'; |
1131
|
|
|
|
|
|
|
} |
1132
|
|
|
|
|
|
|
else { |
1133
|
0
|
|
|
|
|
0
|
$err = "missing term after $prox_match"; |
1134
|
0
|
|
|
|
|
0
|
last LOOP; |
1135
|
|
|
|
|
|
|
} |
1136
|
|
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
} |
1138
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
# deal with boolean connectors |
1140
|
284
|
|
|
|
|
384
|
my $post_bool = ''; |
1141
|
284
|
100
|
|
|
|
2175
|
if (s/^($and_regex)\s+//) { |
|
|
100
|
|
|
|
|
|
1142
|
26
|
|
|
|
|
43
|
$post_bool = 'AND'; |
1143
|
|
|
|
|
|
|
} |
1144
|
|
|
|
|
|
|
elsif (s/^($or_regex)\s+//) { |
1145
|
42
|
|
|
|
|
73
|
$post_bool = 'OR'; |
1146
|
|
|
|
|
|
|
} |
1147
|
|
|
|
|
|
|
|
1148
|
284
|
50
|
100
|
|
|
947
|
if ( $pre_bool |
|
|
|
66
|
|
|
|
|
1149
|
|
|
|
|
|
|
and $post_bool |
1150
|
|
|
|
|
|
|
and $pre_bool ne $post_bool ) |
1151
|
|
|
|
|
|
|
{ |
1152
|
0
|
|
|
|
|
0
|
$err = "cannot mix AND/OR in requests; use parentheses"; |
1153
|
0
|
|
|
|
|
0
|
last LOOP; |
1154
|
|
|
|
|
|
|
} |
1155
|
|
|
|
|
|
|
|
1156
|
284
|
|
100
|
|
|
916
|
my $bool = $pre_bool || $post_bool; |
1157
|
284
|
|
|
|
|
307
|
$pre_bool = $post_bool; # for next loop |
1158
|
|
|
|
|
|
|
|
1159
|
|
|
|
|
|
|
# insert clause in query structure |
1160
|
284
|
50
|
|
|
|
1097
|
if ($clause) { |
1161
|
284
|
100
|
100
|
|
|
1118
|
$sign = '' if $sign eq '+' and $bool eq 'OR'; |
1162
|
284
|
100
|
100
|
|
|
792
|
$sign = '+' if $sign eq '' and $bool eq 'AND'; |
1163
|
284
|
50
|
66
|
|
|
631
|
if ( $sign eq '-' and $bool eq 'OR' ) { |
1164
|
0
|
|
|
|
|
0
|
$err = 'operands of "OR" cannot have "-" or "NOT" prefix'; |
1165
|
0
|
|
|
|
|
0
|
last LOOP; |
1166
|
|
|
|
|
|
|
} |
1167
|
284
|
|
|
|
|
254
|
push @{ $q->{$sign} }, $clause; |
|
284
|
|
|
|
|
2079
|
|
1168
|
|
|
|
|
|
|
} |
1169
|
|
|
|
|
|
|
else { |
1170
|
0
|
0
|
|
|
|
0
|
if ($_) { |
1171
|
0
|
|
|
|
|
0
|
$err = "unexpected string in query: '$_'"; |
1172
|
0
|
|
|
|
|
0
|
last LOOP; |
1173
|
|
|
|
|
|
|
} |
1174
|
0
|
0
|
|
|
|
0
|
if ($field) { |
1175
|
0
|
|
|
|
|
0
|
$err = "missing value after $field $op"; |
1176
|
0
|
|
|
|
|
0
|
last LOOP; |
1177
|
|
|
|
|
|
|
} |
1178
|
|
|
|
|
|
|
} |
1179
|
|
|
|
|
|
|
} |
1180
|
|
|
|
|
|
|
} |
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
# handle error |
1183
|
173
|
100
|
|
|
|
339
|
if ($err) { |
1184
|
3
|
|
|
|
|
9
|
$self->{error} = "[$s_orig] : $err"; |
1185
|
3
|
|
|
|
|
4
|
$q = undef; |
1186
|
|
|
|
|
|
|
} |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
#dump $q; |
1189
|
|
|
|
|
|
|
|
1190
|
173
|
100
|
|
|
|
453
|
if ( !defined $q ) { |
1191
|
3
|
|
|
|
|
10
|
return ( $q, $str ); |
1192
|
|
|
|
|
|
|
} |
1193
|
170
|
|
|
|
|
4791
|
my $query |
1194
|
170
|
|
|
|
|
184
|
= $query_class->new( %{ $self->query_class_opts }, parser => $self ); |
1195
|
170
|
|
|
|
|
12344
|
$query->{$_} = $q->{$_} for keys %$q; |
1196
|
170
|
|
|
|
|
869
|
return ( $query, $str ); |
1197
|
|
|
|
|
|
|
} |
1198
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
1; |
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
__END__ |