line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package CQL::Parser; |
2
|
|
|
|
|
|
|
|
3
|
7
|
|
|
7
|
|
46822
|
use strict; |
|
7
|
|
|
|
|
19
|
|
|
7
|
|
|
|
|
296
|
|
4
|
7
|
|
|
7
|
|
38
|
use warnings; |
|
7
|
|
|
|
|
13
|
|
|
7
|
|
|
|
|
218
|
|
5
|
7
|
|
|
7
|
|
4888
|
use CQL::Lexer; |
|
7
|
|
|
|
|
19
|
|
|
7
|
|
|
|
|
207
|
|
6
|
7
|
|
|
7
|
|
6066
|
use CQL::Relation; |
|
7
|
|
|
|
|
26
|
|
|
7
|
|
|
|
|
666
|
|
7
|
7
|
|
|
7
|
|
50
|
use CQL::Token; |
|
7
|
|
|
|
|
18
|
|
|
7
|
|
|
|
|
1503
|
|
8
|
7
|
|
|
7
|
|
5340
|
use CQL::TermNode; |
|
7
|
|
|
|
|
19
|
|
|
7
|
|
|
|
|
390
|
|
9
|
7
|
|
|
7
|
|
4629
|
use CQL::AndNode; |
|
7
|
|
|
|
|
20
|
|
|
7
|
|
|
|
|
362
|
|
10
|
7
|
|
|
7
|
|
4015
|
use CQL::OrNode; |
|
7
|
|
|
|
|
18
|
|
|
7
|
|
|
|
|
300
|
|
11
|
7
|
|
|
7
|
|
3972
|
use CQL::NotNode; |
|
7
|
|
|
|
|
17
|
|
|
7
|
|
|
|
|
288
|
|
12
|
7
|
|
|
7
|
|
13912
|
use CQL::PrefixNode; |
|
7
|
|
|
|
|
20
|
|
|
7
|
|
|
|
|
1061
|
|
13
|
7
|
|
|
7
|
|
14890
|
use CQL::ProxNode; |
|
7
|
|
|
|
|
18
|
|
|
7
|
|
|
|
|
312
|
|
14
|
7
|
|
|
7
|
|
43
|
use Carp qw( croak ); |
|
7
|
|
|
|
|
14
|
|
|
7
|
|
|
|
|
22723
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our $VERSION = '1.13'; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
my $lexer; |
19
|
|
|
|
|
|
|
my $token; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 NAME |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
CQL::Parser - compiles CQL strings into parse trees of Node subtypes. |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 SYNOPSIS |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
use CQL::Parser; |
28
|
|
|
|
|
|
|
my $parser = CQL::Parser->new(); |
29
|
|
|
|
|
|
|
my $root = $parser->parse( $cql ); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 DESCRIPTION |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
CQL::Parser provides a mechanism to parse Common Query Language (CQL) |
34
|
|
|
|
|
|
|
statements. The best description of CQL comes from the CQL homepage |
35
|
|
|
|
|
|
|
at the Library of Congress L |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
CQL is a formal language for representing queries to information |
38
|
|
|
|
|
|
|
retrieval systems such as web indexes, bibliographic catalogs and museum |
39
|
|
|
|
|
|
|
collection information. The CQL design objective is that queries be |
40
|
|
|
|
|
|
|
human readable and human writable, and that the language be intuitive |
41
|
|
|
|
|
|
|
while maintaining the expressiveness of more complex languages. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
A CQL statement can be as simple as a single keyword, or as complicated as a set |
44
|
|
|
|
|
|
|
of compoenents indicating search indexes, relations, relational modifiers, |
45
|
|
|
|
|
|
|
proximity clauses and boolean logic. CQL::Parser will parse CQL statements |
46
|
|
|
|
|
|
|
and return the root node for a tree of nodes which describes the CQL statement. |
47
|
|
|
|
|
|
|
This data structure can then be used by a client application to analyze the |
48
|
|
|
|
|
|
|
statement, and possibly turn it into a query for a local repository. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
Each CQL component in the tree inherits from L and can be one |
51
|
|
|
|
|
|
|
of the following: L, L, L, |
52
|
|
|
|
|
|
|
L, L, L. See the |
53
|
|
|
|
|
|
|
documentation for those modules for their respective APIs. |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
Here are some examples of CQL statements: |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=over 4 |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=item * george |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=item * dc.creator=george |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=item * dc.creator="George Clinton" |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=item * clinton and funk |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=item * clinton and parliament and funk |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=item * (clinton or bootsy) and funk |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=item * dc.creator="clinton" and dc.date="1976" |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=back |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head1 METHODS |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head2 new() |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=cut |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
## for convenience the lexer is located at the package level |
82
|
|
|
|
|
|
|
## just need to be sure to reinitialize it in very call to parse() |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub new { |
85
|
7
|
|
|
7
|
1
|
6121
|
my ( $class, $debug ) = @_; |
86
|
7
|
50
|
|
|
|
267
|
$CQL::DEBUG = $debug ? 1 : 0; |
87
|
7
|
|
33
|
|
|
101
|
return bless { }, ref($class) || $class; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=head2 parse( $query ) |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
Pass in a CQL query and you'll get back the root node for the CQL parse tree. |
93
|
|
|
|
|
|
|
If the CQL is invalid an exception will be thrown. |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=cut |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub parse { |
98
|
64
|
|
|
64
|
1
|
2322
|
my ($self,$query) = @_; |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
## initialize lexer |
101
|
64
|
|
|
|
|
345
|
$lexer = CQL::Lexer->new(); |
102
|
|
|
|
|
|
|
|
103
|
64
|
|
|
|
|
889
|
debug( "about to parse query: $query" ); |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
## create the lexer and get the first token |
106
|
64
|
|
|
|
|
206
|
$lexer->tokenize( $query ); |
107
|
64
|
|
|
|
|
208
|
$token = $lexer->nextToken(); |
108
|
|
|
|
|
|
|
|
109
|
64
|
|
|
|
|
342
|
my $root = parseQuery( 'srw.ServerChoice', CQL::Relation->new( 'scr' ) ); |
110
|
55
|
50
|
|
|
|
204
|
if ( $token->getType() != CQL_EOF ) { |
111
|
0
|
|
|
|
|
0
|
croak( "junk after end ".$token->getString() ); |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
55
|
|
|
|
|
233
|
return $root; |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head2 parseSafe( $query ) |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
Pass in a CQL query and you'll get back the root node for the CQL parse tree. |
120
|
|
|
|
|
|
|
If the CQL is invalid, an error code from the SRU Diagnostics List |
121
|
|
|
|
|
|
|
will be returned. |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=cut |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
my @cql_errors = ( |
126
|
|
|
|
|
|
|
{ regex => qr/does not support relational modifiers/, code => 20 }, |
127
|
|
|
|
|
|
|
{ regex => qr/expected boolean got /, code => 37 }, |
128
|
|
|
|
|
|
|
{ regex => qr/expected relation modifier got /, code => 20 }, |
129
|
|
|
|
|
|
|
{ regex => qr/unknown first-class relation modifier: /, code => 20 }, |
130
|
|
|
|
|
|
|
{ regex => qr/missing term/, code => 27 }, |
131
|
|
|
|
|
|
|
{ regex => qr/expected proximity relation got /, code => 40 }, |
132
|
|
|
|
|
|
|
{ regex => qr/expected proximity distance got /, code => 41 }, |
133
|
|
|
|
|
|
|
{ regex => qr/expected proximity unit got/, code => 42 }, |
134
|
|
|
|
|
|
|
{ regex => qr/expected proximity ordering got /, code => 43 }, |
135
|
|
|
|
|
|
|
{ regex => qr/unknown first class relation: /, code => 19 }, |
136
|
|
|
|
|
|
|
{ regex => qr/must supply name/, code => 15 }, |
137
|
|
|
|
|
|
|
{ regex => qr/must supply identifier/, code => 15 }, |
138
|
|
|
|
|
|
|
{ regex => qr/must supply subtree/, code => 15 }, |
139
|
|
|
|
|
|
|
{ regex => qr/must supply term parameter/, code => 27 }, |
140
|
|
|
|
|
|
|
{ regex => qr/doesn\'t support relations other than/, code => 20 }, |
141
|
|
|
|
|
|
|
); |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub parseSafe { |
144
|
2
|
|
|
2
|
1
|
6300
|
my ($self,$query) = @_; |
145
|
|
|
|
|
|
|
|
146
|
2
|
|
|
|
|
5
|
my $root = eval { $self->parse( $query ); }; |
|
2
|
|
|
|
|
9
|
|
147
|
|
|
|
|
|
|
|
148
|
2
|
50
|
|
|
|
1147
|
if ( my $error = $@ ) { |
149
|
2
|
|
|
|
|
4
|
my $code = 10; |
150
|
2
|
|
|
|
|
6
|
for( @cql_errors ) { |
151
|
30
|
100
|
|
|
|
293
|
$code = $_->{ code } if $error =~ $_->{ regex }; |
152
|
|
|
|
|
|
|
} |
153
|
2
|
|
|
|
|
20
|
return $code; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
0
|
|
|
|
|
0
|
return $root; |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub parseQuery { |
160
|
70
|
|
|
70
|
0
|
119
|
my ( $qualifier, $relation ) = @_; |
161
|
70
|
|
|
|
|
188
|
debug( "in parseQuery() with term=" . $token->getString() ); |
162
|
70
|
|
|
|
|
182
|
my $term = parseTerm( $qualifier, $relation ); |
163
|
|
|
|
|
|
|
|
164
|
67
|
|
|
|
|
182
|
my $type = $token->getType(); |
165
|
67
|
|
100
|
|
|
258
|
while ( $type != CQL_EOF and $type != CQL_RPAREN ) { |
166
|
34
|
100
|
|
|
|
128
|
if ( $type == CQL_AND ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
167
|
12
|
|
|
|
|
30
|
match($token); |
168
|
12
|
|
|
|
|
30
|
my $term2 = parseTerm( $qualifier, $relation ); |
169
|
10
|
|
|
|
|
88
|
$term = CQL::AndNode->new( left=>$term, right=>$term2 ); |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
elsif ( $type == CQL_OR ) { |
172
|
9
|
|
|
|
|
19
|
match($token); |
173
|
9
|
|
|
|
|
90
|
my $term2 = parseTerm( $qualifier, $relation ); |
174
|
9
|
|
|
|
|
105
|
$term = CQL::OrNode->new( left=>$term, right=>$term2 ); |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
elsif ( $type == CQL_NOT ) { |
177
|
2
|
|
|
|
|
8
|
match($token); |
178
|
2
|
|
|
|
|
7
|
my $term2 = parseTerm( $qualifier, $relation ); |
179
|
2
|
|
|
|
|
26
|
$term = CQL::NotNode->new( left=>$term, right=>$term2 ); |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
elsif ( $type == CQL_PROX ) { |
182
|
11
|
|
|
|
|
25
|
match($token); |
183
|
11
|
|
|
|
|
53
|
my $proxNode = CQL::ProxNode->new( $term ); |
184
|
11
|
|
|
|
|
27
|
gatherProxParameters( $proxNode ); |
185
|
7
|
|
|
|
|
16
|
my $term2 = parseTerm( $qualifier, $relation ); |
186
|
7
|
|
|
|
|
25
|
$proxNode->addSecondTerm( $term2 ); |
187
|
7
|
|
|
|
|
10
|
$term = $proxNode; |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
else { |
190
|
0
|
|
|
|
|
0
|
croak( "expected boolean got ".$token->getString() ); |
191
|
|
|
|
|
|
|
} |
192
|
28
|
|
|
|
|
80
|
$type = $token->getType(); |
193
|
|
|
|
|
|
|
} |
194
|
61
|
|
|
|
|
124
|
debug( "no more ops" ); |
195
|
61
|
|
|
|
|
111
|
return( $term ); |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
sub parseTerm { |
199
|
100
|
|
|
100
|
0
|
162
|
my ( $qualifier, $relation ) = @_; |
200
|
100
|
|
|
|
|
164
|
debug( "in parseTerm()" ); |
201
|
100
|
|
|
|
|
295
|
my $word; |
202
|
100
|
|
|
|
|
111
|
while ( 1 ) { |
203
|
124
|
100
|
|
|
|
371
|
if ( $token->getType() == CQL_LPAREN ) { |
|
|
100
|
|
|
|
|
|
204
|
5
|
|
|
|
|
15
|
debug( "parenthesized term" ); |
205
|
5
|
|
|
|
|
22
|
match( CQL::Token->new('(') ); |
206
|
5
|
|
|
|
|
49
|
my $expr = parseQuery( $qualifier, $relation ); |
207
|
5
|
|
|
|
|
21
|
match( CQL::Token->new(')') ); |
208
|
5
|
|
|
|
|
17
|
return $expr; |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
elsif ( $token->getType() == CQL_GT ) { |
211
|
1
|
|
|
|
|
4
|
match( $token ); |
212
|
1
|
|
|
|
|
4
|
return parsePrefix( $qualifier, $relation ); |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
118
|
|
|
|
|
214
|
debug( "non-parenthesised term" ); |
216
|
118
|
|
|
|
|
601
|
$word = matchSymbol( "qualifier or term" ); |
217
|
|
|
|
|
|
|
|
218
|
118
|
100
|
|
|
|
249
|
last if ! isBaseRelation(); |
219
|
|
|
|
|
|
|
|
220
|
24
|
|
|
|
|
43
|
$qualifier = $word; |
221
|
24
|
|
|
|
|
70
|
debug( "creating relation with word=$word" ); |
222
|
24
|
|
|
|
|
70
|
$relation = CQL::Relation->new( $token->getString() ); |
223
|
24
|
|
|
|
|
60
|
match( $token ); |
224
|
|
|
|
|
|
|
|
225
|
24
|
|
|
|
|
76
|
while ($token->getType() == CQL_MODIFIER ) { |
226
|
12
|
|
|
|
|
24
|
match( $token ); |
227
|
12
|
50
|
|
|
|
29
|
if ( !isRelationModifier() ) { |
228
|
0
|
|
|
|
|
0
|
croak( "expected relation modifier got " . $token->getString() ); |
229
|
|
|
|
|
|
|
} |
230
|
12
|
|
|
|
|
38
|
$relation->addModifier( $token->getString() ); |
231
|
12
|
|
|
|
|
49
|
match( $token ); |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
91
|
|
|
|
|
767
|
debug( "qualifier=$qualifier relation=$relation term=$word" ); |
236
|
91
|
100
|
66
|
|
|
416
|
croak( "missing term" ) if ! defined($word) or $word eq ''; |
237
|
|
|
|
|
|
|
|
238
|
89
|
|
|
|
|
363
|
my $node = CQL::TermNode->new( |
239
|
|
|
|
|
|
|
qualifier => $qualifier, |
240
|
|
|
|
|
|
|
relation => $relation, |
241
|
|
|
|
|
|
|
term => $word |
242
|
|
|
|
|
|
|
); |
243
|
89
|
|
|
|
|
401
|
debug( "made term node: ".$node->toCQL() ); |
244
|
89
|
|
|
|
|
255
|
return $node; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
sub parsePrefix { |
248
|
1
|
|
|
1
|
0
|
4
|
my ( $qualifier, $relation ) = @_; |
249
|
1
|
|
|
|
|
3
|
debug( "prefix mapping" ); |
250
|
1
|
|
|
|
|
1
|
my $name = undef; |
251
|
1
|
|
|
|
|
4
|
my $identifier = matchSymbol( "prefix name" ); |
252
|
1
|
50
|
|
|
|
4
|
if ( $token->getType() == CQL_EQ ) { |
253
|
1
|
|
|
|
|
3
|
match( $token ); |
254
|
1
|
|
|
|
|
2
|
$name = $identifier; |
255
|
1
|
|
|
|
|
3
|
$identifier = matchSymbol( "prefix identifier" ); |
256
|
|
|
|
|
|
|
} |
257
|
1
|
|
|
|
|
4
|
my $node = parseQuery( $qualifier, $relation ); |
258
|
1
|
|
|
|
|
11
|
return CQL::PrefixNode->new( |
259
|
|
|
|
|
|
|
name => $name, |
260
|
|
|
|
|
|
|
identifier => $identifier, |
261
|
|
|
|
|
|
|
subtree => $node |
262
|
|
|
|
|
|
|
); |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
sub gatherProxParameters { |
266
|
11
|
|
|
11
|
0
|
20
|
my $node = shift; |
267
|
11
|
|
|
|
|
14
|
if (0) { # CQL 1.0 (obsolete) |
268
|
|
|
|
|
|
|
for (my $i=0; $i<4; $i++ ) { |
269
|
|
|
|
|
|
|
if ( $token->getType() != CQL_MODIFIER ) { |
270
|
|
|
|
|
|
|
## end of proximity parameters |
271
|
|
|
|
|
|
|
return; |
272
|
|
|
|
|
|
|
} |
273
|
|
|
|
|
|
|
match($token); |
274
|
|
|
|
|
|
|
if ( $token->getType() != CQL_MODIFIER ) { |
275
|
|
|
|
|
|
|
if ( $i==0 ) { gatherProxRelation($node); } |
276
|
|
|
|
|
|
|
elsif ( $i==1 ) { gatherProxDistance($node); } |
277
|
|
|
|
|
|
|
elsif ( $i==2 ) { gatherProxUnit($node); } |
278
|
|
|
|
|
|
|
elsif ( $i==3 ) { gatherProxOrdering($node); } |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
} else { |
282
|
11
|
|
|
|
|
33
|
while ( $token->getType() == CQL_MODIFIER ) { |
283
|
15
|
|
|
|
|
34
|
match( $token ); |
284
|
15
|
100
|
66
|
|
|
43
|
if ( $token->getType() == CQL_DISTANCE ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
285
|
8
|
|
|
|
|
33
|
match( $token ); |
286
|
8
|
|
|
|
|
22
|
gatherProxRelation( $node ); |
287
|
8
|
|
|
|
|
20
|
gatherProxDistance( $node ); |
288
|
|
|
|
|
|
|
} elsif ( $token->getType() == CQL_UNIT ) { |
289
|
4
|
|
|
|
|
20
|
match( $token ); |
290
|
4
|
50
|
|
|
|
14
|
if ( $token->getType() != CQL_EQ ) { |
291
|
0
|
|
|
|
|
0
|
croak( "expected proximity unit parameter got ".$token->getString() ); |
292
|
|
|
|
|
|
|
} |
293
|
4
|
|
|
|
|
10
|
match( $token ); |
294
|
4
|
|
|
|
|
29
|
gatherProxUnit( $node ); |
295
|
|
|
|
|
|
|
} elsif ( $token->getType() == CQL_ORDERED |
296
|
|
|
|
|
|
|
|| $token->getType() == CQL_UNORDERED ) { |
297
|
1
|
|
|
|
|
4
|
gatherProxOrdering( $node ); |
298
|
|
|
|
|
|
|
} else { |
299
|
2
|
|
|
|
|
9
|
croak( "expected proximity parameter got ". $token->getString() ."(". $token->getType() .")" ); |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
sub gatherProxRelation { |
306
|
8
|
|
|
8
|
0
|
15
|
my $node = shift; |
307
|
8
|
50
|
|
|
|
15
|
if ( ! isProxRelation() ) { |
308
|
0
|
|
|
|
|
0
|
croak( "expected proximity relation got ".$token->getString() ); |
309
|
|
|
|
|
|
|
} |
310
|
8
|
|
|
|
|
30
|
$node->addModifier( "relation", $token->getString() ); |
311
|
8
|
|
|
|
|
19
|
match( $token ); |
312
|
8
|
|
|
|
|
28
|
debug( "gatherProxRelation matched ".$token->getString() ); |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
sub gatherProxDistance { |
316
|
8
|
|
|
8
|
0
|
12
|
my $node = shift; |
317
|
8
|
100
|
|
|
|
23
|
if ( $token->getString() !~ /^\d+$/ ) { |
318
|
2
|
|
|
|
|
6
|
croak( "expected proximity distance got ".$token->getString() ); |
319
|
|
|
|
|
|
|
} |
320
|
6
|
|
|
|
|
21
|
$node->addModifier( "distance", $token->getString() ); |
321
|
6
|
|
|
|
|
13
|
match( $token ); |
322
|
6
|
|
|
|
|
22
|
debug( "gatherProxDistance matched ".$token->getString() ); |
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub gatherProxUnit { |
326
|
4
|
|
|
4
|
0
|
6
|
my $node = shift; |
327
|
4
|
|
|
|
|
10
|
my $type = $token->getType(); |
328
|
4
|
50
|
66
|
|
|
26
|
if( $type != CQL_PWORD and $type != CQL_SENTENCE and $type != CQL_PARAGRAPH |
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
329
|
|
|
|
|
|
|
and $type != CQL_ELEMENT ) { |
330
|
0
|
|
|
|
|
0
|
croak( "expected proximity unit got ".$token->getString() ); |
331
|
|
|
|
|
|
|
} |
332
|
4
|
|
|
|
|
14
|
$node->addModifier( "unit", $token->getString() ); |
333
|
4
|
|
|
|
|
10
|
match( $token ); |
334
|
4
|
|
|
|
|
35
|
debug( "gatherProxUnit matched ".$token->getString() ); |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
sub gatherProxOrdering { |
338
|
1
|
|
|
1
|
0
|
2
|
my $node = shift; |
339
|
1
|
|
|
|
|
5
|
my $type = $token->getType(); |
340
|
1
|
50
|
33
|
|
|
6
|
if ( $type != CQL_ORDERED and $type != CQL_UNORDERED ) { |
341
|
0
|
|
|
|
|
0
|
croak( "expected proximity ordering got ".$token->getString() ); |
342
|
|
|
|
|
|
|
} |
343
|
1
|
|
|
|
|
4
|
$node->addModifier( "ordering", $token->getString() ); |
344
|
1
|
|
|
|
|
4
|
match( $token ); |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
sub isBaseRelation { |
348
|
118
|
|
|
118
|
0
|
364
|
debug( "inside base relation: checking ttype=".$token->getType()." sval=". |
349
|
|
|
|
|
|
|
$token->getString() ); |
350
|
118
|
100
|
66
|
|
|
1619
|
if( $token->getType() == CQL_WORD and $token->getString() !~ /\./ ) { |
351
|
3
|
|
|
|
|
1438
|
croak( "unknown first class relation: ".$token->getString() ); |
352
|
|
|
|
|
|
|
} |
353
|
115
|
|
|
|
|
495
|
my $type = $token->getType(); |
354
|
115
|
|
100
|
|
|
211
|
return( isProxRelation() or $type==CQL_ANY or $type==CQL_ALL |
355
|
|
|
|
|
|
|
or $type==CQL_EXACT or $type==CQL_SCR or $type==CQL_WORD |
356
|
|
|
|
|
|
|
or $type==CQL_WITHIN or $type==CQL_ENCLOSES); |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
sub isProxRelation { |
360
|
123
|
|
|
123
|
0
|
275
|
debug( "isProxRelation: checking ttype=".$token->getType()." sval=". |
361
|
|
|
|
|
|
|
$token->getString() ); |
362
|
123
|
|
|
|
|
327
|
my $type = $token->getType(); |
363
|
123
|
|
100
|
|
|
3892
|
return( $type==CQL_LT or $type==CQL_GT or $type==CQL_EQ or $type==CQL_LE |
364
|
|
|
|
|
|
|
or $type==CQL_GE or $type==CQL_NE ); |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
sub isRelationModifier { |
368
|
12
|
|
|
12
|
0
|
35
|
my $type = $token->getType(); |
369
|
12
|
100
|
|
|
|
36
|
if ($type == CQL_WORD) { |
370
|
1
|
|
|
|
|
4
|
return $token->getString() =~ /\./; |
371
|
|
|
|
|
|
|
} |
372
|
11
|
|
66
|
|
|
246
|
return ($type==CQL_RELEVANT or $type==CQL_FUZZY or $type==CQL_STEM |
373
|
|
|
|
|
|
|
or $type==CQL_PHONETIC or $type==CQL_PWORD or $type==CQL_STRING |
374
|
|
|
|
|
|
|
or $type==CQL_ISODATE or $type==CQL_NUMBER or $type==CQL_URI |
375
|
|
|
|
|
|
|
or $type==CQL_PARTIAL or $type==CQL_MASKED or $type==CQL_UNMASKED |
376
|
|
|
|
|
|
|
or $type==CQL_NWSE); |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
sub match { |
380
|
264
|
|
|
264
|
0
|
374
|
my $expected = shift; |
381
|
264
|
|
|
|
|
3056
|
debug( "in match(".$expected->getString().")" ); |
382
|
264
|
50
|
|
|
|
710
|
if ( $token->getType() != $expected->getType() ) { |
383
|
0
|
|
|
|
|
0
|
croak( "expected ".$expected->getString() . |
384
|
|
|
|
|
|
|
" but got " . $token->getString() ); |
385
|
|
|
|
|
|
|
} |
386
|
264
|
|
|
|
|
877
|
$token = $lexer->nextToken(); |
387
|
264
|
|
|
|
|
1207
|
debug( "got token type=".$token->getType()." string=".$token->getString() ); |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
sub matchSymbol { |
391
|
120
|
|
|
120
|
0
|
263
|
debug( "in match symbol" ); |
392
|
120
|
|
|
|
|
297
|
my $return = $token->getString(); |
393
|
120
|
|
|
|
|
247
|
match( $token ); |
394
|
120
|
|
|
|
|
286
|
return $return; |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
sub debug { |
398
|
1530
|
50
|
|
1530
|
0
|
5154
|
return unless $CQL::DEBUG; |
399
|
0
|
|
|
|
|
|
print STDERR "CQL::Parser: ", shift, "\n"; |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
=head1 XCQL |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
CQL has an XML representation which you can generate from a CQL parse |
405
|
|
|
|
|
|
|
tree. Just call the toXCQL() method on the root node you get back |
406
|
|
|
|
|
|
|
from a call to parse(). |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
=head1 ERRORS AND DIAGNOSTICS |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
As mentioned above, a CQL syntax error will result in an exception being |
411
|
|
|
|
|
|
|
thrown. So if you have any doubts about the CQL that you are parsing you |
412
|
|
|
|
|
|
|
should wrap the call to parse() in an eval block, and check $@ |
413
|
|
|
|
|
|
|
afterwards to make sure everything went ok. |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
eval { |
416
|
|
|
|
|
|
|
my $node = $parser->parse( $cql ); |
417
|
|
|
|
|
|
|
}; |
418
|
|
|
|
|
|
|
if ( $@ ) { |
419
|
|
|
|
|
|
|
print "uhoh, exception $@\n"; |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
If you'd like to see blow by blow details while your CQL is being parsed |
423
|
|
|
|
|
|
|
set $CQL::DEBUG equal to 1, and you will get details on STDERR. This is |
424
|
|
|
|
|
|
|
useful if the parse tree is incorrect and you want to locate where things |
425
|
|
|
|
|
|
|
are going wrong. Hopefully this won't happen, but if it does please notify the |
426
|
|
|
|
|
|
|
author. |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
=head1 TODO |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=over 4 |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
=item * toYourEngineHere() please feel free to add functionality and send in |
433
|
|
|
|
|
|
|
patches! |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=back |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=head1 THANKYOUS |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
CQL::Parser is essentially a Perl port of Mike Taylor's cql-java package |
440
|
|
|
|
|
|
|
http://zing.z3950.org/cql/java/. Mike and IndexData were kind enough |
441
|
|
|
|
|
|
|
to allow the author to write this port, and to make it available under |
442
|
|
|
|
|
|
|
the terms of the Artistic License. Thanks Mike! |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
The CQL::Lexer package relies heavily on Stevan Little's excellent |
445
|
|
|
|
|
|
|
String::Tokenizer. Thanks Stevan! |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
CQL::Parser was developed as a component of the Ockham project, |
448
|
|
|
|
|
|
|
which is funded by the National Science Foundation. See http://www.ockham.org |
449
|
|
|
|
|
|
|
for more information about Ockham. |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=head1 AUTHOR |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
=over 4 |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=item * Ed Summers - ehs at pobox dot com |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=item * Brian Cassidy - bricas at cpan dot org |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=item * Wilbert Hengst - W.Hengst at uva dot nl |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=back |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
Copyright 2004-2009 by Ed Summers |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
468
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=cut |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
1; |