| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package CQL::Parser; |
|
2
|
|
|
|
|
|
|
|
|
3
|
7
|
|
|
7
|
|
46822
|
use strict; |
|
|
7
|
|
|
|
|
19
|
|
|
|
7
|
|
|
|
|
296
|
|
|
4
|
7
|
|
|
7
|
|
38
|
use warnings; |
|
|
7
|
|
|
|
|
13
|
|
|
|
7
|
|
|
|
|
218
|
|
|
5
|
7
|
|
|
7
|
|
4888
|
use CQL::Lexer; |
|
|
7
|
|
|
|
|
19
|
|
|
|
7
|
|
|
|
|
207
|
|
|
6
|
7
|
|
|
7
|
|
6066
|
use CQL::Relation; |
|
|
7
|
|
|
|
|
26
|
|
|
|
7
|
|
|
|
|
666
|
|
|
7
|
7
|
|
|
7
|
|
50
|
use CQL::Token; |
|
|
7
|
|
|
|
|
18
|
|
|
|
7
|
|
|
|
|
1503
|
|
|
8
|
7
|
|
|
7
|
|
5340
|
use CQL::TermNode; |
|
|
7
|
|
|
|
|
19
|
|
|
|
7
|
|
|
|
|
390
|
|
|
9
|
7
|
|
|
7
|
|
4629
|
use CQL::AndNode; |
|
|
7
|
|
|
|
|
20
|
|
|
|
7
|
|
|
|
|
362
|
|
|
10
|
7
|
|
|
7
|
|
4015
|
use CQL::OrNode; |
|
|
7
|
|
|
|
|
18
|
|
|
|
7
|
|
|
|
|
300
|
|
|
11
|
7
|
|
|
7
|
|
3972
|
use CQL::NotNode; |
|
|
7
|
|
|
|
|
17
|
|
|
|
7
|
|
|
|
|
288
|
|
|
12
|
7
|
|
|
7
|
|
13912
|
use CQL::PrefixNode; |
|
|
7
|
|
|
|
|
20
|
|
|
|
7
|
|
|
|
|
1061
|
|
|
13
|
7
|
|
|
7
|
|
14890
|
use CQL::ProxNode; |
|
|
7
|
|
|
|
|
18
|
|
|
|
7
|
|
|
|
|
312
|
|
|
14
|
7
|
|
|
7
|
|
43
|
use Carp qw( croak ); |
|
|
7
|
|
|
|
|
14
|
|
|
|
7
|
|
|
|
|
22723
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our $VERSION = '1.13'; |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
my $lexer; |
|
19
|
|
|
|
|
|
|
my $token; |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 NAME |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
CQL::Parser - compiles CQL strings into parse trees of Node subtypes. |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
use CQL::Parser; |
|
28
|
|
|
|
|
|
|
my $parser = CQL::Parser->new(); |
|
29
|
|
|
|
|
|
|
my $root = $parser->parse( $cql ); |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
CQL::Parser provides a mechanism to parse Common Query Language (CQL) |
|
34
|
|
|
|
|
|
|
statements. The best description of CQL comes from the CQL homepage |
|
35
|
|
|
|
|
|
|
at the Library of Congress L |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
CQL is a formal language for representing queries to information |
|
38
|
|
|
|
|
|
|
retrieval systems such as web indexes, bibliographic catalogs and museum |
|
39
|
|
|
|
|
|
|
collection information. The CQL design objective is that queries be |
|
40
|
|
|
|
|
|
|
human readable and human writable, and that the language be intuitive |
|
41
|
|
|
|
|
|
|
while maintaining the expressiveness of more complex languages. |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
A CQL statement can be as simple as a single keyword, or as complicated as a set |
|
44
|
|
|
|
|
|
|
of compoenents indicating search indexes, relations, relational modifiers, |
|
45
|
|
|
|
|
|
|
proximity clauses and boolean logic. CQL::Parser will parse CQL statements |
|
46
|
|
|
|
|
|
|
and return the root node for a tree of nodes which describes the CQL statement. |
|
47
|
|
|
|
|
|
|
This data structure can then be used by a client application to analyze the |
|
48
|
|
|
|
|
|
|
statement, and possibly turn it into a query for a local repository. |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
Each CQL component in the tree inherits from L and can be one |
|
51
|
|
|
|
|
|
|
of the following: L, L, L, |
|
52
|
|
|
|
|
|
|
L, L, L. See the |
|
53
|
|
|
|
|
|
|
documentation for those modules for their respective APIs. |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
Here are some examples of CQL statements: |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=over 4 |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=item * george |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=item * dc.creator=george |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=item * dc.creator="George Clinton" |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=item * clinton and funk |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=item * clinton and parliament and funk |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=item * (clinton or bootsy) and funk |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=item * dc.creator="clinton" and dc.date="1976" |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=back |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head1 METHODS |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head2 new() |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=cut |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
## for convenience the lexer is located at the package level |
|
82
|
|
|
|
|
|
|
## just need to be sure to reinitialize it in very call to parse() |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub new { |
|
85
|
7
|
|
|
7
|
1
|
6121
|
my ( $class, $debug ) = @_; |
|
86
|
7
|
50
|
|
|
|
267
|
$CQL::DEBUG = $debug ? 1 : 0; |
|
87
|
7
|
|
33
|
|
|
101
|
return bless { }, ref($class) || $class; |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=head2 parse( $query ) |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
Pass in a CQL query and you'll get back the root node for the CQL parse tree. |
|
93
|
|
|
|
|
|
|
If the CQL is invalid an exception will be thrown. |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=cut |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub parse { |
|
98
|
64
|
|
|
64
|
1
|
2322
|
my ($self,$query) = @_; |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
## initialize lexer |
|
101
|
64
|
|
|
|
|
345
|
$lexer = CQL::Lexer->new(); |
|
102
|
|
|
|
|
|
|
|
|
103
|
64
|
|
|
|
|
889
|
debug( "about to parse query: $query" ); |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
## create the lexer and get the first token |
|
106
|
64
|
|
|
|
|
206
|
$lexer->tokenize( $query ); |
|
107
|
64
|
|
|
|
|
208
|
$token = $lexer->nextToken(); |
|
108
|
|
|
|
|
|
|
|
|
109
|
64
|
|
|
|
|
342
|
my $root = parseQuery( 'srw.ServerChoice', CQL::Relation->new( 'scr' ) ); |
|
110
|
55
|
50
|
|
|
|
204
|
if ( $token->getType() != CQL_EOF ) { |
|
111
|
0
|
|
|
|
|
0
|
croak( "junk after end ".$token->getString() ); |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
|
|
114
|
55
|
|
|
|
|
233
|
return $root; |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head2 parseSafe( $query ) |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
Pass in a CQL query and you'll get back the root node for the CQL parse tree. |
|
120
|
|
|
|
|
|
|
If the CQL is invalid, an error code from the SRU Diagnostics List |
|
121
|
|
|
|
|
|
|
will be returned. |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=cut |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
my @cql_errors = ( |
|
126
|
|
|
|
|
|
|
{ regex => qr/does not support relational modifiers/, code => 20 }, |
|
127
|
|
|
|
|
|
|
{ regex => qr/expected boolean got /, code => 37 }, |
|
128
|
|
|
|
|
|
|
{ regex => qr/expected relation modifier got /, code => 20 }, |
|
129
|
|
|
|
|
|
|
{ regex => qr/unknown first-class relation modifier: /, code => 20 }, |
|
130
|
|
|
|
|
|
|
{ regex => qr/missing term/, code => 27 }, |
|
131
|
|
|
|
|
|
|
{ regex => qr/expected proximity relation got /, code => 40 }, |
|
132
|
|
|
|
|
|
|
{ regex => qr/expected proximity distance got /, code => 41 }, |
|
133
|
|
|
|
|
|
|
{ regex => qr/expected proximity unit got/, code => 42 }, |
|
134
|
|
|
|
|
|
|
{ regex => qr/expected proximity ordering got /, code => 43 }, |
|
135
|
|
|
|
|
|
|
{ regex => qr/unknown first class relation: /, code => 19 }, |
|
136
|
|
|
|
|
|
|
{ regex => qr/must supply name/, code => 15 }, |
|
137
|
|
|
|
|
|
|
{ regex => qr/must supply identifier/, code => 15 }, |
|
138
|
|
|
|
|
|
|
{ regex => qr/must supply subtree/, code => 15 }, |
|
139
|
|
|
|
|
|
|
{ regex => qr/must supply term parameter/, code => 27 }, |
|
140
|
|
|
|
|
|
|
{ regex => qr/doesn\'t support relations other than/, code => 20 }, |
|
141
|
|
|
|
|
|
|
); |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub parseSafe { |
|
144
|
2
|
|
|
2
|
1
|
6300
|
my ($self,$query) = @_; |
|
145
|
|
|
|
|
|
|
|
|
146
|
2
|
|
|
|
|
5
|
my $root = eval { $self->parse( $query ); }; |
|
|
2
|
|
|
|
|
9
|
|
|
147
|
|
|
|
|
|
|
|
|
148
|
2
|
50
|
|
|
|
1147
|
if ( my $error = $@ ) { |
|
149
|
2
|
|
|
|
|
4
|
my $code = 10; |
|
150
|
2
|
|
|
|
|
6
|
for( @cql_errors ) { |
|
151
|
30
|
100
|
|
|
|
293
|
$code = $_->{ code } if $error =~ $_->{ regex }; |
|
152
|
|
|
|
|
|
|
} |
|
153
|
2
|
|
|
|
|
20
|
return $code; |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
0
|
|
|
|
|
0
|
return $root; |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub parseQuery { |
|
160
|
70
|
|
|
70
|
0
|
119
|
my ( $qualifier, $relation ) = @_; |
|
161
|
70
|
|
|
|
|
188
|
debug( "in parseQuery() with term=" . $token->getString() ); |
|
162
|
70
|
|
|
|
|
182
|
my $term = parseTerm( $qualifier, $relation ); |
|
163
|
|
|
|
|
|
|
|
|
164
|
67
|
|
|
|
|
182
|
my $type = $token->getType(); |
|
165
|
67
|
|
100
|
|
|
258
|
while ( $type != CQL_EOF and $type != CQL_RPAREN ) { |
|
166
|
34
|
100
|
|
|
|
128
|
if ( $type == CQL_AND ) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
167
|
12
|
|
|
|
|
30
|
match($token); |
|
168
|
12
|
|
|
|
|
30
|
my $term2 = parseTerm( $qualifier, $relation ); |
|
169
|
10
|
|
|
|
|
88
|
$term = CQL::AndNode->new( left=>$term, right=>$term2 ); |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
elsif ( $type == CQL_OR ) { |
|
172
|
9
|
|
|
|
|
19
|
match($token); |
|
173
|
9
|
|
|
|
|
90
|
my $term2 = parseTerm( $qualifier, $relation ); |
|
174
|
9
|
|
|
|
|
105
|
$term = CQL::OrNode->new( left=>$term, right=>$term2 ); |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
elsif ( $type == CQL_NOT ) { |
|
177
|
2
|
|
|
|
|
8
|
match($token); |
|
178
|
2
|
|
|
|
|
7
|
my $term2 = parseTerm( $qualifier, $relation ); |
|
179
|
2
|
|
|
|
|
26
|
$term = CQL::NotNode->new( left=>$term, right=>$term2 ); |
|
180
|
|
|
|
|
|
|
} |
|
181
|
|
|
|
|
|
|
elsif ( $type == CQL_PROX ) { |
|
182
|
11
|
|
|
|
|
25
|
match($token); |
|
183
|
11
|
|
|
|
|
53
|
my $proxNode = CQL::ProxNode->new( $term ); |
|
184
|
11
|
|
|
|
|
27
|
gatherProxParameters( $proxNode ); |
|
185
|
7
|
|
|
|
|
16
|
my $term2 = parseTerm( $qualifier, $relation ); |
|
186
|
7
|
|
|
|
|
25
|
$proxNode->addSecondTerm( $term2 ); |
|
187
|
7
|
|
|
|
|
10
|
$term = $proxNode; |
|
188
|
|
|
|
|
|
|
} |
|
189
|
|
|
|
|
|
|
else { |
|
190
|
0
|
|
|
|
|
0
|
croak( "expected boolean got ".$token->getString() ); |
|
191
|
|
|
|
|
|
|
} |
|
192
|
28
|
|
|
|
|
80
|
$type = $token->getType(); |
|
193
|
|
|
|
|
|
|
} |
|
194
|
61
|
|
|
|
|
124
|
debug( "no more ops" ); |
|
195
|
61
|
|
|
|
|
111
|
return( $term ); |
|
196
|
|
|
|
|
|
|
} |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
sub parseTerm { |
|
199
|
100
|
|
|
100
|
0
|
162
|
my ( $qualifier, $relation ) = @_; |
|
200
|
100
|
|
|
|
|
164
|
debug( "in parseTerm()" ); |
|
201
|
100
|
|
|
|
|
295
|
my $word; |
|
202
|
100
|
|
|
|
|
111
|
while ( 1 ) { |
|
203
|
124
|
100
|
|
|
|
371
|
if ( $token->getType() == CQL_LPAREN ) { |
|
|
|
100
|
|
|
|
|
|
|
204
|
5
|
|
|
|
|
15
|
debug( "parenthesized term" ); |
|
205
|
5
|
|
|
|
|
22
|
match( CQL::Token->new('(') ); |
|
206
|
5
|
|
|
|
|
49
|
my $expr = parseQuery( $qualifier, $relation ); |
|
207
|
5
|
|
|
|
|
21
|
match( CQL::Token->new(')') ); |
|
208
|
5
|
|
|
|
|
17
|
return $expr; |
|
209
|
|
|
|
|
|
|
} |
|
210
|
|
|
|
|
|
|
elsif ( $token->getType() == CQL_GT ) { |
|
211
|
1
|
|
|
|
|
4
|
match( $token ); |
|
212
|
1
|
|
|
|
|
4
|
return parsePrefix( $qualifier, $relation ); |
|
213
|
|
|
|
|
|
|
} |
|
214
|
|
|
|
|
|
|
|
|
215
|
118
|
|
|
|
|
214
|
debug( "non-parenthesised term" ); |
|
216
|
118
|
|
|
|
|
601
|
$word = matchSymbol( "qualifier or term" ); |
|
217
|
|
|
|
|
|
|
|
|
218
|
118
|
100
|
|
|
|
249
|
last if ! isBaseRelation(); |
|
219
|
|
|
|
|
|
|
|
|
220
|
24
|
|
|
|
|
43
|
$qualifier = $word; |
|
221
|
24
|
|
|
|
|
70
|
debug( "creating relation with word=$word" ); |
|
222
|
24
|
|
|
|
|
70
|
$relation = CQL::Relation->new( $token->getString() ); |
|
223
|
24
|
|
|
|
|
60
|
match( $token ); |
|
224
|
|
|
|
|
|
|
|
|
225
|
24
|
|
|
|
|
76
|
while ($token->getType() == CQL_MODIFIER ) { |
|
226
|
12
|
|
|
|
|
24
|
match( $token ); |
|
227
|
12
|
50
|
|
|
|
29
|
if ( !isRelationModifier() ) { |
|
228
|
0
|
|
|
|
|
0
|
croak( "expected relation modifier got " . $token->getString() ); |
|
229
|
|
|
|
|
|
|
} |
|
230
|
12
|
|
|
|
|
38
|
$relation->addModifier( $token->getString() ); |
|
231
|
12
|
|
|
|
|
49
|
match( $token ); |
|
232
|
|
|
|
|
|
|
} |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
|
|
235
|
91
|
|
|
|
|
767
|
debug( "qualifier=$qualifier relation=$relation term=$word" ); |
|
236
|
91
|
100
|
66
|
|
|
416
|
croak( "missing term" ) if ! defined($word) or $word eq ''; |
|
237
|
|
|
|
|
|
|
|
|
238
|
89
|
|
|
|
|
363
|
my $node = CQL::TermNode->new( |
|
239
|
|
|
|
|
|
|
qualifier => $qualifier, |
|
240
|
|
|
|
|
|
|
relation => $relation, |
|
241
|
|
|
|
|
|
|
term => $word |
|
242
|
|
|
|
|
|
|
); |
|
243
|
89
|
|
|
|
|
401
|
debug( "made term node: ".$node->toCQL() ); |
|
244
|
89
|
|
|
|
|
255
|
return $node; |
|
245
|
|
|
|
|
|
|
} |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
sub parsePrefix { |
|
248
|
1
|
|
|
1
|
0
|
4
|
my ( $qualifier, $relation ) = @_; |
|
249
|
1
|
|
|
|
|
3
|
debug( "prefix mapping" ); |
|
250
|
1
|
|
|
|
|
1
|
my $name = undef; |
|
251
|
1
|
|
|
|
|
4
|
my $identifier = matchSymbol( "prefix name" ); |
|
252
|
1
|
50
|
|
|
|
4
|
if ( $token->getType() == CQL_EQ ) { |
|
253
|
1
|
|
|
|
|
3
|
match( $token ); |
|
254
|
1
|
|
|
|
|
2
|
$name = $identifier; |
|
255
|
1
|
|
|
|
|
3
|
$identifier = matchSymbol( "prefix identifier" ); |
|
256
|
|
|
|
|
|
|
} |
|
257
|
1
|
|
|
|
|
4
|
my $node = parseQuery( $qualifier, $relation ); |
|
258
|
1
|
|
|
|
|
11
|
return CQL::PrefixNode->new( |
|
259
|
|
|
|
|
|
|
name => $name, |
|
260
|
|
|
|
|
|
|
identifier => $identifier, |
|
261
|
|
|
|
|
|
|
subtree => $node |
|
262
|
|
|
|
|
|
|
); |
|
263
|
|
|
|
|
|
|
} |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
sub gatherProxParameters { |
|
266
|
11
|
|
|
11
|
0
|
20
|
my $node = shift; |
|
267
|
11
|
|
|
|
|
14
|
if (0) { # CQL 1.0 (obsolete) |
|
268
|
|
|
|
|
|
|
for (my $i=0; $i<4; $i++ ) { |
|
269
|
|
|
|
|
|
|
if ( $token->getType() != CQL_MODIFIER ) { |
|
270
|
|
|
|
|
|
|
## end of proximity parameters |
|
271
|
|
|
|
|
|
|
return; |
|
272
|
|
|
|
|
|
|
} |
|
273
|
|
|
|
|
|
|
match($token); |
|
274
|
|
|
|
|
|
|
if ( $token->getType() != CQL_MODIFIER ) { |
|
275
|
|
|
|
|
|
|
if ( $i==0 ) { gatherProxRelation($node); } |
|
276
|
|
|
|
|
|
|
elsif ( $i==1 ) { gatherProxDistance($node); } |
|
277
|
|
|
|
|
|
|
elsif ( $i==2 ) { gatherProxUnit($node); } |
|
278
|
|
|
|
|
|
|
elsif ( $i==3 ) { gatherProxOrdering($node); } |
|
279
|
|
|
|
|
|
|
} |
|
280
|
|
|
|
|
|
|
} |
|
281
|
|
|
|
|
|
|
} else { |
|
282
|
11
|
|
|
|
|
33
|
while ( $token->getType() == CQL_MODIFIER ) { |
|
283
|
15
|
|
|
|
|
34
|
match( $token ); |
|
284
|
15
|
100
|
66
|
|
|
43
|
if ( $token->getType() == CQL_DISTANCE ) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
285
|
8
|
|
|
|
|
33
|
match( $token ); |
|
286
|
8
|
|
|
|
|
22
|
gatherProxRelation( $node ); |
|
287
|
8
|
|
|
|
|
20
|
gatherProxDistance( $node ); |
|
288
|
|
|
|
|
|
|
} elsif ( $token->getType() == CQL_UNIT ) { |
|
289
|
4
|
|
|
|
|
20
|
match( $token ); |
|
290
|
4
|
50
|
|
|
|
14
|
if ( $token->getType() != CQL_EQ ) { |
|
291
|
0
|
|
|
|
|
0
|
croak( "expected proximity unit parameter got ".$token->getString() ); |
|
292
|
|
|
|
|
|
|
} |
|
293
|
4
|
|
|
|
|
10
|
match( $token ); |
|
294
|
4
|
|
|
|
|
29
|
gatherProxUnit( $node ); |
|
295
|
|
|
|
|
|
|
} elsif ( $token->getType() == CQL_ORDERED |
|
296
|
|
|
|
|
|
|
|| $token->getType() == CQL_UNORDERED ) { |
|
297
|
1
|
|
|
|
|
4
|
gatherProxOrdering( $node ); |
|
298
|
|
|
|
|
|
|
} else { |
|
299
|
2
|
|
|
|
|
9
|
croak( "expected proximity parameter got ". $token->getString() ."(". $token->getType() .")" ); |
|
300
|
|
|
|
|
|
|
} |
|
301
|
|
|
|
|
|
|
} |
|
302
|
|
|
|
|
|
|
} |
|
303
|
|
|
|
|
|
|
} |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
sub gatherProxRelation { |
|
306
|
8
|
|
|
8
|
0
|
15
|
my $node = shift; |
|
307
|
8
|
50
|
|
|
|
15
|
if ( ! isProxRelation() ) { |
|
308
|
0
|
|
|
|
|
0
|
croak( "expected proximity relation got ".$token->getString() ); |
|
309
|
|
|
|
|
|
|
} |
|
310
|
8
|
|
|
|
|
30
|
$node->addModifier( "relation", $token->getString() ); |
|
311
|
8
|
|
|
|
|
19
|
match( $token ); |
|
312
|
8
|
|
|
|
|
28
|
debug( "gatherProxRelation matched ".$token->getString() ); |
|
313
|
|
|
|
|
|
|
} |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
sub gatherProxDistance { |
|
316
|
8
|
|
|
8
|
0
|
12
|
my $node = shift; |
|
317
|
8
|
100
|
|
|
|
23
|
if ( $token->getString() !~ /^\d+$/ ) { |
|
318
|
2
|
|
|
|
|
6
|
croak( "expected proximity distance got ".$token->getString() ); |
|
319
|
|
|
|
|
|
|
} |
|
320
|
6
|
|
|
|
|
21
|
$node->addModifier( "distance", $token->getString() ); |
|
321
|
6
|
|
|
|
|
13
|
match( $token ); |
|
322
|
6
|
|
|
|
|
22
|
debug( "gatherProxDistance matched ".$token->getString() ); |
|
323
|
|
|
|
|
|
|
} |
|
324
|
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub gatherProxUnit { |
|
326
|
4
|
|
|
4
|
0
|
6
|
my $node = shift; |
|
327
|
4
|
|
|
|
|
10
|
my $type = $token->getType(); |
|
328
|
4
|
50
|
66
|
|
|
26
|
if( $type != CQL_PWORD and $type != CQL_SENTENCE and $type != CQL_PARAGRAPH |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
329
|
|
|
|
|
|
|
and $type != CQL_ELEMENT ) { |
|
330
|
0
|
|
|
|
|
0
|
croak( "expected proximity unit got ".$token->getString() ); |
|
331
|
|
|
|
|
|
|
} |
|
332
|
4
|
|
|
|
|
14
|
$node->addModifier( "unit", $token->getString() ); |
|
333
|
4
|
|
|
|
|
10
|
match( $token ); |
|
334
|
4
|
|
|
|
|
35
|
debug( "gatherProxUnit matched ".$token->getString() ); |
|
335
|
|
|
|
|
|
|
} |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
sub gatherProxOrdering { |
|
338
|
1
|
|
|
1
|
0
|
2
|
my $node = shift; |
|
339
|
1
|
|
|
|
|
5
|
my $type = $token->getType(); |
|
340
|
1
|
50
|
33
|
|
|
6
|
if ( $type != CQL_ORDERED and $type != CQL_UNORDERED ) { |
|
341
|
0
|
|
|
|
|
0
|
croak( "expected proximity ordering got ".$token->getString() ); |
|
342
|
|
|
|
|
|
|
} |
|
343
|
1
|
|
|
|
|
4
|
$node->addModifier( "ordering", $token->getString() ); |
|
344
|
1
|
|
|
|
|
4
|
match( $token ); |
|
345
|
|
|
|
|
|
|
} |
|
346
|
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
sub isBaseRelation { |
|
348
|
118
|
|
|
118
|
0
|
364
|
debug( "inside base relation: checking ttype=".$token->getType()." sval=". |
|
349
|
|
|
|
|
|
|
$token->getString() ); |
|
350
|
118
|
100
|
66
|
|
|
1619
|
if( $token->getType() == CQL_WORD and $token->getString() !~ /\./ ) { |
|
351
|
3
|
|
|
|
|
1438
|
croak( "unknown first class relation: ".$token->getString() ); |
|
352
|
|
|
|
|
|
|
} |
|
353
|
115
|
|
|
|
|
495
|
my $type = $token->getType(); |
|
354
|
115
|
|
100
|
|
|
211
|
return( isProxRelation() or $type==CQL_ANY or $type==CQL_ALL |
|
355
|
|
|
|
|
|
|
or $type==CQL_EXACT or $type==CQL_SCR or $type==CQL_WORD |
|
356
|
|
|
|
|
|
|
or $type==CQL_WITHIN or $type==CQL_ENCLOSES); |
|
357
|
|
|
|
|
|
|
} |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
sub isProxRelation { |
|
360
|
123
|
|
|
123
|
0
|
275
|
debug( "isProxRelation: checking ttype=".$token->getType()." sval=". |
|
361
|
|
|
|
|
|
|
$token->getString() ); |
|
362
|
123
|
|
|
|
|
327
|
my $type = $token->getType(); |
|
363
|
123
|
|
100
|
|
|
3892
|
return( $type==CQL_LT or $type==CQL_GT or $type==CQL_EQ or $type==CQL_LE |
|
364
|
|
|
|
|
|
|
or $type==CQL_GE or $type==CQL_NE ); |
|
365
|
|
|
|
|
|
|
} |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
sub isRelationModifier { |
|
368
|
12
|
|
|
12
|
0
|
35
|
my $type = $token->getType(); |
|
369
|
12
|
100
|
|
|
|
36
|
if ($type == CQL_WORD) { |
|
370
|
1
|
|
|
|
|
4
|
return $token->getString() =~ /\./; |
|
371
|
|
|
|
|
|
|
} |
|
372
|
11
|
|
66
|
|
|
246
|
return ($type==CQL_RELEVANT or $type==CQL_FUZZY or $type==CQL_STEM |
|
373
|
|
|
|
|
|
|
or $type==CQL_PHONETIC or $type==CQL_PWORD or $type==CQL_STRING |
|
374
|
|
|
|
|
|
|
or $type==CQL_ISODATE or $type==CQL_NUMBER or $type==CQL_URI |
|
375
|
|
|
|
|
|
|
or $type==CQL_PARTIAL or $type==CQL_MASKED or $type==CQL_UNMASKED |
|
376
|
|
|
|
|
|
|
or $type==CQL_NWSE); |
|
377
|
|
|
|
|
|
|
} |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
sub match { |
|
380
|
264
|
|
|
264
|
0
|
374
|
my $expected = shift; |
|
381
|
264
|
|
|
|
|
3056
|
debug( "in match(".$expected->getString().")" ); |
|
382
|
264
|
50
|
|
|
|
710
|
if ( $token->getType() != $expected->getType() ) { |
|
383
|
0
|
|
|
|
|
0
|
croak( "expected ".$expected->getString() . |
|
384
|
|
|
|
|
|
|
" but got " . $token->getString() ); |
|
385
|
|
|
|
|
|
|
} |
|
386
|
264
|
|
|
|
|
877
|
$token = $lexer->nextToken(); |
|
387
|
264
|
|
|
|
|
1207
|
debug( "got token type=".$token->getType()." string=".$token->getString() ); |
|
388
|
|
|
|
|
|
|
} |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
sub matchSymbol { |
|
391
|
120
|
|
|
120
|
0
|
263
|
debug( "in match symbol" ); |
|
392
|
120
|
|
|
|
|
297
|
my $return = $token->getString(); |
|
393
|
120
|
|
|
|
|
247
|
match( $token ); |
|
394
|
120
|
|
|
|
|
286
|
return $return; |
|
395
|
|
|
|
|
|
|
} |
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
sub debug { |
|
398
|
1530
|
50
|
|
1530
|
0
|
5154
|
return unless $CQL::DEBUG; |
|
399
|
0
|
|
|
|
|
|
print STDERR "CQL::Parser: ", shift, "\n"; |
|
400
|
|
|
|
|
|
|
} |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
=head1 XCQL |
|
403
|
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
CQL has an XML representation which you can generate from a CQL parse |
|
405
|
|
|
|
|
|
|
tree. Just call the toXCQL() method on the root node you get back |
|
406
|
|
|
|
|
|
|
from a call to parse(). |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
=head1 ERRORS AND DIAGNOSTICS |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
As mentioned above, a CQL syntax error will result in an exception being |
|
411
|
|
|
|
|
|
|
thrown. So if you have any doubts about the CQL that you are parsing you |
|
412
|
|
|
|
|
|
|
should wrap the call to parse() in an eval block, and check $@ |
|
413
|
|
|
|
|
|
|
afterwards to make sure everything went ok. |
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
eval { |
|
416
|
|
|
|
|
|
|
my $node = $parser->parse( $cql ); |
|
417
|
|
|
|
|
|
|
}; |
|
418
|
|
|
|
|
|
|
if ( $@ ) { |
|
419
|
|
|
|
|
|
|
print "uhoh, exception $@\n"; |
|
420
|
|
|
|
|
|
|
} |
|
421
|
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
If you'd like to see blow by blow details while your CQL is being parsed |
|
423
|
|
|
|
|
|
|
set $CQL::DEBUG equal to 1, and you will get details on STDERR. This is |
|
424
|
|
|
|
|
|
|
useful if the parse tree is incorrect and you want to locate where things |
|
425
|
|
|
|
|
|
|
are going wrong. Hopefully this won't happen, but if it does please notify the |
|
426
|
|
|
|
|
|
|
author. |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
=head1 TODO |
|
429
|
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=over 4 |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
=item * toYourEngineHere() please feel free to add functionality and send in |
|
433
|
|
|
|
|
|
|
patches! |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=back |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=head1 THANKYOUS |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
CQL::Parser is essentially a Perl port of Mike Taylor's cql-java package |
|
440
|
|
|
|
|
|
|
http://zing.z3950.org/cql/java/. Mike and IndexData were kind enough |
|
441
|
|
|
|
|
|
|
to allow the author to write this port, and to make it available under |
|
442
|
|
|
|
|
|
|
the terms of the Artistic License. Thanks Mike! |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
The CQL::Lexer package relies heavily on Stevan Little's excellent |
|
445
|
|
|
|
|
|
|
String::Tokenizer. Thanks Stevan! |
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
CQL::Parser was developed as a component of the Ockham project, |
|
448
|
|
|
|
|
|
|
which is funded by the National Science Foundation. See http://www.ockham.org |
|
449
|
|
|
|
|
|
|
for more information about Ockham. |
|
450
|
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=head1 AUTHOR |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
=over 4 |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=item * Ed Summers - ehs at pobox dot com |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=item * Brian Cassidy - bricas at cpan dot org |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=item * Wilbert Hengst - W.Hengst at uva dot nl |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=back |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
Copyright 2004-2009 by Ed Summers |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
|
468
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=cut |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
1; |