| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package CQL::Parser; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 7 |  |  | 7 |  | 46822 | use strict; | 
|  | 7 |  |  |  |  | 19 |  | 
|  | 7 |  |  |  |  | 296 |  | 
| 4 | 7 |  |  | 7 |  | 38 | use warnings; | 
|  | 7 |  |  |  |  | 13 |  | 
|  | 7 |  |  |  |  | 218 |  | 
| 5 | 7 |  |  | 7 |  | 4888 | use CQL::Lexer; | 
|  | 7 |  |  |  |  | 19 |  | 
|  | 7 |  |  |  |  | 207 |  | 
| 6 | 7 |  |  | 7 |  | 6066 | use CQL::Relation; | 
|  | 7 |  |  |  |  | 26 |  | 
|  | 7 |  |  |  |  | 666 |  | 
| 7 | 7 |  |  | 7 |  | 50 | use CQL::Token; | 
|  | 7 |  |  |  |  | 18 |  | 
|  | 7 |  |  |  |  | 1503 |  | 
| 8 | 7 |  |  | 7 |  | 5340 | use CQL::TermNode; | 
|  | 7 |  |  |  |  | 19 |  | 
|  | 7 |  |  |  |  | 390 |  | 
| 9 | 7 |  |  | 7 |  | 4629 | use CQL::AndNode; | 
|  | 7 |  |  |  |  | 20 |  | 
|  | 7 |  |  |  |  | 362 |  | 
| 10 | 7 |  |  | 7 |  | 4015 | use CQL::OrNode; | 
|  | 7 |  |  |  |  | 18 |  | 
|  | 7 |  |  |  |  | 300 |  | 
| 11 | 7 |  |  | 7 |  | 3972 | use CQL::NotNode; | 
|  | 7 |  |  |  |  | 17 |  | 
|  | 7 |  |  |  |  | 288 |  | 
| 12 | 7 |  |  | 7 |  | 13912 | use CQL::PrefixNode; | 
|  | 7 |  |  |  |  | 20 |  | 
|  | 7 |  |  |  |  | 1061 |  | 
| 13 | 7 |  |  | 7 |  | 14890 | use CQL::ProxNode; | 
|  | 7 |  |  |  |  | 18 |  | 
|  | 7 |  |  |  |  | 312 |  | 
| 14 | 7 |  |  | 7 |  | 43 | use Carp qw( croak ); | 
|  | 7 |  |  |  |  | 14 |  | 
|  | 7 |  |  |  |  | 22723 |  | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | our $VERSION = '1.13'; | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | my $lexer; | 
| 19 |  |  |  |  |  |  | my $token; | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | =head1 NAME | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | CQL::Parser - compiles CQL strings into parse trees of Node subtypes. | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | use CQL::Parser; | 
| 28 |  |  |  |  |  |  | my $parser = CQL::Parser->new(); | 
| 29 |  |  |  |  |  |  | my $root = $parser->parse( $cql ); | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | CQL::Parser provides a mechanism to parse Common Query Language (CQL) | 
| 34 |  |  |  |  |  |  | statements. The best description of CQL comes from the CQL homepage | 
| 35 |  |  |  |  |  |  | at the Library of Congress L | 
| 36 |  |  |  |  |  |  |  | 
| 37 |  |  |  |  |  |  | CQL is a formal language for representing queries to information | 
| 38 |  |  |  |  |  |  | retrieval systems such as web indexes, bibliographic catalogs and museum | 
| 39 |  |  |  |  |  |  | collection information. The CQL design objective is that queries be | 
| 40 |  |  |  |  |  |  | human readable and human writable, and that the language be intuitive | 
| 41 |  |  |  |  |  |  | while maintaining the expressiveness of more complex languages. | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | A CQL statement can be as simple as a single keyword, or as complicated as a set | 
| 44 |  |  |  |  |  |  | of compoenents indicating search indexes, relations, relational modifiers, | 
| 45 |  |  |  |  |  |  | proximity clauses and boolean logic. CQL::Parser will parse CQL statements | 
| 46 |  |  |  |  |  |  | and return the root node for a tree of nodes which describes the CQL statement. | 
| 47 |  |  |  |  |  |  | This data structure can then be used by a client application to analyze the | 
| 48 |  |  |  |  |  |  | statement, and possibly turn it into a query for a local repository. | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | Each CQL component in the tree inherits from L and can be one | 
| 51 |  |  |  |  |  |  | of the following: L, L, L, | 
| 52 |  |  |  |  |  |  | L, L, L. See the | 
| 53 |  |  |  |  |  |  | documentation for those modules for their respective APIs. | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | Here are some examples of CQL statements: | 
| 56 |  |  |  |  |  |  |  | 
| 57 |  |  |  |  |  |  | =over 4 | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | =item * george | 
| 60 |  |  |  |  |  |  |  | 
| 61 |  |  |  |  |  |  | =item * dc.creator=george | 
| 62 |  |  |  |  |  |  |  | 
| 63 |  |  |  |  |  |  | =item * dc.creator="George Clinton" | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | =item * clinton and funk | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | =item * clinton and parliament and funk | 
| 68 |  |  |  |  |  |  |  | 
| 69 |  |  |  |  |  |  | =item * (clinton or bootsy) and funk | 
| 70 |  |  |  |  |  |  |  | 
| 71 |  |  |  |  |  |  | =item * dc.creator="clinton" and dc.date="1976" | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  | =back | 
| 74 |  |  |  |  |  |  |  | 
| 75 |  |  |  |  |  |  | =head1 METHODS | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  | =head2 new() | 
| 78 |  |  |  |  |  |  |  | 
| 79 |  |  |  |  |  |  | =cut | 
| 80 |  |  |  |  |  |  |  | 
| 81 |  |  |  |  |  |  | ## for convenience the lexer is located at the package level | 
| 82 |  |  |  |  |  |  | ## just need to be sure to reinitialize it in very call to parse() | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | sub new { | 
| 85 | 7 |  |  | 7 | 1 | 6121 | my ( $class, $debug ) = @_; | 
| 86 | 7 | 50 |  |  |  | 267 | $CQL::DEBUG = $debug ? 1 : 0; | 
| 87 | 7 |  | 33 |  |  | 101 | return bless { }, ref($class) || $class; | 
| 88 |  |  |  |  |  |  | } | 
| 89 |  |  |  |  |  |  |  | 
| 90 |  |  |  |  |  |  | =head2 parse( $query ) | 
| 91 |  |  |  |  |  |  |  | 
| 92 |  |  |  |  |  |  | Pass in a CQL query and you'll get back the root node for the CQL parse tree. | 
| 93 |  |  |  |  |  |  | If the CQL is invalid an exception will be thrown. | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  | =cut | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | sub parse { | 
| 98 | 64 |  |  | 64 | 1 | 2322 | my ($self,$query) = @_; | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | ## initialize lexer | 
| 101 | 64 |  |  |  |  | 345 | $lexer = CQL::Lexer->new(); | 
| 102 |  |  |  |  |  |  |  | 
| 103 | 64 |  |  |  |  | 889 | debug( "about to parse query: $query" ); | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | ## create the lexer and get the first token | 
| 106 | 64 |  |  |  |  | 206 | $lexer->tokenize( $query ); | 
| 107 | 64 |  |  |  |  | 208 | $token = $lexer->nextToken(); | 
| 108 |  |  |  |  |  |  |  | 
| 109 | 64 |  |  |  |  | 342 | my $root = parseQuery( 'srw.ServerChoice', CQL::Relation->new( 'scr' ) ); | 
| 110 | 55 | 50 |  |  |  | 204 | if ( $token->getType() != CQL_EOF ) { | 
| 111 | 0 |  |  |  |  | 0 | croak( "junk after end ".$token->getString() ); | 
| 112 |  |  |  |  |  |  | } | 
| 113 |  |  |  |  |  |  |  | 
| 114 | 55 |  |  |  |  | 233 | return $root; | 
| 115 |  |  |  |  |  |  | } | 
| 116 |  |  |  |  |  |  |  | 
| 117 |  |  |  |  |  |  | =head2 parseSafe( $query ) | 
| 118 |  |  |  |  |  |  |  | 
| 119 |  |  |  |  |  |  | Pass in a CQL query and you'll get back the root node for the CQL parse tree. | 
| 120 |  |  |  |  |  |  | If the CQL is invalid, an error code from the SRU Diagnostics List | 
| 121 |  |  |  |  |  |  | will be returned. | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | =cut | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | my @cql_errors = ( | 
| 126 |  |  |  |  |  |  | { regex => qr/does not support relational modifiers/,   code => 20 }, | 
| 127 |  |  |  |  |  |  | { regex => qr/expected boolean got /,                   code => 37 }, | 
| 128 |  |  |  |  |  |  | { regex => qr/expected relation modifier got /,         code => 20 }, | 
| 129 |  |  |  |  |  |  | { regex => qr/unknown first-class relation modifier: /, code => 20 }, | 
| 130 |  |  |  |  |  |  | { regex => qr/missing term/,                            code => 27 }, | 
| 131 |  |  |  |  |  |  | { regex => qr/expected proximity relation got /,        code => 40 }, | 
| 132 |  |  |  |  |  |  | { regex => qr/expected proximity distance got /,        code => 41 }, | 
| 133 |  |  |  |  |  |  | { regex => qr/expected proximity unit got/,             code => 42 }, | 
| 134 |  |  |  |  |  |  | { regex => qr/expected proximity ordering got /,        code => 43 }, | 
| 135 |  |  |  |  |  |  | { regex => qr/unknown first class relation: /,          code => 19 }, | 
| 136 |  |  |  |  |  |  | { regex => qr/must supply name/,                        code => 15 }, | 
| 137 |  |  |  |  |  |  | { regex => qr/must supply identifier/,                  code => 15 }, | 
| 138 |  |  |  |  |  |  | { regex => qr/must supply subtree/,                     code => 15 }, | 
| 139 |  |  |  |  |  |  | { regex => qr/must supply term parameter/,              code => 27 }, | 
| 140 |  |  |  |  |  |  | { regex => qr/doesn\'t support relations other than/,   code => 20 }, | 
| 141 |  |  |  |  |  |  | ); | 
| 142 |  |  |  |  |  |  |  | 
| 143 |  |  |  |  |  |  | sub parseSafe { | 
| 144 | 2 |  |  | 2 | 1 | 6300 | my ($self,$query) = @_; | 
| 145 |  |  |  |  |  |  |  | 
| 146 | 2 |  |  |  |  | 5 | my $root = eval { $self->parse( $query ); }; | 
|  | 2 |  |  |  |  | 9 |  | 
| 147 |  |  |  |  |  |  |  | 
| 148 | 2 | 50 |  |  |  | 1147 | if ( my $error = $@ ) { | 
| 149 | 2 |  |  |  |  | 4 | my $code = 10; | 
| 150 | 2 |  |  |  |  | 6 | for( @cql_errors ) { | 
| 151 | 30 | 100 |  |  |  | 293 | $code = $_->{ code } if $error =~ $_->{ regex }; | 
| 152 |  |  |  |  |  |  | } | 
| 153 | 2 |  |  |  |  | 20 | return $code; | 
| 154 |  |  |  |  |  |  | } | 
| 155 |  |  |  |  |  |  |  | 
| 156 | 0 |  |  |  |  | 0 | return $root; | 
| 157 |  |  |  |  |  |  | } | 
| 158 |  |  |  |  |  |  |  | 
| 159 |  |  |  |  |  |  | sub parseQuery { | 
| 160 | 70 |  |  | 70 | 0 | 119 | my ( $qualifier, $relation ) = @_; | 
| 161 | 70 |  |  |  |  | 188 | debug( "in parseQuery() with term=" . $token->getString() ); | 
| 162 | 70 |  |  |  |  | 182 | my $term = parseTerm( $qualifier, $relation ); | 
| 163 |  |  |  |  |  |  |  | 
| 164 | 67 |  |  |  |  | 182 | my $type = $token->getType(); | 
| 165 | 67 |  | 100 |  |  | 258 | while ( $type != CQL_EOF and $type != CQL_RPAREN ) { | 
| 166 | 34 | 100 |  |  |  | 128 | if ( $type == CQL_AND ) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 167 | 12 |  |  |  |  | 30 | match($token); | 
| 168 | 12 |  |  |  |  | 30 | my $term2 = parseTerm( $qualifier, $relation ); | 
| 169 | 10 |  |  |  |  | 88 | $term = CQL::AndNode->new( left=>$term, right=>$term2 ); | 
| 170 |  |  |  |  |  |  | } | 
| 171 |  |  |  |  |  |  | elsif ( $type == CQL_OR ) { | 
| 172 | 9 |  |  |  |  | 19 | match($token); | 
| 173 | 9 |  |  |  |  | 90 | my $term2 = parseTerm( $qualifier, $relation ); | 
| 174 | 9 |  |  |  |  | 105 | $term = CQL::OrNode->new( left=>$term, right=>$term2 ); | 
| 175 |  |  |  |  |  |  | } | 
| 176 |  |  |  |  |  |  | elsif ( $type == CQL_NOT ) { | 
| 177 | 2 |  |  |  |  | 8 | match($token); | 
| 178 | 2 |  |  |  |  | 7 | my $term2 = parseTerm( $qualifier, $relation ); | 
| 179 | 2 |  |  |  |  | 26 | $term = CQL::NotNode->new( left=>$term, right=>$term2 ); | 
| 180 |  |  |  |  |  |  | } | 
| 181 |  |  |  |  |  |  | elsif ( $type == CQL_PROX ) { | 
| 182 | 11 |  |  |  |  | 25 | match($token); | 
| 183 | 11 |  |  |  |  | 53 | my $proxNode = CQL::ProxNode->new( $term ); | 
| 184 | 11 |  |  |  |  | 27 | gatherProxParameters( $proxNode ); | 
| 185 | 7 |  |  |  |  | 16 | my $term2 = parseTerm( $qualifier, $relation ); | 
| 186 | 7 |  |  |  |  | 25 | $proxNode->addSecondTerm( $term2 ); | 
| 187 | 7 |  |  |  |  | 10 | $term = $proxNode; | 
| 188 |  |  |  |  |  |  | } | 
| 189 |  |  |  |  |  |  | else { | 
| 190 | 0 |  |  |  |  | 0 | croak( "expected boolean got ".$token->getString() ); | 
| 191 |  |  |  |  |  |  | } | 
| 192 | 28 |  |  |  |  | 80 | $type = $token->getType(); | 
| 193 |  |  |  |  |  |  | } | 
| 194 | 61 |  |  |  |  | 124 | debug( "no more ops" ); | 
| 195 | 61 |  |  |  |  | 111 | return( $term ); | 
| 196 |  |  |  |  |  |  | } | 
| 197 |  |  |  |  |  |  |  | 
| 198 |  |  |  |  |  |  | sub parseTerm { | 
| 199 | 100 |  |  | 100 | 0 | 162 | my ( $qualifier, $relation ) = @_; | 
| 200 | 100 |  |  |  |  | 164 | debug( "in parseTerm()" ); | 
| 201 | 100 |  |  |  |  | 295 | my $word; | 
| 202 | 100 |  |  |  |  | 111 | while ( 1 ) { | 
| 203 | 124 | 100 |  |  |  | 371 | if ( $token->getType() == CQL_LPAREN ) { | 
|  |  | 100 |  |  |  |  |  | 
| 204 | 5 |  |  |  |  | 15 | debug( "parenthesized term" ); | 
| 205 | 5 |  |  |  |  | 22 | match( CQL::Token->new('(') ); | 
| 206 | 5 |  |  |  |  | 49 | my $expr = parseQuery( $qualifier, $relation ); | 
| 207 | 5 |  |  |  |  | 21 | match( CQL::Token->new(')') ); | 
| 208 | 5 |  |  |  |  | 17 | return $expr; | 
| 209 |  |  |  |  |  |  | } | 
| 210 |  |  |  |  |  |  | elsif ( $token->getType() == CQL_GT ) { | 
| 211 | 1 |  |  |  |  | 4 | match( $token ); | 
| 212 | 1 |  |  |  |  | 4 | return parsePrefix( $qualifier, $relation ); | 
| 213 |  |  |  |  |  |  | } | 
| 214 |  |  |  |  |  |  |  | 
| 215 | 118 |  |  |  |  | 214 | debug( "non-parenthesised term" ); | 
| 216 | 118 |  |  |  |  | 601 | $word = matchSymbol( "qualifier or term" ); | 
| 217 |  |  |  |  |  |  |  | 
| 218 | 118 | 100 |  |  |  | 249 | last if ! isBaseRelation(); | 
| 219 |  |  |  |  |  |  |  | 
| 220 | 24 |  |  |  |  | 43 | $qualifier = $word; | 
| 221 | 24 |  |  |  |  | 70 | debug( "creating relation with word=$word" ); | 
| 222 | 24 |  |  |  |  | 70 | $relation = CQL::Relation->new( $token->getString() ); | 
| 223 | 24 |  |  |  |  | 60 | match( $token ); | 
| 224 |  |  |  |  |  |  |  | 
| 225 | 24 |  |  |  |  | 76 | while ($token->getType() == CQL_MODIFIER ) { | 
| 226 | 12 |  |  |  |  | 24 | match( $token ); | 
| 227 | 12 | 50 |  |  |  | 29 | if ( !isRelationModifier() ) { | 
| 228 | 0 |  |  |  |  | 0 | croak( "expected relation modifier got " . $token->getString() ); | 
| 229 |  |  |  |  |  |  | } | 
| 230 | 12 |  |  |  |  | 38 | $relation->addModifier( $token->getString() ); | 
| 231 | 12 |  |  |  |  | 49 | match( $token ); | 
| 232 |  |  |  |  |  |  | } | 
| 233 |  |  |  |  |  |  | } | 
| 234 |  |  |  |  |  |  |  | 
| 235 | 91 |  |  |  |  | 767 | debug( "qualifier=$qualifier relation=$relation term=$word" ); | 
| 236 | 91 | 100 | 66 |  |  | 416 | croak( "missing term" ) if ! defined($word) or $word eq ''; | 
| 237 |  |  |  |  |  |  |  | 
| 238 | 89 |  |  |  |  | 363 | my $node = CQL::TermNode->new( | 
| 239 |  |  |  |  |  |  | qualifier   => $qualifier, | 
| 240 |  |  |  |  |  |  | relation    => $relation, | 
| 241 |  |  |  |  |  |  | term        => $word | 
| 242 |  |  |  |  |  |  | ); | 
| 243 | 89 |  |  |  |  | 401 | debug( "made term node: ".$node->toCQL() ); | 
| 244 | 89 |  |  |  |  | 255 | return $node; | 
| 245 |  |  |  |  |  |  | } | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  | sub parsePrefix { | 
| 248 | 1 |  |  | 1 | 0 | 4 | my ( $qualifier, $relation ) = @_; | 
| 249 | 1 |  |  |  |  | 3 | debug( "prefix mapping" ); | 
| 250 | 1 |  |  |  |  | 1 | my $name = undef; | 
| 251 | 1 |  |  |  |  | 4 | my $identifier = matchSymbol( "prefix name" ); | 
| 252 | 1 | 50 |  |  |  | 4 | if ( $token->getType() == CQL_EQ ) { | 
| 253 | 1 |  |  |  |  | 3 | match( $token ); | 
| 254 | 1 |  |  |  |  | 2 | $name = $identifier; | 
| 255 | 1 |  |  |  |  | 3 | $identifier = matchSymbol( "prefix identifier" ); | 
| 256 |  |  |  |  |  |  | } | 
| 257 | 1 |  |  |  |  | 4 | my $node = parseQuery( $qualifier, $relation ); | 
| 258 | 1 |  |  |  |  | 11 | return CQL::PrefixNode->new( | 
| 259 |  |  |  |  |  |  | name        => $name, | 
| 260 |  |  |  |  |  |  | identifier  => $identifier, | 
| 261 |  |  |  |  |  |  | subtree     => $node | 
| 262 |  |  |  |  |  |  | ); | 
| 263 |  |  |  |  |  |  | } | 
| 264 |  |  |  |  |  |  |  | 
| 265 |  |  |  |  |  |  | sub gatherProxParameters { | 
| 266 | 11 |  |  | 11 | 0 | 20 | my $node = shift; | 
| 267 | 11 |  |  |  |  | 14 | if (0) {	# CQL 1.0 (obsolete) | 
| 268 |  |  |  |  |  |  | for (my $i=0; $i<4; $i++ ) { | 
| 269 |  |  |  |  |  |  | if ( $token->getType() != CQL_MODIFIER ) { | 
| 270 |  |  |  |  |  |  | ## end of proximity parameters | 
| 271 |  |  |  |  |  |  | return; | 
| 272 |  |  |  |  |  |  | } | 
| 273 |  |  |  |  |  |  | match($token); | 
| 274 |  |  |  |  |  |  | if ( $token->getType() != CQL_MODIFIER ) { | 
| 275 |  |  |  |  |  |  | if ( $i==0 ) { gatherProxRelation($node); } | 
| 276 |  |  |  |  |  |  | elsif ( $i==1 ) { gatherProxDistance($node); } | 
| 277 |  |  |  |  |  |  | elsif ( $i==2 ) { gatherProxUnit($node); } | 
| 278 |  |  |  |  |  |  | elsif ( $i==3 ) { gatherProxOrdering($node); } | 
| 279 |  |  |  |  |  |  | } | 
| 280 |  |  |  |  |  |  | } | 
| 281 |  |  |  |  |  |  | } else { | 
| 282 | 11 |  |  |  |  | 33 | while ( $token->getType() == CQL_MODIFIER ) { | 
| 283 | 15 |  |  |  |  | 34 | match( $token ); | 
| 284 | 15 | 100 | 66 |  |  | 43 | if ( $token->getType() == CQL_DISTANCE ) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 285 | 8 |  |  |  |  | 33 | match( $token ); | 
| 286 | 8 |  |  |  |  | 22 | gatherProxRelation( $node ); | 
| 287 | 8 |  |  |  |  | 20 | gatherProxDistance( $node ); | 
| 288 |  |  |  |  |  |  | } elsif ( $token->getType() == CQL_UNIT ) { | 
| 289 | 4 |  |  |  |  | 20 | match( $token ); | 
| 290 | 4 | 50 |  |  |  | 14 | if ( $token->getType() != CQL_EQ ) { | 
| 291 | 0 |  |  |  |  | 0 | croak( "expected proximity unit parameter got ".$token->getString() ); | 
| 292 |  |  |  |  |  |  | } | 
| 293 | 4 |  |  |  |  | 10 | match( $token ); | 
| 294 | 4 |  |  |  |  | 29 | gatherProxUnit( $node ); | 
| 295 |  |  |  |  |  |  | } elsif ( $token->getType() == CQL_ORDERED | 
| 296 |  |  |  |  |  |  | || $token->getType() == CQL_UNORDERED ) { | 
| 297 | 1 |  |  |  |  | 4 | gatherProxOrdering( $node ); | 
| 298 |  |  |  |  |  |  | } else { | 
| 299 | 2 |  |  |  |  | 9 | croak( "expected proximity parameter got ". $token->getString()  ."(". $token->getType() .")" ); | 
| 300 |  |  |  |  |  |  | } | 
| 301 |  |  |  |  |  |  | } | 
| 302 |  |  |  |  |  |  | } | 
| 303 |  |  |  |  |  |  | } | 
| 304 |  |  |  |  |  |  |  | 
| 305 |  |  |  |  |  |  | sub gatherProxRelation { | 
| 306 | 8 |  |  | 8 | 0 | 15 | my $node = shift; | 
| 307 | 8 | 50 |  |  |  | 15 | if ( ! isProxRelation() ) { | 
| 308 | 0 |  |  |  |  | 0 | croak( "expected proximity relation got ".$token->getString() ); | 
| 309 |  |  |  |  |  |  | } | 
| 310 | 8 |  |  |  |  | 30 | $node->addModifier( "relation", $token->getString() ); | 
| 311 | 8 |  |  |  |  | 19 | match( $token ); | 
| 312 | 8 |  |  |  |  | 28 | debug( "gatherProxRelation matched ".$token->getString() ); | 
| 313 |  |  |  |  |  |  | } | 
| 314 |  |  |  |  |  |  |  | 
| 315 |  |  |  |  |  |  | sub gatherProxDistance { | 
| 316 | 8 |  |  | 8 | 0 | 12 | my $node = shift; | 
| 317 | 8 | 100 |  |  |  | 23 | if ( $token->getString() !~ /^\d+$/ ) { | 
| 318 | 2 |  |  |  |  | 6 | croak( "expected proximity distance got ".$token->getString() ); | 
| 319 |  |  |  |  |  |  | } | 
| 320 | 6 |  |  |  |  | 21 | $node->addModifier( "distance", $token->getString() ); | 
| 321 | 6 |  |  |  |  | 13 | match( $token ); | 
| 322 | 6 |  |  |  |  | 22 | debug( "gatherProxDistance matched ".$token->getString() ); | 
| 323 |  |  |  |  |  |  | } | 
| 324 |  |  |  |  |  |  |  | 
| 325 |  |  |  |  |  |  | sub gatherProxUnit { | 
| 326 | 4 |  |  | 4 | 0 | 6 | my $node = shift; | 
| 327 | 4 |  |  |  |  | 10 | my $type = $token->getType(); | 
| 328 | 4 | 50 | 66 |  |  | 26 | if( $type != CQL_PWORD and $type != CQL_SENTENCE and $type != CQL_PARAGRAPH | 
|  |  |  | 66 |  |  |  |  | 
|  |  |  | 33 |  |  |  |  | 
| 329 |  |  |  |  |  |  | and $type != CQL_ELEMENT ) { | 
| 330 | 0 |  |  |  |  | 0 | croak( "expected proximity unit got ".$token->getString() ); | 
| 331 |  |  |  |  |  |  | } | 
| 332 | 4 |  |  |  |  | 14 | $node->addModifier( "unit", $token->getString() ); | 
| 333 | 4 |  |  |  |  | 10 | match( $token ); | 
| 334 | 4 |  |  |  |  | 35 | debug( "gatherProxUnit matched ".$token->getString() ); | 
| 335 |  |  |  |  |  |  | } | 
| 336 |  |  |  |  |  |  |  | 
| 337 |  |  |  |  |  |  | sub gatherProxOrdering { | 
| 338 | 1 |  |  | 1 | 0 | 2 | my $node = shift; | 
| 339 | 1 |  |  |  |  | 5 | my $type = $token->getType(); | 
| 340 | 1 | 50 | 33 |  |  | 6 | if ( $type != CQL_ORDERED and $type != CQL_UNORDERED ) { | 
| 341 | 0 |  |  |  |  | 0 | croak( "expected proximity ordering got ".$token->getString() ); | 
| 342 |  |  |  |  |  |  | } | 
| 343 | 1 |  |  |  |  | 4 | $node->addModifier( "ordering", $token->getString() ); | 
| 344 | 1 |  |  |  |  | 4 | match( $token ); | 
| 345 |  |  |  |  |  |  | } | 
| 346 |  |  |  |  |  |  |  | 
| 347 |  |  |  |  |  |  | sub isBaseRelation { | 
| 348 | 118 |  |  | 118 | 0 | 364 | debug( "inside base relation: checking ttype=".$token->getType()." sval=". | 
| 349 |  |  |  |  |  |  | $token->getString() ); | 
| 350 | 118 | 100 | 66 |  |  | 1619 | if( $token->getType() == CQL_WORD and $token->getString() !~ /\./ ) { | 
| 351 | 3 |  |  |  |  | 1438 | croak( "unknown first class relation: ".$token->getString() ); | 
| 352 |  |  |  |  |  |  | } | 
| 353 | 115 |  |  |  |  | 495 | my $type = $token->getType(); | 
| 354 | 115 |  | 100 |  |  | 211 | return( isProxRelation() or $type==CQL_ANY or $type==CQL_ALL | 
| 355 |  |  |  |  |  |  | or $type==CQL_EXACT or $type==CQL_SCR or $type==CQL_WORD | 
| 356 |  |  |  |  |  |  | or $type==CQL_WITHIN or $type==CQL_ENCLOSES); | 
| 357 |  |  |  |  |  |  | } | 
| 358 |  |  |  |  |  |  |  | 
| 359 |  |  |  |  |  |  | sub isProxRelation { | 
| 360 | 123 |  |  | 123 | 0 | 275 | debug( "isProxRelation: checking ttype=".$token->getType()." sval=". | 
| 361 |  |  |  |  |  |  | $token->getString() ); | 
| 362 | 123 |  |  |  |  | 327 | my $type = $token->getType(); | 
| 363 | 123 |  | 100 |  |  | 3892 | return( $type==CQL_LT or $type==CQL_GT or $type==CQL_EQ or $type==CQL_LE | 
| 364 |  |  |  |  |  |  | or $type==CQL_GE or $type==CQL_NE ); | 
| 365 |  |  |  |  |  |  | } | 
| 366 |  |  |  |  |  |  |  | 
| 367 |  |  |  |  |  |  | sub isRelationModifier { | 
| 368 | 12 |  |  | 12 | 0 | 35 | my $type = $token->getType(); | 
| 369 | 12 | 100 |  |  |  | 36 | if ($type == CQL_WORD) { | 
| 370 | 1 |  |  |  |  | 4 | return $token->getString() =~ /\./; | 
| 371 |  |  |  |  |  |  | } | 
| 372 | 11 |  | 66 |  |  | 246 | return ($type==CQL_RELEVANT or $type==CQL_FUZZY or $type==CQL_STEM | 
| 373 |  |  |  |  |  |  | or $type==CQL_PHONETIC or $type==CQL_PWORD or $type==CQL_STRING | 
| 374 |  |  |  |  |  |  | or $type==CQL_ISODATE or $type==CQL_NUMBER or $type==CQL_URI | 
| 375 |  |  |  |  |  |  | or $type==CQL_PARTIAL or $type==CQL_MASKED or $type==CQL_UNMASKED | 
| 376 |  |  |  |  |  |  | or $type==CQL_NWSE); | 
| 377 |  |  |  |  |  |  | } | 
| 378 |  |  |  |  |  |  |  | 
| 379 |  |  |  |  |  |  | sub match { | 
| 380 | 264 |  |  | 264 | 0 | 374 | my $expected = shift; | 
| 381 | 264 |  |  |  |  | 3056 | debug( "in match(".$expected->getString().")" ); | 
| 382 | 264 | 50 |  |  |  | 710 | if ( $token->getType() != $expected->getType() ) { | 
| 383 | 0 |  |  |  |  | 0 | croak( "expected ".$expected->getString() . | 
| 384 |  |  |  |  |  |  | " but got " . $token->getString() ); | 
| 385 |  |  |  |  |  |  | } | 
| 386 | 264 |  |  |  |  | 877 | $token = $lexer->nextToken(); | 
| 387 | 264 |  |  |  |  | 1207 | debug( "got token type=".$token->getType()." string=".$token->getString() ); | 
| 388 |  |  |  |  |  |  | } | 
| 389 |  |  |  |  |  |  |  | 
| 390 |  |  |  |  |  |  | sub matchSymbol { | 
| 391 | 120 |  |  | 120 | 0 | 263 | debug( "in match symbol" ); | 
| 392 | 120 |  |  |  |  | 297 | my $return = $token->getString(); | 
| 393 | 120 |  |  |  |  | 247 | match( $token ); | 
| 394 | 120 |  |  |  |  | 286 | return $return; | 
| 395 |  |  |  |  |  |  | } | 
| 396 |  |  |  |  |  |  |  | 
| 397 |  |  |  |  |  |  | sub debug { | 
| 398 | 1530 | 50 |  | 1530 | 0 | 5154 | return unless $CQL::DEBUG; | 
| 399 | 0 |  |  |  |  |  | print STDERR "CQL::Parser: ", shift, "\n"; | 
| 400 |  |  |  |  |  |  | } | 
| 401 |  |  |  |  |  |  |  | 
| 402 |  |  |  |  |  |  | =head1 XCQL | 
| 403 |  |  |  |  |  |  |  | 
| 404 |  |  |  |  |  |  | CQL has an XML representation which you can generate from a CQL parse | 
| 405 |  |  |  |  |  |  | tree. Just call the toXCQL() method on the root node you get back | 
| 406 |  |  |  |  |  |  | from a call to parse(). | 
| 407 |  |  |  |  |  |  |  | 
| 408 |  |  |  |  |  |  | =head1 ERRORS AND DIAGNOSTICS | 
| 409 |  |  |  |  |  |  |  | 
| 410 |  |  |  |  |  |  | As mentioned above, a CQL syntax error will result in an exception being | 
| 411 |  |  |  |  |  |  | thrown. So if you have any doubts about the CQL that you are parsing you | 
| 412 |  |  |  |  |  |  | should wrap the call to parse() in an eval block, and check $@ | 
| 413 |  |  |  |  |  |  | afterwards to make sure everything went ok. | 
| 414 |  |  |  |  |  |  |  | 
| 415 |  |  |  |  |  |  | eval { | 
| 416 |  |  |  |  |  |  | my $node = $parser->parse( $cql ); | 
| 417 |  |  |  |  |  |  | }; | 
| 418 |  |  |  |  |  |  | if ( $@ ) { | 
| 419 |  |  |  |  |  |  | print "uhoh, exception $@\n"; | 
| 420 |  |  |  |  |  |  | } | 
| 421 |  |  |  |  |  |  |  | 
| 422 |  |  |  |  |  |  | If you'd like to see blow by blow details while your CQL is being parsed | 
| 423 |  |  |  |  |  |  | set $CQL::DEBUG equal to 1, and you will get details on STDERR. This is | 
| 424 |  |  |  |  |  |  | useful if the parse tree is incorrect and you want to locate where things | 
| 425 |  |  |  |  |  |  | are going wrong. Hopefully this won't happen, but if it does please notify the | 
| 426 |  |  |  |  |  |  | author. | 
| 427 |  |  |  |  |  |  |  | 
| 428 |  |  |  |  |  |  | =head1 TODO | 
| 429 |  |  |  |  |  |  |  | 
| 430 |  |  |  |  |  |  | =over 4 | 
| 431 |  |  |  |  |  |  |  | 
| 432 |  |  |  |  |  |  | =item * toYourEngineHere() please feel free to add functionality and send in | 
| 433 |  |  |  |  |  |  | patches! | 
| 434 |  |  |  |  |  |  |  | 
| 435 |  |  |  |  |  |  | =back | 
| 436 |  |  |  |  |  |  |  | 
| 437 |  |  |  |  |  |  | =head1 THANKYOUS | 
| 438 |  |  |  |  |  |  |  | 
| 439 |  |  |  |  |  |  | CQL::Parser is essentially a Perl port of Mike Taylor's cql-java package | 
| 440 |  |  |  |  |  |  | http://zing.z3950.org/cql/java/. Mike and IndexData were kind enough | 
| 441 |  |  |  |  |  |  | to allow the author to write this port, and to make it available under | 
| 442 |  |  |  |  |  |  | the terms of the Artistic License. Thanks Mike! | 
| 443 |  |  |  |  |  |  |  | 
| 444 |  |  |  |  |  |  | The CQL::Lexer package relies heavily on Stevan Little's excellent | 
| 445 |  |  |  |  |  |  | String::Tokenizer. Thanks Stevan! | 
| 446 |  |  |  |  |  |  |  | 
| 447 |  |  |  |  |  |  | CQL::Parser was developed as a component of the Ockham project, | 
| 448 |  |  |  |  |  |  | which is funded by the National Science Foundation. See http://www.ockham.org | 
| 449 |  |  |  |  |  |  | for more information about Ockham. | 
| 450 |  |  |  |  |  |  |  | 
| 451 |  |  |  |  |  |  | =head1 AUTHOR | 
| 452 |  |  |  |  |  |  |  | 
| 453 |  |  |  |  |  |  | =over 4 | 
| 454 |  |  |  |  |  |  |  | 
| 455 |  |  |  |  |  |  | =item * Ed Summers - ehs at pobox dot com | 
| 456 |  |  |  |  |  |  |  | 
| 457 |  |  |  |  |  |  | =item * Brian Cassidy - bricas at cpan dot org | 
| 458 |  |  |  |  |  |  |  | 
| 459 |  |  |  |  |  |  | =item * Wilbert Hengst - W.Hengst at uva dot nl | 
| 460 |  |  |  |  |  |  |  | 
| 461 |  |  |  |  |  |  | =back | 
| 462 |  |  |  |  |  |  |  | 
| 463 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 464 |  |  |  |  |  |  |  | 
| 465 |  |  |  |  |  |  | Copyright 2004-2009 by Ed Summers | 
| 466 |  |  |  |  |  |  |  | 
| 467 |  |  |  |  |  |  | This library is free software; you can redistribute it and/or modify | 
| 468 |  |  |  |  |  |  | it under the same terms as Perl itself. | 
| 469 |  |  |  |  |  |  |  | 
| 470 |  |  |  |  |  |  | =cut | 
| 471 |  |  |  |  |  |  |  | 
| 472 |  |  |  |  |  |  | 1; |