line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# Copyright (C) 1999 Eric Bohlman, Loic Dachary |
3
|
|
|
|
|
|
|
# Copyright (C) 2013 Jon Jensen |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or modify it |
6
|
|
|
|
|
|
|
# under the terms of the GNU General Public License as published by the |
7
|
|
|
|
|
|
|
# Free Software Foundation; either version 2, or (at your option) any |
8
|
|
|
|
|
|
|
# later version. You may also use, redistribute and/or modify it |
9
|
|
|
|
|
|
|
# under the terms of the Artistic License supplied with your Perl |
10
|
|
|
|
|
|
|
# distribution |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful, |
13
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
14
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15
|
|
|
|
|
|
|
# GNU General Public License for more details. |
16
|
|
|
|
|
|
|
# |
17
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License |
18
|
|
|
|
|
|
|
# along with this program; if not, write to the Free Software |
19
|
|
|
|
|
|
|
# Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
package Text::Query::ParseAdvanced; |
22
|
|
|
|
|
|
|
|
23
|
2
|
|
|
2
|
|
10
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
82
|
|
24
|
|
|
|
|
|
|
|
25
|
2
|
|
|
2
|
|
12
|
use Carp; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
200
|
|
26
|
2
|
|
|
2
|
|
11
|
use Text::Query::Parse; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
52
|
|
27
|
|
|
|
|
|
|
|
28
|
2
|
|
|
2
|
|
10
|
use vars qw(@ISA); |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
3163
|
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
@ISA = qw(Text::Query::Parse); |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
sub prepare { |
33
|
29
|
|
|
29
|
1
|
45
|
my($self) = shift; |
34
|
29
|
|
|
|
|
52
|
my($qstring) = shift; |
35
|
29
|
|
|
|
|
54
|
my(%args) = @_; |
36
|
|
|
|
|
|
|
|
37
|
29
|
|
|
|
|
119
|
my $default_operators = { |
38
|
|
|
|
|
|
|
'or' => 'or', |
39
|
|
|
|
|
|
|
'and' => 'and', |
40
|
|
|
|
|
|
|
'near' => 'near', |
41
|
|
|
|
|
|
|
'not' => 'not', |
42
|
|
|
|
|
|
|
}; |
43
|
|
|
|
|
|
|
|
44
|
29
|
|
100
|
|
|
184
|
$self->{'scope_map'} = $args{-scope_map} || {}; |
45
|
|
|
|
|
|
|
|
46
|
29
|
|
|
|
|
179
|
return $self->SUPER::prepare($qstring, -near=>10, -operators=>$default_operators, @_); |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
sub expression($) { |
50
|
37
|
|
|
37
|
1
|
60
|
my($self) = shift; |
51
|
37
|
|
|
|
|
38
|
my($rv, $t); |
52
|
37
|
|
|
|
|
105
|
my($or) = $self->{parseopts}{-operators}{or}; |
53
|
37
|
|
|
|
|
54
|
my($tokens) = $self->{'tokens'}; |
54
|
37
|
|
|
|
|
106
|
$self->{'token'} = shift(@$tokens); |
55
|
37
|
|
|
|
|
90
|
$rv = $self->conj(); |
56
|
37
|
|
100
|
|
|
233
|
while(defined($self->{'token'}) and $self->{'token'} =~ /^($or|\|)$/i) { |
57
|
15
|
|
|
|
|
24
|
$self->{'token'} = shift(@{$self->{'tokens'}}); |
|
15
|
|
|
|
|
32
|
|
58
|
15
|
|
|
|
|
38
|
$t= $self->conj(); |
59
|
15
|
|
|
|
|
64
|
$rv = $self->build_expression($rv,$t); |
60
|
|
|
|
|
|
|
} |
61
|
37
|
|
|
|
|
118
|
return $self->build_expression_finish($rv); |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
sub conj($) { |
65
|
52
|
|
|
52
|
0
|
70
|
my($self) = shift; |
66
|
52
|
|
|
|
|
60
|
my($rv); |
67
|
52
|
|
|
|
|
64
|
my($first) = 1; |
68
|
52
|
|
|
|
|
123
|
my($and) = $self->{parseopts}{-operators}{and}; |
69
|
52
|
|
|
|
|
110
|
$rv = $self->concat(); |
70
|
52
|
|
100
|
|
|
338
|
while(defined($self->{'token'}) and $self->{'token'} =~ /^($and|&)$/i) { |
71
|
15
|
|
|
|
|
18
|
$self->{'token'} = shift(@{$self->{'tokens'}}); |
|
15
|
|
|
|
|
35
|
|
72
|
15
|
|
|
|
|
33
|
$rv = $self->build_conj($rv, concat($self), $first); |
73
|
15
|
|
|
|
|
82
|
$first=0; |
74
|
|
|
|
|
|
|
} |
75
|
52
|
|
|
|
|
104
|
return $rv; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub concat($) { |
79
|
67
|
|
|
67
|
0
|
85
|
my($self) = shift; |
80
|
67
|
|
|
|
|
75
|
my($rv,$t,$l); |
81
|
67
|
|
|
|
|
143
|
my($not) = $self->{parseopts}{-operators}{not}; |
82
|
67
|
|
|
|
|
128
|
my($near) = $self->{parseopts}{-operators}{near}; |
83
|
67
|
|
|
|
|
123
|
$rv = factor($self); |
84
|
67
|
|
100
|
|
|
585
|
while(defined($self->{'token'}) and ($l = $self->{'token'}) =~ /^\e|([\(!\~]|$not|$near)$/i) { |
85
|
5
|
100
|
|
|
|
73
|
$self->{'token'} = shift(@{$self->{'tokens'}}) if($l =~ /^($near|\~)$/i); |
|
3
|
|
|
|
|
9
|
|
86
|
5
|
|
|
|
|
15
|
$t = factor($self); |
87
|
5
|
100
|
|
|
|
78
|
if($l =~ /^($near|\~)$/i) { |
88
|
3
|
|
|
|
|
28
|
$rv = $self->build_near($rv, $t); |
89
|
|
|
|
|
|
|
} else { |
90
|
2
|
|
|
|
|
24
|
$rv = $self->build_concat($rv, $t); |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
} |
93
|
67
|
|
|
|
|
186
|
return $rv; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
sub factor($) { |
97
|
85
|
|
|
85
|
0
|
109
|
my($self) = shift; |
98
|
|
|
|
|
|
|
|
99
|
85
|
|
|
|
|
85
|
my($rv,$t); |
100
|
85
|
|
|
|
|
175
|
my($not) = $self->{parseopts}{-operators}{not}; |
101
|
85
|
50
|
|
|
|
719
|
if(!defined($t = $self->{'token'})) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
102
|
0
|
|
|
|
|
0
|
croak("out of token in factor"); |
103
|
|
|
|
|
|
|
} elsif($t eq '(') { |
104
|
8
|
|
|
|
|
23
|
$rv = $self->expression(); |
105
|
8
|
50
|
33
|
|
|
57
|
if(defined($self->{'token'}) and $self->{'token'} eq ')') { |
106
|
8
|
|
|
|
|
9
|
$self->{'token'} = shift(@{$self->{'tokens'}}); |
|
8
|
|
|
|
|
22
|
|
107
|
|
|
|
|
|
|
} else { |
108
|
0
|
|
|
|
|
0
|
croak("missing closing parenthesis in factor"); |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
} elsif($t =~ /^($not|!)$/i) { |
111
|
5
|
|
|
|
|
8
|
$self->{'token'} = shift(@{$self->{'tokens'}}); |
|
5
|
|
|
|
|
13
|
|
112
|
5
|
|
|
|
|
22
|
$rv = $self->build_negation($self->factor()); |
113
|
|
|
|
|
|
|
} elsif($t =~ s/^\e//) { |
114
|
64
|
|
|
|
|
8744
|
$rv = $self->build_literal($t); |
115
|
64
|
|
|
|
|
80
|
$self->{'token'} = shift(@{$self->{'tokens'}}); |
|
64
|
|
|
|
|
146
|
|
116
|
|
|
|
|
|
|
} elsif($t =~ s/:$//) { |
117
|
8
|
|
|
|
|
11
|
$self->{'token'} = shift(@{$self->{'tokens'}}); |
|
8
|
|
|
|
|
21
|
|
118
|
8
|
|
66
|
|
|
12
|
unshift(@{$self->{'scope'}}, ($self->{'scope_map'}{$t} || $t)); |
|
8
|
|
|
|
|
49
|
|
119
|
8
|
|
|
|
|
40
|
$self->build_scope_start(); |
120
|
8
|
|
|
|
|
25
|
$rv = $self->build_scope_end($self->factor()); |
121
|
8
|
|
|
|
|
11
|
shift(@{$self->{'scope'}}); |
|
8
|
|
|
|
|
15
|
|
122
|
|
|
|
|
|
|
} else { |
123
|
0
|
|
|
|
|
0
|
croak("unexpected token $t in factor"); |
124
|
|
|
|
|
|
|
} |
125
|
85
|
|
|
|
|
268
|
return $rv; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub parse_tokens { |
129
|
29
|
|
|
29
|
1
|
104
|
local($^W) = 0; |
130
|
29
|
|
|
|
|
41
|
my($self) = shift; |
131
|
29
|
|
|
|
|
42
|
my($line) = @_; |
132
|
29
|
|
|
|
|
36
|
my($quote, $quoted, $unquoted, $delim, $word); |
133
|
29
|
|
|
|
|
63
|
my($quotes) = $self->{parseopts}{-quotes}; |
134
|
29
|
|
|
|
|
38
|
my($operators) = join("|", values(%{$self->{parseopts}{-operators}})); |
|
29
|
|
|
|
|
127
|
|
135
|
29
|
|
|
|
|
46
|
my(@tokens) = (); |
136
|
|
|
|
|
|
|
|
137
|
29
|
50
|
|
|
|
83
|
warn("quotes = $quotes") if($self->{-verbose} > 1); |
138
|
29
|
|
|
|
|
72
|
while(length($line)) { |
139
|
93
|
|
|
|
|
19143
|
($quote, $quoted, undef, $unquoted, $delim, undef) = |
140
|
|
|
|
|
|
|
$line =~ m/^([$quotes]) # a $quote |
141
|
|
|
|
|
|
|
((?:\\.|(?!\1)[^\\])*) # and $quoted text |
142
|
|
|
|
|
|
|
\1 # followed by the same quote |
143
|
|
|
|
|
|
|
([\000-\377]*) # and the rest |
144
|
|
|
|
|
|
|
| # --OR-- |
145
|
|
|
|
|
|
|
^((?:\\.|[^\\$quotes])*?) # an $unquoted text |
146
|
|
|
|
|
|
|
(\Z(?!\n)|(?:\s*([()|&!\~]|\b(?:$operators)\b|\b(?:[-,_\.\w]+\:))\s*)|(?!^)(?=[$quotes])) # plus EOL, delimiter, or quote |
147
|
|
|
|
|
|
|
([\000-\377]*) # the rest |
148
|
|
|
|
|
|
|
/ix; # extended layout |
149
|
|
|
|
|
|
|
|
150
|
93
|
50
|
33
|
|
|
1496
|
warn("quote = $quote") if($self->{-verbose} > 1 && $quote); |
151
|
93
|
50
|
100
|
|
|
476
|
last unless($quote || length($unquoted) || length($delim)); |
|
|
|
66
|
|
|
|
|
152
|
93
|
|
|
|
|
209
|
$line = $+; |
153
|
93
|
|
|
|
|
169
|
$unquoted =~ s/^\s+//; |
154
|
93
|
|
|
|
|
135
|
$unquoted =~ s/\s+$//; |
155
|
93
|
100
|
|
|
|
199
|
$word .= defined($quote) ? $quoted : $unquoted; |
156
|
93
|
0
|
0
|
|
|
230
|
warn("word = $word") if($self->{-verbose} > 1 and (length($word) and (length($delim) or !length($line)))); |
|
|
|
0
|
|
|
|
|
|
|
|
33
|
|
|
|
|
157
|
93
|
100
|
100
|
|
|
484
|
push(@tokens,"\e$word") if(length($word) and (length($delim) or !length($line))); |
|
|
|
66
|
|
|
|
|
158
|
93
|
|
|
|
|
180
|
$delim =~ s/^\s+//; |
159
|
93
|
|
|
|
|
224
|
$delim =~ s/\s+$//; |
160
|
93
|
50
|
33
|
|
|
261
|
warn("delim = $word") if($self->{-verbose} > 1 and length($delim)); |
161
|
93
|
100
|
|
|
|
213
|
push(@tokens, $delim) if(length($delim)); |
162
|
93
|
100
|
|
|
|
322
|
undef $word if(length($delim)); |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
29
|
50
|
|
|
|
72
|
warn("parsed tokens @tokens") if($self->{-verbose} > 1); |
166
|
|
|
|
|
|
|
|
167
|
29
|
|
|
|
|
160
|
$self->{'tokens'} = \@tokens; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
1; |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
__END__ |