line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Search::QueryBuilder; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
42589
|
use 5.008007; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
107
|
|
4
|
1
|
|
|
1
|
|
7
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
36
|
|
5
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
36
|
|
6
|
1
|
|
|
1
|
|
2401
|
use Data::Dumper; |
|
1
|
|
|
|
|
15309
|
|
|
1
|
|
|
|
|
2427
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
require Exporter; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( 'all' => [ qw( testme getTokenizedString tokenizeString |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
) ] ); |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
our @EXPORT = qw( |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
our $VERSION = '0.01'; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# Preloaded methods go here. |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
sub new { |
28
|
0
|
|
|
0
|
0
|
|
my $package = shift; |
29
|
0
|
|
|
|
|
|
my $self= { |
30
|
|
|
|
|
|
|
_booleantags=> undef, |
31
|
|
|
|
|
|
|
}; |
32
|
|
|
|
|
|
|
#return bless({}, $package); |
33
|
0
|
|
|
|
|
|
return bless ($self,$package); |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
sub tags{ |
37
|
0
|
|
|
0
|
0
|
|
my ( $self, @tags) = @_; |
38
|
0
|
|
|
|
|
|
my @defaulttags=("AND","OR","NOT"); |
39
|
0
|
0
|
|
|
|
|
@{$self->{_booleantags}} = @tags if @tags ; |
|
0
|
|
|
|
|
|
|
40
|
0
|
0
|
|
|
|
|
if( defined(@{$self->{_booleantags}})) { |
|
0
|
|
|
|
|
|
|
41
|
0
|
|
|
|
|
|
return @{$self->{_booleantags}}; |
|
0
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
} else { |
43
|
0
|
|
|
|
|
|
return @defaulttags; |
44
|
|
|
|
|
|
|
}; |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
sub getTokenizedString { |
49
|
0
|
|
|
0
|
0
|
|
my ($self,$query)=@_; |
50
|
|
|
|
|
|
|
|
51
|
0
|
|
|
|
|
|
my @temp; |
52
|
|
|
|
|
|
|
my @temp2; |
53
|
0
|
|
|
|
|
|
my @temp3; |
54
|
0
|
|
|
|
|
|
my @tagbag=$self->tags; |
55
|
0
|
|
|
|
|
|
push(@temp2,tokenizeString($query,@temp)); |
56
|
|
|
|
|
|
|
#temp2 currently represents a tokenized string |
57
|
|
|
|
|
|
|
|
58
|
0
|
|
|
|
|
|
my $previous=""; |
59
|
|
|
|
|
|
|
# this cleans out most obvious mistakes |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# Uppercase the tagbag tags... |
62
|
0
|
|
|
|
|
|
for(my $i=0;$i<$#temp2;$i++){ |
63
|
0
|
|
|
|
|
|
my $test=uc($temp2[$i]); |
64
|
0
|
0
|
|
|
|
|
if((grep /^$test$/,@tagbag)>0){ |
65
|
0
|
|
|
|
|
|
$temp2[$i]=uc($temp2[$i]); |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
} |
69
|
0
|
|
|
|
|
|
foreach my $tempvar (@temp2){ |
70
|
|
|
|
|
|
|
# get rid of duplicates |
71
|
0
|
0
|
0
|
|
|
|
if($previous eq $tempvar){ |
|
|
0
|
|
|
|
|
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# or multiple commands (ie AND AND or NOT AND) |
74
|
|
|
|
|
|
|
} elsif((grep /^$previous$/,@tagbag)>0 && (grep /^$tempvar$/,@tagbag)>0){ |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
}else { |
77
|
0
|
|
|
|
|
|
push(@temp3,$tempvar); |
78
|
|
|
|
|
|
|
} |
79
|
0
|
|
|
|
|
|
$previous=$tempvar; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# Look for and remove dangling AND OR and NOT |
83
|
0
|
|
|
|
|
|
my $poss=($temp3[$#temp3]); |
84
|
0
|
|
|
|
|
|
while((grep /^$poss$/, @tagbag) >0){ |
85
|
|
|
|
|
|
|
# Remove ands ors and nots from the end, where they are a bit meaningless |
86
|
0
|
|
|
|
|
|
pop(@temp3); |
87
|
0
|
|
|
|
|
|
$poss=($temp3[$#temp3]); |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
0
|
|
|
|
|
|
return @temp3; |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
sub testme { |
94
|
|
|
|
|
|
|
#my $myfoo=" Bah FOOOO GRAH BLITHER "; |
95
|
|
|
|
|
|
|
#print ltrim($myfoo)."\n"; |
96
|
|
|
|
|
|
|
#print rtrim($myfoo)."\n"; |
97
|
|
|
|
|
|
|
#print atrim($myfoo)."\n"; |
98
|
|
|
|
|
|
|
#print removeAll($myfoo, "A")."\n"; |
99
|
|
|
|
|
|
|
#print findNearestPrevious("I am a quite long string",12,'q')."\n"; |
100
|
|
|
|
|
|
|
#print tokenizeString("I am a \"fish\" and so are you")."\n"; |
101
|
|
|
|
|
|
|
#print tokenizeString("I am a \"fish and so are you")."\n"; |
102
|
|
|
|
|
|
|
#print tokenizeString("I am a -\"fish +\"and so\" are you")."\n"; |
103
|
|
|
|
|
|
|
#print tokenizeString("I am a +\"fish +\"and so\" are you")."\n"; |
104
|
|
|
|
|
|
|
#print tokenizeString("-I +am a fish and so are you too ")."\n"; |
105
|
|
|
|
|
|
|
#print tokenizeString("I am a -\"fish\"and so\" are you")."\n"; |
106
|
0
|
|
|
0
|
0
|
|
my @temp; |
107
|
|
|
|
|
|
|
my @temp2; |
108
|
0
|
|
|
|
|
|
push(@temp2,tokenizeString(" +\"I am\" a -\"fishy character\" and so\" is Bob",@temp)); |
109
|
|
|
|
|
|
|
#print "Result: ".Data::Dumper->Dump([@temp2])."\n"; |
110
|
0
|
|
|
|
|
|
print "Result: ".join(" ",@temp2)."\n"; |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
sub atrim { |
113
|
0
|
|
|
0
|
0
|
|
my $string = shift; |
114
|
0
|
|
|
|
|
|
$string =~ s/^\s+//; |
115
|
0
|
|
|
|
|
|
$string =~ s/\s+$//; |
116
|
0
|
|
|
|
|
|
return $string; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
# Left trim function to remove leading whitespace |
119
|
|
|
|
|
|
|
sub ltrim { |
120
|
0
|
|
|
0
|
0
|
|
my $string = shift; |
121
|
0
|
|
|
|
|
|
$string =~ s/^\s+//; |
122
|
0
|
|
|
|
|
|
return $string; |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
# Right trim function to remove trailing whitespace |
125
|
|
|
|
|
|
|
sub rtrim { |
126
|
0
|
|
|
0
|
0
|
|
my $string = shift; |
127
|
0
|
|
|
|
|
|
$string =~ s/\s+$//; |
128
|
0
|
|
|
|
|
|
return $string; |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
sub removeAll{ |
132
|
0
|
|
|
0
|
0
|
|
my ($source,$replaceme)=@_; |
133
|
0
|
|
|
|
|
|
$source=~s/$replaceme//g; |
134
|
0
|
|
|
|
|
|
return $source; |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
sub findNearestPrevious { |
138
|
0
|
|
|
0
|
0
|
|
my ($string, $currentidx,$char)=@_; |
139
|
0
|
|
|
|
|
|
my $tmpvar=$currentidx; |
140
|
0
|
|
|
|
|
|
my @charsinstring=split(//,$string); |
141
|
0
|
|
|
|
|
|
while($tmpvar>-1){ |
142
|
0
|
0
|
|
|
|
|
if($charsinstring[$tmpvar] eq $char){ |
143
|
0
|
|
|
|
|
|
return $tmpvar; |
144
|
|
|
|
|
|
|
} |
145
|
0
|
|
|
|
|
|
$tmpvar--; |
146
|
|
|
|
|
|
|
} |
147
|
0
|
|
|
|
|
|
return $tmpvar; |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
sub tokenizeString { |
152
|
0
|
|
|
0
|
0
|
|
my ($query,@response)=@_; |
153
|
0
|
|
|
|
|
|
$query=removeAll($query,"\'"); |
154
|
0
|
|
|
|
|
|
atrim($query); |
155
|
0
|
|
|
|
|
|
my @tempresponse; |
156
|
0
|
0
|
|
|
|
|
if($query eq ""){ |
157
|
0
|
|
|
|
|
|
return @response; |
158
|
|
|
|
|
|
|
} |
159
|
0
|
0
|
|
|
|
|
if(index($query,"\"")<0){ |
160
|
|
|
|
|
|
|
# Oh jolly good, no quotation marks |
161
|
0
|
|
|
|
|
|
my @splitterms=split(/ /,$query); |
162
|
|
|
|
|
|
|
|
163
|
0
|
|
|
|
|
|
foreach my $termlet (@splitterms){ |
164
|
0
|
0
|
|
|
|
|
if(substr($termlet,0,1) eq '-'){ |
|
|
0
|
|
|
|
|
|
165
|
0
|
|
|
|
|
|
push(@tempresponse,"NOT"); |
166
|
0
|
|
|
|
|
|
push(@tempresponse,substr($termlet,1,length($termlet))); |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
} elsif (substr($termlet,0,1) eq '+'){ |
169
|
0
|
|
|
|
|
|
push(@tempresponse,"AND"); |
170
|
0
|
|
|
|
|
|
push(@tempresponse,substr($termlet,1,length($termlet))); |
171
|
|
|
|
|
|
|
} else { |
172
|
0
|
|
|
|
|
|
push(@tempresponse,$termlet); |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
#print "Current contents: ".join(Data::Dumper->Dump([@response]),","); |
175
|
|
|
|
|
|
|
# print "Current contents: ".join(",",@response)."\n"; |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
} else { |
179
|
|
|
|
|
|
|
# bugger. We have quotation marks - repeat, we have quotation marks |
180
|
0
|
|
|
|
|
|
my $firstIndex=index($query,"\""); |
181
|
0
|
|
|
|
|
|
my $secondIndex=index($query,"\"",$firstIndex+1); |
182
|
0
|
|
|
|
|
|
my $testVar=$secondIndex-$firstIndex; |
183
|
0
|
0
|
0
|
|
|
|
if($testVar>-1 && $testVar<2){ # empty quotes?! - sod it |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
184
|
|
|
|
|
|
|
} elsif($testVar<0){ # ... lone lost little quote in middle of nowhere. Put it out of misery |
185
|
0
|
|
|
|
|
|
$query=removeAll($query,"\""); |
186
|
0
|
|
|
|
|
|
push(@tempresponse,tokenizeString($query,@response)); |
187
|
|
|
|
|
|
|
} elsif ($firstIndex<1){ # first quote at beginning of string... |
188
|
0
|
|
|
|
|
|
push(@tempresponse, substr($query,$firstIndex+1,$testVar-1)); |
189
|
0
|
0
|
|
|
|
|
if($secondIndex
|
190
|
|
|
|
|
|
|
#push(@response,tokenizeString(substr($query,$secondIndex+1,length($query)-$secondIndex+1),@response)); |
191
|
0
|
|
|
|
|
|
push(@tempresponse,tokenizeString(substr($query,$secondIndex+1,length($query)-$secondIndex+1),@response)); |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
} else { # first quote not at beginning of string. First quote somewhere random |
194
|
0
|
|
|
|
|
|
my $firstminusone=$firstIndex-1; |
195
|
0
|
0
|
|
|
|
|
if(substr($query,$firstIndex-1,1) eq " "){ |
196
|
|
|
|
|
|
|
# this is fine for most instances, but sometimes there's a - or a + in the way |
197
|
|
|
|
|
|
|
# deal with the most instances first |
198
|
0
|
|
|
|
|
|
push(@tempresponse,tokenizeString(substr($query,0,$firstIndex))); |
199
|
0
|
|
|
|
|
|
push(@tempresponse,substr($query,$firstIndex+1,$testVar-1)); |
200
|
0
|
0
|
|
|
|
|
if($secondIndex
|
201
|
0
|
|
|
|
|
|
push(@tempresponse,tokenizeString(substr($query,$secondIndex+1,length($query)-$secondIndex))); |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
} else { # there' s a - or + before the "!! the (*&£$(*&!'s! |
205
|
0
|
|
|
|
|
|
my $thirdIndex=findNearestPrevious($query,$firstIndex," "); |
206
|
|
|
|
|
|
|
|
207
|
0
|
0
|
|
|
|
|
if($thirdIndex<0){ # no space start of query |
208
|
0
|
0
|
|
|
|
|
if(substr($query,0,1) eq "-"){ |
|
|
0
|
|
|
|
|
|
209
|
0
|
|
|
|
|
|
push(@tempresponse,"NOT"); |
210
|
0
|
|
|
|
|
|
push(@tempresponse,substr($query,2,$testVar-1)); |
211
|
|
|
|
|
|
|
} elsif(substr($query,0,1) eq "+"){ |
212
|
0
|
|
|
|
|
|
push(@tempresponse,"AND"); |
213
|
0
|
|
|
|
|
|
push(@tempresponse,substr($query,2,$testVar-1)); |
214
|
|
|
|
|
|
|
} else { |
215
|
0
|
|
|
|
|
|
push(@tempresponse,substr($query,0,$testVar-1)); |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
} else { # there's a - or + before the ", and we are not at the start of the string... |
221
|
|
|
|
|
|
|
# push(@response,substr($query,0,$thirdIndex)); |
222
|
0
|
|
|
|
|
|
push(@tempresponse,tokenizeString(substr($query,0,$thirdIndex),@response)); |
223
|
0
|
0
|
|
|
|
|
if(substr($query,$thirdIndex+1,1) eq '-'){ # oh look, a - |
|
|
0
|
|
|
|
|
|
224
|
0
|
|
|
|
|
|
push(@tempresponse,"NOT"); |
225
|
0
|
|
|
|
|
|
$thirdIndex++; |
226
|
|
|
|
|
|
|
}elsif(substr($query,$thirdIndex+1,1) eq '+'){ |
227
|
0
|
|
|
|
|
|
push(@tempresponse,"AND"); |
228
|
0
|
|
|
|
|
|
$thirdIndex++; |
229
|
|
|
|
|
|
|
} |
230
|
0
|
|
|
|
|
|
push(@tempresponse,substr($query,$thirdIndex+2,$secondIndex-$thirdIndex-2)); |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
} |
234
|
0
|
0
|
|
|
|
|
if($secondIndex
|
235
|
|
|
|
|
|
|
# yet more to play with? |
236
|
0
|
|
|
|
|
|
push(@tempresponse,tokenizeString(substr($query,$secondIndex+2,length($query)-$secondIndex),@response)); |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
} |
248
|
0
|
|
|
|
|
|
return @tempresponse; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
sub build{ |
252
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
253
|
0
|
|
|
|
|
|
return; |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
1; |
257
|
|
|
|
|
|
|
__END__ |