File Coverage

blib/lib/App/ElasticSearch/Utilities/QueryString.pm
Criterion Covered Total %
statement 81 84 96.4
branch 24 32 75.0
condition 6 9 66.6
subroutine 12 12 100.0
pod 1 1 100.0
total 124 138 89.8


line stmt bran cond sub pod time code
1             package App::ElasticSearch::Utilities::QueryString;
2             # ABSTRACT: CLI query string fixer
3              
4 1     1   492 use strict;
  1         8  
  1         27  
5 1     1   5 use warnings;
  1         3  
  1         40  
6              
7             our $VERSION = '8.5'; # VERSION
8              
9 1     1   540 use App::ElasticSearch::Utilities qw(:config);
  1         3  
  1         8  
10 1     1   932 use App::ElasticSearch::Utilities::Query;
  1         4  
  1         43  
11 1     1   9 use CLI::Helpers qw(:output);
  1         2  
  1         8  
12 1     1   732 use Module::Pluggable::Object;
  1         7497  
  1         32  
13 1     1   8 use Moo;
  1         3  
  1         9  
14 1     1   361 use Ref::Util qw(is_arrayref);
  1         5  
  1         47  
15 1     1   6 use Types::Standard qw(ArrayRef Enum HashRef);
  1         2  
  1         9  
16              
17 1     1   1122 use namespace::autoclean;
  1         3  
  1         7  
18              
19              
20             my %JOINING = map { $_ => 1 } qw( AND OR );
21             my %TRAILING = map { $_ => 1 } qw( AND OR NOT );
22              
23              
24             has 'context' => (
25             is => 'rw',
26             isa => Enum[qw(query filter)],
27             lazy => 1,
28             default => sub { 'query' },
29             );
30              
31              
32             has search_path => (
33             is => 'rw',
34             isa => ArrayRef,
35             default => sub {[]},
36             );
37              
38              
39             has default_join => (
40             is => 'rw',
41             isa => Enum[qw(AND OR)],
42             default => sub { 'AND' },
43             );
44              
45              
46             has plugins => (
47             is => 'ro',
48             isa => ArrayRef,
49             builder => '_build_plugins',
50             lazy => 1,
51             );
52              
53              
54             has fields_meta => (
55             is => 'rw',
56             isa => HashRef,
57             default => sub { {} },
58             );
59              
60              
61             sub expand_query_string {
62 7     7 1 50 my $self = shift;
63              
64 7         169 my $query = App::ElasticSearch::Utilities::Query->new(
65             fields_meta => $self->fields_meta,
66             );
67 7         9196 my @processed = ();
68 7         21 TOKEN: foreach my $token (@_) {
69 15         24 foreach my $p (@{ $self->plugins }) {
  15         267  
70 56         1174 my $res = $p->handle_token($token);
71 56 100       140 if( defined $res ) {
72 11 100       39 push @processed, is_arrayref($res) ? @{$res} : $res;
  8         16  
73 11         34 next TOKEN;
74             }
75             }
76 4         15 push @processed, { query_string => $token };
77             }
78              
79 7         33 debug({color=>"magenta"}, "Processed parts");
80 7         83 debug_var({color=>"magenta"},\@processed);
81              
82 7 50       20279 my $context = $self->context eq 'query' ? 'must' : 'filter';
83 7         90 my $invert=0;
84 7         13 my @dangling=();
85 7         12 my @qs=();
86 7         16 foreach my $part (@processed) {
87 15 100       40 if( exists $part->{dangles} ) {
    100          
    50          
    0          
88 6         12 push @dangling, $part->{query_string};
89             }
90             elsif( exists $part->{query_string} ) {
91 4         11 push @qs, @dangling, $part->{query_string};
92 4         8 @dangling=(),
93             }
94             elsif( exists $part->{condition} ) {
95 5 50       20 my $target = $invert ? 'must_not' : $context;
96 5         22 $query->add_bool( $target => $part->{condition} );
97 5         15 @dangling=();
98             }
99             elsif( exists $part->{nested} ) {
100 0         0 $query->nested($part->{nested}{query});
101 0         0 $query->nested_path($part->{nested}{path});
102 0         0 @dangling=();
103             }
104             # Carry over the Inversion for instance where we jump out of the QS
105 15   100     51 $invert = exists $part->{invert} && $part->{invert};
106             }
107 7 100       32 if(@qs) {
108 3   33     21 pop @qs while @qs && exists $TRAILING{$qs[-1]};
109 3   66     21 shift @qs while @qs && exists $JOINING{$qs[0]};
110              
111             # Ensure there's a joining token, otherwise use our default
112 3 100       11 if( @qs > 1 ) {
113 2         4 my $prev_query = 0;
114 2         5 my @joined = ();
115 2         4 foreach my $part ( @qs ) {
116 6 100       13 if( $prev_query ) {
117 1 50       3 push @joined, $self->default_join() unless exists $JOINING{$part};
118             }
119 6         10 push @joined, $part;
120             # Here we include AND, NOT, OR
121 6 100       15 $prev_query = exists $TRAILING{$part} ? 0 : 1;
122             }
123 2         7 @qs = @joined;
124             }
125             }
126 7 100       39 $query->add_bool($context => { query_string => { query => join(' ', @qs) } }) if @qs;
127              
128 7         27 return $query;
129             }
130              
131             # Builder Routines for QS Objects
132             sub _build_plugins {
133 1     1   15 my $self = shift;
134 1         4 my $globals = es_globals('plugins');
135             my $finder = Module::Pluggable::Object->new(
136 1         4 search_path => ['App::ElasticSearch::Utilities::QueryString',@{ $self->search_path }],
  1         20  
137             except => [qw(App::ElasticSearch::Utilities::QueryString::Plugin)],
138             instantiate => 'new',
139             );
140 1         26 my @plugins;
141 1 50       7 foreach my $p ( sort { $a->priority <=> $b->priority || $a->name cmp $b->name }
  13 50       431  
142             $finder->plugins( options => defined $globals ? $globals : {} )
143             ) {
144 7         124 debug(sprintf "Loaded %s with priority:%d", $p->name, $p->priority);
145 7         316 push @plugins, $p;
146             }
147 1         40 return \@plugins;
148             }
149              
150             # Return true
151             1;
152              
153             __END__
154              
155             =pod
156              
157             =head1 NAME
158              
159             App::ElasticSearch::Utilities::QueryString - CLI query string fixer
160              
161             =head1 VERSION
162              
163             version 8.5
164              
165             =head1 SYNOPSIS
166              
167             This class provides a pluggable architecture to expand query strings on the
168             command-line into complex Elasticsearch queries.
169              
170             =head1 ATTRIBUTES
171              
172             =head2 context
173              
174             Defaults to 'query', but can also be set to 'filter' so the elements will be
175             added to the 'must' or 'filter' parameter.
176              
177             =head2 search_path
178              
179             An array reference of additional namespaces to search for loading the query string
180             processing plugins. Example:
181              
182             $qs->search_path([qw(My::Company::QueryString)]);
183              
184             This will search:
185              
186             App::ElasticSearch::Utilities::QueryString::*
187             My::Company::QueryString::*
188              
189             For query processing plugins.
190              
191             =head2 default_join
192              
193             When fixing up the query string, if two tokens are found next to eachother
194             missing a joining token, join using this token. Can be either C<AND> or C<OR>,
195             and defaults to C<AND>.
196              
197             =head2 plugins
198              
199             Array reference of ordered query string processing plugins, lazily assembled.
200              
201             =head2 fields_meta
202              
203             A hash reference with the field data from L<App::ElasticSearch::Utilities::es_index_fields>.
204              
205             =head1 METHODS
206              
207             =head2 expand_query_string(@tokens)
208              
209             This function takes a list of tokens, often from the command line via @ARGV. Uses
210             a plugin infrastructure to allow customization.
211              
212             Returns: L<App::ElasticSearch::Utilities::Query> object
213              
214             =head1 TOKENS
215              
216             The token expansion plugins can return undefined, which is basically a noop on the token.
217             The plugin can return a hash reference, which marks that token as handled and no other plugins
218             receive that token. The hash reference may contain:
219              
220             =over 2
221              
222             =item query_string
223              
224             This is the rewritten bits that will be reassembled in to the final query string.
225              
226             =item condition
227              
228             This is usually a hash reference representing the condition going into the bool query. For instance:
229              
230             { terms => { field => [qw(alice bob charlie)] } }
231              
232             Or
233              
234             { prefix => { user_agent => 'Go ' } }
235              
236             These conditions will wind up in the B<must> or B<must_not> section of the B<bool> query depending on the
237             state of the the invert flag.
238              
239             =item invert
240              
241             This is used by the bareword "not" to track whether the token invoked a flip from the B<must> to the B<must_not>
242             state. After each token is processed, if it didn't set this flag, the flag is reset.
243              
244             =item dangles
245              
246             This is used for bare words like "not", "or", and "and" to denote that these terms cannot dangle from the
247             beginning or end of the query_string. This allows the final pass of the query_string builder to strip these
248             words to prevent syntax errors.
249              
250             =back
251              
252             =head1 Extended Syntax
253              
254             The search string is pre-analyzed before being sent to ElasticSearch. The following plugins
255             work to manipulate the query string and provide richer, more complete syntax for CLI applications.
256              
257             =head2 App::ElasticSearch::Utilities::QueryString::AutoEscape
258              
259             Provide an '=' prefix to a query string parameter to promote that parameter to a C<term> filter.
260              
261             This allows for exact matches of a field without worrying about escaping Lucene special character filters.
262              
263             E.g.:
264              
265             user_agent:"Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1"
266              
267             Is evaluated into a weird query that doesn't do what you want. However:
268              
269             =user_agent:"Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1"
270              
271             Is translated into:
272              
273             { term => { user_agent => "Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1" } }
274              
275             Which provides an exact match to the term in the query.
276              
277             =head2 App::ElasticSearch::Utilities::QueryString::Barewords
278              
279             The following barewords are transformed:
280              
281             or => OR
282             and => AND
283             not => NOT
284              
285             =head2 App::ElasticSearch::Utilities::QueryString::IP
286              
287             If a field is an IP address uses CIDR Notation, it's expanded to a range query.
288              
289             src_ip:10.0/8 => src_ip:[10.0.0.0 TO 10.255.255.255]
290              
291             =head2 App::ElasticSearch::Utilities::QueryString::Ranges
292              
293             This plugin translates some special comparison operators so you don't need to
294             remember them anymore.
295              
296             Example:
297              
298             price:<100
299              
300             Will translate into a:
301              
302             { range: { price: { lt: 100 } } }
303              
304             And:
305              
306             price:>50,<100
307              
308             Will translate to:
309              
310             { range: { price: { gt: 50, lt: 100 } } }
311              
312             =head3 Supported Operators
313              
314             B<gt> via E<gt>, B<gte> via E<gt>=, B<lt> via E<lt>, B<lte> via E<lt>=
315              
316             =head2 App::ElasticSearch::Utilities::QueryString::Underscored
317              
318             This plugin translates some special underscore surrounded tokens into
319             the Elasticsearch Query DSL.
320              
321             Implemented:
322              
323             =head3 _prefix_
324              
325             Example query string:
326              
327             _prefix_:useragent:'Go '
328              
329             Translates into:
330              
331             { prefix => { useragent => 'Go ' } }
332              
333             =head2 App::ElasticSearch::Utilities::QueryString::FileExpansion
334              
335             If the match ends in .dat, .txt, .csv, or .json then we attempt to read a file with that name and OR the condition:
336              
337             $ cat test.dat
338             50 1.2.3.4
339             40 1.2.3.5
340             30 1.2.3.6
341             20 1.2.3.7
342              
343             Or
344              
345             $ cat test.csv
346             50,1.2.3.4
347             40,1.2.3.5
348             30,1.2.3.6
349             20,1.2.3.7
350              
351             Or
352              
353             $ cat test.txt
354             1.2.3.4
355             1.2.3.5
356             1.2.3.6
357             1.2.3.7
358              
359             Or
360              
361             $ cat test.json
362             { "ip": "1.2.3.4" }
363             { "ip": "1.2.3.5" }
364             { "ip": "1.2.3.6" }
365             { "ip": "1.2.3.7" }
366              
367             We can source that file:
368              
369             src_ip:test.dat => src_ip:(1.2.3.4 1.2.3.5 1.2.3.6 1.2.3.7)
370             src_ip:test.json[ip] => src_ip:(1.2.3.4 1.2.3.5 1.2.3.6 1.2.3.7)
371              
372             This make it simple to use the --data-file output options and build queries
373             based off previous queries. For .txt and .dat file, the delimiter for columns
374             in the file must be either a tab or a null. For files ending in
375             .csv, Text::CSV_XS is used to accurate parsing of the file format. Files
376             ending in .json are considered to be newline-delimited JSON.
377              
378             You can also specify the column of the data file to use, the default being the last column or (-1). Columns are
379             B<zero-based> indexing. This means the first column is index 0, second is 1, .. The previous example can be rewritten
380             as:
381              
382             src_ip:test.dat[1]
383              
384             or:
385             src_ip:test.dat[-1]
386              
387             For newline delimited JSON files, you need to specify the key path you want to extract from the file. If we have a
388             JSON source file with:
389              
390             { "first": { "second": { "third": [ "bob", "alice" ] } } }
391             { "first": { "second": { "third": "ginger" } } }
392             { "first": { "second": { "nope": "fred" } } }
393              
394             We could search using:
395              
396             actor:test.json[first.second.third]
397              
398             Which would expand to:
399              
400             { "terms": { "actor": [ "alice", "bob", "ginger" ] } }
401              
402             This option will iterate through the whole file and unique the elements of the list. They will then be transformed into
403             an appropriate L<terms query|http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html>.
404              
405             =head3 Wildcards
406              
407             We can also have a group of wildcard or regexp in a file:
408              
409             $ cat wildcards.dat
410             *@gmail.com
411             *@yahoo.com
412              
413             To enable wildcard parsing, prefix the filename with a C<*>.
414              
415             es-search.pl to_address:*wildcards.dat
416              
417             Which expands the query to:
418              
419             {
420             "bool": {
421             "minimum_should_match":1,
422             "should": [
423             {"wildcard":{"to_outbound":{"value":"*@gmail.com"}}},
424             {"wildcard":{"to_outbound":{"value":"*@yahoo.com"}}}
425             ]
426             }
427             }
428              
429             No attempt is made to verify or validate the wildcard patterns.
430              
431             =head3 Regular Expressions
432              
433             If you'd like to specify a file full of regexp, you can do that as well:
434              
435             $ cat regexp.dat
436             .*google\.com$
437             .*yahoo\.com$
438              
439             To enable regexp parsing, prefix the filename with a C<~>.
440              
441             es-search.pl to_address:~regexp.dat
442              
443             Which expands the query to:
444              
445             {
446             "bool": {
447             "minimum_should_match":1,
448             "should": [
449             {"regexp":{"to_outbound":{"value":".*google\\.com$"}}},
450             {"regexp":{"to_outbound":{"value":".*yahoo\\.com$"}}}
451             ]
452             }
453             }
454              
455             No attempt is made to verify or validate the regexp expressions.
456              
457             =head2 App::ElasticSearch::Utilities::QueryString::Nested
458              
459             Implement the proposed nested query syntax early. Example:
460              
461             nested_path:"field:match AND string"
462              
463             =head1 AUTHOR
464              
465             Brad Lhotsky <brad@divisionbyzero.net>
466              
467             =head1 COPYRIGHT AND LICENSE
468              
469             This software is Copyright (c) 2023 by Brad Lhotsky.
470              
471             This is free software, licensed under:
472              
473             The (three-clause) BSD License
474              
475             =cut