File Coverage

blib/lib/App/ElasticSearch/Utilities/Aggregations.pm

Criterion	Covered	Total	%
statement	37	104	35.5
branch	11	48	22.9
condition	6	25	24.0
subroutine	5	8	62.5
pod	3	3	100.0
total	62	188	32.9

line	stmt	bran	cond	sub	pod	time	code
1							package App::ElasticSearch::Utilities::Aggregations;
2							# ABSTRACT: Code to simplify creating and working with Elasticsearch aggregations
3
4	3			3		502	use strict;
	3					13
	3					113
5	3			3		16	use warnings;
	3					7
	3					88
6
7	3			3		636	use Storable qw(dclone);
	3					3226
	3					298
8	3					31	use Sub::Exporter -setup => {
9							exports => [ qw(
10							expand_aggregate_string
11							es_flatten_aggregations es_flatten_aggs
12							is_single_stat
13							)],
14							groups => {
15							default => [qw(
16							expand_aggregate_string
17							es_flatten_aggregations es_flatten_aggs
18							is_single_stat
19							)],
20							},
21	3			3		653	};
	3					17173
22
23							my %Aggregations;
24
25
26							$Aggregations{terms} = {
27							params => sub { $_[0] && $_[0] =~ /^\d+$/ ? { size => $_[0] } : {} },
28							type => 'bucket',
29							composite => 1,
30							};
31
32
33							$Aggregations{significant_terms} = {
34							params => sub { $_[0] =~ /^\d+$/ ? { size => $_[0] } : {} },
35							type => 'bucket',
36							};
37
38
39							$Aggregations{rare_terms} = {
40							params => sub { $_[0] =~ /^\d+$/ ? { max_doc_count => $_[0] } : {} },
41							type => 'bucket',
42							};
43
44
45							$Aggregations{histogram} = {
46							params => sub {
47							return unless $_[0] > 0;
48							return { interval => $_[0] };
49							},
50							type => 'bucket',
51							composite => 1,
52							};
53
54
55							$Aggregations{date_histogram} = {
56							params => sub { { calendar_interval => $_[0] \|\| '1h' } },
57							type => 'bucket',
58							composite => 1,
59							};
60
61
62							$Aggregations{geohash_grid} = {
63							params => sub { $_[0] =~ /^\d+$/ ? { precision => $_[0] } : {} },
64							type => 'bucket',
65							composite => 1,
66							};
67
68
69							$Aggregations{missing} = { type => 'bucket' };
70
71
72							$Aggregations{avg} = { single_stat => 1, type => 'metric' };
73							$Aggregations{max} = { single_stat => 1, type => 'metric' };
74							$Aggregations{min} = { single_stat => 1, type => 'metric' };
75							$Aggregations{sum} = { single_stat => 1, type => 'metric' };
76
77
78							$Aggregations{cardinality} = { single_stat => 1, type => 'metric' };
79
80
81							$Aggregations{stats} = { type => 'metric' };
82
83
84							$Aggregations{extended_stats} = { type => 'metric' };
85
86
87							$Aggregations{percentiles} = {
88							params => sub {
89							my @pcts = $_[0] ? split /,/, $_[0] : qw(25 50 75 90);
90							return { percents => \@pcts };
91							},
92							};
93
94
95							$Aggregations{geo_centroid} = { type => 'metric' };
96
97
98
99
100							sub is_single_stat {
101	0			0	1	0	my ($agg) = @_;
102	0	0				0	return unless $agg;
103	0	0				0	return unless exists $Aggregations{$agg};
104	0	0				0	return unless exists $Aggregations{$agg}->{single_stat};
105	0					0	return $Aggregations{$agg}->{single_stat};
106							}
107
108
109							sub expand_aggregate_string {
110	4			4	1	3739	my ($token) = @_;
111
112	4					9	my %aggs = ();
113	4					14	foreach my $def ( split /\+/, $token ) {
114	4	100				24	my $alias = $def =~ s/^(\w+)=// ? $1 : undef;
115	4					24	my @parts = split /:/, $def, 3;
116	4	100				14	if( @parts == 1 ) {
117	1		33			11	$alias \|\|= $def;
118	1					5	$aggs{$alias} = { terms => { field => $def, size => 20 } };
119	1					4	next;
120							}
121	3					7	my ($agg, $field);
122	3	50				11	if( exists $Aggregations{$parts[0]} ) {
123	0					0	$agg = shift @parts;
124	0					0	$field = shift @parts;
125							}
126							else {
127	3					5	$agg = 'terms';
128	3					7	$field = shift @parts;
129							}
130	3					7	my $params = {};
131	3					6	my $paramStr = shift @parts;
132
133	3	100	66			24	if( $paramStr && $paramStr =~ /\w+=/ ) {
		50
134							# split on commas using a positive lookahead for a "word="
135	2					8	foreach my $token (split /,(?=\w+=)/, $paramStr) {
136	2					7	my ($k,$v) = split /=/, $token, 2;
137	2	50	33			11	next unless $k and $v;
138	2	50				11	$params->{$k} = $v =~ /,/ ? [ split /,/, $v ] : $v;
139							}
140							}
141							elsif( exists $Aggregations{$agg}->{params} ) {
142							# Process parameters
143	1					6	$params = $Aggregations{$agg}->{params}->($paramStr);
144							}
145	3	50	66			18	$alias \|\|= join ".", $agg eq 'terms' ? ($field) : ($agg, $field);
146	3					5	$aggs{$alias} = { $agg => { field => $field, %{ $params } } };
	3					18
147							}
148	4					14	return \%aggs;
149							}
150
151
152							sub es_flatten_aggregations {
153	0			0	1		my ($result,$field,$parent) = @_;
154
155	0		0				$parent \|\|= [];
156	0						my @rows = ();
157
158	0						my @remove = qw(
159							doc_count_error_upper_bound
160							sum_other_doc_count
161							);
162
163	0						my $row = dclone($parent);
164							my $extract = sub {
165	0			0			my ($key, $hash) = @_;
166
167	0	0					if( $hash->{value_as_string} ) {
		0
		0
168	0						push @{ $row }, $key, $hash->{value_as_string};
	0
169							}
170							elsif( $hash->{value} ) {
171	0						push @{ $row }, $key, $hash->{value};
	0
172							}
173							elsif( $hash->{values} ) {
174	0						foreach my $k ( sort keys %{ $hash->{values} } ) {
	0
175	0						push @{ $row }, "$key.$k", $hash->{values}{$k}
176	0	0					if $hash->{values}{$k};
177							}
178							}
179							else {
180	0						foreach my $k (sort keys %{ $hash }) {
	0
181	0	0					last if $k eq 'buckets';
182	0						push @{ $row }, "$key.$k", $hash->{$k}
183	0	0					if defined $hash->{values}{$k};
184							}
185							}
186	0						};
187
188	0	0					if( $field ) {
189	0						delete $result->{$_} for @remove;
190	0	0	0				if( $result->{key} and exists $result->{doc_count} ) {
191	0						my $k = delete $result->{key};
192	0						my $ks = delete $result->{key_as_string};
193	0		0				push @{ $row }, $field, $ks \|\| $k;
	0
194	0		0				push @{ $row }, "$field.hits", delete $result->{doc_count} \|\| 0;
	0
195							}
196	0						my %buckets = ();
197	0						foreach my $k ( sort keys %{ $result } ) {
	0
198	0	0					if( ref $result->{$k} eq 'HASH' ) {
199	0						$extract->($k, $result->{$k});
200
201	0	0					if( my $buckets = delete $result->{$k}{buckets} ) {
202	0						$buckets{$k} = $buckets;
203							}
204							}
205							}
206	0	0					if( keys %buckets ) {
207	0						foreach my $k ( sort keys %buckets ) {
208	0	0					if( @{ $buckets{$k} } ) {
	0
209	0						foreach my $bucket ( @{ $buckets{$k} } ) {
	0
210	0						push @rows, @{ es_flatten_aggregations($bucket, $k, $row) };
	0
211							}
212							}
213							else {
214	0						push @rows, $row;
215							}
216							}
217							}
218							else {
219	0						push @rows, $row;
220							}
221							}
222							else {
223	0						foreach my $k ( sort keys %{ $result } ) {
	0
224	0						delete $result->{$k}{$_} for @remove;
225	0						$extract->($k, $result->{$k});
226	0						my $buckets = delete $result->{$k}{buckets};
227	0	0	0				if( $buckets and @{ $buckets } ) {
	0
228	0						foreach my $bucket ( @{ $buckets } ) {
	0
229	0						push @rows, @{ es_flatten_aggregations($bucket,$k,$row) };
	0
230							}
231							}
232							else {
233	0						push @rows, $row;
234							}
235							}
236							}
237
238	0						return \@rows;
239							}
240
241							# Setup Aliases
242							*es_flatten_aggs = \&es_flatten_aggregations;
243
244
245							1;
246
247							__END__
248
249							=pod
250
251							=head1 NAME
252
253							App::ElasticSearch::Utilities::Aggregations - Code to simplify creating and working with Elasticsearch aggregations
254
255							=head1 VERSION
256
257							version 8.5
258
259							=head1 FUNCTIONS
260
261							=head2 is_single_stat()
262
263							Returns true if an aggregation returns a single value.
264
265							=head2 expand_aggregate_string( token )
266
267							Takes a simplified aggregation grammar and expands it the full aggregation hash.
268
269							Simple Terms:
270
271							field_name
272
273							To
274
275							{
276							field_name => {
277							terms => {
278							field => 'field_name',
279							size => 20,
280							}
281							}
282							}
283
284							Alias expansion:
285
286							alias=field_name
287
288							To
289
290							{
291							alias => {
292							terms => {
293							field => 'field_name',
294							size => 20,
295							}
296							}
297							}
298
299							Parameters:
300
301							alias=field_name:10
302
303							To
304
305							{
306							alias => {
307							terms => {
308							field => 'field_name',
309							size => 10,
310							}
311							}
312							}
313
314							Parameters, k/v:
315
316							alias=field_name:size=13
317
318							To
319
320							{
321							alias => {
322							terms => {
323							field => 'field_name',
324							size => 13,
325							}
326							}
327							}
328
329							=head2 es_flatten_aggregations()
330
331							Takes the B<aggregations> section of the query result and parses it into a flat
332							structure so each row contains all the sub aggregation information.
333
334							It returns an array reference, containing arrray references. The individual
335							rows of the array are ordered in a depth first fashion. The array does include
336							a key for every value, so the array can be cast to a hash safely.
337
338							=head1 Aggregations
339
340							List of supported aggregations. Other aggregation may work, but these have defined behavior.
341
342							=head2 Bucket Aggregations
343
344							These aggregations will support sub aggregations.
345
346							=over 2
347
348							=item B<terms>
349
350							The default aggregation if none is specified.
351
352							field_name
353							terms:field_name
354
355							Results in
356
357							{
358							"field_name": {
359							"terms": {
360							"field": "field_name"
361							}
362							}
363							}
364
365							Supports a positional parameter: size
366
367							field_name:20
368							terms:field_name:20
369
370							Results in
371
372							{
373							"field_name": {
374							"terms": {
375							"field": "field_name",
376							"size": 20
377							}
378							}
379							}
380
381							=item B<significant_terms>
382
383							Same as C<terms>.
384
385							significant_terms:field_name:10
386
387							Results in:
388
389							{
390							"rare_terms.field_name": {
391							"terms": {
392							"field": "field_name",
393							"size": 10
394							}
395							}
396							}
397
398							=item B<rare_terms>
399
400							Same as C<terms> but the positional parameter is the C<max_doc_count>.
401
402							rare_terms:field_name:10
403
404							Results in:
405
406							{
407							"rare_terms.field_name": {
408							"terms": {
409							"field": "field_name",
410							"max_doc_count": 10
411							}
412							}
413							}
414
415							=item B<histogram>
416
417							Creates a histogram for numeric fields. Positional parameter is the interval.
418
419							histogram:field_name:10
420
421							Results in:
422
423							{
424							"histogram.field_name": {
425							"histogram": {
426							"field": "field_name",
427							"interval": 10
428							}
429							}
430							}
431
432							=item B<date_histogram>
433
434							Creates a histogram for date fields. Positional parameter is the calendar_interval.
435
436							date_histogram:field_name:1h
437
438							Results in:
439
440							{
441							"histogram.field_name": {
442							"histogram": {
443							"field": "field_name",
444							"calendar_interval": "1h"
445							}
446							}
447							}
448
449							=item B<geohash_grid>
450
451							Creates a geohash grid bucket aggregation. Positional parameter is the precision.
452
453							geohash_grid:field_name:6
454
455							Results in:
456
457							{
458							"geohash_grid.field_name": {
459							"geohash_grid": {
460							"field": "field_name",
461							"precision": 6
462							}
463							}
464							}
465
466							=item B<missing>
467
468							Creates a bucket for documents missing the field. No positional parameters.
469
470							missing:field_name
471
472							Results in:
473
474							{
475							"missing.field_name": {
476							"missing": {
477							"field": "field_name"
478							}
479							}
480							}
481
482							=back
483
484							=head2 Metric Aggregations
485
486							Aggregations that generate metrics from enclosing buckets.
487
488							=over 2
489
490							=item B<avg>, B<max>, B<min>, B<sum>
491
492							Single stat metric aggregations to generate the various single statistics over the enclosing bucket.
493
494							sum:field_name
495
496							Results in
497
498							{
499							"sum.field_names": {
500							"sum": {
501							"field": "field_name"
502							}
503							}
504							}
505
506							=item B<cardinality>
507
508							Computes the unique count of terms in a field.
509
510							cardinality:field_name
511
512							Results in
513
514							{
515							"cardinality.field_names": {
516							"cardinality": {
517							"field": "field_name"
518							}
519							}
520							}
521
522							=item B<stats>
523
524							Runs the stats aggregation that returns min, max, avg, sum, and count.
525
526							stats:field_name
527
528							Results in
529
530							{
531							"stats.field_names": {
532							"stats": {
533							"field": "field_name"
534							}
535							}
536							}
537
538							=item B<extended_stats>
539
540							Runs the stats aggregation that returns the same data as the C<sum> aggregation
541							plus variance, sum of squares, and standard deviation.
542
543							extended_stats:field_name
544
545							Results in
546
547							{
548							"extended_stats.field_names": {
549							"extended_stats": {
550							"field": "field_name"
551							}
552							}
553							}
554
555							=item B<percentiles>
556
557							Computes percentiles for the enclosing bucket. The positional parameter is
558							interpretted at the percents computed. If ommitted, the percentiles computed
559							will be: 25, 50, 75, 90.
560
561							percentiles:field_name:75,95,99
562
563							Results in
564
565							{
566							"percentiles.field_names": {
567							"percentiles": {
568							"field": "field_name",
569							"percents": [ 75, 95, 99 ]
570							}
571							}
572							}
573
574							=item B<geo_centroid>
575
576							Computes center of a group of geo points. No positional parameters supported.
577
578							geo_centroid:field_name
579
580							Results in
581
582							{
583							"geo_centroid.field_names": {
584							"geo_centroid": {
585							"field": "field_name"
586							}
587							}
588							}
589
590							=back
591
592							=for Pod::Coverage es_flatten_aggs
593
594							=head1 AUTHOR
595
596							Brad Lhotsky <brad@divisionbyzero.net>
597
598							=head1 COPYRIGHT AND LICENSE
599
600							This software is Copyright (c) 2023 by Brad Lhotsky.
601
602							This is free software, licensed under:
603
604							The (three-clause) BSD License
605
606							=cut