File Coverage

blib/lib/List/UtilsBy.pm
Criterion Covered Total %
statement 150 152 98.6
branch 43 44 97.7
condition 8 8 100.0
subroutine 19 19 100.0
pod 16 16 100.0
total 236 239 98.7


line stmt bran cond sub pod time code
1             # it wouldn't hurt to add a new line for such a configuration. You may distribute under the terms of either the GNU General Public License
2             # or the Artistic License (the same terms as Perl itself)
3             #
4             # (C) Paul Evans, 2009-2018 -- leonerd@leonerd.org.uk
5              
6             package List::UtilsBy;
7              
8 12     12   610791 use strict;
  12         98  
  12         304  
9 12     12   53 use warnings;
  12         17  
  12         376  
10              
11             our $VERSION = '0.12';
12              
13 12     12   53 use Exporter 'import';
  12         17  
  12         17949  
14              
15             our @EXPORT_OK = qw(
16             sort_by
17             nsort_by
18             rev_sort_by
19             rev_nsort_by
20              
21             max_by nmax_by
22             min_by nmin_by
23             minmax_by nminmax_by
24              
25             uniq_by
26              
27             partition_by
28             count_by
29              
30             zip_by
31             unzip_by
32              
33             extract_by
34             extract_first_by
35              
36             weighted_shuffle_by
37              
38             bundle_by
39             );
40              
41             =head1 NAME
42              
43             C - higher-order list utility functions
44              
45             =head1 SYNOPSIS
46              
47             use List::UtilsBy qw( nsort_by min_by );
48              
49             use File::stat qw( stat );
50             my @files_by_age = nsort_by { stat($_)->mtime } @files;
51              
52             my $shortest_name = min_by { length } @names;
53              
54             =head1 DESCRIPTION
55              
56             This module provides a number of list utility functions, all of which take an
57             initial code block to control their behaviour. They are variations on similar
58             core perl or C functions of similar names, but which use the block
59             to control their behaviour. For example, the core Perl function C takes
60             a list of values and returns them, sorted into order by their string value.
61             The L function sorts them according to the string value returned by
62             the extra function, when given each value.
63              
64             my @names_sorted = sort @names;
65              
66             my @people_sorted = sort_by { $_->name } @people;
67              
68             =cut
69              
70             =head1 FUNCTIONS
71              
72             All functions added since version 0.04 unless otherwise stated, as the
73             original names for earlier versions were renamed.
74              
75             =cut
76              
77             =head2 sort_by
78              
79             @vals = sort_by { KEYFUNC } @vals
80              
81             Returns the list of values sorted according to the string values returned by
82             the C block or function. A typical use of this may be to sort objects
83             according to the string value of some accessor, such as
84              
85             sort_by { $_->name } @people
86              
87             The key function is called in scalar context, being passed each value in turn
88             as both C<$_> and the only argument in the parameters, C<@_>. The values are
89             then sorted according to string comparisons on the values returned.
90              
91             This is equivalent to
92              
93             sort { $a->name cmp $b->name } @people
94              
95             except that it guarantees the C accessor will be executed only once per
96             value.
97              
98             One interesting use-case is to sort strings which may have numbers embedded in
99             them "naturally", rather than lexically.
100              
101             sort_by { s/(\d+)/sprintf "%09d", $1/eg; $_ } @strings
102              
103             This sorts strings by generating sort keys which zero-pad the embedded numbers
104             to some level (9 digits in this case), helping to ensure the lexical sort puts
105             them in the correct order.
106              
107             =cut
108              
109             sub sort_by(&@)
110             {
111 7     7 1 77 my $keygen = shift;
112              
113 7         11 my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_;
  10         25  
  10         13  
114 7         43 return @_[ sort { $keys[$a] cmp $keys[$b] } 0 .. $#_ ];
  4         21  
115             }
116              
117             =head2 nsort_by
118              
119             @vals = nsort_by { KEYFUNC } @vals
120              
121             Similar to L but compares its key values numerically.
122              
123             =cut
124              
125             sub nsort_by(&@)
126             {
127 8     8 1 80 my $keygen = shift;
128              
129 8         16 my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_;
  14         59  
  14         25  
130 8         46 return @_[ sort { $keys[$a] <=> $keys[$b] } 0 .. $#_ ];
  9         34  
131             }
132              
133             =head2 rev_sort_by
134              
135             =head2 rev_nsort_by
136              
137             @vals = rev_sort_by { KEYFUNC } @vals
138              
139             @vals = rev_nsort_by { KEYFUNC } @vals
140              
141             I
142              
143             Similar to L and L but returns the list in the reverse
144             order. Equivalent to
145              
146             @vals = reverse sort_by { KEYFUNC } @vals
147              
148             except that these functions are slightly more efficient because they avoid
149             the final C operation.
150              
151             =cut
152              
153             sub rev_sort_by(&@)
154             {
155 1     1 1 2 my $keygen = shift;
156              
157 1         3 my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_;
  2         4  
  2         4  
158 1         5 return @_[ sort { $keys[$b] cmp $keys[$a] } 0 .. $#_ ];
  1         6  
159             }
160              
161             sub rev_nsort_by(&@)
162             {
163 1     1 1 2 my $keygen = shift;
164              
165 1         2 my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_;
  3         7  
  3         5  
166 1         5 return @_[ sort { $keys[$b] <=> $keys[$a] } 0 .. $#_ ];
  3         8  
167             }
168              
169             =head2 max_by
170              
171             $optimal = max_by { KEYFUNC } @vals
172              
173             @optimal = max_by { KEYFUNC } @vals
174              
175             Returns the (first) value from C<@vals> that gives the numerically largest
176             result from the key function.
177              
178             my $tallest = max_by { $_->height } @people
179              
180             use File::stat qw( stat );
181             my $newest = max_by { stat($_)->mtime } @files;
182              
183             In scalar context, the first maximal value is returned. In list context, a
184             list of all the maximal values is returned. This may be used to obtain
185             positions other than the first, if order is significant.
186              
187             If called on an empty list, an empty list is returned.
188              
189             For symmetry with the L function, this is also provided under the
190             name C since it behaves numerically.
191              
192             =cut
193              
194             sub max_by(&@)
195             {
196 9     9 1 96 my $code = shift;
197              
198 9 100       23 return unless @_;
199              
200 8         10 local $_;
201              
202 8         16 my @maximal = $_ = shift @_;
203 8         13 my $max = $code->( $_ );
204              
205 8         26 foreach ( @_ ) {
206 11         15 my $this = $code->( $_ );
207 11 100 100     41 if( $this > $max ) {
    100          
208 6         10 @maximal = $_;
209 6         9 $max = $this;
210             }
211             elsif( wantarray and $this == $max ) {
212 1         2 push @maximal, $_;
213             }
214             }
215              
216 8 100       32 return wantarray ? @maximal : $maximal[0];
217             }
218              
219             *nmax_by = \&max_by;
220              
221             =head2 min_by
222              
223             $optimal = min_by { KEYFUNC } @vals
224              
225             @optimal = min_by { KEYFUNC } @vals
226              
227             Similar to L but returns values which give the numerically smallest
228             result from the key function. Also provided as C
229              
230             =cut
231              
232             sub min_by(&@)
233             {
234 9     9 1 449 my $code = shift;
235              
236 9 100       29 return unless @_;
237              
238 8         11 local $_;
239              
240 8         15 my @minimal = $_ = shift @_;
241 8         15 my $min = $code->( $_ );
242              
243 8         24 foreach ( @_ ) {
244 12         16 my $this = $code->( $_ );
245 12 50 100     47 if( $this < $min ) {
    100          
246 0         0 @minimal = $_;
247 0         0 $min = $this;
248             }
249             elsif( wantarray and $this == $min ) {
250 1         3 push @minimal, $_;
251             }
252             }
253              
254 8 100       32 return wantarray ? @minimal : $minimal[0];
255             }
256              
257             *nmin_by = \&min_by;
258              
259             =head2 minmax_by
260              
261             ( $minimal, $maximal ) = minmax_by { KEYFUNC } @vals
262              
263             I
264              
265             Similar to calling both L and L with the same key function
266             on the same list. This version is more efficient than calling the two other
267             functions individually, as it has less work to perform overall. In the case of
268             ties, only the first optimal element found in each case is returned. Also
269             provided as C.
270              
271             =cut
272              
273             sub minmax_by(&@)
274             {
275 7     7 1 461 my $code = shift;
276              
277 7 100       19 return unless @_;
278              
279 6         9 my $minimal = $_ = shift @_;
280 6         11 my $min = $code->( $_ );
281              
282 6 100       24 return ( $minimal, $minimal ) unless @_;
283              
284 4         7 my $maximal = $_ = shift @_;
285 4         6 my $max = $code->( $_ );
286              
287 4 100       13 if( $max < $min ) {
288 1         5 ( $maximal, $minimal ) = ( $minimal, $maximal );
289 1         2 ( $max, $min ) = ( $min, $max );
290             }
291              
292             # Minmax algorithm is faster than naïve min + max individually because it
293             # takes pairs of values
294 4         7 while( @_ ) {
295 7         10 my $try_minimal = $_ = shift @_;
296 7         11 my $try_min = $code->( $_ );
297              
298 7         14 my $try_maximal = $try_minimal;
299 7         8 my $try_max = $try_min;
300 7 100       10 if( @_ ) {
301 3         4 $try_maximal = $_ = shift @_;
302 3         6 $try_max = $code->( $_ );
303              
304 3 100       9 if( $try_max < $try_min ) {
305 1         3 ( $try_minimal, $try_maximal ) = ( $try_maximal, $try_minimal );
306 1         1 ( $try_min, $try_max ) = ( $try_max, $try_min );
307             }
308             }
309              
310 7 100       11 if( $try_min < $min ) {
311 2         2 $minimal = $try_minimal;
312 2         2 $min = $try_min;
313             }
314 7 100       13 if( $try_max > $max ) {
315 4         4 $maximal = $try_maximal;
316 4         7 $max = $try_max;
317             }
318             }
319              
320 4         21 return ( $minimal, $maximal );
321             }
322              
323             *nminmax_by = \&minmax_by;
324              
325             =head2 uniq_by
326              
327             @vals = uniq_by { KEYFUNC } @vals
328              
329             Returns a list of the subset of values for which the key function block
330             returns unique values. The first value yielding a particular key is chosen,
331             subsequent values are rejected.
332              
333             my @some_fruit = uniq_by { $_->colour } @fruit;
334              
335             To select instead the last value per key, reverse the input list. If the order
336             of the results is significant, don't forget to reverse the result as well:
337              
338             my @some_fruit = reverse uniq_by { $_->colour } reverse @fruit;
339              
340             Because the values returned by the key function are used as hash keys, they
341             ought to either be strings, or at least well-behaved as strings (such as
342             numbers, or object references which overload stringification in a suitable
343             manner).
344              
345             =cut
346              
347             sub uniq_by(&@)
348             {
349 7     7 1 77 my $code = shift;
350              
351 7         9 my %present;
352             return grep {
353 7         15 my $key = $code->( local $_ = $_ );
  13         23  
354 13         64 !$present{$key}++
355             } @_;
356             }
357              
358             =head2 partition_by
359              
360             %parts = partition_by { KEYFUNC } @vals
361              
362             Returns a key/value list of ARRAY refs containing all the original values
363             distributed according to the result of the key function block. Each value will
364             be an ARRAY ref containing all the values which returned the string from the
365             key function, in their original order.
366              
367             my %balls_by_colour = partition_by { $_->colour } @balls;
368              
369             Because the values returned by the key function are used as hash keys, they
370             ought to either be strings, or at least well-behaved as strings (such as
371             numbers, or object references which overload stringification in a suitable
372             manner).
373              
374             =cut
375              
376             sub partition_by(&@)
377             {
378 7     7 1 77 my $code = shift;
379              
380 7         8 my %parts;
381 7         14 push @{ $parts{ $code->( local $_ = $_ ) } }, $_ for @_;
  13         40  
382              
383 7         53 return %parts;
384             }
385              
386             =head2 count_by
387              
388             %counts = count_by { KEYFUNC } @vals
389              
390             I
391              
392             Returns a key/value list of integers, giving the number of times the key
393             function block returned the key, for each value in the list.
394              
395             my %count_of_balls = count_by { $_->colour } @balls;
396              
397             Because the values returned by the key function are used as hash keys, they
398             ought to either be strings, or at least well-behaved as strings (such as
399             numbers, or object references which overload stringification in a suitable
400             manner).
401              
402             =cut
403              
404             sub count_by(&@)
405             {
406 5     5 1 98 my $code = shift;
407              
408 5         7 my %counts;
409 5         14 $counts{ $code->( local $_ = $_ ) }++ for @_;
410              
411 5         48 return %counts;
412             }
413              
414             =head2 zip_by
415              
416             @vals = zip_by { ITEMFUNC } \@arr0, \@arr1, \@arr2,...
417              
418             Returns a list of each of the values returned by the function block, when
419             invoked with values from across each each of the given ARRAY references. Each
420             value in the returned list will be the result of the function having been
421             invoked with arguments at that position, from across each of the arrays given.
422              
423             my @transposition = zip_by { [ @_ ] } @matrix;
424              
425             my @names = zip_by { "$_[1], $_[0]" } \@firstnames, \@surnames;
426              
427             print zip_by { "$_[0] => $_[1]\n" } [ keys %hash ], [ values %hash ];
428              
429             If some of the arrays are shorter than others, the function will behave as if
430             they had C in the trailing positions. The following two lines are
431             equivalent:
432              
433             zip_by { f(@_) } [ 1, 2, 3 ], [ "a", "b" ]
434             f( 1, "a" ), f( 2, "b" ), f( 3, undef )
435              
436             The item function is called by C, so if it returns a list, the entire
437             list is included in the result. This can be useful for example, for generating
438             a hash from two separate lists of keys and values
439              
440             my %nums = zip_by { @_ } [qw( one two three )], [ 1, 2, 3 ];
441             # %nums = ( one => 1, two => 2, three => 3 )
442              
443             (A function having this behaviour is sometimes called C, e.g. in
444             Haskell, but that name would not fit the naming scheme used by this module).
445              
446             =cut
447              
448             sub zip_by(&@)
449             {
450 7     7 1 3481 my $code = shift;
451              
452 7 100       22 @_ or return;
453              
454 6         8 my $len = 0;
455 6   100     20 scalar @$_ > $len and $len = scalar @$_ for @_;
456              
457             return map {
458 6         14 my $idx = $_;
  14         33  
459 14         20 $code->( map { $_[$_][$idx] } 0 .. $#_ )
  26         42  
460             } 0 .. $len-1;
461             }
462              
463             =head2 unzip_by
464              
465             $arr0, $arr1, $arr2, ... = unzip_by { ITEMFUNC } @vals
466              
467             I
468              
469             Returns a list of ARRAY references containing the values returned by the
470             function block, when invoked for each of the values given in the input list.
471             Each of the returned ARRAY references will contain the values returned at that
472             corresponding position by the function block. That is, the first returned
473             ARRAY reference will contain all the values returned in the first position by
474             the function block, the second will contain all the values from the second
475             position, and so on.
476              
477             my ( $firstnames, $lastnames ) = unzip_by { m/^(.*?) (.*)$/ } @names;
478              
479             If the function returns lists of differing lengths, the result will be padded
480             with C in the missing elements.
481              
482             This function is an inverse of L, if given a corresponding inverse
483             function.
484              
485             =cut
486              
487             sub unzip_by(&@)
488             {
489 5     5 1 77 my $code = shift;
490              
491 5         7 my @ret;
492 5         12 foreach my $idx ( 0 .. $#_ ) {
493 12         24 my @slice = $code->( local $_ = $_[$idx] );
494 12 100       48 $#slice = $#ret if @slice < @ret;
495 12         38 $ret[$_][$idx] = $slice[$_] for 0 .. $#slice;
496             }
497              
498 5         25 return @ret;
499             }
500              
501             =head2 extract_by
502              
503             @vals = extract_by { SELECTFUNC } @arr
504              
505             I
506              
507             Removes elements from the referenced array on which the selection function
508             returns true, and returns a list containing those elements. This function is
509             similar to C, except that it modifies the referenced array to remove the
510             selected values from it, leaving only the unselected ones.
511              
512             my @red_balls = extract_by { $_->color eq "red" } @balls;
513              
514             # Now there are no red balls in the @balls array
515              
516             This function modifies a real array, unlike most of the other functions in this
517             module. Because of this, it requires a real array, not just a list.
518              
519             This function is implemented by invoking C on the array, not by
520             constructing a new list and assigning it. One result of this is that weak
521             references will not be disturbed.
522              
523             extract_by { !defined $_ } @refs;
524              
525             will leave weak references weakened in the C<@refs> array, whereas
526              
527             @refs = grep { defined $_ } @refs;
528              
529             will strengthen them all again.
530              
531             =cut
532              
533             sub extract_by(&\@)
534             {
535 6     6 1 493 my $code = shift;
536 6         8 my ( $arrref ) = @_;
537              
538 6         9 my @ret;
539 6         8 for( my $idx = 0; ; $idx++ ) {
540 43 100       109 last if $idx > $#$arrref;
541 37 100       60 next unless $code->( local $_ = $arrref->[$idx] );
542              
543 11         35 push @ret, splice @$arrref, $idx, 1, ();
544 11         15 redo;
545             }
546              
547 6         28 return @ret;
548             }
549              
550             =head2 extract_first_by
551              
552             $val = extract_first_by { SELECTFUNC } @arr
553              
554             I
555              
556             A hybrid between L and C. Removes the first
557             element from the referenced array on which the selection function returns
558             true, returning it.
559              
560             As with L, this function requires a real array and not just a
561             list, and is also implemented using C so that weak references are
562             not disturbed.
563              
564             If this function fails to find a matching element, it will return an empty
565             list in list context. This allows a caller to distinguish the case between
566             no matching element, and the first matching element being C.
567              
568             =cut
569              
570             sub extract_first_by(&\@)
571             {
572 2     2 1 395 my $code = shift;
573 2         5 my ( $arrref ) = @_;
574              
575 2         6 foreach my $idx ( 0 .. $#$arrref ) {
576 4 100       14 next unless $code->( local $_ = $arrref->[$idx] );
577              
578 1         9 return splice @$arrref, $idx, 1, ();
579             }
580              
581 1         5 return;
582             }
583              
584             =head2 weighted_shuffle_by
585              
586             @vals = weighted_shuffle_by { WEIGHTFUNC } @vals
587              
588             I
589              
590             Returns the list of values shuffled into a random order. The randomisation is
591             not uniform, but weighted by the value returned by the C. The
592             probability of each item being returned first will be distributed with the
593             distribution of the weights, and so on recursively for the remaining items.
594              
595             =cut
596              
597             sub weighted_shuffle_by(&@)
598             {
599 25     25 1 748 my $code = shift;
600 25         39 my @vals = @_;
601              
602 25         43 my @weights = map { $code->( local $_ = $_ ) } @vals;
  70         199  
603              
604 25         81 my @ret;
605 25         41 while( @vals > 1 ) {
606 46         50 my $total = 0; $total += $_ for @weights;
  46         75  
607 46         58 my $select = int rand $total;
608 46         367 my $idx = 0;
609 46         77 while( $select >= $weights[$idx] ) {
610 41         57 $select -= $weights[$idx++];
611             }
612              
613 46         60 push @ret, splice @vals, $idx, 1, ();
614 46         77 splice @weights, $idx, 1, ();
615             }
616              
617 25 100       36 push @ret, @vals if @vals;
618              
619 25         58 return @ret;
620             }
621              
622             =head2 bundle_by
623              
624             @vals = bundle_by { BLOCKFUNC } $number, @vals
625              
626             I
627              
628             Similar to a regular C functional, returns a list of the values returned
629             by C. Values from the input list are given to the block function in
630             bundles of C<$number>.
631              
632             If given a list of values whose length does not evenly divide by C<$number>,
633             the final call will be passed fewer elements than the others.
634              
635             =cut
636              
637             sub bundle_by(&@)
638             {
639 6     6 1 79 my $code = shift;
640 6         7 my $n = shift;
641              
642 6         6 my @ret;
643 6         19 for( my ( $pos, $next ) = ( 0, $n ); $pos < @_; $pos = $next, $next += $n ) {
644 13 100       41 $next = @_ if $next > @_;
645 13         26 push @ret, $code->( @_[$pos .. $next-1] );
646             }
647 6         37 return @ret;
648             }
649              
650             =head1 EXAMPLES
651              
652             As many of these functions both take and return lists of values, more useful
653             behaviours can be created by combining them together.
654              
655             =head2 Modal Value
656              
657             To pick the modal value in a list (i.e. the value that appears most
658             frequently according to some key function) combine C and C:
659              
660             my %counts = count_by { KEYFUNC } ITEMS;
661             my $modal = max_by { $counts{$_} } keys %counts;
662              
663             This can be performed in a single step by use of the C function from
664             L:
665              
666             use List::Util qw( pairs );
667              
668             my $modal = ( max_by { $_->value } pairs count_by { KEYFUNC } ITEMS )->key;
669              
670             =cut
671              
672             =head1 TODO
673              
674             =over 4
675              
676             =item * XS implementations
677              
678             These functions are currently all written in pure perl. Some at least, may
679             benefit from having XS implementations to speed up their logic.
680              
681             =item * Merge into L or L
682              
683             This module shouldn't really exist. The functions should instead be part of
684             one of the existing modules that already contain many list utility functions.
685             Having Yet Another List Utilty Module just worsens the problem.
686              
687             I have attempted to contact the authors of both of the above modules, to no
688             avail; therefore I decided it best to write and release this code here anyway
689             so that it is at least on CPAN. Once there, we can then see how best to merge
690             it into an existing module.
691              
692             I: As I am now the maintainer of L, some
693             amount of merging/copying should be possible. However, given the latter's key
694             position in the core F distribution and head of the "CPAN River" I am
695             keen not to do this wholesale, but a selected pick of what seems best, by a
696             popular consensus.
697              
698             =item * C and C-like functions
699              
700             Consider perhaps
701              
702             head_before { COND } LIST # excludes terminating element
703             head_upto { COND } LIST # includes terminating element
704              
705             tail_since { COND } LIST # includes initiating element
706             tail_after { COND } LIST # excludes initiating element
707              
708             (See also L).
709              
710             =back
711              
712             =head1 AUTHOR
713              
714             Paul Evans
715              
716             =cut
717              
718             0x55AA;