File Coverage

blib/lib/List/UtilsBy.pm

Criterion	Covered	Total	%
statement	150	152	98.6
branch	43	44	97.7
condition	8	8	100.0
subroutine	19	19	100.0
pod	16	16	100.0
total	236	239	98.7

line	stmt	bran	cond	sub	pod	time	code
1							# You may distribute under the terms of either the GNU General Public License
2							# or the Artistic License (the same terms as Perl itself)
3							#
4							# (C) Paul Evans, 2009-2018 -- leonerd@leonerd.org.uk
5
6							package List::UtilsBy;
7
8	12			12		670137	use strict;
	12					128
	12					305
9	12			12		59	use warnings;
	12					17
	12					425
10
11							our $VERSION = '0.11';
12
13	12			12		56	use Exporter 'import';
	12					17
	12					18313
14
15							our @EXPORT_OK = qw(
16							sort_by
17							nsort_by
18							rev_sort_by
19							rev_nsort_by
20
21							max_by nmax_by
22							min_by nmin_by
23							minmax_by nminmax_by
24
25							uniq_by
26
27							partition_by
28							count_by
29
30							zip_by
31							unzip_by
32
33							extract_by
34							extract_first_by
35
36							weighted_shuffle_by
37
38							bundle_by
39							);
40
41							=head1 NAME
42
43							C - higher-order list utility functions
44
45							=head1 SYNOPSIS
46
47							use List::UtilsBy qw( nsort_by min_by );
48
49							use File::stat qw( stat );
50							my @files_by_age = nsort_by { stat($_)->mtime } @files;
51
52							my $shortest_name = min_by { length } @names;
53
54							=head1 DESCRIPTION
55
56							This module provides a number of list utility functions, all of which take an
57							initial code block to control their behaviour. They are variations on similar
58							core perl or C functions of similar names, but which use the block
59							to control their behaviour. For example, the core Perl function C takes
60							a list of values and returns them, sorted into order by their string value.
61							The L function sorts them according to the string value returned by
62							the extra function, when given each value.
63
64							my @names_sorted = sort @names;
65
66							my @people_sorted = sort_by { $_->name } @people;
67
68							=cut
69
70							=head1 FUNCTIONS
71
72							All functions added since version 0.04 unless otherwise stated, as the
73							original names for earlier versions were renamed.
74
75							=cut
76
77							=head2 sort_by
78
79							@vals = sort_by { KEYFUNC } @vals
80
81							Returns the list of values sorted according to the string values returned by
82							the C block or function. A typical use of this may be to sort objects
83							according to the string value of some accessor, such as
84
85							sort_by { $_->name } @people
86
87							The key function is called in scalar context, being passed each value in turn
88							as both C<$_> and the only argument in the parameters, C<@_>. The values are
89							then sorted according to string comparisons on the values returned.
90
91							This is equivalent to
92
93							sort { $a->name cmp $b->name } @people
94
95							except that it guarantees the C accessor will be executed only once per
96							value.
97
98							One interesting use-case is to sort strings which may have numbers embedded in
99							them "naturally", rather than lexically.
100
101							sort_by { s/(\d+)/sprintf "%09d", $1/eg; $_ } @strings
102
103							This sorts strings by generating sort keys which zero-pad the embedded numbers
104							to some level (9 digits in this case), helping to ensure the lexical sort puts
105							them in the correct order.
106
107							=cut
108
109							sub sort_by(&@)
110							{
111	7			7	1	76	my $keygen = shift;
112
113	7					14	my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_;
	10					22
	10					14
114	7					46	return @_[ sort { $keys[$a] cmp $keys[$b] } 0 .. $#_ ];
	4					24
115							}
116
117							=head2 nsort_by
118
119							@vals = nsort_by { KEYFUNC } @vals
120
121							Similar to L but compares its key values numerically.
122
123							=cut
124
125							sub nsort_by(&@)
126							{
127	8			8	1	78	my $keygen = shift;
128
129	8					15	my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_;
	14					34
	14					35
130	8					64	return @_[ sort { $keys[$a] <=> $keys[$b] } 0 .. $#_ ];
	9					36
131							}
132
133							=head2 rev_sort_by
134
135							=head2 rev_nsort_by
136
137							@vals = rev_sort_by { KEYFUNC } @vals
138
139							@vals = rev_nsort_by { KEYFUNC } @vals
140
141							I
142
143							Similar to L and L but returns the list in the reverse
144							order. Equivalent to
145
146							@vals = reverse sort_by { KEYFUNC } @vals
147
148							except that these functions are slightly more efficient because they avoid
149							the final C operation.
150
151							=cut
152
153							sub rev_sort_by(&@)
154							{
155	1			1	1	3	my $keygen = shift;
156
157	1					3	my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_;
	2					5
	2					4
158	1					6	return @_[ sort { $keys[$b] cmp $keys[$a] } 0 .. $#_ ];
	1					7
159							}
160
161							sub rev_nsort_by(&@)
162							{
163	1			1	1	3	my $keygen = shift;
164
165	1					3	my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_;
	3					7
	3					5
166	1					6	return @_[ sort { $keys[$b] <=> $keys[$a] } 0 .. $#_ ];
	3					17
167							}
168
169							=head2 max_by
170
171							$optimal = max_by { KEYFUNC } @vals
172
173							@optimal = max_by { KEYFUNC } @vals
174
175							Returns the (first) value from C<@vals> that gives the numerically largest
176							result from the key function.
177
178							my $tallest = max_by { $_->height } @people
179
180							use File::stat qw( stat );
181							my $newest = max_by { stat($_)->mtime } @files;
182
183							In scalar context, the first maximal value is returned. In list context, a
184							list of all the maximal values is returned. This may be used to obtain
185							positions other than the first, if order is significant.
186
187							If called on an empty list, an empty list is returned.
188
189							For symmetry with the L function, this is also provided under the
190							name C since it behaves numerically.
191
192							=cut
193
194							sub max_by(&@)
195							{
196	9			9	1	106	my $code = shift;
197
198	9	100				26	return unless @_;
199
200	8					10	local $_;
201
202	8					16	my @maximal = $_ = shift @_;
203	8					14	my $max = $code->( $_ );
204
205	8					26	foreach ( @_ ) {
206	11					15	my $this = $code->( $_ );
207	11	100	100			41	if( $this > $max ) {
		100
208	6					9	@maximal = $_;
209	6					10	$max = $this;
210							}
211							elsif( wantarray and $this == $max ) {
212	1					3	push @maximal, $_;
213							}
214							}
215
216	8	100				34	return wantarray ? @maximal : $maximal[0];
217							}
218
219							*nmax_by = \&max_by;
220
221							=head2 min_by
222
223							$optimal = min_by { KEYFUNC } @vals
224
225							@optimal = min_by { KEYFUNC } @vals
226
227							Similar to L but returns values which give the numerically smallest
228							result from the key function. Also provided as C
229
230							=cut
231
232							sub min_by(&@)
233							{
234	9			9	1	448	my $code = shift;
235
236	9	100				33	return unless @_;
237
238	8					10	local $_;
239
240	8					14	my @minimal = $_ = shift @_;
241	8					15	my $min = $code->( $_ );
242
243	8					24	foreach ( @_ ) {
244	12					15	my $this = $code->( $_ );
245	12	50	100			48	if( $this < $min ) {
		100
246	0					0	@minimal = $_;
247	0					0	$min = $this;
248							}
249							elsif( wantarray and $this == $min ) {
250	1					2	push @minimal, $_;
251							}
252							}
253
254	8	100				33	return wantarray ? @minimal : $minimal[0];
255							}
256
257							*nmin_by = \&min_by;
258
259							=head2 minmax_by
260
261							( $minimal, $maximal ) = minmax_by { KEYFUNC } @vals
262
263							I
264
265							Similar to calling both L and L with the same key function
266							on the same list. This version is more efficient than calling the two other
267							functions individually, as it has less work to perform overall. In the case of
268							ties, only the first optimal element found in each case is returned. Also
269							provided as C.
270
271							=cut
272
273							sub minmax_by(&@)
274							{
275	7			7	1	447	my $code = shift;
276
277	7	100				20	return unless @_;
278
279	6					10	my $minimal = $_ = shift @_;
280	6					15	my $min = $code->( $_ );
281
282	6	100				29	return ( $minimal, $minimal ) unless @_;
283
284	4					5	my $maximal = $_ = shift @_;
285	4					6	my $max = $code->( $_ );
286
287	4	100				12	if( $max < $min ) {
288	1					2	( $maximal, $minimal ) = ( $minimal, $maximal );
289	1					2	( $max, $min ) = ( $min, $max );
290							}
291
292							# Minmax algorithm is faster than naïve min + max individually because it
293							# takes pairs of values
294	4					9	while( @_ ) {
295	7					8	my $try_minimal = $_ = shift @_;
296	7					11	my $try_min = $code->( $_ );
297
298	7					17	my $try_maximal = $try_minimal;
299	7					7	my $try_max = $try_min;
300	7	100				14	if( @_ ) {
301	3					5	$try_maximal = $_ = shift @_;
302	3					4	$try_max = $code->( $_ );
303
304	3	100				9	if( $try_max < $try_min ) {
305	1					2	( $try_minimal, $try_maximal ) = ( $try_maximal, $try_minimal );
306	1					2	( $try_min, $try_max ) = ( $try_max, $try_min );
307							}
308							}
309
310	7	100				33	if( $try_min < $min ) {
311	2					2	$minimal = $try_minimal;
312	2					2	$min = $try_min;
313							}
314	7	100				14	if( $try_max > $max ) {
315	4					5	$maximal = $try_maximal;
316	4					10	$max = $try_max;
317							}
318							}
319
320	4					18	return ( $minimal, $maximal );
321							}
322
323							*nminmax_by = \&minmax_by;
324
325							=head2 uniq_by
326
327							@vals = uniq_by { KEYFUNC } @vals
328
329							Returns a list of the subset of values for which the key function block
330							returns unique values. The first value yielding a particular key is chosen,
331							subsequent values are rejected.
332
333							my @some_fruit = uniq_by { $_->colour } @fruit;
334
335							To select instead the last value per key, reverse the input list. If the order
336							of the results is significant, don't forget to reverse the result as well:
337
338							my @some_fruit = reverse uniq_by { $_->colour } reverse @fruit;
339
340							Because the values returned by the key function are used as hash keys, they
341							ought to either be strings, or at least well-behaved as strings (such as
342							numbers, or object references which overload stringification in a suitable
343							manner).
344
345							=cut
346
347							sub uniq_by(&@)
348							{
349	7			7	1	82	my $code = shift;
350
351	7					8	my %present;
352							return grep {
353	7					18	my $key = $code->( local $_ = $_ );
	13					26
354	13					72	!$present{$key}++
355							} @_;
356							}
357
358							=head2 partition_by
359
360							%parts = partition_by { KEYFUNC } @vals
361
362							Returns a key/value list of ARRAY refs containing all the original values
363							distributed according to the result of the key function block. Each value will
364							be an ARRAY ref containing all the values which returned the string from the
365							key function, in their original order.
366
367							my %balls_by_colour = partition_by { $_->colour } @balls;
368
369							Because the values returned by the key function are used as hash keys, they
370							ought to either be strings, or at least well-behaved as strings (such as
371							numbers, or object references which overload stringification in a suitable
372							manner).
373
374							=cut
375
376							sub partition_by(&@)
377							{
378	7			7	1	84	my $code = shift;
379
380	7					10	my %parts;
381	7					14	push @{ $parts{ $code->( local $_ = $_ ) } }, $_ for @_;
	13					43
382
383	7					64	return %parts;
384							}
385
386							=head2 count_by
387
388							%counts = count_by { KEYFUNC } @vals
389
390							I
391
392							Returns a key/value list of integers, giving the number of times the key
393							function block returned the key, for each value in the list.
394
395							my %count_of_balls = count_by { $_->colour } @balls;
396
397							Because the values returned by the key function are used as hash keys, they
398							ought to either be strings, or at least well-behaved as strings (such as
399							numbers, or object references which overload stringification in a suitable
400							manner).
401
402							=cut
403
404							sub count_by(&@)
405							{
406	5			5	1	82	my $code = shift;
407
408	5					6	my %counts;
409	5					17	$counts{ $code->( local $_ = $_ ) }++ for @_;
410
411	5					53	return %counts;
412							}
413
414							=head2 zip_by
415
416							@vals = zip_by { ITEMFUNC } \@arr0, \@arr1, \@arr2,...
417
418							Returns a list of each of the values returned by the function block, when
419							invoked with values from across each each of the given ARRAY references. Each
420							value in the returned list will be the result of the function having been
421							invoked with arguments at that position, from across each of the arrays given.
422
423							my @transposition = zip_by { [ @_ ] } @matrix;
424
425							my @names = zip_by { "$_[1], $_[0]" } \@firstnames, \@surnames;
426
427							print zip_by { "$_[0] => $_[1]\n" } [ keys %hash ], [ values %hash ];
428
429							If some of the arrays are shorter than others, the function will behave as if
430							they had C in the trailing positions. The following two lines are
431							equivalent:
432
433							zip_by { f(@_) } [ 1, 2, 3 ], [ "a", "b" ]
434							f( 1, "a" ), f( 2, "b" ), f( 3, undef )
435
436							The item function is called by C, so if it returns a list, the entire
437							list is included in the result. This can be useful for example, for generating
438							a hash from two separate lists of keys and values
439
440							my %nums = zip_by { @_ } [qw( one two three )], [ 1, 2, 3 ];
441							# %nums = ( one => 1, two => 2, three => 3 )
442
443							(A function having this behaviour is sometimes called C, e.g. in
444							Haskell, but that name would not fit the naming scheme used by this module).
445
446							=cut
447
448							sub zip_by(&@)
449							{
450	7			7	1	3660	my $code = shift;
451
452	7	100				20	@_ or return;
453
454	6					10	my $len = 0;
455	6		100			24	scalar @$_ > $len and $len = scalar @$_ for @_;
456
457							return map {
458	6					16	my $idx = $_;
	14					39
459	14					23	$code->( map { $_[$_][$idx] } 0 .. $#_ )
	26					55
460							} 0 .. $len-1;
461							}
462
463							=head2 unzip_by
464
465							$arr0, $arr1, $arr2, ... = unzip_by { ITEMFUNC } @vals
466
467							I
468
469							Returns a list of ARRAY references containing the values returned by the
470							function block, when invoked for each of the values given in the input list.
471							Each of the returned ARRAY references will contain the values returned at that
472							corresponding position by the function block. That is, the first returned
473							ARRAY reference will contain all the values returned in the first position by
474							the function block, the second will contain all the values from the second
475							position, and so on.
476
477							my ( $firstnames, $lastnames ) = unzip_by { m/^(.?) (.)$/ } @names;
478
479							If the function returns lists of differing lengths, the result will be padded
480							with C in the missing elements.
481
482							This function is an inverse of L, if given a corresponding inverse
483							function.
484
485							=cut
486
487							sub unzip_by(&@)
488							{
489	5			5	1	83	my $code = shift;
490
491	5					7	my @ret;
492	5					13	foreach my $idx ( 0 .. $#_ ) {
493	12					33	my @slice = $code->( local $_ = $_[$idx] );
494	12	100				60	$#slice = $#ret if @slice < @ret;
495	12					42	$ret[$_][$idx] = $slice[$_] for 0 .. $#slice;
496							}
497
498	5					29	return @ret;
499							}
500
501							=head2 extract_by
502
503							@vals = extract_by { SELECTFUNC } @arr
504
505							I
506
507							Removes elements from the referenced array on which the selection function
508							returns true, and returns a list containing those elements. This function is
509							similar to C, except that it modifies the referenced array to remove the
510							selected values from it, leaving only the unselected ones.
511
512							my @red_balls = extract_by { $_->color eq "red" } @balls;
513
514							# Now there are no red balls in the @balls array
515
516							This function modifies a real array, unlike most of the other functions in this
517							module. Because of this, it requires a real array, not just a list.
518
519							This function is implemented by invoking C on the array, not by
520							constructing a new list and assigning it. One result of this is that weak
521							references will not be disturbed.
522
523							extract_by { !defined $_ } @refs;
524
525							will leave weak references weakened in the C<@refs> array, whereas
526
527							@refs = grep { defined $_ } @refs;
528
529							will strengthen them all again.
530
531							=cut
532
533							sub extract_by(&\@)
534							{
535	6			6	1	502	my $code = shift;
536	6					11	my ( $arrref ) = @_;
537
538	6					9	my @ret;
539	6					10	for( my $idx = 0; ; $idx++ ) {
540	43	100				120	last if $idx > $#$arrref;
541	37	100				62	next unless $code->( local $_ = $arrref->[$idx] );
542
543	11					36	push @ret, splice @$arrref, $idx, 1, ();
544	11					47	redo;
545							}
546
547	6					37	return @ret;
548							}
549
550							=head2 extract_first_by
551
552							$val = extract_first_by { SELECTFUNC } @arr
553
554							I
555
556							A hybrid between L and C. Removes the first
557							element from the referenced array on which the selection function returns
558							true, returning it.
559
560							As with L, this function requires a real array and not just a
561							list, and is also implemented using C so that weak references are
562							not disturbed.
563
564							If this function fails to find a matching element, it will return an empty
565							list in list context. This allows a caller to distinguish the case between
566							no matching element, and the first matching element being C.
567
568							=cut
569
570							sub extract_first_by(&\@)
571							{
572	2			2	1	428	my $code = shift;
573	2					4	my ( $arrref ) = @_;
574
575	2					6	foreach my $idx ( 0 .. $#$arrref ) {
576	4	100				16	next unless $code->( local $_ = $arrref->[$idx] );
577
578	1					10	return splice @$arrref, $idx, 1, ();
579							}
580
581	1					6	return;
582							}
583
584							=head2 weighted_shuffle_by
585
586							@vals = weighted_shuffle_by { WEIGHTFUNC } @vals
587
588							I
589
590							Returns the list of values shuffled into a random order. The randomisation is
591							not uniform, but weighted by the value returned by the C. The
592							probabilty of each item being returned first will be distributed with the
593							distribution of the weights, and so on recursively for the remaining items.
594
595							=cut
596
597							sub weighted_shuffle_by(&@)
598							{
599	25			25	1	748	my $code = shift;
600	25					39	my @vals = @_;
601
602	25					34	my @weights = map { $code->( local $_ = $_ ) } @vals;
	70					203
603
604	25					81	my @ret;
605	25					42	while( @vals > 1 ) {
606	46					48	my $total = 0; $total += $_ for @weights;
	46					72
607	46					69	my $select = int rand $total;
608	46					427	my $idx = 0;
609	46					67	while( $select >= $weights[$idx] ) {
610	42					65	$select -= $weights[$idx++];
611							}
612
613	46					66	push @ret, splice @vals, $idx, 1, ();
614	46					75	splice @weights, $idx, 1, ();
615							}
616
617	25	100				46	push @ret, @vals if @vals;
618
619	25					63	return @ret;
620							}
621
622							=head2 bundle_by
623
624							@vals = bundle_by { BLOCKFUNC } $number, @vals
625
626							I
627
628							Similar to a regular C functional, returns a list of the values returned
629							by C. Values from the input list are given to the block function in
630							bundles of C<$number>.
631
632							If given a list of values whose length does not evenly divide by C<$number>,
633							the final call will be passed fewer elements than the others.
634
635							=cut
636
637							sub bundle_by(&@)
638							{
639	6			6	1	84	my $code = shift;
640	6					7	my $n = shift;
641
642	6					7	my @ret;
643	6					20	for( my ( $pos, $next ) = ( 0, $n ); $pos < @_; $pos = $next, $next += $n ) {
644	13	100				40	$next = @_ if $next > @_;
645	13					29	push @ret, $code->( @_[$pos .. $next-1] );
646							}
647	6					44	return @ret;
648							}
649
650							=head1 TODO
651
652							=over 4
653
654							=item * XS implementations
655
656							These functions are currently all written in pure perl. Some at least, may
657							benefit from having XS implementations to speed up their logic.
658
659							=item * Merge into L or L
660
661							This module shouldn't really exist. The functions should instead be part of
662							one of the existing modules that already contain many list utility functions.
663							Having Yet Another List Utilty Module just worsens the problem.
664
665							I have attempted to contact the authors of both of the above modules, to no
666							avail; therefore I decided it best to write and release this code here anyway
667							so that it is at least on CPAN. Once there, we can then see how best to merge
668							it into an existing module.
669
670							I: As I am now the maintainer of L, some
671							amount of merging/copying should be possible. However, given the latter's key
672							position in the core F distribution and head of the "CPAN River" I am
673							keen not to do this wholesale, but a selected pick of what seems best, by a
674							popular consensus.
675
676							=item * C and C-like functions
677
678							Consider perhaps
679
680							head_before { COND } LIST # excludes terminating element
681							head_upto { COND } LIST # includes terminating element
682
683							tail_since { COND } LIST # includes initiating element
684							tail_after { COND } LIST # excludes initiating element
685
686							(See also L).
687
688							=back
689
690							=head1 AUTHOR
691
692							Paul Evans
693
694							=cut
695
696							0x55AA;