line
stmt
bran
cond
sub
pod
time
code
1
=head1 NAME
2
3
Data::Sorting - Multi-key sort using function results
4
5
6
=head1 SYNOPSIS
7
8
use Data::Sorting qw( :basics :arrays :extras );
9
10
# Sorting functions default to simple string comparisons
11
@names = qw( Bob Alice Ellen Charlie David );
12
@ordered = sorted_by( undef, @names );
13
14
# Various options can be passed before the list values
15
@ordered = sorted_by( [ -order=>'reverse' ], @names );
16
17
# You can also generate a sorting function and then apply it
18
$function = sort_function();
19
@ordered = $function->( @names ); # or &{$function}(@names)
20
@ordered = sort_function( -order=>'reverse' )->( @names );
21
22
# The :array functions are prototyped to take the array first
23
@ordered = sorted_array( @names );
24
@ordered = sorted_arrayref( \@names );
25
26
# You can also sort an array in place, changing its internal order
27
sort_array( @names );
28
sort_arrayref( \@names );
29
30
# There are several sorting options, such as -compare => 'natural'
31
@movies = ( 'The Matrix', 'Plan 9', '2001', 'Terminator 2' );
32
@ordered = sort_function( -compare => 'natural' )->( @movies );
33
# @ ordered now contains '2001', 'The Matrix', 'Plan 9', 'Terminator 2'
34
35
# To sort numbers, pass the -compare => 'numeric' option
36
@numbers = ( 18, 5, 23, 42, 156, 91, 64 );
37
@ordered = sorted_by( [ -compare => 'numeric' ], @numbers );
38
@ordered = sort_function( -compare => 'numeric' )->( @numbers );
39
@ordered = sorted_array( @numbers, -compare => 'numeric' );
40
sort_array( @numbers, -compare => 'numeric' );
41
42
# You can sort by the results of a function to be called on each item
43
sort_array( @numbers, -compare => 'numeric', sub { $_[0] % 16 } );
44
# @numbers now contains 64, 18, 5, 23, 42, 91, 156
45
46
# For arrays of datastructures, pass in keys to extract for sorting
47
@records = (
48
{ 'rec_id'=>3, 'name'=>{'first'=>'Bob', 'last'=>'Macy'} },
49
{ 'rec_id'=>1, 'name'=>{'first'=>'Sue', 'last'=>'Jones'} },
50
{ 'rec_id'=>2, 'name'=>{'first'=>'Al', 'last'=>'Jones' } },
51
);
52
@ordered = sorted_array( @records, 'rec_id' );
53
54
# For nested data structures, pass an array of keys to fetch
55
@ordered = sorted_array( @records, ['name','first'] );
56
57
# Pass multiple sort keys for multiple-level sorts
58
@ordered = sorted_array( @records, ['name','last'], ['name','first'] );
59
60
# Any selected sort options are applied to all subsequent sort keys
61
@ordered = sorted_array( @records,
62
-order => 'reverse', ['name','last'], ['name','first'] );
63
64
# Options specified within a hash-ref apply only to that key
65
@ordered = sorted_array( @records,
66
{ order=>'reverse', sortkey=>['name','last'] },
67
['name','first'] );
68
69
# Locale support is available if you have Perl 5.004 or later and POSIX
70
POSIX::setlocale( POSIX::LC_COLLATE(), 'en_US' );
71
POSIX::setlocale( POSIX::LC_CTYPE(), 'en_US' );
72
@ordered = sorted_array( @records,
73
-compare=>'locale', ['name','last'], ['name','first'] );
74
75
76
=head1 ABSTRACT
77
78
Data::Sorting provides functions to sort the contents of arrays based on a collection of extraction and comparison rules. Extraction rules are used to identify the attributes of array elements on which the ordering is based; comparison rules specify how those values should be ordered.
79
80
Index strings may be used to retrieve values from array elements, or function references may be passed in to call on each element. Comparison rules are provided for numeric, bytewise, and case-insensitive orders, as well as a 'natural' comparison that places numbers first, in numeric order, followed by the remaining items in case-insensitive textual order.
81
82
83
=head1 DESCRIPTION
84
85
This module provides several public functions with different calling interfaces that all use the same underlying sorting mechanisms.
86
87
These functions may be imported individually or in groups using the following tags:
88
89
=over 9
90
91
=item :basics
92
93
sorted_by(), sort_function(): General-purpose sorting functions.
94
95
=item :array
96
97
sorted_array(), sorted_arrayref(), sort_array(), sort_arrayref(): Prototyped functions for arrays.
98
99
=item :extras
100
101
sort_key_values(), sort_description(): Two accessory functions that explain how sorting is being carried out.
102
103
=back
104
105
All of these functions take a list of sorting rules as arguments. See L<"Sort Rule Syntax"> for a discussion of the contents of the $sort_rule or @sort_rules parameters shown below.
106
107
=cut
108
109
########################################################################
110
111
package Data::Sorting;
112
113
require 5.003;
114
7
7
182863
use strict;
7
17
7
276
115
7
7
38
use Carp;
7
14
7
730
116
7
7
42
use Exporter;
7
19
7
401
117
118
7
7
33
use vars qw( $VERSION @ISA %EXPORT_TAGS );
7
25
7
1229
119
$VERSION = 0.9;
120
121
push @ISA, qw( Exporter );
122
%EXPORT_TAGS = (
123
basics => [qw( sorted_by sort_function )],
124
arrays => [qw( sorted_array sorted_arrayref sort_array sort_arrayref)],
125
extras => [qw( sort_key_values sort_description )],
126
);
127
Exporter::export_ok_tags( keys %EXPORT_TAGS );
128
129
7
7
41
use vars qw( @Array @Rules $PreCalculate $Rule @ValueSet );
7
29
7
5333
130
131
########################################################################
132
133
=head2 sorted_by
134
135
@ordered = sorted_by( $sort_rule, @value_array );
136
@ordered = sorted_by( $sort_rule, @$value_arrayref );
137
@ordered = sorted_by( $sort_rule, $value1, $value2, $value3 );
138
139
@ordered = sorted_by( \@sort_rules, @value_array );
140
@ordered = sorted_by( \@sort_rules, @$value_arrayref );
141
@ordered = sorted_by( \@sort_rules, $value1, $value2, $value3 );
142
143
This is a general-purpose sorting function which accepts one or more sort order rules and a list of input values, then returns the values in the order specified by the rules.
144
145
=cut
146
147
# @in_order = sorted_by( $sort_rules_ary, @values );
148
sub sorted_by ($;@) {
149
3
10
my @sort_params = ( ! defined $_[0] ) ? () :
150
3
50
3
1
2073
( ref($_[0]) eq 'ARRAY' ) ? @{ (shift) } :
50
151
shift;
152
3
10
( my $sorter, local @Rules ) = _parse_sort_args( @sort_params );
153
3
35
local *Array = \@_;
154
3
9
&$sorter;
155
}
156
157
########################################################################
158
159
=head2 sort_function
160
161
@ordered = sort_function( @sort_rules )->( @value_array );
162
@ordered = sort_function( @sort_rules )->( @$value_arrayref );
163
@ordered = sort_function( @sort_rules )->( $value1, $value2, $value3 );
164
165
Creates an anonymous function which applies the provided sort rules. The function may be cached and used multiple times to apply the same rules again.
166
167
=cut
168
169
# @in_order = sort_function( @sort_rules )->( @array );
170
sub sort_function (@) {
171
40
40
1
31625
my ( $sorter, @rules ) = _parse_sort_args( @_ );
172
return sub {
173
410
410
18999
local *Array = \@_;
174
410
1098
local @Rules = @rules;
175
410
689
my @results = &$sorter;
176
# Kludge to clear extracted data; there's gotta be a better way...
177
410
1100
foreach my $rule (@rules) {
178
498
3039
map { delete $rule->{$_} } grep /^ext_/, keys %$rule
256
1000
179
}
180
410
2251
@results;
181
}
182
40
261
}
183
184
########################################################################
185
186
=head2 sorted_array
187
188
@ordered = sorted_array( @value_array, @sort_rules );
189
@ordered = sorted_array( @$value_arrayref, @sort_rules );
190
191
Returns a sorted list of the items without altering the order of the original list.
192
193
=cut
194
195
# @in_order = sorted_array( @array, @sort_rules );
196
sub sorted_array (\@;@) {
197
2
2
1
750
local *Array = shift;
198
2
6
( my $sorter, local @Rules ) = _parse_sort_args( @_ );
199
2
8
&$sorter;
200
}
201
202
=head2 sorted_arrayref
203
204
@ordered = sorted_arrayref( \@value_array, @sort_rules );
205
@ordered = sorted_arrayref( $value_arrayref, @sort_rules );
206
207
Returns a sorted list of the items without altering the order of the original list.
208
209
=cut
210
211
# @in_order = sorted_arrayref( $array_ref, @sort_rules );
212
sub sorted_arrayref ($;@) {
213
2
2
1
694
local *Array = shift;
214
2
5
( my $sorter, local @Rules ) = _parse_sort_args( @_ );
215
2
5
&$sorter;
216
}
217
218
########################################################################
219
220
=head2 sort_array
221
222
sort_array( @value_array, @sort_rules );
223
sort_array( @$value_arrayref, @sort_rules );
224
225
Sorts the contents of the specified array using a list of sorting rules.
226
227
=cut
228
229
# sort_array( @array, @sort_rules );
230
sub sort_array (\@;@) {
231
2
2
1
652
local *Array = shift;
232
2
5
( my $sorter, local @Rules ) = _parse_sort_args( @_ );
233
2
20
@Array = &$sorter;
234
}
235
236
=head2 sort_arrayref
237
238
sort_arrayref( \@value_array, @sort_rules );
239
sort_arrayref( $value_arrayref, @sort_rules );
240
241
Equivalent to sort_array, but takes an explicit array reference as its first argument, rather than an array variable.
242
243
=cut
244
245
# sort_arrayref( $array_ref, @sort_rules );
246
sub sort_arrayref ($;@) {
247
2
2
1
619
local *Array = shift;
248
2
5
( my $sorter, local @Rules ) = _parse_sort_args( @_ );
249
2
5
@Array = &$sorter;
250
}
251
252
########################################################################
253
254
=head2 sort_key_values
255
256
@key_values = sort_key_values( \@value_array, @sort_rules );
257
@key_values = sort_key_values( $value_arrayref, @sort_rules );
258
259
Doesn't actually perform any sorting. Extracts and returns the values which would be used as sort keys from each item in the array, in their original order.
260
261
=cut
262
263
# @results = sort_key_values( $array, @sort_rules );
264
sub sort_key_values ($;@) {
265
0
0
1
0
local *Array = shift;
266
0
0
my ($sorter, @rules) = _parse_sort_args( @_ );
267
268
0
0
0
if ( scalar @rules == 1 ) {
269
0
0
_extract_values_for_rule( $rules[0], @Array );
270
} else {
271
0
0
map [ _extract_values_for_item( $_, @rules ) ], @Array;
272
}
273
}
274
275
########################################################################
276
277
=head2 sort_description
278
279
@description = sort_description( $descriptor, @sort_rules );
280
281
Doesn't actually perform any sorting. Provides descriptive information about the sort rules for diagnostic purposes.
282
283
=cut
284
285
# @sort_rules = sort_description( 'text', @sort_rules );
286
sub sort_description ($;@) {
287
0
0
1
0
my $descriptor = shift;
288
289
0
0
my $desc_func;
290
0
0
0
if ( ! $descriptor ) {
0
0
291
0
0
$desc_func = \&_desc_text;
292
} elsif ( ref($descriptor) eq 'CODE' ) {
293
0
0
$desc_func = $descriptor;
294
} elsif ( ! ref($descriptor) ) {
295
7
7
49
no strict 'refs';
7
13
7
3947
296
0
0
0
$desc_func = \&{"_desc_$descriptor"}
0
0
297
or croak("Can't find a function named '_desc_$descriptor'");
298
} else {
299
0
0
croak("Unsupported descriptor '$descriptor'")
300
}
301
302
0
0
my ($sorter, @rules) = _parse_sort_args( @_ );
303
304
0
0
map { &$desc_func( $_ ) } @rules;
0
0
305
}
306
307
sub _desc_text {
308
0
0
0
my $rule = shift;
309
310
0
0
my $comp = $rule->{compare};
311
312
0
0
$rule->{extract} .
313
314
0
0
0
join( '', map $_ ? "($_) " : " ", join(', ', map "'$_'", @{ $rule->{extract_args} }) ) .
0
0
0
0
315
316
"in " . ( $rule->{order_sign} < 0 ? "descending" : "ascending" ) . " " .
317
318
( ! ref($comp) ? "$comp" :
319
ref($comp) eq 'CODE' ? "with custom function ($comp)":
320
ref($comp) eq 'ARRAY' ? join(', ', @$comp) : "with $comp" ) .
321
" order"
322
}
323
324
########################################################################
325
326
=head2 Sort Rule Syntax
327
328
The sort rule argument list may contain several different types of parameters, which are parsed identically by all of the public functions described above.
329
330
A sort rule definition list may contain any combination of the following argument structures:
331
332
=over 4
333
334
=item I
335
336
If no sort keys are specified, a default sort key is created using the C "self"> option.
337
338
@ordered = sorted_array( @names );
339
340
=item I
341
342
Specifies a sort key. Each I may be either a scalar value, or an array reference. Appropriate values for a I vary depending on which "extract" option is being used, and are discussed further below.
343
344
@ordered = sorted_array( @numbers, sub { $_[0] % 8 } );
345
@ordered = sorted_array( @records, 'rec_id' );
346
@ordered = sorted_array( @records, ['name','first'] );
347
348
Any number of sortkeys may be provided:
349
350
@ordered = sorted_array( @records, ['name','last'],
351
['name','first'] );
352
353
=item -sortkey => I
354
355
Another way of specifying a sort key is by preceding it with the "-sortkey" flag.
356
357
@ordered = sorted_array( @numbers, -sortkey => sub { $_[0] % 8 } );
358
@ordered = sorted_array( @records, -sortkey => ['name','last'],
359
-sortkey => ['name','first'] );
360
361
=item { sortkey => I, I => I, ... }
362
363
Additional options can be specified by passing a reference to a hash containing a sortkey and values for any number of options described in the list below.
364
365
@ordered = sorted_array( @numbers, { sortkey => sub { abs(shift) },
366
compare => 'numeric', } );
367
368
=item -I => I
369
370
Sets a default option for any subsequent sortkeys in the argument list.
371
372
@ordered = sorted_array( @records, -compare => 'numeric',
373
-sortkey => sub { abs(shift) });
374
375
@ordered = sorted_array( @records, -compare => 'textual',
376
-sortkey => ['name','last'],
377
-sortkey => ['name','first'] );
378
379
=back
380
381
The possible I values are:
382
383
=over 4
384
385
=item extract
386
387
Determines the function which will be used to retrieve the sort key value from each item in the input list.
388
389
=item compare
390
391
Determines the function which will be used to order the extracted values.
392
393
=item order
394
395
Can be set to "reverse" or "descending" to invert the sort order. Defaults to "ascending".
396
397
=item engine
398
399
Determines the underlying sorting algorithm which will be used to implement the sort. Generally left blank, enabling the module to select the best one available.
400
401
=back
402
403
Each of these options is discussed at further length below.
404
405
=cut
406
407
my @DefaultState = ( order=>'ascending', compare=>'cmp', extract=>'any' );
408
my %SupportedOptions = ( map { $_=>1 } qw( engine order compare extract ) );
409
my %FunctionCache;
410
411
sub _parse_sort_args {
412
51
51
123
my @arguments = ( @_ );
413
414
51
73
my %state;
415
my @rules;
416
51
159
while ( scalar @arguments ) {
417
71
106
my $token = shift @arguments;
418
419
71
402
my ( $flagname ) = ( $token =~ /^\-(\w+)$/ );
420
71
100
66
444
if ( $flagname and $SupportedOptions{$flagname} ) {
50
100
421
36
138
$state{ $flagname } = shift @arguments;
422
} elsif ( $flagname eq 'sortkey' ) {
423
0
0
push @rules, { @DefaultState, %state, 'sortkey' => shift @arguments };
424
} elsif ( ref($token) eq 'HASH' ) {
425
1
10
push @rules, { @DefaultState, %state, %$token };
426
} else {
427
34
263
push @rules, { @DefaultState, %state, 'sortkey' => $token };
428
}
429
}
430
51
100
142
if ( ! scalar @rules ) {
431
24
181
push @rules, { @DefaultState, 'extract' => 'self', %state, sortkey => [] };
432
}
433
434
7
7
48
no strict 'refs';
7
18
7
15805
435
436
51
107
foreach my $rule ( @rules ) {
437
# Select the appropriate comparison function
438
59
139
my $compare = $rule->{compare};
439
59
50
140
croak("Missing compare option for sorting") unless ( $compare );
440
$rule->{compare_func} = ref($compare) eq 'CODE' ? $compare :
441
59
50
33
296
$FunctionCache{"_cmp_$compare"} ||= \&{"_cmp_$compare"}
66
442
|| croak("Can't find a function named '_cmp_$compare'");
443
444
# Optional parameter for "reverse" or "descending" sorts
445
59
100
339
$rule->{order_sign} = ( $rule->{order} =~ /^desc|^rev/i ) ? -1 : 1;
446
447
# Select the appropriate value extraction function
448
59
103
my $extract = $rule->{extract};
449
59
50
149
croak("Missing extract option for sorting") unless ( length $extract );
450
59
100
100
244
$extract = 'code' if ($extract eq 'any' && ref($rule->{sortkey}) eq 'CODE');
451
$rule->{extract_func} = ref($extract) eq 'CODE' ? $extract :
452
59
50
33
1114
$FunctionCache{"_ext_$extract"} ||= \&{"_ext_$extract"} ||
66
453
croak("Can't find a function named '_ext_$extract'");
454
455
# Optional array of arguments to the extraction function
456
59
237
my $skey = $rule->{sortkey};
457
59
100
325
$rule->{extract_args} = ( ! defined $skey ) ? [] :
50
458
(ref($skey) eq 'ARRAY') ? $skey :
459
[ $skey ];
460
461
59
100
262
if ( $extract eq 'compound' ) {
462
1
2
foreach ( 0 .. $#{ $rule->{extract_args} } / 2 ) {
1
6
463
2
6
my $xa = $rule->{extract_args}->[ $_ * 2 ];
464
2
50
6
if ( ! ref $xa ) {
465
$rule->{extract_args}->[$_ * 2] = $FunctionCache{"_ext_$xa"} ||=
466
2
0
29
\&{"_ext_$xa"} || croak("Can't find a function named '_ext_$xa'");
33
467
}
468
}
469
}
470
}
471
472
# If $PreCalculate is set, do our lookups ahead of time for all of the items
473
58
100
392
my $engine = defined($PreCalculate) ? 'precalc' :
474
$rules[0]->{engine} ? $rules[0]->{engine} :
475
( @rules == 1 and $rules[0]->{order_sign} > 0
476
and $rules[0]->{compare} eq 'cmp'
477
and $rules[0]->{extract} eq 'self' ) ? 'trivial' :
478
51
100
100
612
(! grep {$_->{compare} ne 'cmp' or $_->{order_sign} < 0} @rules) ? 'packed' :
100
100
50
50
479
( scalar @rules == 1 ) ? 'precalc' :
480
'orcish' ;
481
# warn "Sorting using '$engine' engine\n";
482
483
my $sorter = ref($engine) eq 'CODE' ? $engine :
484
51
50
33
250
$FunctionCache{"_sorted_$engine"} ||= \&{"_sorted_$engine"} ||
66
485
croak("No such sort mode '$engine'; can't find function '_sorted_$engine'");
486
487
51
209
return $sorter, @rules;
488
}
489
490
########################################################################
491
492
=head2 Extraction Functions
493
494
For the extract option, you may specify one of the following Is:
495
496
=over 4
497
498
=item any
499
500
The default. Based on the I may behave as the 'self', 'key', or 'method' options described below.
501
502
=item self
503
504
Uses the input value as the sort key, unaltered. Typically used when sorting strings or other scalar values.
505
506
=item key
507
508
Allows for indexing in to hash or array references, allowing you to sort a list of arrayrefs based on the Ith value in each, or to sort a list of hashrefs based on a given key.
509
510
If the sortkey is an array reference, then the keys are looked up sequentially, allowing you to sort on the contents of a nested hash or array structure.
511
512
=item method
513
514
Uses the sortkey as a method name to be called on each list value, enabling you to sort objects by some calculated value.
515
516
If the sortkey is an array reference, then the first value is used as the method name and the remaining values as arguments to that method.
517
518
=item I
519
520
You may pass in a reference to a custom extraction function that will be used to retrieve the sort key values for this rule. The function will be called separately for each value in the input list, receiving that current value as an argument.
521
522
If the sortkey is an array reference, then the first value is used as the function reference and the remaining values as arguments to be passed after the item value.
523
524
=back
525
526
extract => self | method | key | code | CODEREF | ...
527
sortkey => - | m.name | key/idx | CODEREF | args
528
529
=cut
530
531
# $value = _extract_value( $item, $rule );
532
sub _extract_value {
533
352
352
4533
my ( $item, $rule ) = @_;
534
352
356
my $value = &{ $rule->{extract_func} }( $item, @{ $rule->{extract_args} } );
352
948
352
882
535
352
50
1121
return defined($value) ? $value : '';
536
}
537
538
# $value = _extract_values_for_item( $item, @rules );
539
sub _extract_values_for_item {
540
0
0
0
my $item = shift;
541
0
0
0
map { defined($_) ? $_ : '' }
0
0
542
0
0
map { &{ $_->{extract_func} }( $item, @{ $_->{extract_args} } ) } @_;
0
0
0
0
543
}
544
545
# $value = _extract_values_for_rule( $rule, @item );
546
sub _extract_values_for_rule {
547
113
113
138
my $rule = shift;
548
113
100
905
return @_ if ( $rule->{extract} eq 'self' );
549
231
50
613
map { defined($_) ? $_ : '' }
231
417
550
22
47
map { &{ $rule->{extract_func} }( $_, @{ $rule->{extract_args} } ) } @_;
231
900
231
631
551
}
552
553
sub _ext_self {
554
0
0
0
my ( $item, @sortkey ) = @_;
555
0
0
return $item;
556
}
557
558
sub _ext_split {
559
264
264
485
my ( $item, $delim, @indexes ) = @_;
560
# warn "Split '$item' with '$delim'\n";
561
264
1001
my @values = split /$delim/, $item;
562
264
1336
join $delim, @values[ @indexes ];
563
}
564
565
sub _ext_substr {
566
88
88
146
my ( $item, @sortkey ) = @_;
567
88
50
393
$#sortkey ? substr($item, $sortkey[0], $sortkey[1] ) : substr($item, $sortkey[0] );
568
}
569
570
sub _ext_self_code {
571
88
88
107
my ( $item, @sortkey ) = @_;
572
88
171
&$item( @sortkey );
573
}
574
575
sub _ext_code {
576
275
275
461
my ( $item, $code, @sortkey ) = @_;
577
275
535
&$code( $item, @sortkey );
578
}
579
580
sub _ext_method {
581
132
132
201
my ( $item, $method, @sortkey ) = @_;
582
132
321
$item->$method( @sortkey );
583
}
584
585
sub _ext_index {
586
968
968
1418
my ( $item, @sortkey ) = @_;
587
968
2695
while ( scalar @sortkey ) {
588
1188
1311
my $index = shift @sortkey;
589
590
1188
50
4235
if ( ! ref $item ) {
100
50
591
0
0
return;
592
} elsif ( UNIVERSAL::isa($item, 'HASH') ) {
593
924
3066
$item = $item->{$index};
594
} elsif ( UNIVERSAL::isa($item, 'ARRAY') ) {
595
264
50
66
794
carp "Use of non-numeric key '$index'"
596
unless ( $index eq '0' or $index != 0 );
597
264
761
$item = $item->[$index];
598
} else {
599
0
0
carp "Can't _ext_index from '$item' ($index)";
600
}
601
602
}
603
968
3162
return $item;
604
}
605
606
sub _ext_any {
607
1100
1100
1681
my ( $item, @sortkey ) = @_;
608
609
1100
50
5454
if ( ref($item) eq 'CODE' ) {
50
50
100
50
610
# &_ext_self_code;
611
0
0
&$item( @sortkey );
612
} elsif ( ! scalar @sortkey ) {
613
0
0
return $item;
614
} elsif ( ref($sortkey[0]) eq 'CODE' ) {
615
0
0
&_ext_code;
616
} elsif ( UNIVERSAL::can( $item, $sortkey[0] ) ) {
617
132
188
&_ext_method;
618
} elsif ( ! ref $sortkey[0] ) {
619
968
1446
&_ext_index;
620
} else {
621
0
0
confess "Unsure how to extract value for sorting purposes";
622
}
623
}
624
625
sub _ext_compound {
626
44
44
196
my $item = shift;
627
44
218
while ( scalar @_ ) {
628
88
125
my ($extr_sub, $sortkey) = ( shift, shift );
629
88
50
185
$item = &$extr_sub( $item, $sortkey ? @$sortkey : () );
630
}
631
44
163
return $item;
632
}
633
634
########################################################################
635
636
=head2 Comparison Functions
637
638
For the compare option, you may specify one of the following Is:
639
640
=over 4
641
642
=item cmp
643
644
The default comparison, using Perl's default cmp operator.
645
646
=item numeric
647
648
A numeric comparison using Perl's <=> operator.
649
650
=item textual
651
652
A text-oriented comparison that ignores whitespace and capitalization.
653
654
=item natural
655
656
A multi-type comparison that places empty values first, then numeric values in numeric order, then non-textual values like punctuation, followed by textual values in text order. The natural ordering also includes moving subsidiary words to the end, eg "The Book of Verse" is sorted as "Book of Verse, The"
657
658
=item locale : $three_way_cmp
659
660
Comparator functions which use the POSIX strcoll function for ordering.
661
662
=item lc_locale : $three_way_cmp
663
664
A case-insensitive version of the POSIX strcoll ordering.
665
666
=item num_lc_locale
667
668
Like the 'natural' style, this comparison distinguishes between empty and numeric values, but uses the lc_locale function to sort the textual values.
669
670
=item I
671
672
You may pass in a reference to a custom comparison function that will be used to order the sort key values for this rule.
673
674
=back
675
676
Each of these functions may return a postive, zero, or negative value based on the relationship of the values in the $a and $b positions of the current @ValueSet array. An undefined return indicates that the comparator is unable to provide an ordering for this pair, in which case the choice will fall through to the next comparator in the list; if no comparator specifies an order, they are left in their original order.
677
678
=cut
679
680
# $three_way_cmp = _cmp_cmp;
681
sub _cmp_cmp {
682
342
342
931
$ValueSet[$a] cmp $ValueSet[$b]
683
}
684
685
# $three_way_cmp = _cmp_bytewise;
686
sub _cmp_bytewise {
687
1358
1358
2981
$ValueSet[$a] cmp $ValueSet[$b]
688
}
689
690
# $three_way_cmp = _cmp_numeric;
691
sub _cmp_numeric {
692
2110
2110
5804
$ValueSet[$a] <=> $ValueSet[$b]
693
}
694
695
# $three_way_cmp = _cmp_empty_first;
696
sub _cmp_empty_first {
697
# If neither is empty, we have no opinion.
698
# If only one is empty, place it first
699
# If they're both empty, they're equivalent
700
0
0
0
0
( ! length($ValueSet[$a]) )
0
0
701
? ( ( ! length($ValueSet[$b]) ) ? 0 : -1 )
702
: ( ( ! length($ValueSet[$b]) ) ? 1 : undef );
703
}
704
705
# $three_way_cmp = _cmp_numbers_first;
706
sub _cmp_numbers_first {
707
# Use an extra array to cache our converted value
708
0
0
0
0
$Rule->{'ext_numeric'} ||= [];
709
0
0
my $is_numeric = $Rule->{'ext_numeric'};
710
711
# If we haven't already, check to see if the values are purely numeric
712
0
0
0
defined $is_numeric->[$a] or
713
$is_numeric->[$a] = ( $ValueSet[$a] =~ /\A\-?(?:\d*\.)?\d+\Z/ );
714
0
0
0
defined $is_numeric->[$b] or
715
$is_numeric->[$b] = ( $ValueSet[$b] =~ /\A\-?(?:\d*\.)?\d+\Z/ );
716
717
# If they're both numeric, use numeric comparison,
718
# If one's numeric and the other isn't, put the number first
719
# If neither is numeric, we have no opinion
720
0
0
0
( $is_numeric->[$a] )
0
0
721
? ( ( $is_numeric->[$b] ) ? ( $ValueSet[$a] <=> $ValueSet[$b] ) : -1 )
722
: ( ( $is_numeric->[$b] ) ? 1 : undef );
723
}
724
725
# $three_way_cmp = _cmp_textual;
726
sub _cmp_textual {
727
# Use an extra array to cache our converted value
728
0
0
0
0
$Rule->{'ext_textual'} ||= [];
729
0
0
my $mangled = $Rule->{'ext_textual'};
730
731
# If we haven't already, generate a lower-case, alphanumeric-only value
732
0
0
foreach my $idx ( $a, $b ) {
733
0
0
0
next if defined $mangled->[$idx];
734
0
0
local $_ = lc( $ValueSet[$idx] );
735
0
0
tr/0-9a-z/ /cs;
736
0
0
s/\A\s+//;
737
0
0
s/\s+\Z//;
738
0
0
$mangled->[$idx] = $_
739
}
740
741
# If both items have an alphanumeric value, compare them on that basis
742
# If one is alphanumeric and the other is punctuation/empty, put alpha last.
743
0
0
0
( length($mangled->[$a]) )
0
0
744
? ( length($mangled->[$b]) ? ( $mangled->[$a] cmp $mangled->[$b] ) : -1 )
745
: ( length($mangled->[$b]) ? 1 : undef );
746
}
747
748
# $three_way_cmp = _cmp_locale
749
sub _cmp_locale {
750
0
0
0
require POSIX;
751
0
0
POSIX::strcoll( $ValueSet[$a], $ValueSet[$b] );
752
}
753
754
# $three_way_cmp = _cmp_lc_locale
755
sub _cmp_lc_locale {
756
0
0
0
require POSIX;
757
0
0
POSIX::strcoll( lc($ValueSet[$a]), lc($ValueSet[$b]) );
758
}
759
760
sub _cmp_make_compound {
761
7
7
18
my @comparators = @_;
762
sub {
763
0
0
0
foreach my $comparator ( @comparators ) {
764
# Call each comparison function in an attempt to establish an ordering
765
0
0
my $rc = &$comparator;
766
# If the comparator returns undef, it has no opinion; call the next one
767
0
0
0
return($rc) if defined($rc);
768
}
769
}
770
7
39
}
771
772
{
773
7
7
82
no strict 'refs';
7
35
7
8504
774
*{'_cmp_num_lc_locale'} = _cmp_make_compound( \&_cmp_empty_first, \&_cmp_numbers_first, \&_cmp_lc_locale );
775
}
776
777
# $three_way_cmp = _cmp_natural;
778
sub _cmp_natural {
779
780
# If neither is empty, we have no opinion.
781
# If only one is empty, place it first
782
# If they're both empty, they're equivalent
783
2304
50
2304
5523
( ! length($ValueSet[$a]) )
100
100
784
? ( ( ! length($ValueSet[$b]) ) ? return 0 : return -1 )
785
: ( ( ! length($ValueSet[$b]) ) ? return 1 : undef );
786
787
# Use an extra array to cache our converted value
788
2258
100
4270
$Rule->{'ext_numeric'} ||= [];
789
2258
2839
my $is_numeric = $Rule->{'ext_numeric'};
790
791
# If we haven't already, check to see if the values are purely numeric
792
2258
100
5256
defined $is_numeric->[$a] or
793
$is_numeric->[$a] = ( $ValueSet[$a] =~ /\A\-?(?:\d*\.)?\d+\Z/ );
794
2258
100
4948
defined $is_numeric->[$b] or
795
$is_numeric->[$b] = ( $ValueSet[$b] =~ /\A(?:\d*\.)?\d+\Z/ );
796
797
# If they're both numeric, use numeric comparison,
798
# If one's numeric and the other isn't, put the number first
799
# If neither is numeric, we have no opinion
800
2258
100
7350
( $is_numeric->[$a] )
100
100
801
? return( ( $is_numeric->[$b] ) ? ( $ValueSet[$a] <=> $ValueSet[$b] ) : -1 )
802
: ( ( $is_numeric->[$b] ) ? return 1 : undef );
803
804
# Use an extra array to cache our converted value
805
487
100
938
$Rule->{'ext_textual'} ||= [];
806
487
685
my $mangled = $Rule->{'ext_textual'};
807
808
# If we haven't already, generate a lower-case, alphanumeric-only value
809
487
624
foreach my $idx ( $a, $b ) {
810
974
100
2059
next if defined $mangled->[$idx];
811
220
419
local $_ = lc( $ValueSet[$idx] );
812
220
290
tr/0-9a-z/ /cs;
813
220
369
s/\A\s+//;
814
220
527
s/\s+\Z//;
815
220
266
s/\A(the)\s(.*)/$2 $1/;
816
220
528
$mangled->[$idx] = $_
817
}
818
819
# If both items have an alphanumeric value, compare them on that basis
820
# If one is alphanumeric and the other is punctuation/empty, put alpha last.
821
487
50
2121
( length($mangled->[$a]) )
0
50
822
? ( length($mangled->[$b]) ? ( $mangled->[$a] cmp $mangled->[$b] ) : -1 )
823
: ( length($mangled->[$b]) ? 1 : undef );
824
}
825
826
########################################################################
827
828
=head2 Ascending or Descending Order
829
830
For the order option, you may specify one of the following Is:
831
832
=over 4
833
834
=item forward I ascending
835
836
The default order, from lower values to higher ones.
837
838
=item reverse I descending
839
840
Reverses the ordering dictated by a sort rule.
841
842
=back
843
844
845
=head2 Sorting Engines
846
847
Depending on the specific sorting rules used in a given call, this module automatically selects an internal function that provides an appropriate approach to implementing the sort, called the sort "engine".
848
849
You can override this selection by setting an "engine" option on the first sort key, which can either contain either the name of one of the engines, described below, or a CODEREF with equivalent behavior.
850
851
=over 4
852
853
=item trivial
854
855
In the common case of sorting raw values with a cmp comparison, the fast-but-simple "trivial" engine is used, which simply applies Perl's default sorting.
856
857
=item orcish
858
859
For a complex multi-key sort the "orcish" engine is typically selected.
860
861
=item precalc
862
863
Used when there's only one sorting key.
864
865
You may also set the $PreCalculate package variable to true to force this engine to be selected. Because the sort key values for the list are calculated before entering Perl's sort operation, there's less of a chance of possible re-entry problems due to nested uses of the sort operator, which causes a fatal error in at least some versions of Perl.
866
867
=item packed
868
869
Some sorts are handled with the Guttman-Rosler technique, extracting packed keys and using Perl's default sort function, which is substantially faster, but currently only a limited set of simple comparisons can be handled this way. (For more information on packed-default sorting, see http://www.sysarch.com/perl/sort_paper.html or search for "Guttman-Rosler".)
870
871
=back
872
873
=cut
874
875
sub _sorted_trivial {
876
11
11
46
sort @Array
877
}
878
879
sub _sorted_precalc {
880
113
113
182
foreach my $rule (@Rules) {
881
113
402
$rule->{ext_value} = [ _extract_values_for_rule( $rule, @Array ) ]
882
}
883
113
572
return @Array[ sort _sorted_indexes_precalc 0 .. $#Array ];
884
}
885
886
# Compare indexes $a and $b acording to each of the specified rules
887
# $three_way_cmp = _sorted_indexes_precalc;
888
sub _sorted_indexes_precalc {
889
# implicit: $a, $b
890
891
5822
5822
8259
RULE: foreach $Rule (@Rules) {
892
5822
50
12070
local *ValueSet = ( $Rule->{ext_value} ||= [] );
893
894
# If the function returns zero or undef, the values are equivalent
895
5822
100
5856
my $rc = &{ $Rule->{compare_func} }
5822
10767
896
or next RULE;
897
898
# Else return the comparison results, reversing them first if necessary
899
5745
11527
return $rc * $Rule->{order_sign};
900
}
901
# If the items are equivalent for all of the rules, don't change their order
902
# warn "Comparing $a and $b: '$ValueSet[$a]' " . ('=') . " '$ValueSet[$b]'\n";
903
77
153
return $a <=> $b;
904
}
905
906
sub _sorted_orcish {
907
44
44
144
return @Array[ sort _sorted_indexes_orcish 0 .. $#Array ];
908
}
909
910
sub _sorted_indexes_orcish {
911
# implicit: $a, $b
912
913
204
204
314
RULE: foreach $Rule (@Rules) {
914
# If we haven't already, calculate the value of each item for this rule
915
292
100
1277
local *ValueSet = ( $Rule->{ext_value} ||= [] );
916
292
100
721
defined $ValueSet[$a] or $ValueSet[$a] = _extract_value($Array[$a], $Rule);
917
292
100
843
defined $ValueSet[$b] or $ValueSet[$b] = _extract_value($Array[$b], $Rule);
918
919
# If the function returns zero or undef, the values are equivalent
920
292
100
290
my $rc = &{ $Rule->{compare_func} }
292
515
921
or next RULE;
922
923
# Else return the comparison results, reversing them first if necessary
924
204
635
return $rc * $Rule->{order_sign};
925
}
926
# If the items are equivalent for all of the rules, don't change their order
927
# warn "Comparing $a and $b: '$ValueSet[$a]' " . ('=') . " '$ValueSet[$b]'\n";
928
0
0
return $a <=> $b;
929
}
930
931
sub _sorted_packed {
932
253
253
260
my @packed;
933
253
100
615
if ( @Rules == 1 ) {
934
836
1727
@packed = map
935
209
428
&{ $Rules[0]->{extract_func} }( $Array[$_], @{ $Rules[0]->{extract_args} } )
836
2776
936
. "\0" . $_,
937
( 0 .. $#Array );
938
} else {
939
176
246
@packed = map {
940
44
79
my $item = $Array[$_];
941
352
631
join( "\0",
942
176
240
map(&{ $_->{extract_func} }( $item, @{ $_->{extract_args} } ), @Rules),
352
711
943
$_
944
)
945
} ( 0 .. $#Array );
946
}
947
948
# warn "Packed: " . join(', ', map "'$_'", @packed ) . "\n";
949
950
253
3257
return @Array[ map substr($_, 1 + rindex $_, "\0"), sort @packed ];
951
}
952
953
########################################################################
954
955
=head1 STATUS AND SUPPORT
956
957
This release of Data::Sorting is intended for public review and feedback.
958
959
Name DSLIP Description
960
-------------- ----- ---------------------------------------------
961
Data::
962
::Sorting bdpfp Multi-key sort using function results
963
964
Further information and support for this module is available at www.evoscript.org.
965
966
Please report bugs or other problems to Ebugs@evoscript.comE.
967
968
=head1 BUGS AND TO DO
969
970
The following issues have been noted for future improvements:
971
972
Convert more types of comparisons to packed-default sorts for speed.
973
974
Further investigate the current status of the Sort::Records module.
975
976
Add a comparator function for an alpha-numeric-spans sorting model
977
like Sort::Naturally.
978
979
Interface to Sort::PolySort for alternate comparator styles, like
980
"name" and "usdate".
981
982
For non-scalar values, compare referents along the lines of
983
Ref::cmpref().
984
985
Provide better handling for nested sorts; perhaps throw an exception
986
from the inner instance to the outer, catch and set $PreCalculate,
987
then go back into the loop?
988
989
Replace dynamic scoping with object instances for thread safety.
990
May not be necessary given changes in threading models.
991
992
=head1 CREDITS AND COPYRIGHT
993
994
=head2 Developed By
995
996
M. Simon Cavalletto, simonm@cavalletto.org
997
Evolution Softworks, www.evoscript.org
998
999
=head2 Copyright
1000
1001
Copyright 2003 Matthew Cavalletto.
1002
1003
Portions copyright 1996, 1997, 1998, 1999 Evolution Online Systems, Inc.
1004
1005
=head2 License
1006
1007
You may use, modify, and distribute this software under the same terms as Perl.
1008
1009
=cut
1010
1011
########################################################################
1012
1013
1;