line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WordLists::Sort;
|
2
|
3
|
|
|
3
|
|
89801
|
use utf8;
|
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
22
|
|
3
|
3
|
|
|
3
|
|
92
|
use strict;
|
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
99
|
|
4
|
3
|
|
|
3
|
|
18
|
use warnings;
|
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
109
|
|
5
|
|
|
|
|
|
|
require Exporter;
|
6
|
3
|
|
|
3
|
|
993
|
use WordLists::Base;
|
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
6911
|
|
7
|
|
|
|
|
|
|
our $VERSION = $WordLists::Base::VERSION;
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our @ISA = qw(Exporter);
|
10
|
|
|
|
|
|
|
our @EXPORT_OK = qw(
|
11
|
|
|
|
|
|
|
complex_compare
|
12
|
|
|
|
|
|
|
atomic_compare
|
13
|
|
|
|
|
|
|
sorted_collate
|
14
|
|
|
|
|
|
|
schwartzian_collate
|
15
|
|
|
|
|
|
|
);
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub complex_compare
|
18
|
|
|
|
|
|
|
{
|
19
|
12
|
|
|
12
|
1
|
20
|
my $args = $_[2];
|
20
|
12
|
50
|
33
|
|
|
74
|
if ( (defined $args) && (ref $args eq ref {}) )
|
21
|
|
|
|
|
|
|
{
|
22
|
12
|
0
|
33
|
|
|
58
|
return 0 if (!$args->{'override_undef'} and !defined($_[0]) and !defined ($_[1])); # avoid excessive execution of code if possible
|
|
|
|
33
|
|
|
|
|
23
|
12
|
50
|
33
|
|
|
64
|
return (defined($_[0]) <=> defined ($_[1])) if (!$args->{'override_undef'} and (!defined($_[0]) or !defined ($_[1])) ); # avoid excessive execution of code if possible
|
|
|
|
33
|
|
|
|
|
24
|
12
|
100
|
66
|
|
|
71
|
return 0 if (!$args->{'override_eq'} and ($_[0] eq $_[1])); # avoid excessive execution of code if possible
|
25
|
11
|
50
|
33
|
|
|
66
|
if ( (defined $args->{'functions'}) && (ref $args->{'functions'} eq ref []) )
|
26
|
|
|
|
|
|
|
{
|
27
|
11
|
|
|
|
|
14
|
my @functions = @{$args->{'functions'}};
|
|
11
|
|
|
|
|
31
|
|
28
|
11
|
|
|
|
|
21
|
foreach (@functions)
|
29
|
|
|
|
|
|
|
{
|
30
|
25
|
|
|
|
|
56
|
my $r = atomic_compare($_[0], $_[1], $_);
|
31
|
25
|
100
|
|
|
|
127
|
return $r unless $r == 0;
|
32
|
|
|
|
|
|
|
}
|
33
|
0
|
|
|
|
|
0
|
return 0;
|
34
|
|
|
|
|
|
|
}
|
35
|
|
|
|
|
|
|
else
|
36
|
|
|
|
|
|
|
{
|
37
|
0
|
|
|
|
|
0
|
warn 'Expected: $a, $b, { functions => [...]}';
|
38
|
0
|
0
|
|
|
|
0
|
return $_[0] cmp $_[1] unless defined $args;
|
39
|
|
|
|
|
|
|
}
|
40
|
|
|
|
|
|
|
}
|
41
|
|
|
|
|
|
|
else
|
42
|
|
|
|
|
|
|
{
|
43
|
0
|
0
|
|
|
|
0
|
warn 'Expected: $a, $b, {...}' if defined $args;
|
44
|
0
|
0
|
0
|
|
|
0
|
return 0 if (!defined($_[0]) and !defined ($_[1])); # avoid excessive execution of code if possible
|
45
|
0
|
0
|
0
|
|
|
0
|
return (defined($_[0]) <=> defined ($_[1])) if ((!defined($_[0]) or !defined ($_[1])) ); # avoid excessive execution of code if possible
|
46
|
0
|
|
|
|
|
0
|
return $_[0] cmp $_[1];
|
47
|
|
|
|
|
|
|
}
|
48
|
0
|
|
|
|
|
0
|
return 0;
|
49
|
|
|
|
|
|
|
}
|
50
|
|
|
|
|
|
|
sub debug_compare
|
51
|
|
|
|
|
|
|
{
|
52
|
260
|
50
|
66
|
260
|
0
|
2178
|
if (defined $_[2] and $_[2])
|
53
|
|
|
|
|
|
|
{
|
54
|
0
|
0
|
|
|
|
0
|
print "\n". (' ' x $_[2]) . 'Comparing `' . (defined $_[0]? $_[0]:'') . '` and `' . (defined $_[1]? $_[1]:'') .'`';
|
|
|
0
|
|
|
|
|
|
55
|
|
|
|
|
|
|
}
|
56
|
260
|
50
|
|
|
|
646
|
if (defined $_[3])
|
57
|
|
|
|
|
|
|
{
|
58
|
0
|
|
|
|
|
0
|
print "-- Result = $_[3]";
|
59
|
|
|
|
|
|
|
}
|
60
|
|
|
|
|
|
|
}
|
61
|
|
|
|
|
|
|
sub atomic_compare
|
62
|
|
|
|
|
|
|
{
|
63
|
90
|
|
|
90
|
1
|
1949
|
my @s = ($_[0], $_[1]);
|
64
|
90
|
|
|
|
|
108
|
my $args = $_[2];
|
65
|
90
|
100
|
66
|
|
|
418
|
if ( (defined $args) && (ref $args eq ref {}) )
|
66
|
|
|
|
|
|
|
{
|
67
|
80
|
0
|
33
|
|
|
292
|
return 0 if (!$args->{'override_undef'} and !defined($_[0]) and !defined ($_[1])); # avoid excessive execution of code if possible
|
|
|
|
33
|
|
|
|
|
68
|
80
|
50
|
33
|
|
|
353
|
return (defined($_[0]) <=> defined ($_[1])) if (!$args->{'override_undef'} and (!defined($_[0]) or !defined ($_[1])) ); # avoid excessive execution of code if possible
|
|
|
|
33
|
|
|
|
|
69
|
80
|
100
|
66
|
|
|
2143
|
return 0 if (!$args->{'override_eq'} and ($_[0] eq $_[1])); # avoid excessive execution of code if possible if ( (defined $args) && (ref $args eq ref {}) )
|
70
|
|
|
|
|
|
|
my %arg = (
|
71
|
54
|
|
|
54
|
|
87
|
'c' => sub { $_[0] cmp $_[1]; },
|
72
|
75
|
|
|
|
|
366
|
't' => [],
|
73
|
|
|
|
|
|
|
'n' => [],
|
74
|
|
|
|
|
|
|
'd' => 0,
|
75
|
75
|
|
|
|
|
240
|
%{$args},
|
76
|
|
|
|
|
|
|
);
|
77
|
75
|
|
|
|
|
291
|
debug_compare ($s[0], $s[1], $args->{'d'});
|
78
|
75
|
100
|
|
|
|
260
|
if (ref $args->{'n'} ne ref [])
|
79
|
|
|
|
|
|
|
{
|
80
|
74
|
|
|
|
|
137
|
$arg{'n'} = [];
|
81
|
74
|
100
|
|
8
|
|
178
|
$args->{'n'} = sub {$_[0];} unless defined $args->{'n'};
|
|
8
|
|
|
|
|
18
|
|
82
|
74
|
|
|
|
|
138
|
$arg{'n'}[0] = $args->{'n'};
|
83
|
74
|
|
|
|
|
125
|
$arg{'n'}[1] = $args->{'n'};
|
84
|
|
|
|
|
|
|
}
|
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
#push (@{$arg{'t'}}, {re=> qr/.+/, c=> $arg{'c'} }) unless defined${$arg{'t'}}[0];
|
87
|
75
|
|
|
|
|
98
|
my @t = (@{$arg{'t'}}, {re=> qr/./, c=> $arg{'c'} });
|
|
75
|
|
|
|
|
482
|
|
88
|
75
|
|
|
|
|
110
|
my @sToken;
|
89
|
|
|
|
|
|
|
my @sTokenType;
|
90
|
75
|
|
|
|
|
139
|
foreach my $i (0..1)
|
91
|
|
|
|
|
|
|
{
|
92
|
150
|
|
|
|
|
173
|
$s[$i] = &{ $arg{'n'}[$i] }($s[$i]);
|
|
150
|
|
|
|
|
499
|
|
93
|
|
|
|
|
|
|
do
|
94
|
150
|
|
|
|
|
254
|
{
|
95
|
386
|
|
|
|
|
1234
|
foreach (0..$#t)
|
96
|
|
|
|
|
|
|
{
|
97
|
563
|
|
|
|
|
818
|
my $re = $t[$_]{'re'};
|
98
|
|
|
|
|
|
|
|
99
|
563
|
100
|
|
|
|
7058
|
if ($s[$i] =~ s/^($re)//)
|
100
|
|
|
|
|
|
|
{
|
101
|
|
|
|
|
|
|
#print "\n($1)$s[$i] matches $re";
|
102
|
386
|
|
|
|
|
497
|
push @{$sToken[$i]}, $1;
|
|
386
|
|
|
|
|
1033
|
|
103
|
386
|
|
|
|
|
427
|
push @{$sTokenType[$i]}, $_;
|
|
386
|
|
|
|
|
629
|
|
104
|
386
|
|
|
|
|
1433
|
last;
|
105
|
|
|
|
|
|
|
}
|
106
|
|
|
|
|
|
|
else
|
107
|
|
|
|
|
|
|
{
|
108
|
|
|
|
|
|
|
#print "\n$s[$i] doesn't match $re";
|
109
|
|
|
|
|
|
|
}
|
110
|
|
|
|
|
|
|
}
|
111
|
|
|
|
|
|
|
} until $s[$i] eq '';
|
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
}
|
114
|
75
|
|
|
|
|
176
|
$arg{'d'}=$arg{'d'} * 2;
|
115
|
75
|
100
|
|
|
|
84
|
foreach ($#{$sTokenType[0]} >= $#{$sTokenType[1]} ? 0..$#{$sTokenType[0]} : 0..$#{$sTokenType[1]})
|
|
75
|
|
|
|
|
124
|
|
|
75
|
|
|
|
|
169
|
|
|
62
|
|
|
|
|
127
|
|
|
13
|
|
|
|
|
34
|
|
116
|
|
|
|
|
|
|
{
|
117
|
185
|
|
|
|
|
432
|
debug_compare ($sToken[0][$_], $sToken[1][$_], $arg{'d'});
|
118
|
185
|
100
|
66
|
|
|
739
|
if (defined $sTokenType[0][$_] and defined $sTokenType[1][$_])
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
119
|
|
|
|
|
|
|
{
|
120
|
174
|
100
|
|
|
|
330
|
if ($sTokenType[0][$_] == $sTokenType[1][$_])
|
121
|
|
|
|
|
|
|
{
|
122
|
172
|
|
|
|
|
264
|
my $c = $t[$sTokenType[0][$_]]{'c'};
|
123
|
172
|
100
|
|
|
|
309
|
if ((ref $c eq ref ''))
|
124
|
|
|
|
|
|
|
{
|
125
|
|
|
|
|
|
|
# todo: dwimmery code - dp - what dwimmery?
|
126
|
17
|
50
|
|
|
|
45
|
return $c unless $c ==0;
|
127
|
|
|
|
|
|
|
# return undef;
|
128
|
|
|
|
|
|
|
}
|
129
|
|
|
|
|
|
|
else
|
130
|
|
|
|
|
|
|
{
|
131
|
155
|
|
|
|
|
245
|
my $r = &{$c}($sToken[0][$_], $sToken[1][$_]);
|
|
155
|
|
|
|
|
364
|
|
132
|
155
|
100
|
|
|
|
920
|
return $r unless $r ==0;
|
133
|
|
|
|
|
|
|
}
|
134
|
|
|
|
|
|
|
}
|
135
|
|
|
|
|
|
|
else
|
136
|
|
|
|
|
|
|
{
|
137
|
2
|
|
|
|
|
24
|
return ($sTokenType[1][$_] <=> $sTokenType[0][$_]);
|
138
|
|
|
|
|
|
|
}
|
139
|
|
|
|
|
|
|
}
|
140
|
|
|
|
|
|
|
elsif (defined $sTokenType[0][$_])
|
141
|
|
|
|
|
|
|
{
|
142
|
0
|
|
|
|
|
0
|
return 1;
|
143
|
|
|
|
|
|
|
}
|
144
|
|
|
|
|
|
|
elsif (defined $sTokenType[1][$_])
|
145
|
|
|
|
|
|
|
{
|
146
|
11
|
|
|
|
|
131
|
return -1;
|
147
|
|
|
|
|
|
|
}
|
148
|
|
|
|
|
|
|
}
|
149
|
|
|
|
|
|
|
}
|
150
|
|
|
|
|
|
|
else
|
151
|
|
|
|
|
|
|
{
|
152
|
10
|
50
|
|
|
|
20
|
warn 'Expected: $a, $b, {...}' if defined $args;
|
153
|
10
|
100
|
66
|
|
|
38
|
return 0 if (!defined($_[0]) and !defined ($_[1])); # avoid excessive execution of code if possible
|
154
|
9
|
100
|
66
|
|
|
43
|
return (defined($_[0]) <=> defined ($_[1])) if ((!defined($_[0]) or !defined ($_[1])) ); # avoid excessive execution of code if possible
|
155
|
8
|
|
|
|
|
47
|
return $_[0] cmp $_[1];
|
156
|
|
|
|
|
|
|
}
|
157
|
41
|
|
|
|
|
175
|
return 0;
|
158
|
|
|
|
|
|
|
}
|
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
sub sorted_collate # Sorted Collation - hopefully O n log (n)
|
161
|
|
|
|
|
|
|
{
|
162
|
4
|
|
|
4
|
1
|
15
|
my ( $aIn, $cmp, $merge) = @_;
|
163
|
|
|
|
|
|
|
# ^ + $self
|
164
|
4
|
|
|
|
|
12
|
my $iEnum=0;
|
165
|
4
|
|
|
|
|
12
|
my $aEnum = [map {[ $iEnum++ , $_]; } @$aIn];
|
|
8
|
|
|
|
|
35
|
|
166
|
4
|
|
|
|
|
29
|
my $aSorted = [sort {&{$cmp}($a->[1],$b->[1])} @$aEnum];
|
|
5
|
|
|
|
|
20
|
|
|
5
|
|
|
|
|
16
|
|
167
|
|
|
|
|
|
|
|
168
|
4
|
|
|
|
|
32
|
for (my $i = 0; $i<$#{$aSorted}; $i++)
|
|
8
|
|
|
|
|
32
|
|
169
|
|
|
|
|
|
|
{
|
170
|
4
|
100
|
|
|
|
19
|
next unless defined $aSorted->[$i][1] ;
|
171
|
3
|
|
|
|
|
7
|
for (my $j = 1; $j<=$#{$aSorted}-$i; $j++)
|
|
6
|
|
|
|
|
22
|
|
172
|
|
|
|
|
|
|
{
|
173
|
4
|
50
|
|
|
|
18
|
if (defined $aSorted->[$i+$j][1])
|
174
|
|
|
|
|
|
|
{
|
175
|
4
|
100
|
|
|
|
89
|
if (0 == &{$cmp}($aSorted->[$i][1], $aSorted->[$i+$j][1]))
|
|
4
|
|
|
|
|
13
|
|
176
|
|
|
|
|
|
|
{
|
177
|
3
|
|
|
|
|
22
|
&{$merge}($aSorted->[$i][1], $aSorted->[$i+$j][1]);
|
|
3
|
|
|
|
|
11
|
|
178
|
3
|
|
|
|
|
20
|
$aSorted->[$i+$j][1] = undef;
|
179
|
|
|
|
|
|
|
}
|
180
|
|
|
|
|
|
|
else
|
181
|
1
|
|
|
|
|
9
|
{ $i += $j - 1;
|
182
|
1
|
|
|
|
|
3
|
last; # last j === next i
|
183
|
|
|
|
|
|
|
}
|
184
|
|
|
|
|
|
|
}
|
185
|
|
|
|
|
|
|
}
|
186
|
|
|
|
|
|
|
}
|
187
|
4
|
|
|
|
|
10
|
return [map {$_->[1]} sort { $a->[0] <=> $b->[0] } grep { defined $_->[1] } @$aSorted];
|
|
5
|
|
|
|
|
39
|
|
|
1
|
|
|
|
|
5
|
|
|
8
|
|
|
|
|
25
|
|
188
|
|
|
|
|
|
|
}
|
189
|
|
|
|
|
|
|
sub schwartzian_collate # Schwartzian Collation - hopefully O n log (n), but less than sorted collation, if $norm is slow
|
190
|
|
|
|
|
|
|
{
|
191
|
4
|
|
|
4
|
1
|
16
|
my ( $aIn, $cmp, $norm, $merge) = @_;
|
192
|
|
|
|
|
|
|
# ^ + $self
|
193
|
4
|
|
|
|
|
13
|
my $iEnum=0;
|
194
|
4
|
|
|
|
|
13
|
my $aEnum;
|
195
|
|
|
|
|
|
|
my $aSorted;
|
196
|
4
|
50
|
|
|
|
19
|
if (defined $norm)
|
197
|
|
|
|
|
|
|
{
|
198
|
4
|
|
|
|
|
12
|
$aEnum = [map {[ $iEnum++ , $_, &{$norm}($_)]; } @$aIn];
|
|
8
|
|
|
|
|
36
|
|
|
8
|
|
|
|
|
28
|
|
199
|
4
|
|
|
|
|
43
|
$aSorted = [sort {&{$cmp}($a->[2],$b->[2])} @$aEnum];
|
|
5
|
|
|
|
|
17
|
|
|
5
|
|
|
|
|
15
|
|
200
|
|
|
|
|
|
|
}
|
201
|
|
|
|
|
|
|
else
|
202
|
|
|
|
|
|
|
{
|
203
|
0
|
|
|
|
|
0
|
$aEnum = [map {[ $iEnum++ , $_]; } @$aIn];
|
|
0
|
|
|
|
|
0
|
|
204
|
0
|
|
|
|
|
0
|
$aSorted = [sort {&{$cmp}($a->[1],$b->[1])} @$aEnum];
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
205
|
|
|
|
|
|
|
}
|
206
|
4
|
|
|
|
|
22
|
for (my $i = 0; $i<$#{$aSorted}; $i++)
|
|
8
|
|
|
|
|
31
|
|
207
|
|
|
|
|
|
|
{
|
208
|
4
|
100
|
|
|
|
15
|
next unless defined $aSorted->[$i][1] ;
|
209
|
3
|
|
|
|
|
7
|
for (my $j = 1; $j<=$#{$aSorted}-$i; $j++)
|
|
6
|
|
|
|
|
24
|
|
210
|
|
|
|
|
|
|
{
|
211
|
4
|
50
|
|
|
|
14
|
if (defined $aSorted->[$i+$j][1])
|
212
|
|
|
|
|
|
|
{
|
213
|
4
|
50
|
|
|
|
16
|
if (
|
|
|
100
|
|
|
|
|
|
214
|
4
|
|
|
|
|
171
|
(defined $norm) ?
|
215
|
0
|
|
|
|
|
0
|
( 0 == &{$cmp}($aSorted->[$i][2], $aSorted->[$i+$j][2]) ) :
|
216
|
|
|
|
|
|
|
( 0 == &{$cmp}($aSorted->[$i][1], $aSorted->[$i+$j][1]) )
|
217
|
|
|
|
|
|
|
)
|
218
|
|
|
|
|
|
|
{
|
219
|
3
|
|
|
|
|
18
|
&{$merge}($aSorted->[$i][1], $aSorted->[$i+$j][1]);
|
|
3
|
|
|
|
|
10
|
|
220
|
3
|
|
|
|
|
19
|
$aSorted->[$i+$j][1] = undef;
|
221
|
|
|
|
|
|
|
}
|
222
|
|
|
|
|
|
|
else
|
223
|
1
|
|
|
|
|
6
|
{ $i += $j - 1;
|
224
|
1
|
|
|
|
|
42
|
last; # last j === next i
|
225
|
|
|
|
|
|
|
}
|
226
|
|
|
|
|
|
|
}
|
227
|
|
|
|
|
|
|
}
|
228
|
|
|
|
|
|
|
}
|
229
|
4
|
|
|
|
|
12
|
return [map {$_->[1]} sort { $a->[0] <=> $b->[0] } grep { defined $_->[1] } @$aSorted];
|
|
5
|
|
|
|
|
43
|
|
|
1
|
|
|
|
|
4
|
|
|
8
|
|
|
|
|
22
|
|
230
|
|
|
|
|
|
|
}
|
231
|
|
|
|
|
|
|
sub naive_collate # Naive Collation - probably O n**2
|
232
|
|
|
|
|
|
|
{
|
233
|
4
|
|
|
4
|
1
|
10
|
my ( $aIn, $cmp, $merge) = @_;
|
234
|
|
|
|
|
|
|
# ^ + $self
|
235
|
4
|
|
|
|
|
7
|
my $iEnum = 0;
|
236
|
4
|
|
|
|
|
13
|
my $aEnum = [map {[$iEnum++,$_]} @$aIn];
|
|
8
|
|
|
|
|
28
|
|
237
|
4
|
|
|
|
|
12
|
for (my $i = 0; $i<$#{$aEnum}; $i++)
|
|
8
|
|
|
|
|
32
|
|
238
|
|
|
|
|
|
|
{
|
239
|
4
|
100
|
|
|
|
17
|
next unless defined $aEnum->[$i][1] ;
|
240
|
3
|
|
|
|
|
6
|
for (my $j = 1; $j<=$#{$aEnum}-$i; $j++)
|
|
7
|
|
|
|
|
32
|
|
241
|
|
|
|
|
|
|
{
|
242
|
4
|
50
|
|
|
|
15
|
if (defined $aEnum->[$i+$j][1])
|
243
|
|
|
|
|
|
|
{
|
244
|
4
|
100
|
|
|
|
11
|
if (0 == &{$cmp}($aEnum->[$i][1], $aEnum->[$i+$j][1]))
|
|
4
|
|
|
|
|
16
|
|
245
|
|
|
|
|
|
|
{
|
246
|
3
|
|
|
|
|
26
|
&{$merge}($aEnum->[$i][1], $aEnum->[$i+$j][1]);
|
|
3
|
|
|
|
|
9
|
|
247
|
3
|
|
|
|
|
21
|
$aEnum->[$i+$j][1] = undef;
|
248
|
|
|
|
|
|
|
}
|
249
|
|
|
|
|
|
|
}
|
250
|
|
|
|
|
|
|
}
|
251
|
|
|
|
|
|
|
}
|
252
|
4
|
|
|
|
|
9
|
return [map {$_->[1]} grep { defined $_->[1] } @$aEnum];
|
|
5
|
|
|
|
|
116
|
|
|
8
|
|
|
|
|
23
|
|
253
|
|
|
|
|
|
|
}
|
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
return 1;
|
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=pod
|
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=head1 NAME
|
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
WordLists::Sort
|
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
Provides a structure for comparison functions, generally for complex sort.
|
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
# The following sorts "No6" "No.7" "no 8" in that order - ignoring punctuation.
|
270
|
|
|
|
|
|
|
@sorted = sort { atomic_compare (
|
271
|
|
|
|
|
|
|
$a,$b,{ n => sub{ $_[0]=~s/[^[:alnum:]]//g; lc $_[0]; } }
|
272
|
|
|
|
|
|
|
) } @unsorted;
|
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
# The following sorts A9 before A10.
|
275
|
|
|
|
|
|
|
@sorted = sort { atomic_compare (
|
276
|
|
|
|
|
|
|
$a,$b,{ t => [ { re => qr/[0-9]+/, c => sub { $_[0] <=> $_[1]; } }, ], } }
|
277
|
|
|
|
|
|
|
) } @unsorted;
|
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
This is by far and away the most evil member of the L family (it's also pretty much unrelated to all the others). It is basically a terse way of writing complex comparison/sort functions as one liners (if you want to).
|
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
The intention is to be able to sort by several different criteria, e.g. so "the UN" sorts after "un-" and before "unabashed", and/or so that "F\x{E9}" sorts after "Fe" but before "FE".
|
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
Once you've written/cribbed a sort algorithm, it's easy to use - just put it in a subroutine and call it. (Actually, what you're writing is a comparison algrithm, which perl's C then calls).
|
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
Writing it is a bit harder, though: the framework involves (potentially) anonymous coderefs sprinkled amidst the hashrefs - it's much easier with indentation.
|
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=head1 FUNCTIONS
|
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
=head2 atomic_compare
|
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
C: This provides most of the functionality in the module. It allows normalisation of the arguments, tokenisation so that different sections can be compared with different criteria, and, if so desired, flipping of the result.
|
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=head3 Function arguments
|
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
C: Normalise. This should be a coderef. If present, runs the code on each argument before comparison.
|
301
|
|
|
|
|
|
|
Note that this only happens locally to the function, so lowercasing in functions[1]{n} will not prevent functions[2] putting VAT before vat.
|
302
|
|
|
|
|
|
|
(If you want to keep them, nest the original function in the c).
|
303
|
|
|
|
|
|
|
If C is an arrayref, it runs the first code on C<$a>, the second on C<$b>.
|
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
C: Tokenize. An arrayref containing hashrefs, each of which is attempted in order. In each hashref should be a regex keyed to C which will match in case you want do different comparisons on different types of data. Permitted values, other than coderefs, are 0 (e.g. C<< {re=>qr/\d/, 'c'=>0} >> means 1 and 9 are equivalent), -1 or 1 (meaning that if this token is discovered at the same location, $a or $b always wins - NB that this is to be avoided in sort functions).
|
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
C: Flip. if set to 1, then the result is reversed (-1 becomes 1 and vice versa but 0 stays the same).
|
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
C: Comparison to use for text which doesn't match a token. Default behaviour is to use a wrapper for C.
|
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
Below is an C function for sorting names of units in a workbook. You want them to appear in the order Welcome, Unit 1, Unit 2, ... Unit 11, but perl's C would put "Welcome" at the end and sorts "Unit 11" before "Unit 2". The normalisation is a hack to pretend 'Welcome' is equivalent to "Unit 0", and the tokenisation instructs that series of digits should be compared as numbers and not as strings, so 10 and 11 now sort after 9.
|
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
atomic_compare (
|
314
|
|
|
|
|
|
|
$a, $b,
|
315
|
|
|
|
|
|
|
{
|
316
|
|
|
|
|
|
|
n => sub{
|
317
|
|
|
|
|
|
|
$_[0] =~ s/^Welcome$/Unit 0/i;
|
318
|
|
|
|
|
|
|
lc $_[0];
|
319
|
|
|
|
|
|
|
},
|
320
|
|
|
|
|
|
|
t =>
|
321
|
|
|
|
|
|
|
[
|
322
|
|
|
|
|
|
|
{
|
323
|
|
|
|
|
|
|
re => qr/[0-9]+/,
|
324
|
|
|
|
|
|
|
c => sub { $_[0] <=> $_[1]; }
|
325
|
|
|
|
|
|
|
},
|
326
|
|
|
|
|
|
|
],
|
327
|
|
|
|
|
|
|
}
|
328
|
|
|
|
|
|
|
);
|
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head2 complex_compare
|
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
C allows a user to perform several successive comparisons, returning a value as soon as but only when a nonzero result is achieved. This is useful for situations such as:
|
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=over
|
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=item *
|
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
For user-facing sorting, such as dictionary headwords where "the Internet" should normally sort after "internet" and not after "theft".
|
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=item *
|
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
For sorting which requires heavy processing only in some cases, e.g. an identifier C always sorts before C, whatever the numerical values, but to compare C and C an external resource (lookup table, AJAX data, etc.) must be consulted.
|
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=item *
|
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
Where certain values have high priority, e.g. if you'd like to see the string 'undef' appear before the sting '0'.
|
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
=back
|
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
C is pretty much equivalent to C, but is potentially less confusing than using the C<||> or C operators, and may be easier to code than repeating the C, C<$a>, C<$b>.
|
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=head3 Function arguments
|
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
C: Prevents the function returning 0 immediately when the strings are identical. (The arguments are ordinarily tested for string equality at the beginning, in order to prevent unnecessary processing.) Setting this flag is only necessary if you have a condition which forces C<$a> or C<$b> to win for certain strings which are equal.
|
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
C: In , an arrayref containing a hashref equivalent to the third value in C. Each function performs a comparison which executes and returns 0, 1, or -1. If the result of any function is nonzero, that is the return value. If it is zero, the next function is tried.
|
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
complex_compare ($a, $b, {
|
360
|
|
|
|
|
|
|
functions =>
|
361
|
|
|
|
|
|
|
[
|
362
|
|
|
|
|
|
|
{
|
363
|
|
|
|
|
|
|
n => sub{lc $_[0];}, #is \&lc possible?
|
364
|
|
|
|
|
|
|
},
|
365
|
|
|
|
|
|
|
{
|
366
|
|
|
|
|
|
|
n => sub{$_[0] =~ s/^the\s//; $_[0];},
|
367
|
|
|
|
|
|
|
t =>
|
368
|
|
|
|
|
|
|
[
|
369
|
|
|
|
|
|
|
{ qr/[^[:alpha:]]/ => 0 },
|
370
|
|
|
|
|
|
|
],
|
371
|
|
|
|
|
|
|
f => 1,
|
372
|
|
|
|
|
|
|
c => sub { $_[0] cmp $ [1] },
|
373
|
|
|
|
|
|
|
},
|
374
|
|
|
|
|
|
|
]
|
375
|
|
|
|
|
|
|
})
|
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=head2 naive_collate
|
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
Performs a collation on an arrayref. Provide a) the data, b) a comparison function which returns 0 if the two comparands are duplicates, and c) a merge function which takes the first and later duplicate.
|
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
NB: This is slow, use either C or C unless you have a good reason for using this function (e.g. your comparison function is unstable and doesn't function like C). To discourage casual use, it is not exported.
|
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=head2 sorted_collate
|
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
Like C, but faster.
|
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
It is faster because rather than comparing every element against every other element that hasn't already been collated, it sorts once first, then compares against following elements until it finds one which doesn't match, then stops. The list is then returned to its original order (except without the duplicates).
|
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=head2 schwartzian_collate
|
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
Like C, but uses a Schwartzian transform: after the comparison function, provide a normalisation function.
|
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
It is about as fast as the C, but can be several times faster when the normalisation function is complex.
|
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
=head1 TODO
|
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=over
|
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
=item *
|
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
Add lots more dwimmery so that C expressions can be used wherever they're likely to be useful and coderefs can be substituted in for regexes where their function is to test.
|
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=item *
|
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
Make priority lists a bit easier, e.g. by allowing regexes - or even strings - amongst the hashrefs in C.
|
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=item *
|
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
Write some good, sensible examples for C.
|
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=item *
|
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
Gather useful common comparison functions which can be imported, studied, borrowed, etc. and offer them as a module.
|
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
=item *
|
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
Write more test cases.
|
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
=item *
|
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
Possibly remove assumptions about the comparanda, i.e. permit comparison of objects, references, etc. (But then: how does tokenisation work? Maybe it only works if C<< n=>sub{$_[0]->toString} >>? Wouldn't we want to compare hashrefs and arrayrefs more intelligently?)
|
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
=item *
|
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
Figure out how to get C and C to work with Schwartzian transforms.
|
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
=back
|
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=head1 BUGS
|
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
Please use the Github issues tracker.
|
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=head1 LICENSE
|
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
Copyright 2011-2012 © Cambridge University Press. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
|
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=cut |