line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package File::Sort; |
2
|
1
|
|
|
1
|
|
264799
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
56
|
|
3
|
1
|
|
|
1
|
|
5
|
use Fcntl qw(O_RDONLY O_WRONLY O_CREAT O_TRUNC); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
44
|
|
4
|
1
|
|
|
1
|
|
826
|
use Symbol qw(gensym); |
|
1
|
|
|
|
|
860
|
|
|
1
|
|
|
|
|
64
|
|
5
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
24
|
|
6
|
1
|
|
|
1
|
|
719
|
use locale; |
|
1
|
|
|
|
|
205
|
|
|
1
|
|
|
|
|
5
|
|
7
|
1
|
|
|
1
|
|
26
|
use vars qw($VERSION *sortsub *sort1 *sort2 *map1 *map2 %fh); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
84
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
require Exporter; |
10
|
1
|
|
|
1
|
|
4
|
use vars qw(@ISA @EXPORT_OK); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
3586
|
|
11
|
|
|
|
|
|
|
@ISA = 'Exporter'; |
12
|
|
|
|
|
|
|
@EXPORT_OK = 'sort_file'; |
13
|
|
|
|
|
|
|
$VERSION = '1.01'; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
sub sort_file { |
16
|
7
|
|
|
7
|
0
|
11821
|
my @args = @_; |
17
|
7
|
50
|
|
|
|
34
|
if (ref $args[0]) { |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# fix pos to look like k |
20
|
7
|
50
|
|
|
|
39
|
if (exists $args[0]{'pos'}) { |
21
|
0
|
|
|
|
|
0
|
my @argv; |
22
|
0
|
|
|
|
|
0
|
my $pos = $args[0]{'pos'}; |
23
|
|
|
|
|
|
|
|
24
|
0
|
0
|
|
|
|
0
|
if (!ref $pos) { |
25
|
0
|
|
|
|
|
0
|
$pos = [$pos]; |
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
|
28
|
0
|
0
|
|
|
|
0
|
if (!exists $args[0]{'k'}) { |
|
|
0
|
|
|
|
|
|
29
|
0
|
|
|
|
|
0
|
$args[0]{'k'} = []; |
30
|
|
|
|
|
|
|
} elsif (!ref $args[0]{'k'}) { |
31
|
0
|
|
|
|
|
0
|
$args[0]{'k'} = [$args[0]{'k'}]; |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
0
|
|
|
|
|
0
|
for (@$pos) { |
35
|
0
|
|
|
|
|
0
|
my $n; |
36
|
0
|
0
|
|
|
|
0
|
if ( /^\+(\d+)(?:\.(\d+))?([bdfinr]+)? |
37
|
|
|
|
|
|
|
(?:\s+\-(\d+)(?:\.(\d+))?([bdfinr]+)?)?$/x) { |
38
|
0
|
|
|
|
|
0
|
$n = $1 + 1; |
39
|
0
|
0
|
|
|
|
0
|
$n .= '.' . ($2 + 1) if defined $2; |
40
|
0
|
0
|
|
|
|
0
|
$n .= $3 if $3; |
41
|
|
|
|
|
|
|
|
42
|
0
|
0
|
|
|
|
0
|
if (defined $4) { |
43
|
0
|
0
|
|
|
|
0
|
$n .= "," . (defined $5 ? ($4 + 1) . ".$5" : $4); |
44
|
0
|
0
|
|
|
|
0
|
$n .= $6 if $6; |
45
|
|
|
|
|
|
|
} |
46
|
0
|
|
|
|
|
0
|
push @{$args[0]{'k'}}, $n; |
|
0
|
|
|
|
|
0
|
|
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
} |
51
|
7
|
|
|
|
|
26
|
_sort_file(@args); |
52
|
|
|
|
|
|
|
} else { |
53
|
0
|
|
|
|
|
0
|
_sort_file({I => $args[0], o => $args[1]}); |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
} |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub _sort_file { |
58
|
7
|
|
|
7
|
|
35
|
local $\; # don't mess up our prints |
59
|
7
|
|
|
|
|
20
|
my($opts, @fh, @recs) = shift; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# record separator, default to \n |
62
|
7
|
50
|
|
|
|
43
|
local $/ = $opts->{R} ? $opts->{R} : "\n"; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# get input files into anon array if not already |
65
|
7
|
100
|
|
|
|
52
|
$opts->{I} = [$opts->{I}] unless ref $opts->{I}; |
66
|
|
|
|
|
|
|
|
67
|
7
|
50
|
|
|
|
16
|
usage() unless @{$opts->{I}}; |
|
7
|
|
|
|
|
26
|
|
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# "K" == "no k", for later |
70
|
7
|
100
|
|
|
|
28
|
$opts->{K} = $opts->{k} ? 0 : 1; |
71
|
7
|
100
|
|
|
|
38
|
$opts->{k} = $opts->{k} ? [$opts->{k}] : [] if !ref $opts->{k}; |
|
|
50
|
|
|
|
|
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# set output and other defaults |
74
|
7
|
50
|
|
|
|
32
|
$opts->{o} = !$opts->{o} ? '' : $opts->{o}; |
75
|
7
|
|
50
|
|
|
87
|
$opts->{'y'} ||= $ENV{MAX_SORT_RECORDS} || 200000; # default max records |
|
|
|
66
|
|
|
|
|
76
|
7
|
|
50
|
|
|
71
|
$opts->{F} ||= $ENV{MAX_SORT_FILES} || 40; # default max files |
|
|
|
33
|
|
|
|
|
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# see big ol' mess below |
80
|
7
|
|
|
|
|
38
|
_make_sort_sub($opts); |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# only check to see if file is sorted |
83
|
7
|
50
|
|
|
|
46
|
if ($opts->{c}) { |
|
|
50
|
|
|
|
|
|
84
|
0
|
|
|
|
|
0
|
local *F; |
85
|
0
|
|
|
|
|
0
|
my $last; |
86
|
|
|
|
|
|
|
|
87
|
0
|
0
|
|
|
|
0
|
if ($opts->{I}[0] eq '-') { |
88
|
0
|
0
|
|
|
|
0
|
open(F, $opts->{I}[0]) |
89
|
|
|
|
|
|
|
or die "Can't open `$opts->{I}[0]' for reading: $!"; |
90
|
|
|
|
|
|
|
} else { |
91
|
0
|
0
|
|
|
|
0
|
sysopen(F, $opts->{I}[0], O_RDONLY) |
92
|
|
|
|
|
|
|
or die "Can't open `$opts->{I}[0]' for reading: $!"; |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
0
|
|
|
|
|
0
|
while (defined(my $rec = )) { |
96
|
|
|
|
|
|
|
# fail if -u and keys are not unique (assume sorted) |
97
|
0
|
0
|
0
|
|
|
0
|
if ($opts->{u} && $last) { |
98
|
0
|
0
|
|
|
|
0
|
return 0 unless _are_uniq($opts->{K}, $last, $rec); |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# fail if records not in proper sort order |
102
|
0
|
0
|
|
|
|
0
|
if ($last) { |
103
|
0
|
|
|
|
|
0
|
my @foo; |
104
|
0
|
0
|
|
|
|
0
|
if ($opts->{K}) { |
105
|
0
|
|
|
|
|
0
|
local $^W; |
106
|
0
|
|
|
|
|
0
|
@foo = sort sort1 ($rec, $last); |
107
|
|
|
|
|
|
|
} else { |
108
|
0
|
|
|
|
|
0
|
local $^W; |
109
|
0
|
|
|
|
|
0
|
@foo = map {$_->[0]} sort sortsub |
|
0
|
|
|
|
|
0
|
|
110
|
|
|
|
|
|
|
map &map1, ($rec, $last); |
111
|
|
|
|
|
|
|
} |
112
|
0
|
0
|
0
|
|
|
0
|
return 0 if $foo[0] ne $last || $foo[1] ne $rec; |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# save value of last record |
116
|
0
|
|
|
|
|
0
|
$last = $rec; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
# success, yay |
120
|
0
|
|
|
|
|
0
|
return 1; |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# if merging sorted files |
123
|
|
|
|
|
|
|
} elsif ($opts->{'m'}) { |
124
|
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
0
|
foreach my $filein (@{$opts->{I}}) { |
|
0
|
|
|
|
|
0
|
|
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
# just open files and get array of handles |
128
|
0
|
|
|
|
|
0
|
my $sym = gensym(); |
129
|
|
|
|
|
|
|
|
130
|
0
|
0
|
|
|
|
0
|
sysopen($sym, $filein, O_RDONLY) |
131
|
|
|
|
|
|
|
or die "Can't open `$filein' for reading: $!"; |
132
|
|
|
|
|
|
|
|
133
|
0
|
|
|
|
|
0
|
push @fh, $sym; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
# ooo, get ready, get ready |
137
|
|
|
|
|
|
|
} else { |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# once for each input file |
140
|
7
|
|
|
|
|
10
|
foreach my $filein (@{$opts->{I}}) { |
|
7
|
|
|
|
|
23
|
|
141
|
8
|
|
|
|
|
20
|
local *F; |
142
|
8
|
|
|
|
|
16
|
my $count = 0; |
143
|
|
|
|
|
|
|
|
144
|
8
|
50
|
|
|
|
27
|
_debug("Sorting file $filein ...\n") if $opts->{D}; |
145
|
|
|
|
|
|
|
|
146
|
8
|
50
|
|
|
|
20
|
if ($filein eq '-') { |
147
|
0
|
0
|
|
|
|
0
|
open(F, $filein) |
148
|
|
|
|
|
|
|
or die "Can't open `$filein' for reading: $!"; |
149
|
|
|
|
|
|
|
} else { |
150
|
8
|
50
|
|
|
|
430
|
sysopen(F, $filein, O_RDONLY) |
151
|
|
|
|
|
|
|
or die "Can't open `$filein' for reading: $!"; |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
|
154
|
8
|
|
|
|
|
163
|
while (defined(my $rec = )) { |
155
|
4724
|
|
|
|
|
6758
|
push @recs, $rec; |
156
|
4724
|
|
|
|
|
4464
|
$count++; # keep track of number of records |
157
|
|
|
|
|
|
|
|
158
|
4724
|
100
|
|
|
|
14556
|
if ($count >= $opts->{'y'}) { # don't go over record limit |
159
|
|
|
|
|
|
|
|
160
|
100
|
50
|
|
|
|
188
|
_debug("$count records reached in `$filein'\n") |
161
|
|
|
|
|
|
|
if $opts->{D}; |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
# save to temp file, add new fh to array |
164
|
100
|
|
|
|
|
224
|
push @fh, _write_temp(\@recs, $opts); |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# reset record count and record array |
167
|
100
|
|
|
|
|
229
|
($count, @recs) = (0); |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# do a merge now if at file limit |
170
|
100
|
100
|
|
|
|
493
|
if (@fh >= $opts->{F}) { |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# get filehandle and restart array with it |
173
|
2
|
|
|
|
|
11
|
@fh = (_merge_files($opts, \@fh, [], _get_temp())); |
174
|
|
|
|
|
|
|
|
175
|
2
|
50
|
|
|
|
34
|
_debug("\nCreating temp files ...\n") if $opts->{D}; |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
8
|
|
|
|
|
150
|
close F; |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
# records leftover, didn't reach record limit |
184
|
7
|
100
|
|
|
|
30
|
if (@recs) { |
185
|
5
|
50
|
|
|
|
21
|
_debug("\nSorting leftover records ...\n") if $opts->{D}; |
186
|
5
|
|
|
|
|
25
|
_check_last(\@recs); |
187
|
5
|
100
|
|
|
|
19
|
if ($opts->{K}) { |
188
|
3
|
|
|
|
|
17
|
local $^W; |
189
|
3
|
|
|
|
|
249
|
@recs = sort sort1 @recs; |
190
|
|
|
|
|
|
|
} else { |
191
|
2
|
|
|
|
|
6
|
local $^W; |
192
|
2
|
|
|
|
|
54
|
@recs = map {$_->[0]} sort sortsub map &map1, @recs; |
|
200
|
|
|
|
|
398
|
|
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
# do the merge thang, uh huh, do the merge thang |
198
|
7
|
|
|
|
|
116
|
my $close = _merge_files($opts, \@fh, \@recs, $opts->{o}); |
199
|
7
|
50
|
|
|
|
157
|
close $close unless fileno($close) == fileno('STDOUT'); # don't close STDOUT |
200
|
|
|
|
|
|
|
|
201
|
7
|
50
|
|
|
|
26
|
_debug("\nDone!\n\n") if $opts->{D}; |
202
|
7
|
|
|
|
|
1178
|
return 1; # yay |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
# take optional arrayref of handles of sorted files, |
206
|
|
|
|
|
|
|
# plus optional arrayref of sorted scalars |
207
|
|
|
|
|
|
|
sub _merge_files { |
208
|
|
|
|
|
|
|
# we need the options, filehandles, and output file |
209
|
9
|
|
|
9
|
|
27
|
my($opts, $fh, $recs, $file) = @_; |
210
|
9
|
|
|
|
|
13
|
my($uniq, $first, $o, %oth); |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# arbitrarily named keys, store handles as values |
213
|
9
|
|
|
|
|
27
|
%oth = map {($o++ => $_)} @$fh; |
|
102
|
|
|
|
|
232
|
|
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# match handle key in %oth to next record of the handle |
216
|
102
|
|
|
|
|
114
|
%fh = map { |
217
|
9
|
|
|
|
|
47
|
my $fh = $oth{$_}; |
218
|
102
|
|
|
|
|
753
|
($_ => scalar <$fh>); |
219
|
|
|
|
|
|
|
} keys %oth; |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
# extra records, special X "handle" |
222
|
9
|
100
|
|
|
|
56
|
$fh{X} = shift @$recs if @$recs; |
223
|
|
|
|
|
|
|
|
224
|
9
|
50
|
|
|
|
31
|
_debug("\nCreating sorted $file ...\n") if $opts->{D}; |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# output to STDOUT if no output file provided |
227
|
9
|
50
|
|
|
|
55
|
if ($file eq '') { |
|
|
100
|
|
|
|
|
|
228
|
0
|
|
|
|
|
0
|
$file = \*STDOUT; |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
# if output file is a path, not a reference to a file, open |
231
|
|
|
|
|
|
|
# file and get a reference to it |
232
|
|
|
|
|
|
|
} elsif (!ref $file) { |
233
|
7
|
|
|
|
|
43
|
my $tfh = gensym(); |
234
|
7
|
50
|
|
|
|
857
|
sysopen($tfh, $file, O_WRONLY|O_CREAT|O_TRUNC) |
235
|
|
|
|
|
|
|
or die "Can't open `$file' for writing: $!"; |
236
|
7
|
|
|
|
|
18
|
$file = $tfh; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
9
|
|
|
|
|
35
|
my $oldfh = select $file; |
240
|
9
|
|
|
|
|
27
|
$| = 0; # just in case, use the buffer, you knob |
241
|
|
|
|
|
|
|
|
242
|
9
|
|
|
|
|
28
|
while (keys %fh) { |
243
|
|
|
|
|
|
|
# don't bother sorting keys if only one key remains! |
244
|
316
|
100
|
66
|
|
|
1404
|
if (!$opts->{u} && keys %fh == 1) { |
245
|
9
|
|
|
|
|
31
|
($first) = keys %fh; |
246
|
9
|
|
|
|
|
18
|
my $curr = $oth{$first}; |
247
|
9
|
100
|
|
|
|
733
|
my @left = $first eq 'X' ? @$recs : <$curr>; |
248
|
9
|
|
|
|
|
974
|
print $fh{$first}, @left; |
249
|
9
|
|
|
|
|
22
|
delete $fh{$first}; |
250
|
9
|
|
|
|
|
212
|
last; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
{ |
254
|
|
|
|
|
|
|
# $first is arbitrary number assigned to first fh in sort |
255
|
307
|
50
|
|
|
|
279
|
if ($opts->{K}) { |
|
307
|
|
|
|
|
493
|
|
256
|
307
|
|
|
|
|
632
|
local $^W; |
257
|
307
|
|
|
|
|
7756
|
($first) = (sort sort2 keys %fh); |
258
|
|
|
|
|
|
|
} else { |
259
|
0
|
|
|
|
|
0
|
local $^W; |
260
|
0
|
|
|
|
|
0
|
($first) = (map {$_->[0]} sort sortsub |
|
0
|
|
|
|
|
0
|
|
261
|
|
|
|
|
|
|
map &map2, keys %fh); |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
# don't print if -u and not unique |
266
|
307
|
50
|
|
|
|
1029
|
if ($opts->{u}) { |
267
|
0
|
0
|
0
|
|
|
0
|
print $fh{$first} if |
268
|
|
|
|
|
|
|
(!$uniq || _are_uniq($opts->{K}, $uniq, $fh{$first})); |
269
|
0
|
|
|
|
|
0
|
$uniq = $fh{$first}; |
270
|
|
|
|
|
|
|
} else { |
271
|
307
|
|
|
|
|
511
|
print $fh{$first}; |
272
|
|
|
|
|
|
|
} |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
# get current filehandle |
275
|
307
|
|
|
|
|
482
|
my $curr = $oth{$first}; |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
# use @$recs, not filehandles, if key is X |
278
|
307
|
50
|
|
|
|
1310
|
my $rec = $first eq 'X' ? shift @$recs : scalar <$curr>; |
279
|
|
|
|
|
|
|
|
280
|
307
|
100
|
|
|
|
507
|
if (defined $rec) { # bring up next record for this filehandle |
281
|
209
|
|
|
|
|
595
|
$fh{$first} = $rec; |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
} else { # we don't need you anymore |
284
|
98
|
|
|
|
|
313
|
delete $fh{$first}; |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
|
288
|
9
|
|
|
|
|
467
|
seek $file, 0, 0; # might need to read back from it |
289
|
9
|
|
|
|
|
46
|
select $oldfh; |
290
|
9
|
|
|
|
|
3157
|
return $file; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
sub _check_last { |
294
|
|
|
|
|
|
|
# add new record separator if not one there |
295
|
105
|
100
|
|
105
|
|
117
|
${$_[0]}[-1] .= $/ if (${$_[0]}[-1] !~ m|$/$|); |
|
4
|
|
|
|
|
16
|
|
|
105
|
|
|
|
|
798
|
|
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
sub _write_temp { |
299
|
100
|
|
|
100
|
|
123
|
my($recs, $opts) = @_; |
300
|
100
|
50
|
|
|
|
162
|
my $temp = _get_temp() or die "Can't get temp file: $!"; |
301
|
|
|
|
|
|
|
|
302
|
100
|
|
|
|
|
431
|
_check_last($recs); |
303
|
|
|
|
|
|
|
|
304
|
100
|
50
|
|
|
|
267
|
_debug("New tempfile: $temp\n") if $opts->{D}; |
305
|
|
|
|
|
|
|
|
306
|
100
|
50
|
|
|
|
179
|
if ($opts->{K}) { |
307
|
100
|
|
|
|
|
237
|
local $^W; |
308
|
100
|
|
|
|
|
127
|
print $temp sort sort1 @{$recs}; |
|
100
|
|
|
|
|
2814
|
|
309
|
|
|
|
|
|
|
} else { |
310
|
0
|
|
|
|
|
0
|
local $^W; |
311
|
0
|
|
|
|
|
0
|
print $temp map {$_->[0]} sort sortsub map &map1, @{$recs}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
100
|
|
|
|
|
3632
|
seek $temp, 0, 0; # might need to read back from it |
315
|
100
|
|
|
|
|
238
|
return $temp; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
sub _parse_keydef { |
319
|
2
|
|
|
2
|
|
5
|
my($k, $topts) = @_; |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
# gurgle |
322
|
2
|
|
|
|
|
19
|
$k =~ /^(\d+)(?:\.(\d+))?([bdfinr]+)? |
323
|
|
|
|
|
|
|
(?:,(\d+)(?:\.(\d+))?([bdfinr]+)?)?$/x; |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
# set defaults at zero or undef |
326
|
2
|
50
|
50
|
|
|
73
|
my %opts = ( |
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
327
|
|
|
|
|
|
|
%$topts, # get other options |
328
|
|
|
|
|
|
|
ksf => $1 || 0, # start field |
329
|
|
|
|
|
|
|
ksc => $2 || 0, # start field char start |
330
|
|
|
|
|
|
|
kst => $3 || '', # start field type |
331
|
|
|
|
|
|
|
kff => (defined $4 ? $4 : undef), # end field |
332
|
|
|
|
|
|
|
kfc => $5 || 0, # end field char end |
333
|
|
|
|
|
|
|
kft => $6 || '', # end field type |
334
|
|
|
|
|
|
|
); |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
# their idea of 1 is not ours |
337
|
2
|
|
|
|
|
7
|
for (qw(ksf ksc kff)) { # kfc stays same |
338
|
6
|
100
|
|
|
|
21
|
$opts{$_}-- if $opts{$_}; |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
# if nothing in kst or kft, use other flags possibly passed |
342
|
2
|
50
|
33
|
|
|
15
|
if (!$opts{kst} && !$opts{kft}) { |
343
|
2
|
|
|
|
|
6
|
foreach (qw(b d f i n r)) { |
344
|
12
|
100
|
|
|
|
22
|
$opts{kst} .= $_ if $topts->{$_}; |
345
|
12
|
100
|
|
|
|
29
|
$opts{kft} .= $_ if $topts->{$_}; |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# except for b, flags on one apply to the other |
349
|
|
|
|
|
|
|
} else { |
350
|
0
|
|
|
|
|
0
|
foreach (qw(d f i n r)) { |
351
|
0
|
0
|
0
|
|
|
0
|
$opts{kst} .= $_ if ($opts{kst} =~ /$_/ || $opts{kft} =~ /$_/); |
352
|
0
|
0
|
0
|
|
|
0
|
$opts{kft} .= $_ if ($opts{kst} =~ /$_/ || $opts{kft} =~ /$_/); |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
|
356
|
2
|
|
|
|
|
6
|
return \%opts; |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
sub _make_sort_sub { |
360
|
7
|
|
|
7
|
|
23
|
my($topts, @sortsub, @mapsub, @sort1, @sort2) = shift; |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# if no keydefs set |
363
|
7
|
100
|
|
|
|
21
|
if ($topts->{K}) { |
364
|
5
|
|
|
|
|
18
|
$topts->{kst} = ''; |
365
|
5
|
|
|
|
|
13
|
foreach (qw(b d f i n r)) { |
366
|
30
|
100
|
|
|
|
96
|
$topts->{kst} .= $_ if $topts->{$_}; |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# more complex stuff, act like we had -k defined |
370
|
5
|
50
|
|
|
|
27
|
if ($topts->{kst} =~ /[bdfi]/) { |
371
|
0
|
|
|
|
|
0
|
$topts->{K} = 0; |
372
|
0
|
|
|
|
|
0
|
$topts->{k} = ['K']; # special K ;-) |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
# if no keydefs set |
377
|
7
|
100
|
|
|
|
22
|
if ($topts->{K}) { |
378
|
5
|
50
|
|
|
|
35
|
_debug("No keydef set\n") if $topts->{D}; |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
# defaults for main sort sub components |
381
|
5
|
|
|
|
|
19
|
my($cmp, $aa, $bb, $fa, $fb) = qw(cmp $a $b $fh{$a} $fh{$b}); |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# reverse sense |
384
|
5
|
100
|
|
|
|
20
|
($bb, $aa, $fb, $fa) = ($aa, $bb, $fa, $fb) if $topts->{r}; |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
# do numeric sort |
387
|
5
|
100
|
|
|
|
13
|
$cmp = '<=>' if $topts->{n}; |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
# add finished expression to array |
390
|
5
|
|
|
|
|
19
|
my $sort1 = "sub { $aa $cmp $bb }\n"; |
391
|
5
|
|
|
|
|
40
|
my $sort2 = "sub { $fa $cmp $fb }\n"; |
392
|
|
|
|
|
|
|
|
393
|
5
|
50
|
|
|
|
11
|
_debug("$sort1\n$sort2\n") if $topts->{D}; |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
{ |
396
|
5
|
|
|
|
|
9
|
local $^W; |
|
5
|
|
|
|
|
18
|
|
397
|
5
|
|
|
50
|
|
873
|
*sort1 = eval $sort1; |
|
50
|
|
|
|
|
448
|
|
398
|
5
|
50
|
|
|
|
18
|
die "Can't create sort sub: $@" if $@; |
399
|
5
|
|
|
6420
|
|
416
|
*sort2 = eval $sort2; |
|
6420
|
|
|
|
|
7747
|
|
400
|
5
|
50
|
|
|
|
31
|
die "Can't create sort sub: $@" if $@; |
401
|
|
|
|
|
|
|
} |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
} else { |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
# get text separator or use whitespace |
406
|
2
|
50
|
|
|
|
14
|
$topts->{t} = |
|
|
50
|
|
|
|
|
|
407
|
|
|
|
|
|
|
defined $topts->{X} ? $topts->{X} : |
408
|
|
|
|
|
|
|
defined $topts->{t} ? quotemeta($topts->{t}) : |
409
|
|
|
|
|
|
|
'\s+'; |
410
|
2
|
50
|
|
|
|
7
|
$topts->{t} =~ s|/|\\/|g if defined $topts->{X}; |
411
|
|
|
|
|
|
|
|
412
|
2
|
|
|
|
|
4
|
foreach my $k (@{$topts->{k}}) { |
|
2
|
|
|
|
|
6
|
|
413
|
2
|
|
|
|
|
4
|
my($opts, @fil) = ($topts); |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
# defaults for main sort sub components |
416
|
2
|
|
|
|
|
7
|
my($cmp, $ab_, $fab_, $aa, $bb) = qw(cmp $_ $fh{$_} $a $b); |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
# skip stuff if special K |
419
|
2
|
50
|
|
|
|
13
|
$opts = $k eq 'K' ? $topts : _parse_keydef($k, $topts); |
420
|
|
|
|
|
|
|
|
421
|
2
|
50
|
|
|
|
8
|
if ($k ne 'K') { |
422
|
2
|
50
|
|
|
|
9
|
my($tmp1, $tmp2) = ("\$tmp[$opts->{ksf}]", |
423
|
|
|
|
|
|
|
($opts->{kff} ? "\$tmp[$opts->{kff}]" : '')); |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
# skip leading spaces |
426
|
2
|
50
|
|
|
|
9
|
if ($opts->{kst} =~ /b/) { |
427
|
0
|
|
|
|
|
0
|
$tmp1 = "($tmp1 =~ /(\\S.*)/)[0]"; |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
|
430
|
2
|
50
|
|
|
|
7
|
if ($opts->{kft} =~ /b/) { |
431
|
0
|
|
|
|
|
0
|
$tmp2 = "($tmp2 =~ /(\\S.*)/)[0]"; |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
# simpler if one field, goody for us |
435
|
2
|
50
|
33
|
|
|
10
|
if (! defined $opts->{kff} || $opts->{ksf} == $opts->{kff}) { |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
# simpler if chars are both 0, wicked pissah |
438
|
2
|
50
|
33
|
|
|
13
|
if ($opts->{ksc} == 0 && |
|
|
0
|
33
|
|
|
|
|
439
|
|
|
|
|
|
|
(!$opts->{kfc} || $opts->{kfc} == 0)) { |
440
|
2
|
|
|
|
|
9
|
@fil = "\$tmp[$opts->{ksf}]"; |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
# hmmmmm |
443
|
|
|
|
|
|
|
} elsif (!$opts->{kfc}) { |
444
|
0
|
|
|
|
|
0
|
@fil = "substr($tmp1, $opts->{ksc})"; |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
# getting out of hand now |
447
|
|
|
|
|
|
|
} else { |
448
|
0
|
|
|
|
|
0
|
@fil = "substr($tmp1, $opts->{ksc}, ". |
449
|
|
|
|
|
|
|
($opts->{kfc} - $opts->{ksc}) . ')'; |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
# try again, shall we? |
453
|
|
|
|
|
|
|
} else { |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
# if spans two fields, but chars are both 0 |
456
|
|
|
|
|
|
|
# and neither has -b, alrighty |
457
|
0
|
0
|
0
|
|
|
0
|
if ($opts->{kfc} == 0 && $opts->{ksc} == 0 && |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
458
|
|
|
|
|
|
|
$opts->{kst} !~ /b/ && $opts->{kft} !~ /b/) { |
459
|
0
|
|
|
|
|
0
|
@fil = "join(''," . |
460
|
|
|
|
|
|
|
"\@tmp[$opts->{ksf} .. $opts->{kff}])"; |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
# if only one field away |
463
|
|
|
|
|
|
|
} elsif (($opts->{kff} - $opts->{ksf}) == 1) { |
464
|
0
|
|
|
|
|
0
|
@fil = "join('', substr($tmp1, $opts->{ksc}), " . |
465
|
|
|
|
|
|
|
"substr($tmp2, 0, $opts->{kfc}))"; |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
# fine, have it your way! hurt me! love me! |
468
|
|
|
|
|
|
|
} else { |
469
|
0
|
|
|
|
|
0
|
@fil = "join('', substr($tmp1, $opts->{ksc}), " . |
470
|
|
|
|
|
|
|
"\@tmp[" . ($opts->{ksf} + 1) . " .. " . |
471
|
|
|
|
|
|
|
($opts->{kff} - 1) . "], " . |
472
|
|
|
|
|
|
|
"substr($tmp2, 0, $opts->{kfc}))"; |
473
|
|
|
|
|
|
|
} |
474
|
|
|
|
|
|
|
} |
475
|
|
|
|
|
|
|
} else { |
476
|
0
|
0
|
|
|
|
0
|
@fil = $opts->{kst} =~ /b/ ? |
477
|
|
|
|
|
|
|
"(\$tmp[0] =~ /(\\S.*)/)[0]" : "\$tmp[0]"; |
478
|
|
|
|
|
|
|
} |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
# fold to upper case |
481
|
2
|
50
|
|
|
|
9
|
if ($opts->{kst} =~ /f/) { |
482
|
0
|
|
|
|
|
0
|
$fil[0] = "uc($fil[0])"; |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
# only alphanumerics and whitespace, override -i |
486
|
2
|
50
|
|
|
|
43
|
if ($opts->{kst} =~ /d/) { |
|
|
50
|
|
|
|
|
|
487
|
0
|
|
|
|
|
0
|
$topts->{DD}++; |
488
|
0
|
|
|
|
|
0
|
push @fil, "\$tmp =~ s/[^\\w\\s]+//g", '"$tmp"'; |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
# only printable characters |
491
|
|
|
|
|
|
|
} elsif ($opts->{kst} =~ /i/) { |
492
|
0
|
|
|
|
|
0
|
require POSIX; |
493
|
0
|
|
|
|
|
0
|
$fil[0] = "join '', grep {POSIX::isprint \$_} " . |
494
|
|
|
|
|
|
|
"split //,\n$fil[0]"; |
495
|
|
|
|
|
|
|
} |
496
|
|
|
|
|
|
|
|
497
|
2
|
50
|
|
|
|
14
|
$fil[0] = "\$tmp = $fil[0]" if $opts->{kst} =~ /d/; |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
# reverse sense |
501
|
2
|
100
|
|
|
|
10
|
($bb, $aa) = ($aa, $bb) if ($opts->{kst} =~ /r/); |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
# do numeric sort |
504
|
2
|
50
|
|
|
|
10
|
$cmp = '<=>' if ($opts->{kst} =~ /n/); |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
# add finished expressions to arrays |
507
|
2
|
|
|
|
|
5
|
my $n = @sortsub + 2; |
508
|
2
|
|
|
|
|
11
|
push @sortsub, sprintf "%s->[$n] %s %s->[$n]", |
509
|
|
|
|
|
|
|
$aa, $cmp, $bb; |
510
|
|
|
|
|
|
|
|
511
|
2
|
50
|
|
|
|
6
|
if (@fil > 1) { |
512
|
0
|
|
|
|
|
0
|
push @mapsub, " (\n" . |
513
|
0
|
|
|
|
|
0
|
join(",\n", map {s/^/ /mg; $_} @fil), |
|
0
|
|
|
|
|
0
|
|
514
|
|
|
|
|
|
|
"\n )[-1],\n "; |
515
|
|
|
|
|
|
|
} else { |
516
|
2
|
|
|
|
|
15
|
push @mapsub, " " . $fil[0] . ",\n "; |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
# if not -u |
521
|
2
|
50
|
|
|
|
5
|
if (! $topts->{u} ) { |
522
|
|
|
|
|
|
|
# do straight compare if all else is equal |
523
|
2
|
100
|
|
|
|
11
|
push @sortsub, sprintf "%s->[1] %s %s->[1]", |
524
|
|
|
|
|
|
|
$topts->{r} ? qw($b cmp $a) : qw($a cmp $b); |
525
|
|
|
|
|
|
|
} |
526
|
|
|
|
|
|
|
|
527
|
2
|
|
|
|
|
7
|
my(%maps, $sortsub, $mapsub) = (map1 => '$_', map2 => '$fh{$_}'); |
528
|
|
|
|
|
|
|
|
529
|
2
|
|
|
|
|
9
|
$sortsub = "sub {\n " . join(" || \n ", @sortsub) . "\n}\n"; |
530
|
|
|
|
|
|
|
|
531
|
2
|
|
|
|
|
6
|
for my $m (keys %maps) { |
532
|
4
|
|
|
|
|
7
|
my $k = $maps{$m}; |
533
|
4
|
50
|
|
|
|
21
|
$maps{$m} = sprintf "sub {\n my \@tmp = %s;\n", |
534
|
|
|
|
|
|
|
$topts->{k}[0] eq 'K' ? $k : "split(/$topts->{t}/, $k)"; |
535
|
|
|
|
|
|
|
|
536
|
4
|
50
|
|
|
|
11
|
$maps{$m} .= " my \$tmp;\n" if $topts->{DD}; |
537
|
4
|
|
|
|
|
8
|
$maps{$m} .= "\n [\$_, $k"; |
538
|
4
|
50
|
|
|
|
15
|
$maps{$m} .= ",\n " . join('', @mapsub) if @mapsub; |
539
|
4
|
|
|
|
|
44
|
$maps{$m} .= "]\n}\n"; |
540
|
|
|
|
|
|
|
} |
541
|
|
|
|
|
|
|
|
542
|
2
|
50
|
|
|
|
7
|
_debug("$sortsub\n$maps{map1}\n$maps{map2}\n") if $topts->{D}; |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
{ |
545
|
2
|
|
|
|
|
3
|
local $^W; |
|
2
|
|
|
|
|
7
|
|
546
|
2
|
50
|
|
197
|
|
233
|
*sortsub = eval $sortsub; |
|
197
|
|
|
|
|
508
|
|
547
|
2
|
50
|
|
|
|
7
|
die "Can't create sort sub: $@" if $@; |
548
|
2
|
|
|
100
|
|
392
|
*map1 = eval $maps{map1}; |
|
100
|
|
|
|
|
152
|
|
|
100
|
|
|
|
|
326
|
|
549
|
2
|
50
|
|
|
|
11
|
die "Can't create sort sub: $@" if $@; |
550
|
2
|
|
|
0
|
|
188
|
*map2 = eval $maps{map2}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
551
|
2
|
50
|
|
|
|
15
|
die "Can't create sort sub: $@" if $@; |
552
|
|
|
|
|
|
|
} |
553
|
|
|
|
|
|
|
} |
554
|
|
|
|
|
|
|
} |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
sub _get_temp { # nice and simple |
558
|
102
|
|
|
102
|
|
1996
|
require IO::File; |
559
|
102
|
|
|
|
|
23941
|
IO::File->new_tmpfile; |
560
|
|
|
|
|
|
|
} |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
sub _are_uniq { |
563
|
0
|
|
|
0
|
|
|
my $nok = shift; |
564
|
0
|
|
|
|
|
|
local $^W; |
565
|
|
|
|
|
|
|
|
566
|
0
|
0
|
|
|
|
|
if ($nok) { |
567
|
0
|
|
|
|
|
|
($a, $b) = @_; |
568
|
0
|
|
|
|
|
|
return &sort1; |
569
|
|
|
|
|
|
|
} else { |
570
|
0
|
|
|
|
|
|
($a, $b) = map &map1, @_; |
571
|
0
|
|
|
|
|
|
return &sortsub; |
572
|
|
|
|
|
|
|
} |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
sub _debug { |
576
|
0
|
|
|
0
|
|
|
print STDERR @_; |
577
|
|
|
|
|
|
|
} |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
sub usage { |
580
|
0
|
|
|
0
|
0
|
|
local $/ = "\n"; # in case changed |
581
|
0
|
|
|
|
|
|
my $u; |
582
|
|
|
|
|
|
|
|
583
|
0
|
|
|
|
|
|
seek DATA, 0, 0; |
584
|
0
|
|
|
|
|
|
while () { |
585
|
0
|
0
|
|
|
|
|
last if m/^=head1 SYNOPSIS$/; |
586
|
|
|
|
|
|
|
} |
587
|
|
|
|
|
|
|
|
588
|
0
|
|
|
|
|
|
while () { |
589
|
0
|
0
|
|
|
|
|
last if m/^=/; |
590
|
0
|
|
|
|
|
|
$u .= $_; |
591
|
|
|
|
|
|
|
} |
592
|
|
|
|
|
|
|
|
593
|
0
|
|
|
|
|
|
$u =~ s/\n//; |
594
|
|
|
|
|
|
|
|
595
|
0
|
|
|
|
|
|
die "Usage:$u"; |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
__END__ |