line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package List::MergeSorted::XS; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
22163
|
use 5.008; |
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
45
|
|
4
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
35
|
|
5
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
32
|
|
6
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
127
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
require Exporter; |
9
|
1
|
|
|
1
|
|
1791
|
use AutoLoader; |
|
1
|
|
|
|
|
1759
|
|
|
1
|
|
|
|
|
6
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our @EXPORT_OK = qw(merge); |
14
|
|
|
|
|
|
|
our @EXPORT = qw(); |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our $VERSION = '1.06'; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
require XSLoader; |
19
|
|
|
|
|
|
|
XSLoader::load('List::MergeSorted::XS', $VERSION); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use constant { |
22
|
1
|
|
|
|
|
1178
|
PRIO_LINEAR => 0, |
23
|
|
|
|
|
|
|
PRIO_FIB => 1, |
24
|
|
|
|
|
|
|
SORT => 2, |
25
|
1
|
|
|
1
|
|
108
|
}; |
|
1
|
|
|
|
|
1
|
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
sub merge { |
28
|
129
|
|
|
129
|
1
|
242536
|
my $lists = shift; |
29
|
129
|
|
|
|
|
364
|
my %opts = @_; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# validate inputs |
32
|
129
|
100
|
100
|
|
|
1008
|
unless ($lists && ref $lists && ref $lists eq 'ARRAY') { |
|
|
|
66
|
|
|
|
|
33
|
3
|
|
|
|
|
20
|
die "merge requires an array reference"; |
34
|
|
|
|
|
|
|
} |
35
|
126
|
|
|
|
|
200
|
for my $list (@$lists) { |
36
|
549
|
100
|
66
|
|
|
3156
|
unless ($list && ref $list && ref $list eq 'ARRAY') { |
|
|
|
66
|
|
|
|
|
37
|
1
|
|
|
|
|
14
|
die "lists to merge must be arrayrefs"; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
} |
40
|
|
|
|
|
|
|
|
41
|
125
|
|
100
|
|
|
471
|
my $limit = $opts{limit} || 0; |
42
|
125
|
50
|
33
|
|
|
464
|
die "limit must be positive" if defined $limit && $limit < 0; |
43
|
|
|
|
|
|
|
|
44
|
125
|
50
|
66
|
|
|
435
|
die "key_cb option must be a coderef" |
45
|
|
|
|
|
|
|
if defined $opts{key_cb} && ref $opts{key_cb} ne 'CODE'; |
46
|
|
|
|
|
|
|
|
47
|
125
|
50
|
66
|
|
|
337
|
die "uniq_cb option must be a coderef" |
48
|
|
|
|
|
|
|
if defined $opts{uniq_cb} && ref $opts{uniq_cb} ne 'CODE'; |
49
|
|
|
|
|
|
|
|
50
|
125
|
100
|
|
|
|
257
|
return [] unless @$lists; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# pick an algorithm |
53
|
124
|
|
|
|
|
302
|
my @params = ($lists, $limit, $opts{key_cb}, $opts{uniq_cb}); |
54
|
|
|
|
|
|
|
|
55
|
124
|
100
|
|
|
|
256
|
if (defined $opts{method}) { |
56
|
87
|
|
|
|
|
219
|
return _merge($opts{method}, @params); |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
|
59
|
37
|
100
|
|
|
|
71
|
if (defined $opts{key_cb}) { |
60
|
|
|
|
|
|
|
# linear priority queue is faster until ~100 lists, relatively |
61
|
|
|
|
|
|
|
# independent of limit %. sort never wins in keyed mode because of |
62
|
|
|
|
|
|
|
# Schwartzian tx overhead |
63
|
|
|
|
|
|
|
|
64
|
18
|
50
|
|
|
|
54
|
return scalar @$lists < 100 |
65
|
|
|
|
|
|
|
? _merge(PRIO_LINEAR, @params) |
66
|
|
|
|
|
|
|
: _merge(PRIO_FIB, @params); |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
else { |
69
|
|
|
|
|
|
|
# linear always wins with a small number of lists (<100). with more |
70
|
|
|
|
|
|
|
# lists, fib wins with low limit, giving way to sort around 25% |
71
|
|
|
|
|
|
|
# limit. |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# compute what fraction of the merged set will be returned |
74
|
19
|
|
|
|
|
56
|
my $total = _count_elements($lists); |
75
|
19
|
|
100
|
|
|
51
|
$limit ||= $total; |
76
|
|
|
|
|
|
|
|
77
|
19
|
50
|
|
|
|
72
|
if ($limit < 0.05 * $total) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
78
|
0
|
0
|
|
|
|
0
|
return scalar @$lists < 1000 |
79
|
|
|
|
|
|
|
? _merge(PRIO_LINEAR, @params) |
80
|
|
|
|
|
|
|
: _merge(PRIO_FIB, @params); |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
elsif ($limit < 0.25 * $total) { |
83
|
2
|
50
|
|
|
|
8
|
return scalar @$lists < 500 |
84
|
|
|
|
|
|
|
? _merge(PRIO_LINEAR, @params) |
85
|
|
|
|
|
|
|
: _merge(PRIO_FIB, @params) |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
elsif ($limit < 0.75 * $total) { |
88
|
4
|
50
|
|
|
|
13
|
return scalar @$lists < 100 |
89
|
|
|
|
|
|
|
? _merge(PRIO_LINEAR, @params) |
90
|
|
|
|
|
|
|
: _merge(SORT, @params) |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
else { |
93
|
13
|
50
|
|
|
|
39
|
return scalar @$lists < 100 |
94
|
|
|
|
|
|
|
? _merge(PRIO_LINEAR, @params) |
95
|
|
|
|
|
|
|
: _merge(SORT, @params) |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# dispatch to appopriate implementation based on algorithm and options |
101
|
|
|
|
|
|
|
sub _merge { |
102
|
124
|
|
|
124
|
|
206
|
my ($method, $lists, $limit, $key_cb, $uniq_cb) = @_; |
103
|
|
|
|
|
|
|
|
104
|
124
|
100
|
|
|
|
316
|
if ($method == PRIO_LINEAR) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
105
|
66
|
100
|
|
|
|
964
|
return $key_cb ? $uniq_cb ? _merge_linear_keyed_dedupe($lists, $limit, $key_cb, $uniq_cb) |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
106
|
|
|
|
|
|
|
: _merge_linear_keyed_dupeok($lists, $limit, $key_cb) |
107
|
|
|
|
|
|
|
: $uniq_cb ? _merge_linear_flat_dedupe($lists, $limit, $uniq_cb) |
108
|
|
|
|
|
|
|
: _merge_linear_flat_dupeok($lists, $limit); |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
elsif ($method == PRIO_FIB) { |
111
|
29
|
100
|
|
|
|
2142
|
return $key_cb ? $uniq_cb ? _merge_fib_keyed_dedupe($lists, $limit, $key_cb, $uniq_cb) |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
112
|
|
|
|
|
|
|
: _merge_fib_keyed_dupeok($lists, $limit, $key_cb) |
113
|
|
|
|
|
|
|
: $uniq_cb ? _merge_fib_flat_dedupe($lists, $limit, $uniq_cb) |
114
|
|
|
|
|
|
|
: _merge_fib_flat_dupeok($lists, $limit); |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
elsif ($method == SORT) { |
117
|
29
|
100
|
|
|
|
129
|
return $key_cb ? $uniq_cb ? _merge_sort_keyed_dedupe($lists, $limit, $key_cb, $uniq_cb) |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
118
|
|
|
|
|
|
|
: _merge_sort_keyed_dupeok($lists, $limit, $key_cb) |
119
|
|
|
|
|
|
|
: $uniq_cb ? _merge_sort_flat_dedupe($lists, $limit, $uniq_cb) |
120
|
|
|
|
|
|
|
: _merge_sort_flat_dupeok($lists, $limit); |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
else { |
123
|
0
|
|
|
|
|
0
|
die "unknown sort method $method requested\n"; |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
# concatenate all lists and sort the whole thing. works well when no limit is |
128
|
|
|
|
|
|
|
# given. |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub _merge_sort_flat_dupeok { |
131
|
12
|
|
|
12
|
|
562
|
my ($lists, $limit) = @_; |
132
|
|
|
|
|
|
|
|
133
|
12
|
|
|
|
|
23
|
my @output = sort {$a <=> $b} map {@$_} @$lists; |
|
11580
|
|
|
|
|
9430
|
|
|
63
|
|
|
|
|
322
|
|
134
|
12
|
100
|
66
|
|
|
153
|
splice @output, $limit if $limit && @output > $limit; |
135
|
12
|
|
|
|
|
85
|
return \@output; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
sub _merge_sort_keyed_dupeok { |
139
|
12
|
|
|
12
|
|
22
|
my ($lists, $limit, $keyer) = @_; |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# Schwartzian transform is faster than sorting on |
142
|
|
|
|
|
|
|
# {$keyer->($a) <=> # $keyer->($b)}, even for degenerately simple case |
143
|
|
|
|
|
|
|
# of $keyer = sub { $_[0] } |
144
|
|
|
|
|
|
|
|
145
|
3113
|
|
|
|
|
4406
|
my @output = |
146
|
11548
|
|
|
|
|
11446
|
map { $_->[1] } |
147
|
3113
|
|
|
|
|
10946
|
sort { $a->[0] <=> $b->[0] } |
148
|
61
|
|
|
|
|
346
|
map { [$keyer->($_), $_] } |
149
|
12
|
|
|
|
|
39
|
map { @$_ } |
150
|
|
|
|
|
|
|
@$lists; |
151
|
|
|
|
|
|
|
|
152
|
12
|
100
|
66
|
|
|
649
|
splice @output, $limit if $limit && @output > $limit; |
153
|
12
|
|
|
|
|
94
|
return \@output; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub _merge_sort_flat_dedupe { |
157
|
1
|
|
|
1
|
|
2
|
my ($lists, $limit, $uniquer) = @_; |
158
|
|
|
|
|
|
|
|
159
|
1
|
|
|
|
|
2
|
my @merged = sort {$a <=> $b} map {@$_} @$lists; |
|
12
|
|
|
|
|
14
|
|
|
3
|
|
|
|
|
10
|
|
160
|
|
|
|
|
|
|
|
161
|
1
|
|
|
|
|
2
|
my @output; |
162
|
1
|
|
|
|
|
1
|
my $last_unique = undef; |
163
|
1
|
|
|
|
|
3
|
for my $element (@merged) { |
164
|
7
|
|
|
|
|
13
|
my $unique = $uniquer->($element); |
165
|
7
|
100
|
100
|
|
|
34
|
next if defined $last_unique && $unique == $last_unique; |
166
|
5
|
|
|
|
|
5
|
push @output, $element; |
167
|
5
|
|
|
|
|
8
|
$last_unique = $unique; |
168
|
|
|
|
|
|
|
} |
169
|
1
|
50
|
33
|
|
|
6
|
splice @output, $limit if $limit && @output > $limit; |
170
|
1
|
|
|
|
|
6
|
return \@output; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
sub _merge_sort_keyed_dedupe { |
174
|
4
|
|
|
4
|
|
7
|
my ($lists, $limit, $keyer, $uniquer) = @_; |
175
|
|
|
|
|
|
|
|
176
|
16
|
|
|
|
|
26
|
my @merged = |
177
|
17
|
|
|
|
|
44
|
map { $_->[1] } |
178
|
16
|
|
|
|
|
62
|
sort { $a->[0] <=> $b->[0] } |
179
|
7
|
|
|
|
|
16
|
map { [$keyer->($_), $_] } |
180
|
4
|
|
|
|
|
8
|
map { @$_ } |
181
|
|
|
|
|
|
|
@$lists; |
182
|
|
|
|
|
|
|
|
183
|
4
|
|
|
|
|
13
|
my @output; |
184
|
|
|
|
|
|
|
my %seen; |
185
|
4
|
|
|
|
|
5
|
for my $element (@merged) { |
186
|
16
|
|
|
|
|
45
|
my $unique = $uniquer->($element); |
187
|
16
|
100
|
|
|
|
77
|
next if $seen{$unique}++; |
188
|
11
|
|
|
|
|
22
|
push @output, $element; |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
4
|
50
|
33
|
|
|
15
|
splice @output, $limit if $limit && @output > $limit; |
192
|
4
|
|
|
|
|
25
|
return \@output; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
1; |
196
|
|
|
|
|
|
|
__END__ |