line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Data::Range::Compare::Stream::Iterator::File::MergeSortAsc; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# based on http://en.wikipedia.org/wiki/Merge_sort |
4
|
|
|
|
|
|
|
|
5
|
2
|
|
|
2
|
|
1637
|
use strict; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
51
|
|
6
|
2
|
|
|
2
|
|
9
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
51
|
|
7
|
2
|
|
|
2
|
|
8
|
use Carp qw(croak); |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
83
|
|
8
|
2
|
|
|
2
|
|
755
|
use IO::File; |
|
2
|
|
|
|
|
10796
|
|
|
2
|
|
|
|
|
360
|
|
9
|
|
|
|
|
|
|
|
10
|
2
|
|
|
2
|
|
11
|
use Data::Range::Compare::Stream::Sort; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
159
|
|
11
|
2
|
|
|
2
|
|
1193
|
use Data::Range::Compare::Stream::Iterator::Stack; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
49
|
|
12
|
2
|
|
|
2
|
|
11
|
use Data::Range::Compare::Stream::Iterator::Array; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
45
|
|
13
|
2
|
|
|
2
|
|
1371
|
use Data::Range::Compare::Stream::Iterator::File::MergeSortAsc::Stack; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
62
|
|
14
|
2
|
|
|
2
|
|
608
|
use Data::Range::Compare::Stream::Iterator::File; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
68
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
|
17
|
2
|
|
|
2
|
|
10
|
use base qw(Data::Range::Compare::Stream::Iterator::Base Data::Range::Compare::Stream::Iterator::File::Temp); |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
3163
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
sub new { |
21
|
13
|
|
|
13
|
0
|
1230
|
my ($class,%args)=@_; |
22
|
13
|
|
|
|
|
87
|
my $self=$class->SUPER::new( |
23
|
|
|
|
|
|
|
bucket_size=>4000, |
24
|
|
|
|
|
|
|
NEW_ITERATOR_FROM=>'Data::Range::Compare::Stream::Iterator::File', |
25
|
|
|
|
|
|
|
NEW_ARRAY_ITERATOR_FROM=>'Data::Range::Compare::Stream::Iterator::Array', |
26
|
|
|
|
|
|
|
NEW_FROM=>'Data::Range::Compare::Stream', |
27
|
|
|
|
|
|
|
file_list=>[], |
28
|
|
|
|
|
|
|
iterator_list=>[], |
29
|
|
|
|
|
|
|
auto_prepare=>0, |
30
|
|
|
|
|
|
|
unlink_result_file=>1, |
31
|
|
|
|
|
|
|
prepared=>0, |
32
|
|
|
|
|
|
|
%args |
33
|
|
|
|
|
|
|
); |
34
|
|
|
|
|
|
|
|
35
|
13
|
50
|
|
|
|
49
|
if($self->{bucket_size} < 1) { |
36
|
0
|
|
|
|
|
0
|
croak 'bucket_size < 1';; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
13
|
|
|
|
|
22
|
my $it_list=$self->{iterator_list}; |
40
|
13
|
100
|
|
|
|
37
|
if($args{filename}) { |
41
|
4
|
|
|
|
|
7
|
push @{$self->{file_list}},$args{filename}; |
|
4
|
|
|
|
|
14
|
|
42
|
|
|
|
|
|
|
} |
43
|
13
|
|
|
|
|
22
|
foreach my $filename (@{$self->{file_list}}) { |
|
13
|
|
|
|
|
36
|
|
44
|
13
|
|
|
|
|
36
|
my $it=$self->create_file_iterator(filename=>$filename); |
45
|
13
|
50
|
|
|
|
49
|
croak "Could not open: [$filename]" if $it->in_error; |
46
|
13
|
|
|
|
|
37
|
push @$it_list,$it; |
47
|
|
|
|
|
|
|
} |
48
|
13
|
100
|
|
|
|
39
|
$self->prepare if $self->{auto_prepare}; |
49
|
|
|
|
|
|
|
|
50
|
13
|
|
|
|
|
45
|
return $self; |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
|
53
|
122
|
|
|
122
|
1
|
1302
|
sub NEW_FROM { $_[0]->{NEW_FROM} } |
54
|
|
|
|
|
|
|
|
55
|
97
|
|
|
97
|
1
|
947
|
sub NEW_ARRAY_ITERATOR_FROM { $_[0]->{NEW_ARRAY_ITERATOR_FROM} } |
56
|
|
|
|
|
|
|
|
57
|
112
|
|
|
112
|
1
|
425
|
sub NEW_ITERATOR_FROM { $_[0]->{NEW_ITERATOR_FROM} } |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
sub create_file_iterator { |
60
|
112
|
|
|
112
|
0
|
1060
|
my ($self,%args)=@_; |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
my $obj=$self->NEW_ITERATOR_FROM->new( |
63
|
|
|
|
|
|
|
NEW_FROM=>$self->NEW_FROM, |
64
|
|
|
|
|
|
|
parse_line=>$self->{parse_line}, |
65
|
|
|
|
|
|
|
result_to_line=>$self->{result_to_line}, |
66
|
|
|
|
|
|
|
factory_instance=>$self->{factory_instance}, |
67
|
112
|
|
|
|
|
303
|
%args |
68
|
|
|
|
|
|
|
); |
69
|
|
|
|
|
|
|
|
70
|
112
|
|
|
|
|
413
|
return $obj; |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub prepare { |
74
|
8256
|
|
|
8256
|
0
|
12195
|
my ($self)=@_; |
75
|
8256
|
100
|
|
|
|
24531
|
return if $self->{prepared}; |
76
|
13
|
|
|
|
|
22
|
$self->{prepared}=1; |
77
|
|
|
|
|
|
|
|
78
|
13
|
|
|
|
|
22
|
my $it_list=$self->{iterator_list}; |
79
|
|
|
|
|
|
|
|
80
|
13
|
|
|
|
|
36
|
$self->create_stack($it_list); |
81
|
13
|
|
|
|
|
182
|
my $result_file=$self->walk_stack; |
82
|
13
|
|
|
|
|
30
|
$self->{result_file}=$result_file; |
83
|
13
|
|
|
|
|
24
|
$self->{stack}=undef; |
84
|
13
|
|
|
|
|
50
|
$self->{iterator}=$self->create_file_iterator(filename=>$result_file); |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
|
87
|
3
|
|
|
3
|
0
|
10
|
sub get_result_file { $_[0]->prepare;$_[0]->{result_file} } |
|
3
|
|
|
|
|
61
|
|
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
sub DESTROY { |
90
|
13
|
|
|
13
|
|
4090
|
my ($self)=@_; |
91
|
13
|
50
|
|
|
|
61
|
return unless defined($self); |
92
|
|
|
|
|
|
|
|
93
|
13
|
50
|
|
|
|
44
|
if(defined($self->{result_file})) { |
94
|
13
|
|
|
|
|
25
|
$self->{iterator}=undef; |
95
|
13
|
100
|
|
|
|
45
|
unlink $self->{result_file} if $self->{unlink_result_file}; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
13
|
50
|
|
|
|
163
|
if(defined($self->{stack})) { |
99
|
0
|
|
|
|
|
0
|
my $stack=$self->{stack}; |
100
|
0
|
|
|
|
|
0
|
while($stack->has_next) { |
101
|
0
|
|
|
|
|
0
|
unlink $stack->get_next; |
102
|
|
|
|
|
|
|
} |
103
|
0
|
|
|
|
|
0
|
$self->{stack}=undef; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
8252
|
|
|
8252
|
0
|
33282
|
sub has_next { $_[0]->prepare;$_[0]->{iterator}->has_next } |
|
8252
|
|
|
|
|
28915
|
|
108
|
8244
|
50
|
|
8244
|
1
|
27364
|
sub get_next { return undef unless defined($_[0]->{iterator});$_[0]->{iterator}->get_next } |
|
8244
|
|
|
|
|
26344
|
|
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
sub walk_stack { |
112
|
13
|
|
|
13
|
0
|
19
|
my ($self)=@_; |
113
|
|
|
|
|
|
|
|
114
|
13
|
|
|
|
|
27
|
my $stack=$self->{stack}; |
115
|
13
|
|
|
|
|
64
|
while($stack->has_next==2) { |
116
|
43
|
|
|
|
|
194
|
my $left=$stack->get_next; |
117
|
43
|
|
|
|
|
219
|
my $right=$stack->get_next; |
118
|
43
|
|
|
|
|
372
|
my $left_fh=IO::File->new($left,'r'); |
119
|
43
|
|
|
|
|
4587
|
my $right_fh=IO::File->new($right,'r'); |
120
|
43
|
|
|
|
|
3233
|
my $it_left=$self->create_file_iterator(fh=>$left_fh); |
121
|
43
|
|
|
|
|
127
|
my $it_right=$self->create_file_iterator(fh=>$right_fh); |
122
|
|
|
|
|
|
|
|
123
|
43
|
|
|
|
|
137
|
$stack->push($self->merge($it_left,$it_right)); |
124
|
|
|
|
|
|
|
|
125
|
43
|
|
|
|
|
192
|
$right_fh->close; |
126
|
43
|
|
|
|
|
504
|
$left_fh->close; |
127
|
43
|
|
|
|
|
512210
|
unlink $right,$left; |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
} |
130
|
13
|
|
|
|
|
50
|
my $next=$stack->get_next; |
131
|
13
|
|
|
|
|
30
|
$next; |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub create_stack { |
135
|
13
|
|
|
13
|
0
|
25
|
my ($self,$list)=@_; |
136
|
|
|
|
|
|
|
|
137
|
13
|
|
|
|
|
148
|
my $merged=new Data::Range::Compare::Stream::Iterator::File::MergeSortAsc::Stack(tmpdir=>$self->{tmpdir}); |
138
|
13
|
|
|
|
|
33
|
$self->{stack}=$merged; |
139
|
|
|
|
|
|
|
|
140
|
13
|
|
|
|
|
108
|
my $stack=new Data::Range::Compare::Stream::Iterator::Stack(stack=>$list); |
141
|
|
|
|
|
|
|
|
142
|
13
|
|
|
|
|
30
|
my $load_count=$self->{bucket_size}; |
143
|
|
|
|
|
|
|
|
144
|
13
|
|
|
|
|
20
|
my $left=[]; |
145
|
13
|
|
|
|
|
37
|
my $right=[]; |
146
|
|
|
|
|
|
|
|
147
|
13
|
|
|
|
|
49
|
while($stack->has_next) { |
148
|
8244
|
|
|
|
|
23608
|
my $result=$stack->get_next; |
149
|
|
|
|
|
|
|
|
150
|
8244
|
100
|
|
|
|
22382
|
if($#$left < $load_count) { |
151
|
4149
|
|
|
|
|
13310
|
push @$left,$result; |
152
|
|
|
|
|
|
|
} else { |
153
|
4095
|
100
|
|
|
|
7598
|
if($#$right < $load_count) { |
154
|
4054
|
|
|
|
|
13313
|
push @$right,$result; |
155
|
|
|
|
|
|
|
} else { |
156
|
41
|
|
|
|
|
338
|
@$left=sort { $self->sort_method($a,$b) } @$left; |
|
22071
|
|
|
|
|
37338
|
|
157
|
41
|
|
|
|
|
348
|
@$right=sort { $self->sort_method($a,$b) } @$right; |
|
22082
|
|
|
|
|
37283
|
|
158
|
|
|
|
|
|
|
|
159
|
41
|
|
|
|
|
238
|
my $it_left=$self->NEW_ARRAY_ITERATOR_FROM->new(factory_instance=>$self->{factory_instance},sorted=>1,range_list=>$left); |
160
|
41
|
|
|
|
|
123
|
my $it_right=$self->NEW_ARRAY_ITERATOR_FROM->new(factory_instance=>$self->{factory_instance},sorted=>1,range_list=>$right); |
161
|
|
|
|
|
|
|
|
162
|
41
|
|
|
|
|
147
|
$merged->push($self->merge($it_left,$it_right)); |
163
|
41
|
|
|
|
|
99
|
@$right=(); |
164
|
41
|
|
|
|
|
351
|
@$left=($result); |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
13
|
|
|
|
|
31
|
foreach my $array ($left,$right) { |
173
|
|
|
|
|
|
|
|
174
|
26
|
100
|
|
|
|
1245
|
next if $#$array==-1; |
175
|
|
|
|
|
|
|
|
176
|
15
|
|
|
|
|
79
|
@$array=sort { $self->sort_method($a,$b) } @$array; |
|
652
|
|
|
|
|
1058
|
|
177
|
|
|
|
|
|
|
|
178
|
15
|
|
|
|
|
42
|
my $it=$self->NEW_ARRAY_ITERATOR_FROM->new(factory_instance=>$self->{factory_instance},sorted=>1,range_list=>$array); |
179
|
15
|
|
|
|
|
110
|
my $tmp=$self->get_temp; |
180
|
|
|
|
|
|
|
|
181
|
15
|
|
|
|
|
5187
|
while($it->has_next) { |
182
|
160
|
|
|
|
|
459
|
my $result=$it->get_next; |
183
|
160
|
|
|
|
|
437
|
$tmp->print($self->result_to_line($result)); |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
15
|
|
|
|
|
51
|
$merged->push($tmp->filename); |
187
|
15
|
|
|
|
|
76
|
$tmp->close; |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
sub sort_method { |
194
|
97684
|
|
|
97684
|
1
|
137453
|
my ($self,$left_range,$right_range)=@_; |
195
|
|
|
|
|
|
|
|
196
|
97684
|
50
|
|
|
|
201316
|
return $self->{sort_func}->($left_range,$right_range) if $self->{sort_func}; |
197
|
97684
|
|
|
|
|
262252
|
my $cmp=sort_in_consolidate_order_asc($left_range->get_common,$right_range->get_common); |
198
|
|
|
|
|
|
|
|
199
|
97684
|
|
|
|
|
205494
|
return $cmp; |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub merge { |
203
|
84
|
|
|
84
|
0
|
149
|
my ($self,$left,$right)=@_; |
204
|
|
|
|
|
|
|
|
205
|
84
|
|
|
|
|
412
|
my $tmp_result=$self->get_temp; |
206
|
|
|
|
|
|
|
|
207
|
84
|
|
|
|
|
38675
|
my ($left_range,$right_range); |
208
|
|
|
|
|
|
|
|
209
|
84
|
50
|
|
|
|
325
|
if($left->has_next) { |
210
|
84
|
|
|
|
|
289
|
$left_range=$left->get_next; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
|
213
|
84
|
50
|
|
|
|
339
|
if($right->has_next) { |
214
|
84
|
|
|
|
|
239
|
$right_range=$right->get_next; |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
|
218
|
84
|
|
100
|
|
|
302
|
while(defined($left_range) or defined($right_range)) { |
219
|
|
|
|
|
|
|
|
220
|
53097
|
100
|
100
|
|
|
228597
|
if(defined($left_range) and defined($right_range)) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
221
|
|
|
|
|
|
|
|
222
|
52879
|
|
|
|
|
105331
|
my $cmp=$self->sort_method($left_range,$right_range); |
223
|
|
|
|
|
|
|
|
224
|
52879
|
100
|
|
|
|
102899
|
if($cmp!=1) { |
225
|
|
|
|
|
|
|
|
226
|
24456
|
|
|
|
|
68873
|
$tmp_result->print($self->result_to_line($left_range)); |
227
|
24456
|
|
|
|
|
133371
|
$left_range=undef; |
228
|
24456
|
100
|
|
|
|
75171
|
if($left->has_next) { |
229
|
24414
|
|
|
|
|
65924
|
$left_range=$left->get_next; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
} else { |
232
|
|
|
|
|
|
|
|
233
|
28423
|
|
|
|
|
77395
|
$tmp_result->print($self->result_to_line($right_range)); |
234
|
28423
|
|
|
|
|
157428
|
$right_range=undef; |
235
|
28423
|
100
|
|
|
|
87640
|
if($right->has_next) { |
236
|
28381
|
|
|
|
|
80019
|
$right_range=$right->get_next; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
} elsif(defined($left_range)) { |
242
|
|
|
|
|
|
|
|
243
|
116
|
|
|
|
|
317
|
$tmp_result->print($self->result_to_line($left_range)); |
244
|
116
|
|
|
|
|
1168
|
$left_range=undef; |
245
|
116
|
100
|
|
|
|
390
|
if($left->has_next) { |
246
|
74
|
|
|
|
|
205
|
$left_range=$left->get_next; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
} elsif(defined($right_range)) { |
250
|
|
|
|
|
|
|
|
251
|
102
|
|
|
|
|
281
|
$tmp_result->print($self->result_to_line($right_range)); |
252
|
102
|
|
|
|
|
765
|
$right_range=undef; |
253
|
102
|
100
|
|
|
|
371
|
if($right->has_next) { |
254
|
60
|
|
|
|
|
158
|
$right_range=$right->get_next; |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
84
|
|
|
|
|
434
|
my $result_name=$tmp_result->filename; |
261
|
84
|
|
|
|
|
956
|
$tmp_result->close; |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
|
264
|
84
|
|
|
|
|
8268
|
return $result_name; |
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
1; |