line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::JBrowse::Store::NCList::LazyNCList; |
2
|
|
|
|
|
|
|
BEGIN { |
3
|
2
|
|
|
2
|
|
785
|
$Bio::JBrowse::Store::NCList::LazyNCList::AUTHORITY = 'cpan:RBUELS'; |
4
|
|
|
|
|
|
|
} |
5
|
|
|
|
|
|
|
{ |
6
|
|
|
|
|
|
|
$Bio::JBrowse::Store::NCList::LazyNCList::VERSION = '0.1'; |
7
|
|
|
|
|
|
|
} |
8
|
|
|
|
|
|
|
|
9
|
2
|
|
|
2
|
|
11
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
57
|
|
10
|
2
|
|
|
2
|
|
9
|
use warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
49
|
|
11
|
2
|
|
|
2
|
|
10
|
use Carp; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
123
|
|
12
|
2
|
|
|
2
|
|
11
|
use List::Util qw(max); |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
210
|
|
13
|
|
|
|
|
|
|
|
14
|
2
|
|
|
2
|
|
1667
|
use Bio::JBrowse::Store::NCList::NCList; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
3016
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub new { |
18
|
1
|
|
|
1
|
1
|
19
|
my ($class, $attrs, $lazyClass, $makeLazy, $loadChunk, |
19
|
|
|
|
|
|
|
$measure, $output, $sizeThresh) = @_; |
20
|
|
|
|
|
|
|
|
21
|
1
|
|
|
|
|
5
|
my $self = { attrs => $attrs, |
22
|
|
|
|
|
|
|
start => $attrs->makeGetter("start"), |
23
|
|
|
|
|
|
|
end => $attrs->makeGetter("end"), |
24
|
|
|
|
|
|
|
setSublist => $attrs->makeSetter("sublist"), |
25
|
|
|
|
|
|
|
lazyClass => $lazyClass, |
26
|
|
|
|
|
|
|
makeLazy => $makeLazy, |
27
|
|
|
|
|
|
|
loadChunk => $loadChunk, |
28
|
|
|
|
|
|
|
measure => $measure, |
29
|
|
|
|
|
|
|
output => $output, |
30
|
|
|
|
|
|
|
sizeThresh => $sizeThresh, |
31
|
|
|
|
|
|
|
count => 0, |
32
|
|
|
|
|
|
|
minStart => undef, |
33
|
|
|
|
|
|
|
maxEnd => undef, |
34
|
|
|
|
|
|
|
chunkNum => 1, |
35
|
|
|
|
|
|
|
chunkSizes => [], |
36
|
|
|
|
|
|
|
partialStack => [] }; |
37
|
1
|
|
|
|
|
3
|
bless $self, $class; |
38
|
|
|
|
|
|
|
|
39
|
1
|
|
|
|
|
3
|
$self->addNewLevel(); |
40
|
|
|
|
|
|
|
|
41
|
1
|
|
|
|
|
3
|
return $self; |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub importExisting { |
45
|
0
|
|
|
0
|
0
|
0
|
my ($class, $attrs, $lazyClass, $count, $minStart, |
46
|
|
|
|
|
|
|
$maxEnd, $loadChunk, $topLevelList) = @_; |
47
|
|
|
|
|
|
|
|
48
|
0
|
|
|
|
|
0
|
my $self = { attrs => $attrs, |
49
|
|
|
|
|
|
|
lazyClass => $lazyClass, |
50
|
|
|
|
|
|
|
start => $attrs->makeGetter("start"), |
51
|
|
|
|
|
|
|
end => $attrs->makeGetter("end"), |
52
|
|
|
|
|
|
|
count => $count, |
53
|
|
|
|
|
|
|
minStart => $minStart, |
54
|
|
|
|
|
|
|
maxEnd => $maxEnd, |
55
|
|
|
|
|
|
|
loadChunk => $loadChunk, |
56
|
|
|
|
|
|
|
topLevelList => $topLevelList }; |
57
|
0
|
|
|
|
|
0
|
bless $self, $class; |
58
|
|
|
|
|
|
|
|
59
|
0
|
|
|
|
|
0
|
$self->addNewLevel(); |
60
|
|
|
|
|
|
|
|
61
|
0
|
|
|
|
|
0
|
return $self; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub addSorted { |
66
|
2559
|
|
|
2559
|
1
|
33889
|
my ($self, $feat) = @_; |
67
|
|
|
|
|
|
|
|
68
|
2559
|
|
|
|
|
3657
|
$self->{count} += 1; |
69
|
2559
|
|
|
|
|
3463
|
my $lastAdded = $self->{lastAdded}; |
70
|
2559
|
50
|
|
|
|
6512
|
my $start = $self->{start}->( $feat ) or die; |
71
|
2559
|
|
|
|
|
6890
|
my $end = $self->{end}->( $feat ); |
72
|
|
|
|
|
|
|
|
73
|
2559
|
100
|
|
|
|
4440
|
if (defined($lastAdded)) { |
74
|
2558
|
|
|
|
|
6531
|
my $lastStart = $self->{start}->($lastAdded); |
75
|
2558
|
|
|
|
|
6473
|
my $lastEnd = $self->{end}->($lastAdded); |
76
|
|
|
|
|
|
|
# check that the input is sorted |
77
|
2558
|
50
|
|
|
|
5271
|
$lastStart <= $start |
78
|
|
|
|
|
|
|
or die "input not sorted: got start $lastStart before $start"; |
79
|
|
|
|
|
|
|
|
80
|
2558
|
50
|
33
|
|
|
6296
|
die "input not sorted: got $lastStart..$lastEnd before $start..$end" |
81
|
|
|
|
|
|
|
if $lastStart == $start && $lastEnd < $end; |
82
|
|
|
|
|
|
|
} else { |
83
|
|
|
|
|
|
|
# LazyNCList requires sorted input, so the start of the first feat |
84
|
|
|
|
|
|
|
# is the minStart |
85
|
1
|
|
|
|
|
3
|
$self->{minStart} = $start; |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
|
88
|
2559
|
|
|
|
|
4030
|
$self->{lastAdded} = $feat; |
89
|
|
|
|
|
|
|
|
90
|
2559
|
|
|
|
|
3486
|
my $chunkSizes = $self->{chunkSizes}; |
91
|
2559
|
|
|
|
|
3254
|
my $partialStack = $self->{partialStack}; |
92
|
|
|
|
|
|
|
|
93
|
2559
|
|
|
|
|
6662
|
for (my $level = 0; $level <= $#$partialStack; $level++) { |
94
|
|
|
|
|
|
|
# due to NCList nesting, among other things, it's hard to be exactly |
95
|
|
|
|
|
|
|
# precise about the size of the JSON serialization, but this will get |
96
|
|
|
|
|
|
|
# us pretty close. |
97
|
2580
|
|
|
|
|
5697
|
my $featSize = $self->{measure}->($feat); |
98
|
2580
|
|
|
|
|
21260
|
my $proposedChunkSize = $chunkSizes->[$level] + $featSize; |
99
|
|
|
|
|
|
|
#print STDERR "chunksize at $level is now " . $chunkSizes->[$level] . "; (next chunk is " . $self->{chunkNum} . ")\n"; |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# If this partial chunk is full, |
102
|
2580
|
100
|
66
|
|
|
7460
|
if ( $proposedChunkSize > $self->{sizeThresh} && @{$partialStack->[$level]} ){ |
|
21
|
|
|
|
|
73
|
|
103
|
|
|
|
|
|
|
# then we're finished with the current "partial" chunk (i.e., |
104
|
|
|
|
|
|
|
# it's now a "complete" chunk rather than a partial one), so |
105
|
|
|
|
|
|
|
# create a new NCList to hold all the features in this chunk. |
106
|
21
|
|
|
|
|
70
|
my $lazyFeat = $self->finishChunk( $partialStack->[$level] ); |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
# start a new partial chunk with the current feature |
109
|
21
|
|
|
|
|
194
|
$partialStack->[$level] = [$feat]; |
110
|
21
|
|
|
|
|
34
|
$chunkSizes->[$level] = $featSize; |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
# and propagate $lazyFeat up to the next level |
113
|
21
|
|
|
|
|
25
|
$feat = $lazyFeat; |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# if we're already at the highest level, |
116
|
21
|
100
|
|
|
|
64
|
if ($level == $#{$self->{partialStack}}) { |
|
21
|
|
|
|
|
220
|
|
117
|
|
|
|
|
|
|
# then we need to make a new level to have somewhere to put |
118
|
|
|
|
|
|
|
# the new lazy feat |
119
|
1
|
|
|
|
|
3
|
$self->addNewLevel(); |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
} else { |
122
|
|
|
|
|
|
|
# add the current feature the partial chunk at this level |
123
|
2559
|
|
|
|
|
2857
|
push @{$partialStack->[$level]}, $feat; |
|
2559
|
|
|
|
|
4677
|
|
124
|
2559
|
|
|
|
|
3551
|
$chunkSizes->[$level] = $proposedChunkSize; |
125
|
2559
|
|
|
|
|
6908
|
last; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub addNewLevel { |
131
|
2
|
|
|
2
|
0
|
2
|
my ($self) = @_; |
132
|
2
|
|
|
|
|
4
|
push @{$self->{partialStack}}, []; |
|
2
|
|
|
|
|
8
|
|
133
|
2
|
|
|
|
|
3
|
push @{$self->{chunkSizes}}, 0; |
|
2
|
|
|
|
|
13
|
|
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub finishChunk { |
137
|
22
|
|
|
22
|
0
|
41
|
my ($self, $featList) = @_; |
138
|
22
|
|
|
|
|
145
|
my $newNcl = Bio::JBrowse::Store::NCList::NCList->new( |
139
|
|
|
|
|
|
|
$self->{start}, |
140
|
|
|
|
|
|
|
$self->{end}, |
141
|
|
|
|
|
|
|
$self->{setSublist}, |
142
|
|
|
|
|
|
|
$featList |
143
|
|
|
|
|
|
|
); |
144
|
22
|
|
|
|
|
44
|
my $chunkId = $self->{chunkNum}; |
145
|
22
|
|
|
|
|
43
|
$self->{chunkNum} += 1; |
146
|
22
|
|
|
|
|
70
|
$self->{output}->($newNcl->nestedList, $chunkId); |
147
|
|
|
|
|
|
|
|
148
|
22
|
100
|
|
|
|
168
|
$self->{maxEnd} = $newNcl->maxEnd unless defined($self->{maxEnd}); |
149
|
22
|
|
|
|
|
82
|
$self->{maxEnd} = max($self->{maxEnd}, $newNcl->maxEnd); |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# return the lazy ("fake") feature representing this chunk |
152
|
22
|
|
|
|
|
69
|
return $self->{makeLazy}->($newNcl->minStart, $newNcl->maxEnd, $chunkId); |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub finish { |
157
|
1
|
|
|
1
|
1
|
9
|
my ($self) = @_; |
158
|
1
|
|
|
|
|
3
|
my $level; |
159
|
|
|
|
|
|
|
|
160
|
1
|
|
|
|
|
2
|
for ($level = 0; $level < $#{$self->{partialStack}}; $level++) { |
|
2
|
|
|
|
|
9
|
|
161
|
1
|
|
|
|
|
5
|
my $lazyFeat = $self->finishChunk($self->{partialStack}->[$level]); |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
# pass $lazyFeat up to the next higher level. |
164
|
|
|
|
|
|
|
# (the loop ends one level before the highest level, so there |
165
|
|
|
|
|
|
|
# will always be at least one higher level) |
166
|
1
|
|
|
|
|
9
|
push @{$self->{partialStack}->[$level + 1]}, $lazyFeat; |
|
1
|
|
|
|
|
5
|
|
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# make sure there's a top-level NCL |
170
|
1
|
|
|
|
|
2
|
$level = $#{$self->{partialStack}}; |
|
1
|
|
|
|
|
3
|
|
171
|
1
|
|
|
|
|
7
|
my $newNcl = Bio::JBrowse::Store::NCList::NCList->new( |
172
|
|
|
|
|
|
|
$self->{start}, |
173
|
|
|
|
|
|
|
$self->{end}, |
174
|
|
|
|
|
|
|
$self->{setSublist}, |
175
|
|
|
|
|
|
|
$self->{partialStack}->[$level]); |
176
|
1
|
|
|
|
|
6
|
$self->{maxEnd} = max( grep defined, $self->{maxEnd}, $newNcl->maxEnd ); |
177
|
|
|
|
|
|
|
#print STDERR "top level NCL has " . scalar(@{$self->{partialStack}->[$level]}) . " features\n"; |
178
|
1
|
|
|
|
|
5
|
$self->{topLevelList} = $newNcl->nestedList; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub binarySearch { |
182
|
36
|
|
|
36
|
0
|
95
|
my ($self, $arr, $item, $getter) = @_; |
183
|
|
|
|
|
|
|
|
184
|
36
|
|
|
|
|
46
|
my $low = -1; |
185
|
36
|
|
|
|
|
41
|
my $high = $#{$arr} + 1; |
|
36
|
|
|
|
|
58
|
|
186
|
36
|
|
|
|
|
52
|
my $mid; |
187
|
|
|
|
|
|
|
|
188
|
36
|
|
|
|
|
85
|
while ($high - $low > 1) { |
189
|
151
|
|
|
|
|
240
|
$mid = int(($low + $high) / 2); |
190
|
151
|
50
|
|
|
|
406
|
if ($getter->($arr->[$mid]) > $item) { |
191
|
151
|
|
|
|
|
379
|
$high = $mid; |
192
|
|
|
|
|
|
|
} else { |
193
|
0
|
|
|
|
|
0
|
$low = $mid; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
# if we're iterating rightward, return the high index; |
198
|
|
|
|
|
|
|
# if leftward, the low index |
199
|
36
|
50
|
|
|
|
80
|
if ($getter == $self->{end}) { return $high } else { return $low }; |
|
36
|
|
|
|
|
83
|
|
|
0
|
|
|
|
|
0
|
|
200
|
|
|
|
|
|
|
}; |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub iterHelper { |
203
|
36
|
|
|
36
|
0
|
78
|
my ($self, $arr, $from, $to, $fun, $inc, |
204
|
|
|
|
|
|
|
$searchGet, $testGet, $path) = @_; |
205
|
36
|
|
|
|
|
48
|
my $len = $#{$arr} + 1; |
|
36
|
|
|
|
|
69
|
|
206
|
36
|
|
|
|
|
102
|
my $i = $self->binarySearch($arr, $from, $searchGet); |
207
|
36
|
|
|
|
|
125
|
my $getChunk = $self->{attrs}->makeGetter("chunk"); |
208
|
36
|
|
|
|
|
112
|
my $getSublist = $self->{attrs}->makeGetter("sublist"); |
209
|
|
|
|
|
|
|
|
210
|
36
|
|
66
|
|
|
213
|
while (($i < $len) |
|
|
|
66
|
|
|
|
|
211
|
|
|
|
|
|
|
&& ($i >= 0) |
212
|
|
|
|
|
|
|
&& (($inc * $testGet->($arr->[$i])) < ($inc * $to)) ) { |
213
|
|
|
|
|
|
|
|
214
|
2581
|
100
|
|
|
|
6323
|
if ($arr->[$i][0] == $self->{lazyClass}) { |
215
|
22
|
|
|
|
|
61
|
my $chunkNum = $getChunk->($arr->[$i]); |
216
|
22
|
|
|
|
|
110
|
my $chunk = $self->{loadChunk}->($chunkNum); |
217
|
22
|
|
|
|
|
157
|
$self->iterHelper($chunk, $from, $to, $fun, $inc, |
218
|
|
|
|
|
|
|
$searchGet, $testGet, [$chunkNum]); |
219
|
|
|
|
|
|
|
} else { |
220
|
2559
|
|
|
|
|
8533
|
$fun->($arr->[$i], [@$path, $i]); |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
2581
|
|
|
|
|
11851
|
my $sublist = $getSublist->($arr->[$i]); |
224
|
2581
|
100
|
|
|
|
5433
|
if (defined($sublist)) { |
225
|
13
|
|
|
|
|
61
|
$self->iterHelper($sublist, $from, $to, $fun, $inc, |
226
|
|
|
|
|
|
|
$searchGet, $testGet, [@$path, $i]); |
227
|
|
|
|
|
|
|
} |
228
|
2581
|
|
|
|
|
15375
|
$i += $inc; |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
sub overlapCallback { |
234
|
1
|
|
|
1
|
1
|
4
|
my ($self, $from, $to, $fun) = @_; |
235
|
|
|
|
|
|
|
|
236
|
1
|
50
|
|
|
|
7
|
croak "LazyNCList not loaded" unless defined($self->{topLevelList}); |
237
|
|
|
|
|
|
|
|
238
|
1
|
50
|
|
|
|
6
|
return unless $self->count; |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# inc: iterate leftward or rightward |
241
|
1
|
50
|
|
|
|
6
|
my $inc = ($from > $to) ? -1 : 1; |
242
|
|
|
|
|
|
|
# searchGet: search on start or end |
243
|
1
|
50
|
|
|
|
5
|
my $searchGet = ($from > $to) ? $self->{start} : $self->{end}; |
244
|
|
|
|
|
|
|
# testGet: test on start or end |
245
|
1
|
50
|
|
|
|
6
|
my $testGet = ($from > $to) ? $self->{end} : $self->{start}; |
246
|
|
|
|
|
|
|
# treats the root chunk as number 0 |
247
|
1
|
|
|
|
|
8
|
$self->iterHelper($self->{topLevelList}, $from, $to, $fun, |
248
|
|
|
|
|
|
|
$inc, $searchGet, $testGet, [0]); |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
1
|
|
|
1
|
0
|
9
|
sub count { return shift->{count}; } |
252
|
|
|
|
|
|
|
|
253
|
1
|
|
|
1
|
0
|
10
|
sub maxEnd { return shift->{maxEnd}; } |
254
|
|
|
|
|
|
|
|
255
|
1
|
|
|
1
|
0
|
1713402
|
sub minStart { return shift->{minStart}; } |
256
|
|
|
|
|
|
|
|
257
|
0
|
|
|
0
|
0
|
|
sub topLevelList { return shift->{topLevelList}; } |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
1; |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
__END__ |