line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package IntervalTree; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
30229
|
use 5.006; |
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
45
|
|
4
|
1
|
|
|
1
|
|
613
|
use POSIX qw(ceil); |
|
1
|
|
|
|
|
9060
|
|
|
1
|
|
|
|
|
11
|
|
5
|
1
|
|
|
1
|
|
1722
|
use List::Util qw(max min); |
|
1
|
|
|
|
|
10
|
|
|
1
|
|
|
|
|
142
|
|
6
|
1
|
|
|
1
|
|
8
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
53
|
|
7
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
50
|
|
8
|
1
|
|
|
1
|
|
7
|
no warnings 'once'; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
1069
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our $VERSION = '0.01'; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
IntervalTree.pm |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 VERSION |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
Version 0.01 |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 DESCRIPTION |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
Data structure for performing intersect queries on a set of intervals which |
23
|
|
|
|
|
|
|
preserves all information about the intervals (unlike bitset projection methods). |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=cut |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# Historical note: |
28
|
|
|
|
|
|
|
# This module original contained an implementation based on sorted endpoints |
29
|
|
|
|
|
|
|
# and a binary search, using an idea from Scott Schwartz and Piotr Berman. |
30
|
|
|
|
|
|
|
# Later an interval tree implementation was implemented by Ian for Galaxy's |
31
|
|
|
|
|
|
|
# join tool (see `bx.intervals.operations.quicksect.py`). This was then |
32
|
|
|
|
|
|
|
# converted to Cython by Brent, who also added support for |
33
|
|
|
|
|
|
|
# upstream/downstream/neighbor queries. This was modified by James to |
34
|
|
|
|
|
|
|
# handle half-open intervals strictly, to maintain sort order, and to |
35
|
|
|
|
|
|
|
# implement the same interface as the original Intersecter. |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 SYNOPSIS |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
Data structure for performing window intersect queries on a set of |
41
|
|
|
|
|
|
|
of possibly overlapping 1d intervals. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
Usage |
44
|
|
|
|
|
|
|
===== |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
Create an empty IntervalTree |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
>>> use IntervalTree; |
49
|
|
|
|
|
|
|
>>> my $intersecter = IntervalTree->new(); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
An interval is a start and end position and a value (possibly None). |
52
|
|
|
|
|
|
|
You can add any object as an interval: |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
>>> $intersecter->insert( 0, 10, "food" ); |
55
|
|
|
|
|
|
|
>>> $intersecter->insert( 3, 7, {foo=>'bar'} ); |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
>>> $intersecter->find( 2, 5 ); |
58
|
|
|
|
|
|
|
['food', {'foo'=>'bar'}] |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
If the object has start and end attributes (like the Interval class) there |
61
|
|
|
|
|
|
|
is are some shortcuts: |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
>>> my $intersecter = IntervalTree->new(); |
64
|
|
|
|
|
|
|
>>> $intersecter->insert_interval( Interval->new( 0, 10 ) ); |
65
|
|
|
|
|
|
|
>>> $intersecter->insert_interval( Interval->new( 3, 7 ) ); |
66
|
|
|
|
|
|
|
>>> $intersecter->insert_interval( Interval->new( 3, 40 ) ); |
67
|
|
|
|
|
|
|
>>> $intersecter->insert_interval( Interval->new( 13, 50 ) ); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
>>> $intersecter->find( 30, 50 ); |
70
|
|
|
|
|
|
|
[Interval(3, 40), Interval(13, 50)] |
71
|
|
|
|
|
|
|
>>> $intersecter->find( 100, 200 ); |
72
|
|
|
|
|
|
|
[] |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
Before/after for intervals |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
>>> $intersecter->before_interval( Interval->new( 10, 20 ) ); |
77
|
|
|
|
|
|
|
[Interval(3, 7)] |
78
|
|
|
|
|
|
|
>>> $intersecter->before_interval( Interval->new( 5, 20 ) ); |
79
|
|
|
|
|
|
|
[] |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Upstream/downstream |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
>>> $intersecter->upstream_of_interval(Interval->new(11, 12)); |
84
|
|
|
|
|
|
|
[Interval(0, 10)] |
85
|
|
|
|
|
|
|
>>> $intersecter->upstream_of_interval(Interval->new(11, 12, undef, undef, "-")); |
86
|
|
|
|
|
|
|
[Interval(13, 50)] |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
>>> intersecter.upstream_of_interval(Interval(1, 2, undef, undef, "-"), 3); |
89
|
|
|
|
|
|
|
[Interval(3, 7), Interval(3, 40), Interval(13, 50)] |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=cut |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
sub new { |
94
|
0
|
|
|
0
|
0
|
0
|
my ( $class ) = @_; |
95
|
0
|
|
|
|
|
0
|
my $self = {}; |
96
|
0
|
|
|
|
|
0
|
$self->{root} = undef; |
97
|
0
|
|
|
|
|
0
|
return bless $self, $class; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# ---- Position based interfaces ----------------------------------------- |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=head2 insert |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
Insert the interval [start,end) associated with value `value`. |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=cut |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub insert { |
109
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $start, $end, $value) = @_; |
110
|
0
|
0
|
|
|
|
0
|
if (!defined $self->{root}) { |
111
|
0
|
|
|
|
|
0
|
$self->{root} = IntervalNode->new( $start, $end, $value ); |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
else { |
114
|
0
|
|
|
|
|
0
|
$self->{root} = $self->{root}->insert( $start, $end, $value ); |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
*add = \&insert; |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=head2 find |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
Return a sorted list of all intervals overlapping [start,end). |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=cut |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
sub find { |
127
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $start, $end ) = @_; |
128
|
0
|
0
|
|
|
|
0
|
if (!defined $self->{root}) { |
129
|
0
|
|
|
|
|
0
|
return []; |
130
|
|
|
|
|
|
|
} |
131
|
0
|
|
|
|
|
0
|
return $self->{root}->find( $start, $end ); |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head2 before |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Find `num_intervals` intervals that lie before `position` and are no |
137
|
|
|
|
|
|
|
further than `max_dist` positions away |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=cut |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub before { |
142
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $position, $num_intervals, $max_dist ) = @_; |
143
|
0
|
0
|
|
|
|
0
|
$num_intervals = 1 if !defined $num_intervals; |
144
|
0
|
0
|
|
|
|
0
|
$max_dist = 2500 if !defined $max_dist; |
145
|
|
|
|
|
|
|
|
146
|
0
|
0
|
|
|
|
0
|
if (!defined $self->{root}) { |
147
|
0
|
|
|
|
|
0
|
return []; |
148
|
|
|
|
|
|
|
} |
149
|
0
|
|
|
|
|
0
|
return $self->{root}->left( $position, $num_intervals, $max_dist ); |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=head2 after |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
Find `num_intervals` intervals that lie after `position` and are no |
155
|
|
|
|
|
|
|
further than `max_dist` positions away |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=cut |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub after { |
160
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $position, $num_intervals, $max_dist) = @_; |
161
|
0
|
0
|
|
|
|
0
|
$num_intervals = 1 if !defined $num_intervals; |
162
|
0
|
0
|
|
|
|
0
|
$max_dist = 2500 if !defined $max_dist; |
163
|
|
|
|
|
|
|
|
164
|
0
|
0
|
|
|
|
0
|
if (!defined $self->{root}) { |
165
|
0
|
|
|
|
|
0
|
return []; |
166
|
|
|
|
|
|
|
} |
167
|
0
|
|
|
|
|
0
|
return $self->{root}->right( $position, $num_intervals, $max_dist ); |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
# ---- Interval-like object based interfaces ----------------------------- |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=head2 insert_interval |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
Insert an "interval" like object (one with at least start and end |
175
|
|
|
|
|
|
|
attributes) |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=cut |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
sub insert_interval { |
180
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $interval ) = @_; |
181
|
0
|
|
|
|
|
0
|
$self->insert( $interval->{start}, $interval->{end}, $interval ); |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
*add_interval = \&insert_interval; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
=head2 before_interval |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
Find `num_intervals` intervals that lie completely before `interval` |
189
|
|
|
|
|
|
|
and are no further than `max_dist` positions away |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=cut |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
sub before_interval { |
194
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $interval, $num_intervals, $max_dist ) = @_; |
195
|
0
|
0
|
|
|
|
0
|
$num_intervals = 1 if !defined $num_intervals; |
196
|
0
|
0
|
|
|
|
0
|
$max_dist = 2500 if !defined $max_dist; |
197
|
|
|
|
|
|
|
|
198
|
0
|
0
|
|
|
|
0
|
if (!defined $self->{root}) { |
199
|
0
|
|
|
|
|
0
|
return []; |
200
|
|
|
|
|
|
|
} |
201
|
0
|
|
|
|
|
0
|
return $self->{root}->left( $interval->{start}, $num_intervals, $max_dist ); |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=head2 after_interval |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
Find `num_intervals` intervals that lie completely after `interval` and |
207
|
|
|
|
|
|
|
are no further than `max_dist` positions away |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=cut |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
sub after_interval { |
212
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $interval, $num_intervals, $max_dist ) = @_; |
213
|
0
|
0
|
|
|
|
0
|
$num_intervals = 1 if !defined $num_intervals; |
214
|
0
|
0
|
|
|
|
0
|
$max_dist = 2500 if !defined $max_dist; |
215
|
|
|
|
|
|
|
|
216
|
0
|
0
|
|
|
|
0
|
if (!defined $self->{root}) { |
217
|
0
|
|
|
|
|
0
|
return []; |
218
|
|
|
|
|
|
|
} |
219
|
0
|
|
|
|
|
0
|
return $self->{root}->right( $interval->{end}, $num_intervals, $max_dist ); |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=head2 upstream_of_interval |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
Find `num_intervals` intervals that lie completely upstream of |
225
|
|
|
|
|
|
|
`interval` and are no further than `max_dist` positions away |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
=cut |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
sub upstream_of_interval { |
230
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $interval, $num_intervals, $max_dist ) = @_; |
231
|
0
|
0
|
|
|
|
0
|
$num_intervals = 1 if !defined $num_intervals; |
232
|
0
|
0
|
|
|
|
0
|
$max_dist = 2500 if !defined $max_dist; |
233
|
|
|
|
|
|
|
|
234
|
0
|
0
|
|
|
|
0
|
if (!defined $self->{root}) { |
235
|
0
|
|
|
|
|
0
|
return []; |
236
|
|
|
|
|
|
|
} |
237
|
0
|
0
|
0
|
|
|
0
|
if ($interval->{strand} == -1 || $interval->{strand} eq "-") { |
238
|
0
|
|
|
|
|
0
|
return $self->{root}->right( $interval->{end}, $num_intervals, $max_dist ); |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
else { |
241
|
0
|
|
|
|
|
0
|
return $self->{root}->left( $interval->{start}, $num_intervals, $max_dist ); |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=head2 downstream_of_interval |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
Find `num_intervals` intervals that lie completely downstream of |
248
|
|
|
|
|
|
|
`interval` and are no further than `max_dist` positions away |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=cut |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
sub downstream_of_interval { |
254
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $interval, $num_intervals, $max_dist ) = @_; |
255
|
0
|
0
|
|
|
|
0
|
$num_intervals = 1 if !defined $num_intervals; |
256
|
0
|
0
|
|
|
|
0
|
$max_dist = 2500 if !defined $max_dist; |
257
|
|
|
|
|
|
|
|
258
|
0
|
0
|
|
|
|
0
|
if (!defined $self->{root}) { |
259
|
0
|
|
|
|
|
0
|
return []; |
260
|
|
|
|
|
|
|
} |
261
|
0
|
0
|
0
|
|
|
0
|
if ($interval->{strand} == -1 || $interval->{strand} eq "-") { |
262
|
0
|
|
|
|
|
0
|
return $self->{root}->left( $interval->{start}, $num_intervals, $max_dist ); |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
else { |
265
|
0
|
|
|
|
|
0
|
return $self->{root}->right( $interval->{end}, $num_intervals, $max_dist ); |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=head2 traverse |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
call fn for each element in the tree |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
=cut |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
sub traverse { |
276
|
0
|
|
|
0
|
1
|
0
|
my ($self, $fn) = @_; |
277
|
0
|
0
|
|
|
|
0
|
if (!defined $self->{root}) { |
278
|
0
|
|
|
|
|
0
|
return undef; |
279
|
|
|
|
|
|
|
} |
280
|
0
|
|
|
|
|
0
|
return $self->{root}->traverse($fn); |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=head2 IntervalNode |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
A single node of an `IntervalTree`. |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
NOTE: Unless you really know what you are doing, you probably should us |
288
|
|
|
|
|
|
|
`IntervalTree` rather than using this directly. |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=cut |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
package IntervalNode; |
293
|
1
|
|
|
1
|
|
14
|
use List::Util qw(min max); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
2596
|
|
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
our $EmptyNode = IntervalNode->new( 0, 0, Interval->new(0, 0)); |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
sub nlog { |
298
|
1
|
|
|
1
|
|
98
|
return -1.0 / log(0.5); |
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
sub left_node { |
302
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
303
|
0
|
0
|
|
|
|
0
|
return $self->{cleft} != $EmptyNode ? $self->{cleft} : undef; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
sub right_node { |
307
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
308
|
0
|
0
|
|
|
|
0
|
return $self->{cright} != $EmptyNode ? $self->{cright} : undef; |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
sub root_node { |
312
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
313
|
0
|
0
|
|
|
|
0
|
return $self->{croot} != $EmptyNode ? $self->{croot} : undef; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
sub str { |
317
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
318
|
0
|
|
|
|
|
0
|
return "IntervalNode($self->{start}, $self->{end})"; |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
sub new { |
322
|
1
|
|
|
1
|
|
4
|
my ($class, $start, $end, $interval) = @_; |
323
|
|
|
|
|
|
|
# Perl lacks the binomial distribution, so we convert a |
324
|
|
|
|
|
|
|
# uniform into a binomial because it naturally scales with |
325
|
|
|
|
|
|
|
# tree size. Also, perl's uniform is perfect since the |
326
|
|
|
|
|
|
|
# upper limit is not inclusive, which gives us undefined here. |
327
|
1
|
|
|
|
|
3
|
my $self = {}; |
328
|
1
|
|
|
|
|
3
|
$self->{priority} = POSIX::ceil(nlog() * log(-1.0/(1.0 * rand() - 1))); |
329
|
1
|
|
|
|
|
4
|
$self->{start} = $start; |
330
|
1
|
|
|
|
|
16
|
$self->{end} = $end; |
331
|
1
|
|
|
|
|
2
|
$self->{interval} = $interval; |
332
|
1
|
|
|
|
|
3
|
$self->{maxend} = $end; |
333
|
1
|
|
|
|
|
2
|
$self->{minstart} = $start; |
334
|
1
|
|
|
|
|
2
|
$self->{minend} = $end; |
335
|
1
|
|
|
|
|
2
|
$self->{cleft} = $EmptyNode; |
336
|
1
|
|
|
|
|
2
|
$self->{cright} = $EmptyNode; |
337
|
1
|
|
|
|
|
2
|
$self->{croot} = $EmptyNode; |
338
|
1
|
|
|
|
|
4
|
return bless $self, $class; |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=head2 insert |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
Insert a new IntervalNode into the tree of which this node is |
344
|
|
|
|
|
|
|
currently the root. The return value is the new root of the tree (which |
345
|
|
|
|
|
|
|
may or may not be this node!) |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
=cut |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
sub insert { |
350
|
0
|
|
|
0
|
|
0
|
my ($self, $start, $end, $interval) = @_; |
351
|
0
|
|
|
|
|
0
|
my $croot = $self; |
352
|
|
|
|
|
|
|
# If starts are the same, decide which to add interval to based on |
353
|
|
|
|
|
|
|
# end, thus maintaining sortedness relative to start/end |
354
|
0
|
|
|
|
|
0
|
my $decision_endpoint = $start; |
355
|
0
|
0
|
|
|
|
0
|
if ($start == $self->{start}) { |
356
|
0
|
|
|
|
|
0
|
$decision_endpoint = $end; |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
0
|
0
|
|
|
|
0
|
if ($decision_endpoint > $self->{start}) { |
360
|
|
|
|
|
|
|
# insert to cright tree |
361
|
0
|
0
|
|
|
|
0
|
if ($self->{cright} != $EmptyNode) { |
362
|
0
|
|
|
|
|
0
|
$self->{cright} = $self->{cright}->insert( $start, $end, $interval ); |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
else { |
365
|
0
|
|
|
|
|
0
|
$self->{cright} = IntervalNode->new( $start, $end, $interval ); |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
# rebalance tree |
368
|
0
|
0
|
|
|
|
0
|
if ($self->{priority} < $self->{cright}{priority}) { |
369
|
0
|
|
|
|
|
0
|
$croot = $self->rotate_left(); |
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
else { |
373
|
|
|
|
|
|
|
# insert to cleft tree |
374
|
0
|
0
|
|
|
|
0
|
if ($self->{cleft} != $EmptyNode) { |
375
|
0
|
|
|
|
|
0
|
$self->{cleft} = $self->{cleft}->insert( $start, $end, $interval); |
376
|
|
|
|
|
|
|
} |
377
|
|
|
|
|
|
|
else { |
378
|
0
|
|
|
|
|
0
|
$self->{cleft} = IntervalNode->new( $start, $end, $interval); |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
# rebalance tree |
381
|
0
|
0
|
|
|
|
0
|
if ($self->{priority} < $self->{cleft}{priority}) { |
382
|
0
|
|
|
|
|
0
|
$croot = $self->rotate_right(); |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
|
386
|
0
|
|
|
|
|
0
|
$croot->set_ends(); |
387
|
0
|
|
|
|
|
0
|
$self->{cleft}{croot} = $croot; |
388
|
0
|
|
|
|
|
0
|
$self->{cright}{croot} = $croot; |
389
|
0
|
|
|
|
|
0
|
return $croot; |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
sub rotate_right { |
393
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
394
|
0
|
|
|
|
|
0
|
my $croot = $self->{cleft}; |
395
|
0
|
|
|
|
|
0
|
$self->{cleft} = $self->{cleft}{cright}; |
396
|
0
|
|
|
|
|
0
|
$croot->{cright} = $self; |
397
|
0
|
|
|
|
|
0
|
$self->set_ends(); |
398
|
0
|
|
|
|
|
0
|
return $croot; |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
sub rotate_left { |
402
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
403
|
0
|
|
|
|
|
0
|
my $croot = $self->{cright}; |
404
|
0
|
|
|
|
|
0
|
$self->{cright} = $self->{cright}{cleft}; |
405
|
0
|
|
|
|
|
0
|
$croot->{cleft} = $self; |
406
|
0
|
|
|
|
|
0
|
$self->set_ends(); |
407
|
0
|
|
|
|
|
0
|
return $croot; |
408
|
|
|
|
|
|
|
} |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
sub set_ends { |
411
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
412
|
0
|
0
|
0
|
|
|
0
|
if ($self->{cright} != $EmptyNode && $self->{cleft} != $EmptyNode) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
413
|
0
|
|
|
|
|
0
|
$self->{maxend} = max($self->{end}, $self->{cright}{maxend}, $self->{cleft}{maxend}); |
414
|
0
|
|
|
|
|
0
|
$self->{minend} = min($self->{end}, $self->{cright}{minend}, $self->{cleft}{minend}); |
415
|
0
|
|
|
|
|
0
|
$self->{minstart} = min($self->{start}, $self->{cright}{minstartz}, $self->{cleft}{minstart}); |
416
|
|
|
|
|
|
|
} |
417
|
|
|
|
|
|
|
elsif ( $self->{cright} != $EmptyNode) { |
418
|
0
|
|
|
|
|
0
|
$self->{maxend} = max($self->{end}, $self->{cright}{maxend}); |
419
|
0
|
|
|
|
|
0
|
$self->{minend} = min($self->{end}, $self->{cright}{minend}); |
420
|
0
|
|
|
|
|
0
|
$self->{minstart} = min($self->{start}, $self->{cright}{minstart}); |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
elsif ( $self->{cleft} != $EmptyNode) { |
423
|
0
|
|
|
|
|
0
|
$self->{maxend} = max($self->{end}, $self->{cleft}{maxend}); |
424
|
0
|
|
|
|
|
0
|
$self->{minend} = min($self->{end}, $self->{cleft}{minend}); |
425
|
0
|
|
|
|
|
0
|
$self->{minstart} = min($self->{start}, $self->{cleft}{minstart}); |
426
|
|
|
|
|
|
|
} |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=head2 intersect |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
given a start and a end, return a list of features |
433
|
|
|
|
|
|
|
falling within that range |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=cut |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
sub intersect { |
438
|
0
|
|
|
0
|
|
0
|
my ( $self, $start, $end, $sort ) = @_; |
439
|
0
|
0
|
|
|
|
0
|
$sort = 1 if !defined $sort; |
440
|
0
|
|
|
|
|
0
|
my $results = []; |
441
|
0
|
|
|
|
|
0
|
$self->_intersect( $start, $end, $results ); |
442
|
0
|
|
|
|
|
0
|
return $results; |
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
*find = \&intersect; |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
sub _intersect { |
448
|
0
|
|
|
0
|
|
0
|
my ( $self, $start, $end, $results) = @_; |
449
|
|
|
|
|
|
|
# Left subtree |
450
|
0
|
0
|
0
|
|
|
0
|
if ($self->{cleft} != $EmptyNode && $self->{cleft}{maxend} > $start) { |
451
|
0
|
|
|
|
|
0
|
$self->{cleft}->_intersect( $start, $end, $results ); |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
# This interval |
454
|
0
|
0
|
0
|
|
|
0
|
if (( $self->{end} > $start ) && ( $self->{start} < $end )) { |
455
|
0
|
|
|
|
|
0
|
push @$results, $self->{interval}; |
456
|
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
# Right subtree |
458
|
0
|
0
|
0
|
|
|
0
|
if ($self->{cright} != $EmptyNode && $self->{start} < $end) { |
459
|
0
|
|
|
|
|
0
|
$self->{cright}->_intersect( $start, $end, $results ); |
460
|
|
|
|
|
|
|
} |
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
sub _seek_left { |
465
|
0
|
|
|
0
|
|
0
|
my ($self, $position, $results, $n, $max_dist) = @_; |
466
|
|
|
|
|
|
|
# we know we can bail in these 2 cases. |
467
|
0
|
0
|
|
|
|
0
|
if ($self->{maxend} + $max_dist < $position) { |
468
|
0
|
|
|
|
|
0
|
return; |
469
|
|
|
|
|
|
|
} |
470
|
0
|
0
|
|
|
|
0
|
if ($self->{minstart} > $position) { |
471
|
0
|
|
|
|
|
0
|
return; |
472
|
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
# the ordering of these 3 blocks makes it so the results are |
475
|
|
|
|
|
|
|
# ordered nearest to farest from the query position |
476
|
0
|
0
|
|
|
|
0
|
if ($self->{cright} != $EmptyNode) { |
477
|
0
|
|
|
|
|
0
|
$self->{cright}->_seek_left($position, $results, $n, $max_dist); |
478
|
|
|
|
|
|
|
} |
479
|
|
|
|
|
|
|
|
480
|
0
|
0
|
0
|
|
|
0
|
if (-1 < $position - $self->{end} && $position - $self->{end} < $max_dist) { |
481
|
0
|
|
|
|
|
0
|
push @$results, $self->{interval}; |
482
|
|
|
|
|
|
|
} |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
# TODO: can these conditionals be more stringent? |
485
|
0
|
0
|
|
|
|
0
|
if ($self->{cleft} != $EmptyNode) { |
486
|
0
|
|
|
|
|
0
|
$self->{cleft}->_seek_left($position, $results, $n, $max_dist); |
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
} |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
sub _seek_right { |
493
|
0
|
|
|
0
|
|
0
|
my ($self, $position, $results, $n, $max_dist) = @_; |
494
|
|
|
|
|
|
|
# we know we can bail in these 2 cases. |
495
|
0
|
0
|
|
|
|
0
|
return if $self->{maxend} < $position; |
496
|
0
|
0
|
|
|
|
0
|
return if $self->{minstart} - $max_dist > $position; |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
#print "SEEK_RIGHT:",self, self.cleft, self.maxend, self.minstart, position |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
# the ordering of these 3 blocks makes it so the results are |
501
|
|
|
|
|
|
|
# ordered nearest to farest from the query position |
502
|
0
|
0
|
|
|
|
0
|
if ($self->{cleft} != $EmptyNode) { |
503
|
0
|
|
|
|
|
0
|
$self->{cleft}->_seek_right($position, $results, $n, $max_dist); |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
|
506
|
0
|
0
|
0
|
|
|
0
|
if (-1 < $self->{start} - $position && $self->{start} - $position < $max_dist) { |
507
|
0
|
|
|
|
|
0
|
push @$results, $self->{interval}; |
508
|
|
|
|
|
|
|
} |
509
|
|
|
|
|
|
|
|
510
|
0
|
0
|
|
|
|
0
|
if ($self->{cright} != $EmptyNode) { |
511
|
0
|
|
|
|
|
0
|
$self->{cright}->_seek_right($position, $results, $n, $max_dist); |
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
} |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
=head2 left |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
find n features with a start > than `position` |
518
|
|
|
|
|
|
|
f: a Interval object (or anything with an `end` attribute) |
519
|
|
|
|
|
|
|
n: the number of features to return |
520
|
|
|
|
|
|
|
max_dist: the maximum distance to look before giving up. |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=cut |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
sub left { |
525
|
0
|
|
|
0
|
|
0
|
my ($self, $position, $n, $max_dist) = @_; |
526
|
0
|
0
|
|
|
|
0
|
$n = 1 if !defined $n; |
527
|
0
|
0
|
|
|
|
0
|
$max_dist = 2500 if !defined $max_dist; |
528
|
|
|
|
|
|
|
|
529
|
0
|
|
|
|
|
0
|
my $results = []; |
530
|
|
|
|
|
|
|
# use start - 1 becuase .left() assumes strictly left-of |
531
|
0
|
|
|
|
|
0
|
$self->_seek_left( $position - 1, $results, $n, $max_dist ); |
532
|
0
|
0
|
|
|
|
0
|
return $results if length(@$results) == $n; |
533
|
|
|
|
|
|
|
|
534
|
0
|
|
|
|
|
0
|
my $r = $results; |
535
|
0
|
|
|
|
|
0
|
@$r = sort {$b->{end} <=> $a->{end}} @$r; |
|
0
|
|
|
|
|
0
|
|
536
|
0
|
|
|
|
|
0
|
return @$r[0..$n-1]; |
537
|
|
|
|
|
|
|
} |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
=head2 right |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
find n features with a end < than position |
542
|
|
|
|
|
|
|
f: a Interval object (or anything with a `start` attribute) |
543
|
|
|
|
|
|
|
n: the number of features to return |
544
|
|
|
|
|
|
|
max_dist: the maximum distance to look before giving up. |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
=cut |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
sub right { |
549
|
0
|
|
|
0
|
|
0
|
my ($self, $position, $n, $max_dist) = @_; |
550
|
0
|
0
|
|
|
|
0
|
$n = 1 if !defined $n; |
551
|
0
|
0
|
|
|
|
0
|
$max_dist = 2500 if !defined $max_dist; |
552
|
|
|
|
|
|
|
|
553
|
0
|
|
|
|
|
0
|
my $results = []; |
554
|
|
|
|
|
|
|
# use end + 1 because .right() assumes strictly right-of |
555
|
0
|
|
|
|
|
0
|
$self->_seek_right($position + 1, $results, $n, $max_dist); |
556
|
0
|
0
|
|
|
|
0
|
return $results if length(@$results) == $n; |
557
|
|
|
|
|
|
|
|
558
|
0
|
|
|
|
|
0
|
my $r = $results; |
559
|
0
|
|
|
|
|
0
|
@$r = sort {$a->{start} <=> $b->{start}} @$r; |
|
0
|
|
|
|
|
0
|
|
560
|
0
|
|
|
|
|
0
|
return @$r[0..$n-1]; |
561
|
|
|
|
|
|
|
} |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
sub traverse { |
564
|
0
|
|
|
0
|
|
0
|
my ($self, $func) = @_; |
565
|
0
|
|
|
|
|
0
|
$self->_traverse($func); |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
sub _traverse { |
569
|
0
|
|
|
0
|
|
0
|
my ($self, $func) = @_; |
570
|
0
|
0
|
|
|
|
0
|
$self->{cleft}->_traverse($func) if $self->{cleft} != $EmptyNode; |
571
|
0
|
|
|
|
|
0
|
$func->($self); |
572
|
0
|
0
|
|
|
|
0
|
$self->{cright}->_traverse($func) if $self->{cright} != $EmptyNode; |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
## ---- Wrappers that retain the old interface ------------------------------- |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
=head2 Interval |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
Basic feature, with required integer start and end properties. |
580
|
|
|
|
|
|
|
Also accepts optional strand as +1 or -1 (used for up/downstream queries), |
581
|
|
|
|
|
|
|
a name, and any arbitrary data is sent in on the info keyword argument |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
>>> from bx.intervals.intersection import Interval |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
>>> f1 = Interval->new(23, 36); |
586
|
|
|
|
|
|
|
>>> f2 = Interval->new(34, 48, {'chr':12, 'anno':'transposon'}); |
587
|
|
|
|
|
|
|
>>> f2 |
588
|
|
|
|
|
|
|
Interval(34, 48, {'anno': 'transposon', 'chr': 12}) |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
=cut |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
package Interval; |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
sub new { |
595
|
1
|
|
|
1
|
|
5
|
my ($class, $start, $end, $value, $chrom, $strand) = @_; |
596
|
1
|
50
|
|
|
|
6
|
die "start must be less than end" unless $start <= $end; |
597
|
1
|
|
|
|
|
3
|
my $self = {}; |
598
|
1
|
|
|
|
|
3
|
$self->{start} = $start; |
599
|
1
|
|
|
|
|
2
|
$self->{end} = $end; |
600
|
1
|
|
|
|
|
2
|
$self->{value} = $value; |
601
|
1
|
|
|
|
|
2
|
$self->{chrom} = $chrom; |
602
|
1
|
|
|
|
|
1
|
$self->{strand} = $strand; |
603
|
1
|
|
|
|
|
9
|
return bless $self, $class; |
604
|
|
|
|
|
|
|
} |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
sub str { |
607
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
608
|
0
|
|
|
|
|
|
my $fstr = "Interval($self->{start}, $self->{end}"; |
609
|
0
|
0
|
|
|
|
|
if (defined($self->{value})) { |
610
|
0
|
|
|
|
|
|
$fstr .= ", value=$self->{value}"; |
611
|
|
|
|
|
|
|
} |
612
|
0
|
|
|
|
|
|
$fstr .= ")"; |
613
|
0
|
|
|
|
|
|
return $fstr; |
614
|
|
|
|
|
|
|
} |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
=head2 AUTHOR |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
Ben Booth, C<< >> |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
Original Authors: James Taylor (james@jamestaylor.org), |
621
|
|
|
|
|
|
|
Ian Schenk (ian.schenck@gmail.com), |
622
|
|
|
|
|
|
|
Brent Pedersen (bpederse@gmail.com) |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
=head2 BUGS |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
627
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
628
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
=head2 SUPPORT |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
perldoc IntervalTree |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
You can also look for information at: |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
=over 4 |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
L |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
L |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
=item * CPAN Ratings |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
L |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
=item * Search CPAN |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
L |
656
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
=back |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
=head2 ACKNOWLEDGEMENTS |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
This code was directly ported from the bx-python project: |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
https://bitbucket.org/james_taylor/bx-python/src/tip/lib/bx/intervals/intersection.pyx |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
Original Authors: James Taylor (james@jamestaylor.org), |
666
|
|
|
|
|
|
|
Ian Schenk (ian.schenck@gmail.com), |
667
|
|
|
|
|
|
|
Brent Pedersen (bpederse@gmail.com) |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=head2 LICENSE AND COPYRIGHT |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
Copyright 2012 Ben Booth. |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
674
|
|
|
|
|
|
|
under the terms of either: the GNU General Public License as published |
675
|
|
|
|
|
|
|
by the Free Software Foundation; or the Artistic License. |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
See http://dev.perl.org/licenses/ for more information. |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
=cut |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
1; # End of IntervalTree |