File Coverage

Bio/RangeI.pm
Criterion Covered Total %
statement 168 207 81.1
branch 79 138 57.2
condition 24 39 61.5
subroutine 23 23 100.0
pod 13 13 100.0
total 307 420 73.1


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::RangeI
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Lehvaslaiho
7             #
8             # Copyright Matthew Pocock
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::RangeI - Range interface
17              
18             =head1 SYNOPSIS
19              
20             #Do not run this module directly
21              
22             =head1 DESCRIPTION
23              
24             This provides a standard BioPerl range interface that should be
25             implemented by any object that wants to be treated as a range. This
26             serves purely as an abstract base class for implementers and can not
27             be instantiated.
28              
29             Ranges are modeled as having (start, end, length, strand). They use
30             Bio-coordinates - all points E= start and E= end are within the
31             range. End is always greater-than or equal-to start, and length is
32             greater than or equal to 1. The behaviour of a range is undefined if
33             ranges with negative numbers or zero are used.
34              
35             So, in summary:
36              
37             length = end - start + 1
38             end >= start
39             strand = (-1 | 0 | +1)
40              
41             =head1 FEEDBACK
42              
43             =head2 Mailing Lists
44              
45             User feedback is an integral part of the evolution of this and other
46             Bioperl modules. Send your comments and suggestions preferably to one
47             of the Bioperl mailing lists. Your participation is much appreciated.
48              
49             bioperl-l@bioperl.org - General discussion
50             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
51              
52             =head2 Support
53              
54             Please direct usage questions or support issues to the mailing list:
55              
56             I
57              
58             rather than to the module maintainer directly. Many experienced and
59             reponsive experts will be able look at the problem and quickly
60             address it. Please include a thorough description of the problem
61             with code and data examples if at all possible.
62              
63             =head2 Reporting Bugs
64              
65             Report bugs to the Bioperl bug tracking system to help us keep track
66             the bugs and their resolution. Bug reports can be submitted via the
67             web:
68              
69             https://github.com/bioperl/bioperl-live/issues
70              
71             =head1 AUTHOR - Heikki Lehvaslaiho
72              
73             Email: heikki-at-bioperl-dot-org
74              
75             =head1 CONTRIBUTORS
76              
77             Juha Muilu (muilu@ebi.ac.uk)
78             Sendu Bala (bix@sendu.me.uk)
79             Malcolm Cook (mec@stowers-institute.org)
80             Stephen Montgomery (sm8 at sanger.ac.uk)
81              
82             =head1 APPENDIX
83              
84             The rest of the documentation details each of the object
85             methods. Internal methods are usually preceded with a _
86              
87             =cut
88              
89             package Bio::RangeI;
90              
91 217     217   935 use strict;
  217         1039  
  217         4694  
92 217     217   627 use Carp;
  217         285  
  217         8564  
93 217     217   769 use integer;
  217         214  
  217         1083  
94 217     217   3800 use vars qw(%STRAND_OPTIONS);
  217         249  
  217         7168  
95              
96 217     217   717 use base qw(Bio::Root::RootI);
  217         244  
  217         16975  
97              
98             BEGIN {
99             # STRAND_OPTIONS contains the legal values for the strand-testing options
100 217     217   408 %STRAND_OPTIONS = map { $_, '_' . $_ }
  651         324444  
101             (
102             'strong', # ranges must have the same strand
103             'weak', # ranges must have the same strand or no strand
104             'ignore', # ignore strand information
105             );
106             }
107              
108             # utility methods
109             #
110              
111             # returns true if strands are equal and non-zero
112             sub _strong {
113 8     8   13 my ($r1, $r2) = @_;
114 8         11 my ($s1, $s2) = ($r1->strand(), $r2->strand());
115              
116 8 100 100     55 return 1 if $s1 != 0 && $s1 == $s2;
117             }
118              
119             # returns true if strands are equal or either is zero
120             sub _weak {
121 8     8   10 my ($r1, $r2) = @_;
122 8         13 my ($s1, $s2) = ($r1->strand(), $r2->strand());
123 8 100 100     63 return 1 if $s1 == 0 || $s2 == 0 || $s1 == $s2;
      100        
124             }
125              
126             # returns true for any strandedness
127             sub _ignore {
128 1     1   297 return 1;
129             }
130              
131             # works out what test to use for the strictness and returns true/false
132             # e.g. $r1->_testStrand($r2, 'strong')
133             sub _testStrand() {
134 12981     12981   12509 my ($r1, $r2, $comp) = @_;
135 12981 100       41403 return 1 unless $comp;
136 10         12 my $func = $STRAND_OPTIONS{$comp};
137 10         31 return $r1->$func($r2);
138             }
139              
140             =head1 Abstract methods
141              
142             These methods must be implemented in all subclasses.
143              
144             =head2 start
145              
146             Title : start
147             Usage : $start = $range->start();
148             Function: get/set the start of this range
149             Returns : the start of this range
150             Args : optionally allows the start to be set
151             using $range->start($start)
152              
153             =cut
154              
155             sub start {
156 1     1 1 629 shift->throw_not_implemented();
157             }
158              
159             =head2 end
160              
161             Title : end
162             Usage : $end = $range->end();
163             Function: get/set the end of this range
164             Returns : the end of this range
165             Args : optionally allows the end to be set
166             using $range->end($end)
167              
168             =cut
169              
170             sub end {
171 1     1 1 517 shift->throw_not_implemented();
172             }
173              
174             =head2 length
175              
176             Title : length
177             Usage : $length = $range->length();
178             Function: get/set the length of this range
179             Returns : the length of this range
180             Args : optionally allows the length to be set
181             using $range->length($length)
182              
183             =cut
184              
185             sub length {
186 1     1 1 523 shift->throw_not_implemented();
187             }
188              
189             =head2 strand
190              
191             Title : strand
192             Usage : $strand = $range->strand();
193             Function: get/set the strand of this range
194             Returns : the strandedness (-1, 0, +1)
195             Args : optionally allows the strand to be set
196             using $range->strand($strand)
197              
198             =cut
199              
200             sub strand {
201 1     1 1 500 shift->throw_not_implemented();
202             }
203              
204             =head1 Boolean Methods
205              
206             These methods return true or false. They throw an error if start and
207             end are not defined.
208              
209             $range->overlaps($otherRange) && print "Ranges overlap\n";
210              
211             =head2 overlaps
212              
213             Title : overlaps
214             Usage : if($r1->overlaps($r2)) { do stuff }
215             Function: tests if $r2 overlaps $r1
216             Args : arg #1 = a range to compare this one to (mandatory)
217             arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
218             Returns : true if the ranges overlap, false otherwise
219              
220             =cut
221              
222             sub overlaps {
223 45     45 1 535 my ($self, $other, $so) = @_;
224              
225 45 50       79 $self->throw("start is undefined") unless defined $self->start;
226 44 50       75 $self->throw("end is undefined") unless defined $self->end;
227 44 50 33     204 $self->throw("not a Bio::RangeI object") unless defined $other &&
228             $other->isa('Bio::RangeI');
229 44 50       71 $other->throw("start is undefined") unless defined $other->start;
230 44 50       67 $other->throw("end is undefined") unless defined $other->end;
231              
232             return
233 44   100     89 ($self->_testStrand($other, $so)
234             and not (
235             ($self->start() > $other->end() or
236             $self->end() < $other->start() )
237             ));
238             }
239              
240             =head2 contains
241              
242             Title : contains
243             Usage : if($r1->contains($r2) { do stuff }
244             Function: tests whether $r1 totally contains $r2
245             Args : arg #1 = a range to compare this one to (mandatory)
246             alternatively, integer scalar to test
247             arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
248             Returns : true if the argument is totally contained within this range
249              
250             =cut
251              
252             sub contains {
253 8289     8289 1 7310 my ($self, $other, $so) = @_;
254 8289 50       11899 $self->throw("start is undefined") unless defined $self->start;
255 8288 50       12539 $self->throw("end is undefined") unless defined $self->end;
256              
257 8288 50 33     23836 if(defined $other && ref $other) { # a range object?
258 8288 50       18502 $other->throw("Not a Bio::RangeI object: $other") unless $other->isa('Bio::RangeI');
259 8288 50       11117 $other->throw("start is undefined") unless defined $other->start;
260 8288 50       12310 $other->throw("end is undefined") unless defined $other->end;
261              
262 8288   100     12226 return ($self->_testStrand($other, $so) and
263             $other->start() >= $self->start() and
264             $other->end() <= $self->end());
265             } else { # a scalar?
266 0 0       0 $self->throw("'$other' is not an integer.\n") unless $other =~ /^[-+]?\d+$/;
267 0   0     0 return ($other >= $self->start() and $other <= $self->end());
268             }
269             }
270              
271             =head2 equals
272              
273             Title : equals
274             Usage : if($r1->equals($r2))
275             Function: test whether $r1 has the same start, end, length as $r2
276             Args : arg #1 = a range to compare this one to (mandatory)
277             arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
278             Returns : true if they are describing the same range
279              
280             =cut
281              
282             sub equals {
283 1     1 1 482 my ($self, $other, $so) = @_;
284              
285 1 0       7 $self->throw("start is undefined") unless defined $self->start;
286 0 0       0 $self->throw("end is undefined") unless defined $self->end;
287 0 0       0 $other->throw("Not a Bio::RangeI object") unless $other->isa('Bio::RangeI');
288 0 0       0 $other->throw("start is undefined") unless defined $other->start;
289 0 0       0 $other->throw("end is undefined") unless defined $other->end;
290              
291 0   0     0 return ($self->_testStrand($other, $so) and
292             $self->start() == $other->start() and
293             $self->end() == $other->end() );
294             }
295              
296             =head1 Geometrical methods
297              
298             These methods do things to the geometry of ranges, and return
299             Bio::RangeI compliant objects or triplets (start, stop, strand) from
300             which new ranges could be built.
301              
302             =head2 intersection
303              
304             Title : intersection
305             Usage : ($start, $end, $strand) = $r1->intersection($r2); OR
306             ($start, $end, $strand) = Bio::Range->intersection(\@ranges); OR
307             my $containing_range = $r1->intersection($r2); OR
308             my $containing_range = Bio::Range->intersection(\@ranges);
309             Function: gives the range that is contained by all ranges
310             Returns : undef if they do not overlap or if @ranges has only a
311             single range, else returns the range that they do
312             overlap. In scalar contex, the return value is an object of
313             the same class as the calling one. In array context the
314             return value is a three element array.
315             Args : arg #1 = [REQUIRED] a Bio::RangeI to compare this one to,
316             or an array ref of ranges
317             arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
318              
319             =cut
320              
321             sub intersection {
322 4438     4438 1 4798 my ($self, $given, $so) = @_;
323 4438 100       7017 $self->throw("missing arg: you need to pass in another feature") unless $given;
324              
325 4437         3221 my @ranges;
326 4437 50       8656 if ($self eq "Bio::RangeI") {
327 0         0 $self = "Bio::Range";
328 0         0 $self->warn("calling static methods of an interface is deprecated; use $self instead");
329             }
330 4437 100       6968 if (ref $self) {
331 4436         4151 push(@ranges, $self);
332             }
333 4437 100       6519 ref($given) eq 'ARRAY' ? push(@ranges, @{$given}) : push(@ranges, $given);
  3         3  
334             #$self->throw("Need at least 2 ranges") unless @ranges >= 2;
335             # Rather than the above, I think the following is more consistent
336 4437 50       7591 return undef unless @ranges >= 2;
337              
338 4437         3687 my $intersect;
339 4437         7389 while (@ranges > 0) {
340 4440 100       7098 unless ($intersect) {
341 4437         3911 $intersect = shift(@ranges);
342 4437 50       7274 $self->throw("Not an object: $intersect") unless ref($intersect);
343 4437 50       11301 $self->throw("Not a Bio::RangeI object: $intersect") unless $intersect->isa('Bio::RangeI');
344 4437 50       7735 $self->throw("start is undefined") unless defined $intersect->start;
345 4437 50       7078 $self->throw("end is undefined") unless defined $intersect->end;
346             }
347              
348 4440         4782 my $compare = shift(@ranges);
349 4440 50       7479 $self->throw("Not an object: $compare") unless ref($compare);
350 4440 50       10988 $self->throw("Not a Bio::RangeI object: $compare") unless $compare->isa('Bio::RangeI');
351 4440 50       7208 $self->throw("start is undefined") unless defined $compare->start;
352 4440 50       7404 $self->throw("end is undefined") unless defined $compare->end;
353 4440 50       6862 return unless $compare->_testStrand($intersect, $so);
354              
355 4440         7192 my @starts = sort {$a <=> $b} ($intersect->start(), $compare->start());
  4440         10680  
356 4440         8305 my @ends = sort {$a <=> $b} ($intersect->end(), $compare->end());
  4440         8999  
357              
358 4440         4668 my $start = pop @starts; # larger of the 2 starts
359 4440         4089 my $end = shift @ends; # smaller of the 2 ends
360              
361 4440         3641 my $intersect_strand; # strand for the intersection
362 4440 100 33     8313 if (defined($intersect->strand) && defined($compare->strand) && $intersect->strand == $compare->strand) {
      66        
363 4284         5412 $intersect_strand = $compare->strand;
364             }
365             else {
366 156         147 $intersect_strand = 0;
367             }
368              
369 4440 100       7422 if ($start > $end) {
370 109         226 return;
371             }
372             else {
373 4331         12797 $intersect = $self->new(-start => $start,
374             -end => $end,
375             -strand => $intersect_strand);
376             }
377             }
378              
379 4328 100       6452 if (wantarray()) {
380 4         7 return ($intersect->start, $intersect->end, $intersect->strand);
381             }
382             else {
383 4324         6277 return $intersect;
384             }
385             }
386              
387             =head2 union
388              
389             Title : union
390             Usage : ($start, $end, $strand) = $r1->union($r2);
391             : ($start, $end, $strand) = Bio::Range->union(@ranges);
392             my $newrange = Bio::Range->union(@ranges);
393             Function: finds the minimal Range that contains all of the Ranges
394             Args : a Range or list of Range objects
395              
396             Returns : the range containing all of the range. In scalar contex,
397             the return value is an object of the same class as the
398             calling one. In array context the return value is a
399             three element array.
400              
401             =cut
402              
403             sub union {
404 21337     21337 1 17282 my $self = shift;
405 21337         25239 my @ranges = @_;
406 21337 50       45648 if ($self eq "Bio::RangeI") {
407 0         0 $self = "Bio::Range";
408 0         0 $self->warn("calling static methods of an interface is deprecated; use $self instead");
409             }
410 21337 100       35574 if(ref $self) {
411 21336         25253 unshift @ranges, $self;
412             }
413              
414 21341         52004 my @start = sort {$a<=>$b}
415 21337         21122 map( { $_->start() } @ranges);
  42676         55560  
416 21341         40870 my @end = sort {$a<=>$b}
417 21337         23234 map( { $_->end() } @ranges);
  42676         59571  
418              
419 21337         22414 my $start = shift @start;
420 21337         34204 while( !defined $start ) {
421 0         0 $start = shift @start;
422             }
423              
424 21337         18537 my $end = pop @end;
425              
426 21337         17177 my $union_strand; # Strand for the union range object.
427              
428 21337         29734 foreach(@ranges) {
429 42676 100       54780 if(! defined $union_strand) {
430 21354         36638 $union_strand = $_->strand;
431 21354         24434 next;
432             } else {
433 21322 100 100     27529 if(not defined $_->strand or $union_strand ne $_->strand) {
434 467         351 $union_strand = 0;
435 467         446 last;
436             }
437             }
438             }
439 21337 50 33     39832 return unless $start or $end;
440 21337 100       25110 if( wantarray() ) {
441 17083         38240 return ( $start,$end,$union_strand);
442             } else {
443 4254         8638 return $self->new('-start' => $start,
444             '-end' => $end,
445             '-strand' => $union_strand
446             );
447             }
448             }
449              
450             =head2 overlap_extent
451              
452             Title : overlap_extent
453             Usage : ($a_unique,$common,$b_unique) = $a->overlap_extent($b)
454             Function: Provides actual amount of overlap between two different
455             ranges
456             Example :
457             Returns : array of values containing the length unique to the calling
458             range, the length common to both, and the length unique to
459             the argument range
460             Args : a range
461              
462             =cut
463              
464             sub overlap_extent{
465 1     1 1 711 my ($a,$b) = @_;
466              
467 1 0       7 $a->throw("start is undefined") unless defined $a->start;
468 0 0       0 $a->throw("end is undefined") unless defined $a->end;
469 0 0       0 $b->throw("Not a Bio::RangeI object") unless $b->isa('Bio::RangeI');
470 0 0       0 $b->throw("start is undefined") unless defined $b->start;
471 0 0       0 $b->throw("end is undefined") unless defined $b->end;
472              
473 0 0       0 if( ! $a->overlaps($b) ) {
474 0         0 return ($a->length,0,$b->length);
475             }
476              
477 0         0 my ($au,$bu) = (0, 0);
478 0 0       0 if( $a->start < $b->start ) {
479 0         0 $au = $b->start - $a->start;
480             } else {
481 0         0 $bu = $a->start - $b->start;
482             }
483              
484 0 0       0 if( $a->end > $b->end ) {
485 0         0 $au += $a->end - $b->end;
486             } else {
487 0         0 $bu += $b->end - $a->end;
488             }
489              
490 0         0 my $intersect = $a->intersection($b);
491 0 0       0 if( ! $intersect ) {
492 0         0 warn("no intersection\n");
493 0         0 return ($au, 0, $bu);
494             } else {
495 0         0 my $ie = $intersect->end;
496 0         0 my $is = $intersect->start;
497 0         0 return ($au,$ie-$is+1,$bu);
498             }
499             }
500              
501             =head2 disconnected_ranges
502              
503             Title : disconnected_ranges
504             Usage : my @disc_ranges = Bio::Range->disconnected_ranges(@ranges);
505             Function: finds the minimal set of ranges such that each input range
506             is fully contained by at least one output range, and none of
507             the output ranges overlap
508             Args : a list of ranges
509             Returns : a list of objects of the same type as the input
510             (conforms to RangeI)
511              
512             =cut
513              
514             sub disconnected_ranges {
515 2080     2080 1 1939 my $self = shift;
516 2080 50       3688 if ($self eq "Bio::RangeI") {
517 0         0 $self = "Bio::Range";
518 0         0 $self->warn("calling static methods of an interface is deprecated; use $self instead");
519             }
520 2080         2504 my @inranges = @_;
521 2080 50       3028 if(ref $self) {
522 0         0 unshift @inranges, $self;
523             }
524              
525 2080         1778 my @outranges = (); # disconnected ranges
526              
527             # iterate through all input ranges $inrange,
528             # adding each input range to the set of output ranges @outranges,
529             # provided $inrange does not overlap ANY range in @outranges
530             # - if it does overlap an outrange, then merge it
531 2080         2007 foreach my $inrange (@inranges) {
532 6333         4858 my $intersects = 0;
533 6333         5186 my @outranges_new = ();
534 6333         4824 my @intersecting_ranges = ();
535              
536             # iterate through all @outranges, testing if it intersects
537             # current $inrange; if it does, merge and add to list
538             # of @intersecting_ranges, otherwise add $outrange to
539             # the new list of outranges that do NOT intersect
540 6333         11011 for (my $i=0; $i<@outranges; $i++) {
541 4289         3906 my $outrange = $outranges[$i];
542 4289         7133 my $intersection = $inrange->intersection($outrange);
543 4289 100       5438 if ($intersection) {
544 4250         3730 $intersects = 1;
545 4250         6196 my $union = $inrange->union($outrange);
546 4250         11574 push(@intersecting_ranges, $union);
547             }
548             else {
549 39         108 push(@outranges_new, $outrange);
550             }
551             }
552 6333         8881 @outranges = @outranges_new;
553             # @outranges now contains a list of non-overlapping ranges
554             # that do not intersect the current $inrange
555              
556 6333 100       8467 if (@intersecting_ranges) {
557 4250 50       5721 if (@intersecting_ranges > 1) {
558             # this sf intersected > 1 range, which means that
559             # all the ranges it intersects should be joined
560             # together in a new range
561 0         0 my $merged_range =
562             $self->union(@intersecting_ranges);
563 0         0 push(@outranges, $merged_range);
564              
565             }
566             else {
567             # exactly 1 intersecting range
568 4250         6820 push(@outranges, @intersecting_ranges);
569             }
570             }
571             else {
572             # no intersections found - new range
573 2083         4228 push(@outranges,
574             $self->new('-start'=>$inrange->start,
575             '-end'=>$inrange->end,
576             '-strand'=>$inrange->strand,
577             ));
578             }
579             }
580 2080         4474 return @outranges;
581             }
582              
583             =head2 offsetStranded
584              
585             Title : offsetStranded
586             Usage : $rnge->ofsetStranded($fiveprime_offset, $threeprime_offset)
587             Function : destructively modifies RangeI implementing object to
588             offset its start and stop coordinates by values $fiveprime_offset and
589             $threeprime_offset (positive values being in the strand direction).
590             Args : two integer offsets: $fiveprime_offset and $threeprime_offset
591             Returns : $self, offset accordingly.
592              
593             =cut
594              
595             sub offsetStranded {
596 5     5 1 732 my ($self, $offset_fiveprime, $offset_threeprime) = @_;
597 5 100       15 my ($offset_start, $offset_end) = $self->strand() eq -1 ? (- $offset_threeprime, - $offset_fiveprime) : ($offset_fiveprime, $offset_threeprime);
598 4         8 $self->start($self->start + $offset_start);
599 4         7 $self->end($self->end + $offset_end);
600 4         12 return $self;
601             };
602              
603             =head2 subtract
604              
605             Title : subtract
606             Usage : my @subtracted = $r1->subtract($r2)
607             Function: Subtract range r2 from range r1
608             Args : arg #1 = a range to subtract from this one (mandatory)
609             arg #2 = strand option ('strong', 'weak', 'ignore') (optional)
610             Returns : undef if they do not overlap or r2 contains this RangeI,
611             or an arrayref of Range objects (this is an array since some
612             instances where the subtract range is enclosed within this range
613             will result in the creation of two new disjoint ranges)
614              
615             =cut
616              
617             sub subtract() {
618 7     7 1 984 my ($self, $range, $so) = @_;
619 7 100       20 $self->throw("missing arg: you need to pass in another feature")
620             unless $range;
621 6 100       10 return unless $self->_testStrand($range, $so);
622              
623 4 50       11 if ($self eq "Bio::RangeI") {
624 0         0 $self = "Bio::Range";
625 0         0 $self->warn("calling static methods of an interface is
626             deprecated; use $self instead");
627             }
628 4 50       11 $range->throw("Input a Bio::RangeI object") unless
629             $range->isa('Bio::RangeI');
630              
631 4         5 my @sub_locations;
632 4 100       6 if ($self->location->isa('Bio::Location::SplitLocationI') ) {
633 1         2 @sub_locations = $self->location->sub_Location;
634             } else {
635 3         5 @sub_locations = $self;
636             }
637              
638 4         4 my @outranges;
639 4         6 foreach my $sl (@sub_locations) {
640 6 100       13 if (!$sl->overlaps($range)) {
641 1         3 push(@outranges,
642             $self->new('-start' =>$sl->start,
643             '-end' =>$sl->end,
644             '-strand'=>$sl->strand,
645             ));
646 1         2 next;
647             }
648            
649             ##Subtracts everything
650 5 100       11 if ($range->contains($sl)) {
651 1         2 next;
652             }
653            
654 4         12 my ($start, $end, $strand) = $sl->intersection($range, $so);
655             ##Subtract intersection from $self range
656            
657 4 100       8 if ($sl->start < $start) {
658 3         6 push(@outranges,
659             $self->new('-start' =>$sl->start,
660             '-end' =>$start - 1,
661             '-strand'=>$sl->strand,
662             ));
663             }
664 4 100       7 if ($sl->end > $end) {
665 2         5 push(@outranges,
666             $self->new('-start' =>$end + 1,
667             '-end' =>$sl->end,
668             '-strand'=>$sl->strand,
669             ));
670             }
671             }
672              
673 4 100       7 if (@outranges) {
674 3         7 return \@outranges;
675             }
676 1         2 return;
677             }
678              
679             1;