File Coverage

Bio/Location/Fuzzy.pm
Criterion Covered Total %
statement 108 128 84.3
branch 76 100 76.0
condition 60 78 76.9
subroutine 14 14 100.0
pod 11 11 100.0
total 269 331 81.2


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Location::Fuzzy
3             # Please direct questions and support issues to
4             #
5             # Cared for by Jason Stajich
6             #
7             # Copyright Jason Stajich
8             #
9             # You may distribute this module under the same terms as perl itself
10             # POD documentation - main docs before the code
11              
12             =head1 NAME
13              
14             Bio::Location::Fuzzy - Implementation of a Location on a Sequence
15             which has unclear start and/or end locations
16              
17             =head1 SYNOPSIS
18              
19             use Bio::Location::Fuzzy;
20             my $fuzzylocation = Bio::Location::Fuzzy->new(
21             -start => '<30',
22             -end => 90,
23             -location_type => '..');
24              
25             print "location string is ", $fuzzylocation->to_FTstring(), "\n";
26             print "location is of the type ", $fuzzylocation->location_type, "\n";
27              
28             =head1 DESCRIPTION
29              
30             This module contains the necessary methods for representing a
31             Fuzzy Location, one that does not have clear start and/or end points.
32             This will initially serve to handle features from Genbank/EMBL feature
33             tables that are written as 1^100 meaning between bases 1 and 100 or
34             E100..300 meaning it starts somewhere before 100. Advanced
35             implementations of this interface may be able to handle the necessary
36             logic of overlaps/intersection/contains/union. It was constructed to
37             handle fuzzy locations that can be represented in Genbank/EMBL and
38             Swissprot.
39              
40             =head1 FEEDBACK
41              
42             User feedback is an integral part of the evolution of this and other
43             Bioperl modules. Send your comments and suggestions preferably to one
44             of the Bioperl mailing lists. Your participation is much appreciated.
45              
46             bioperl-l@bioperl.org - General discussion
47             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48              
49             =head2 Support
50              
51             Please direct usage questions or support issues to the mailing list:
52              
53             I
54              
55             rather than to the module maintainer directly. Many experienced and
56             reponsive experts will be able look at the problem and quickly
57             address it. Please include a thorough description of the problem
58             with code and data examples if at all possible.
59              
60             =head2 Reporting Bugs
61              
62             Report bugs to the Bioperl bug tracking system to help us keep track
63             the bugs and their resolution. Bug reports can be submitted via the
64             web:
65              
66             https://github.com/bioperl/bioperl-live/issues
67              
68             =head1 AUTHOR - Jason Stajich
69              
70             Email jason-at-bioperl-dot-org
71              
72             =head1 APPENDIX
73              
74             The rest of the documentation details each of the object
75             methods. Internal methods are usually preceded with a _
76              
77             =cut
78              
79             # Let the code begin...
80              
81             package Bio::Location::Fuzzy;
82 192     192   2729 use strict;
  192         225  
  192         5007  
83              
84 192     192   590 use base qw(Bio::Location::Atomic Bio::Location::FuzzyLocationI);
  192         196  
  192         60068  
85              
86             our @LOCATIONCODESBSANE = (undef, 'EXACT', 'WITHIN', 'BETWEEN', 'UNCERTAIN',
87             'BEFORE', 'AFTER');
88              
89             our %FUZZYCODES = ( 'EXACT' => '..', # Position is 'exact
90             # Exact position is unknown, but is within the range specified, ((1.2)..100)
91             'WITHIN' => '.',
92             # 1^2
93             'BETWEEN' => '^',
94             'IN-BETWEEN' => '^',
95             'UNCERTAIN' => '?',
96             # <100
97             'BEFORE' => '<',
98             # >10
99             'AFTER' => '>');
100            
101             # The following regular expressions map to fuzzy location types. Every
102             # expression must match the complete encoded point string, and must
103             # contain two groups identifying min and max. Empty matches are automatic.
104             # converted to undef, except for 'EXACT', for which max is set to equal
105             # min.
106            
107             our %FUZZYPOINTENCODE = (
108             '\>(\d+)(.{0})' => 'AFTER',
109             '\<(.{0})(\d+)' => 'BEFORE',
110             '(\d+)' => 'EXACT',
111             '\?(\d*)' => 'UNCERTAIN',
112             '(\d+)(.{0})\>' => 'AFTER',
113             '(.{0})(\d+)\<' => 'BEFORE',
114             '(\d+)\.(\d+)' => 'WITHIN',
115             '(\d+)\^(\d+)' => 'BETWEEN',
116             );
117            
118             our %FUZZYRANGEENCODE = ( '\.' => 'WITHIN',
119             '\.\.' => 'EXACT',
120             '\^' => 'IN-BETWEEN' );
121              
122             =head2 new
123              
124             Title : new
125             Usage : my $fuzzyloc = Bio::Location::Fuzzy->new( @args);
126             Function:
127             Returns :
128             Args : -start => value for start (initialize by superclass)
129             -end => value for end (initialize by superclass)
130             -strand => value for strand (initialize by superclass)
131             -location_type => either ('EXACT','WITHIN','IN-BETWEEN',
132             'UNCERTAIN') OR ( 1,2,3,4)
133             -start_ext=> extension for start - defaults to 0,
134             -start_fuz= fuzzy code for start can be
135             ('EXACT','WITHIN','BETWEEN','BEFORE','AFTER',
136             'UNCERTAIN' ) OR
137             a value 1 - 5 corresponding to index+1 above
138             -end_ext=> extension for end - defaults to 0,
139             -end_fuz= fuzzy code for end can be
140             ('EXACT','WITHIN','BETWEEN','BEFORE','AFTER',
141             'UNCERTAIN') OR
142             a value 1 - 5 corresponding to index+1 above
143              
144             =cut
145              
146             sub new {
147 554     554 1 2569 my ($class, @args) = @_;
148 554         1620 my $self = $class->SUPER::new(@args);
149 554         1832 my ($location_type, $start_ext, $start_fuz, $end_ext, $end_fuz) =
150             $self->_rearrange([ qw(LOCATION_TYPE START_EXT START_FUZ
151             END_EXT END_FUZ )
152             ], @args);
153              
154 554 100       1863 $location_type && $self->location_type($location_type);
155 553 50       864 $start_ext && $self->max_start($self->min_start + $start_ext);
156 553 50       820 $end_ext && $self->max_end($self->min_end + $end_ext);
157 553 50       753 $start_fuz && $self->start_pos_type($start_fuz);
158 553 50       716 $end_fuz && $self->end_pos_type($end_fuz);
159              
160 553         1204 return $self;
161             }
162              
163             =head2 location_type
164              
165             Title : location_type
166             Usage : my $location_type = $location->location_type();
167             Function: Get location type encoded as text
168             Returns : string ('EXACT', 'WITHIN', 'IN-BETWEEN', 'UNCERTAIN')
169             Args : none
170              
171             =cut
172              
173             sub location_type {
174 6477     6477 1 5029 my ($self,$value) = @_;
175 6477 100 100     18727 if( defined $value || ! defined $self->{'_location_type'} ) {
176 1100 100       1734 $value = 'EXACT' unless defined $value;
177 1100 100       2077 if(! defined $FUZZYCODES{$value} ) {
178 480         658 $value = uc($value);
179 480 100 0     1332 if( $value =~ /\.\./ ) {
    100 0        
    100          
    50          
    0          
180 415         469 $value = 'EXACT';
181             } elsif( $value =~ /^\.$/ ) {
182 18         35 $value = 'WITHIN';
183             } elsif( $value =~ /\^/ ) {
184 6         10 $value = 'IN-BETWEEN';
185 6 100 66     14 $self->throw("Use Bio::Location::Simple for IN-BETWEEN locations [".
      100        
186             $self->start. "] and [". $self->end. "]")
187             if defined $self->start && defined $self->end &&
188             ($self->end - 1 == $self->start);
189             } elsif( $value =~ /\?/ ) {
190 41         48 $value = 'UNCERTAIN';
191             } elsif( $value ne 'EXACT' && $value ne 'WITHIN' &&
192             $value ne 'IN-BETWEEN' ) {
193 0         0 $self->throw("Did not specify a valid location type");
194             }
195             }
196 1099         1255 $self->{'_location_type'} = $value;
197             }
198 6476         12946 return $self->{'_location_type'};
199             }
200              
201             =head1 LocationI methods
202              
203             =head2 length
204              
205             Title : length
206             Usage : $length = $fuzzy_loc->length();
207             Function: Get the length of this location.
208              
209             Note that the length of a fuzzy location will always depend
210             on the currently active interpretation of start and end. The
211             result will therefore vary for different CoordinatePolicy objects.
212              
213             Returns : an integer
214             Args : none
215              
216             =cut
217              
218             #sub length {
219             # my($self) = @_;
220             # return $self->SUPER::length() if( !$self->start || !$self->end);
221             # $self->warn('Length is not valid for a FuzzyLocation');
222             # return 0;
223             #}
224              
225             =head2 start
226              
227             Title : start
228             Usage : $start = $fuzzy->start();
229             Function: get/set start of this range, handling fuzzy_starts
230             Returns : a positive integer representing the start of the location
231             Args : start location on set (can be fuzzy point string)
232              
233             =cut
234              
235             sub start {
236 2796     2796 1 2669 my($self,$value) = @_;
237 2796 100       3851 if( defined $value ) {
238 555         1152 my ($encode,$min,$max) = $self->_fuzzypointdecode($value);
239 555         1696 $self->start_pos_type($encode);
240 555         1009 $self->min_start($min);
241 555         924 $self->max_start($max);
242             }
243              
244 2796 100 100     3472 $self->throw("Use Bio::Location::Simple for IN-BETWEEN locations ["
      100        
245             . $self->SUPER::start. "] and [". $self->SUPER::end. "]")
246             if $self->location_type eq 'IN-BETWEEN' && defined $self->SUPER::end &&
247             ($self->SUPER::end - 1 == $self->SUPER::start);
248              
249 2795         4969 return $self->SUPER::start();
250             }
251              
252             =head2 end
253              
254             Title : end
255             Usage : $end = $fuzzy->end();
256             Function: get/set end of this range, handling fuzzy_ends
257             Returns : a positive integer representing the end of the range
258             Args : end location on set (can be fuzzy string)
259              
260             =cut
261              
262             sub end {
263 2753     2753 1 2324 my($self,$value) = @_;
264 2753 100       3800 if( defined $value ) {
265 554         882 my ($encode,$min,$max) = $self->_fuzzypointdecode($value);
266 554         1516 $self->end_pos_type($encode);
267 554         924 $self->min_end($min);
268 554         922 $self->max_end($max);
269             }
270              
271 2753 100 100     3297 $self->throw("Use Bio::Location::Simple for IN-BETWEEN locations [".
      100        
272             $self->SUPER::start. "] and [". $self->SUPER::end. "]")
273             if $self->location_type eq 'IN-BETWEEN' && defined $self->SUPER::start &&
274             ($self->SUPER::end - 1 == $self->SUPER::start);
275              
276 2752         4537 return $self->SUPER::end();
277             }
278              
279             =head2 min_start
280              
281             Title : min_start
282             Usage : $min_start = $fuzzy->min_start();
283             Function: get/set the minimum starting point
284             Returns : the minimum starting point from the contained sublocations
285             Args : integer or undef on set
286              
287             =cut
288              
289             sub min_start {
290 4420     4420 1 11688 my ($self,@args) = @_;
291              
292 4420 100       6005 if(@args) {
293 555         904 $self->{'_min_start'} = $args[0]; # the value may be undef!
294             }
295 4420         6722 return $self->{'_min_start'};
296             }
297              
298             =head2 max_start
299              
300             Title : max_start
301             Usage : my $maxstart = $location->max_start();
302             Function: Get/set maximum starting location of feature startpoint
303             Returns : integer or undef if no maximum starting point.
304             Args : integer or undef on set
305              
306             =cut
307              
308             sub max_start {
309 3241     3241 1 2973 my ($self,@args) = @_;
310              
311 3241 100       4488 if(@args) {
312 555         820 $self->{'_max_start'} = $args[0]; # the value may be undef!
313             }
314 3241         4786 return $self->{'_max_start'};
315             }
316              
317             =head2 start_pos_type
318              
319             Title : start_pos_type
320             Usage : my $start_pos_type = $location->start_pos_type();
321             Function: Get/set start position type.
322             Returns : type of position coded as text
323             ('BEFORE','AFTER','EXACT','WITHIN','BETWEEN','UNCERTAIN')
324             Args : a string on set
325              
326             =cut
327              
328             sub start_pos_type {
329 767     767 1 838 my ($self,$value) = @_;
330 767 50 66     3112 if(defined $value && $value =~ /^\d+$/ ) {
331 0 0       0 if( $value == 0 ) { $value = 'EXACT'; }
  0         0  
332             else {
333 0         0 my $v = $LOCATIONCODESBSANE[$value];
334 0 0       0 if( ! defined $v ) {
335 0         0 $self->warn("Provided value $value which I don't understand,".
336             " reverting to 'EXACT'");
337 0         0 $v = 'EXACT';
338             }
339 0         0 $value = $v;
340             }
341             }
342 767 100       1186 if(defined($value)) {
343 609         989 $self->{'_start_pos_type'} = $value;
344             }
345 767         1230 return $self->{'_start_pos_type'};
346             }
347              
348             =head2 min_end
349              
350             Title : min_end
351             Usage : my $minend = $location->min_end();
352             Function: Get/set minimum ending location of feature endpoint
353             Returns : integer or undef if no minimum ending point.
354             Args : integer or undef on set
355              
356             =cut
357              
358             sub min_end {
359 2433     2433 1 2407 my ($self,@args) = @_;
360              
361 2433 100       3513 if(@args) {
362 554         1215 $self->{'_min_end'} = $args[0]; # the value may be undef!
363             }
364 2433         3572 return $self->{'_min_end'};
365             }
366              
367             =head2 max_end
368              
369             Title : max_end
370             Usage : my $maxend = $location->max_end();
371             Function: Get/set maximum ending location of feature endpoint
372             Returns : integer or undef if no maximum ending point.
373             Args : integer or undef on set
374              
375             =cut
376              
377             sub max_end {
378 3974     3974 1 3326 my ($self,@args) = @_;
379              
380 3974 100       5338 if(@args) {
381 554         859 $self->{'_max_end'} = $args[0]; # the value may be undef!
382             }
383 3974         6196 return $self->{'_max_end'};
384             }
385              
386             =head2 end_pos_type
387              
388             Title : end_pos_type
389             Usage : my $end_pos_type = $location->end_pos_type();
390             Function: Get/set end position type.
391             Returns : type of position coded as text
392             ('BEFORE','AFTER','EXACT','WITHIN','BETWEEN','UNCERTAIN')
393             Args : a string on set
394              
395             =cut
396              
397             sub end_pos_type {
398 767     767 1 788 my ($self,$value) = @_;
399 767 50 66     3045 if( defined $value && $value =~ /^\d+$/ ) {
400 0 0       0 if( $value == 0 ) { $value = 'EXACT'; }
  0         0  
401             else {
402 0         0 my $v = $LOCATIONCODESBSANE[$value];
403 0 0       0 if( ! defined $v ) {
404 0         0 $self->warn("Provided value $value which I don't understand,".
405             " reverting to 'EXACT'");
406 0         0 $v = 'EXACT';
407             }
408 0         0 $value = $v;
409             }
410             }
411              
412 767 100       1241 if(defined($value)) {
413 597         940 $self->{'_end_pos_type'} = $value;
414             }
415 767         1775 return $self->{'_end_pos_type'};
416             }
417              
418             =head2 seq_id
419              
420             Title : seq_id
421             Usage : my $seqid = $location->seq_id();
422             Function: Get/Set seq_id that location refers to
423             Returns : seq_id
424             Args : [optional] seq_id value to set
425              
426             =cut
427              
428             =head2 coordinate_policy
429              
430             Title : coordinate_policy
431              
432             Usage : $policy = $location->coordinate_policy();
433             $location->coordinate_policy($mypolicy); # set may not be possible
434             Function: Get the coordinate computing policy employed by this object.
435              
436             See Bio::Location::CoordinatePolicyI for documentation about
437             the policy object and its use.
438              
439             The interface *does not* require implementing classes to accept
440             setting of a different policy. The implementation provided here
441             does, however, allow to do so.
442              
443             Implementors of this interface are expected to initialize every
444             new instance with a CoordinatePolicyI object. The implementation
445             provided here will return a default policy object if none has
446             been set yet. To change this default policy object call this
447             method as a class method with an appropriate argument. Note that
448             in this case only subsequently created Location objects will be
449             affected.
450              
451             Returns : A Bio::Location::CoordinatePolicyI implementing object.
452             Args : On set, a Bio::Location::CoordinatePolicyI implementing object.
453              
454             See L
455              
456             =cut
457              
458             =head2 to_FTstring
459              
460             Title : to_FTstring
461             Usage : my $locstr = $location->to_FTstring()
462             Function: Get/Set seq_id that location refers to
463             Returns : seq_id
464             Args : [optional] seq_id value to set
465              
466             =cut
467              
468             sub to_FTstring {
469 113     113 1 7403 my ($self) = @_;
470 113         244 my (%vals) = ( 'start' => $self->start,
471             'min_start' => $self->min_start,
472             'max_start' => $self->max_start,
473             'start_code' => $self->start_pos_type,
474             'end' => $self->end,
475             'min_end' => $self->min_end,
476             'max_end' => $self->max_end,
477             'end_code' => $self->end_pos_type );
478              
479 113         420 my (%strs) = ( 'start' => '',
480             'end' => '');
481 113         211 my ($delimiter) = $FUZZYCODES{$self->location_type};
482 113 100       351 $delimiter = $FUZZYCODES{'EXACT'} if ($self->location_type eq 'UNCERTAIN');
483            
484 113         260 my $policy = ref($self->coordinate_policy);
485            
486             # I'm lazy, lets do this in a loop since behaviour will be the same for
487             # start and end
488             # The CoordinatePolicy now dictates start/end data here (bug 992) - cjf
489 113         182 foreach my $point ( qw(start end) ) {
490 226 100 100     1035 if( ($vals{$point."_code"} ne 'EXACT') &&
    100          
491             ($vals{$point."_code"} ne 'UNCERTAIN') ) {
492            
493             # must have max and min defined to use 'WITHIN', 'BETWEEN'
494 114 50 100     1001 if ((!defined $vals{"min_$point"} ||
      33        
      66        
495             !defined $vals{"max_$point"}) &&
496             ( $vals{$point."_code"} eq 'WITHIN' ||
497             $vals{$point."_code"} eq 'BETWEEN'))
498             {
499 0 0       0 $vals{"min_$point"} = '' unless defined $vals{"min_$point"};
500 0 0       0 $vals{"max_$point"} = '' unless defined $vals{"max_$point"};
501            
502             $self->warn("Fuzzy codes for start are in a strange state, (".
503             join(",", ($vals{"min_$point"},
504             $vals{"max_$point"},
505 0         0 $vals{$point."_code"})). ")");
506 0         0 return '';
507             }
508            
509 114 100 100     592 if (defined $vals{$point."_code"} &&
      66        
510             ($vals{$point."_code"} eq 'BEFORE' ||
511             $vals{$point."_code"} eq 'AFTER'))
512             {
513 79         193 $strs{$point} .= $FUZZYCODES{$vals{$point."_code"}};
514 79         136 $strs{$point} .= $vals{"$point"};
515             }
516            
517 114 100 100     604 if( defined $vals{$point."_code"} &&
      66        
518             ($vals{$point."_code"} eq 'WITHIN' ||
519             $vals{$point."_code"} eq 'BETWEEN'))
520             {
521             # Expect odd results with anything but WidestCoordPolicy for now
522             $strs{$point} .= ($point eq 'start') ?
523             $vals{"$point"}.
524             $FUZZYCODES{$vals{$point."_code"}}.
525             $vals{'max_'.$point}
526             :
527             $vals{'min_'.$point}.
528             $FUZZYCODES{$vals{$point."_code"}}.
529 27 100       123 $vals{"$point"};
530 27         56 $strs{$point} = "(".$strs{$point}.")";
531             }
532            
533             } elsif ($vals{$point."_code"} eq 'UNCERTAIN') {
534 17         30 $strs{$point} = $FUZZYCODES{$vals{$point."_code"}};
535 17 100       40 $strs{$point} .= $vals{$point} if defined $vals{$point};
536             } else {
537 95         217 $strs{$point} = $vals{$point};
538             }
539             }
540            
541 113         234 my $str = $strs{'start'} . $delimiter . $strs{'end'};
542 113 100 66     272 if($self->is_remote() && $self->seq_id()) {
543 5         12 $str = $self->seq_id() . ":" . $str;
544             }
545 113 100 100     281 if( defined $self->strand &&
    100 66        
546             $self->strand == -1 &&
547             $self->location_type() ne "UNCERTAIN") {
548 9         18 $str = "complement(" . $str . ")";
549             } elsif($self->location_type() eq "WITHIN") {
550 11         29 $str = "(".$str.")";
551             }
552 113         485 return $str;
553             }
554              
555             =head2 valid_Location
556              
557             Title : valid_Location
558             Usage : if ($location->valid_location) {...};
559             Function: boolean method to determine whether location is considered valid
560             (has minimum requirements for Simple implementation)
561             Returns : Boolean value: true if location is valid, false otherwise
562             Args : none
563              
564             =cut
565              
566             =head2 _fuzzypointdecode
567              
568             Title : _fuzzypointdecode
569             Usage : ($type,$min,$max) = $self->_fuzzypointdecode('<5');
570             Function: Decode a fuzzy string.
571             Returns : A 3-element array consisting of the type of location, the
572             minimum integer, and the maximum integer describing the range
573             of coordinates this start or endpoint refers to. Minimum or
574             maximum coordinate may be undefined.
575             : Returns empty array on fail.
576             Args : fuzzypoint string
577              
578             =cut
579              
580             sub _fuzzypointdecode {
581 1109     1109   1128 my ($self, $string) = @_;
582 1109 50       1641 return () if( !defined $string);
583             # strip off leading and trailing space
584 1109         4490 $string =~ s/^\s*(\S+)\s*/$1/;
585 1109         3117 foreach my $pattern ( keys %FUZZYPOINTENCODE ) {
586 4818 100       55117 if( $string =~ /^$pattern$/ ) {
587 1090 100 100     4521 my ($min,$max) = ($1,$2) unless (($1 eq '') && (!defined $2));
588 1090 100 100     3521 if( ($FUZZYPOINTENCODE{$pattern} eq 'EXACT') ||
589             ($FUZZYPOINTENCODE{$pattern} eq 'UNCERTAIN')
590             ) {
591 587         697 $max = $min;
592             } else {
593 503 100 66     1825 $max = undef if((defined $max) && (length($max) == 0));
594 503 100 66     1806 $min = undef if((defined $min) && (length($min) == 0));
595             }
596 1090         3325 return ($FUZZYPOINTENCODE{$pattern},$min,$max);
597             }
598             }
599 19 50       63 if( $self->verbose >= 1 ) {
600 0         0 $self->warn("could not find a valid fuzzy encoding for $string");
601             }
602 19         31 return ();
603             }
604              
605             1;