File Coverage

Bio/SeqFeature/Generic.pm
Criterion Covered Total %
statement 238 278 85.6
branch 119 148 80.4
condition 51 77 66.2
subroutine 39 44 88.6
pod 29 34 85.2
total 476 581 81.9


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::SeqFeature::Generic
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Ewan Birney
7             #
8             # Copyright Ewan Birney
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::SeqFeature::Generic - Generic SeqFeature
17              
18             =head1 SYNOPSIS
19              
20             $feat = Bio::SeqFeature::Generic->new(
21             -start => 10,
22             -end => 100,
23             -strand => -1,
24             -primary => 'repeat', # -primary_tag is a synonym
25             -source_tag => 'repeatmasker',
26             -display_name => 'alu family',
27             -score => 1000,
28             -tag => { new => 1,
29             author => 'someone',
30             sillytag => 'this is silly!' } );
31              
32             $feat = Bio::SeqFeature::Generic->new( -gff_string => $string );
33             # if you want explicitly GFF1
34             $feat = Bio::SeqFeature::Generic->new( -gff1_string => $string );
35              
36             # add it to an annotated sequence
37              
38             $annseq->add_SeqFeature($feat);
39              
40             =head1 DESCRIPTION
41              
42             Bio::SeqFeature::Generic is a generic implementation for the
43             Bio::SeqFeatureI interface, providing a simple object to provide all
44             the information for a feature on a sequence.
45              
46             For many Features, this is all you will need to use (for example, this
47             is fine for Repeats in DNA sequence or Domains in protein
48             sequence). For other features, which have more structure, this is a
49             good base class to extend using inheritence to have new things: this
50             is what is done in the L,
51             L and L, which provide
52             well coordinated classes to represent genes on DNA sequence (for
53             example, you can get the protein sequence out from a transcript
54             class).
55              
56             For many Features, you want to add some piece of information, for
57             example a common one is that this feature is 'new' whereas other
58             features are 'old'. The tag system, which here is implemented using a
59             hash can be used here. You can use the tag system to extend the
60             L programmatically: that is, you know that you have
61             read in more information into the tag 'mytag' which you can then
62             retrieve. This means you do not need to know how to write inherited
63             Perl to provide more complex information on a feature, and/or, if you
64             do know but you do not want to write a new class every time you need
65             some extra piece of information, you can use the tag system to easily
66             store and then retrieve information.
67              
68             The tag system can be written in/out of GFF format, and also into EMBL
69             format via the L system
70              
71             =head1 Implemented Interfaces
72              
73             This class implements the following interfaces.
74              
75             =over 4
76              
77             =item L
78              
79             Note that this includes implementing Bio::RangeI.
80              
81             =item L
82              
83             =item L
84              
85             Features held by a feature are essentially sub-features.
86              
87             =back
88              
89             =head1 FEEDBACK
90              
91             =head2 Mailing Lists
92              
93             User feedback is an integral part of the evolution of this and other
94             Bioperl modules. Send your comments and suggestions preferably to one
95             of the Bioperl mailing lists. Your participation is much appreciated.
96              
97             bioperl-l@bioperl.org - General discussion
98             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
99              
100             =head2 Support
101              
102             Please direct usage questions or support issues to the mailing list:
103              
104             I
105              
106             rather than to the module maintainer directly. Many experienced and
107             reponsive experts will be able look at the problem and quickly
108             address it. Please include a thorough description of the problem
109             with code and data examples if at all possible.
110              
111             =head2 Reporting Bugs
112              
113             Report bugs to the Bioperl bug tracking system to help us keep track
114             the bugs and their resolution. Bug reports can be submitted via
115             the web:
116              
117             https://github.com/bioperl/bioperl-live/issues
118              
119             =head1 AUTHOR - Ewan Birney
120              
121             Ewan Birney Ebirney@sanger.ac.ukE
122              
123             =head1 DEVELOPERS
124              
125             This class has been written with an eye out for inheritance. The fields
126             the actual object hash are:
127              
128             _gsf_tag_hash = reference to a hash for the tags
129             _gsf_sub_array = reference to an array for subfeatures
130              
131             =head1 APPENDIX
132              
133             The rest of the documentation details each of the object
134             methods. Internal methods are usually preceded with a _
135              
136             =cut
137              
138              
139             # Let the code begin...
140              
141              
142             package Bio::SeqFeature::Generic;
143 146     146   6786 use strict;
  146         162  
  146         3388  
144              
145 146     146   28850 use Bio::Annotation::Collection;
  146         206  
  146         2807  
146 146     146   24220 use Bio::Location::Simple;
  146         259  
  146         3026  
147 146     146   45476 use Bio::Location::Split;
  146         234  
  146         3790  
148 146     146   58791 use Bio::Tools::GFF;
  146         267  
  146         4478  
149             #use Tie::IxHash;
150              
151 146     146   791 use base qw(Bio::Root::Root Bio::SeqFeatureI Bio::FeatureHolderI Bio::AnnotatableI);
  146         160  
  146         64872  
152              
153             sub new {
154 41010     41010 1 56840 my ( $caller, @args) = @_;
155 41010         80823 my ($self) = $caller->SUPER::new(@args);
156 41010         80833 $self->_register_for_cleanup(\&cleanup_generic);
157 41010         46889 $self->{'_parse_h'} = {};
158 41010         44650 $self->{'_gsf_tag_hash'} = {};
159              
160             # bulk-set attributes
161 41010         57707 $self->set_attributes(@args);
162              
163             # done - we hope
164 41010         81417 return $self;
165             }
166              
167             =head2 set_attributes
168              
169             Title : set_attributes
170             Usage :
171             Function: Sets a whole array of parameters at once.
172             Example :
173             Returns : none
174             Args : Named parameters, in the form as they would otherwise be passed
175             to new(). Currently recognized are:
176              
177             -start start position
178             -end end position
179             -strand strand
180             -phase the phase of the feature (0..2)
181             -primary_tag primary tag
182             -primary (synonym for -primary_tag)
183             -source_tag source tag
184             -source (synonym for -source_tag)
185             -frame frame
186             -score score value
187             -tag a reference to a tag/value hash
188             -gff_string GFF v.2 string to initialize from
189             -gff1_string GFF v.1 string to initialize from
190             -seq_id the display name of the sequence
191             -annotation the AnnotationCollectionI object
192             -location the LocationI object
193              
194             =cut
195              
196             sub set_attributes {
197 45940     45940 1 60061 my ($self,@args) = @_;
198 45940         170732 my ($start, $end, $strand, $primary_tag, $source_tag, $primary,
199             $source, $frame, $score, $tag, $gff_string, $gff1_string,
200             $seqname, $seqid, $annot, $location, $display_name, $pid, $phase) =
201             $self->_rearrange([qw(START
202             END
203             STRAND
204             PRIMARY_TAG
205             SOURCE_TAG
206             PRIMARY
207             SOURCE
208             FRAME
209             SCORE
210             TAG
211             GFF_STRING
212             GFF1_STRING
213             SEQNAME
214             SEQ_ID
215             ANNOTATION
216             LOCATION
217             DISPLAY_NAME
218             PRIMARY_ID
219             PHASE
220             )], @args);
221 45940 100       156275 $location && $self->location($location);
222 45940 50       58348 $gff_string && $self->_from_gff_string($gff_string);
223 45940 50       63446 $gff1_string && do {
224 0         0 $self->gff_format(Bio::Tools::GFF->new('-gff_version' => 1));
225 0         0 $self->_from_gff_stream($gff1_string);
226             };
227            
228 45940 50       59457 $pid && $self->primary_id($pid);
229 45940 100       77585 $primary_tag && $self->primary_tag($primary_tag);
230 45940 100       59707 $source_tag && $self->source_tag($source_tag);
231 45940 100       60810 $primary && $self->primary_tag($primary);
232 45940 100       59437 $source && $self->source_tag($source);
233 45940 50       59928 $annot && $self->annotation($annot);
234 45940 100       69854 defined $start && $self->start($start);
235 45940 100       70111 defined $end && $self->end($end);
236 45940 100       68890 defined $strand && $self->strand($strand);
237 45940 100       61676 defined $frame && $self->frame($frame);
238 45940 100       59006 defined $display_name && $self->display_name($display_name);
239 45940 100       61391 defined $score && $self->score($score);
240 45940 100       65798 defined $phase && $self->phase($phase);
241              
242 45940 50       60211 if($seqname) {
243 0         0 $self->warn("-seqname is deprecated. Please use -seq_id instead.");
244 0 0       0 $seqid = $seqname unless $seqid;
245             }
246 45940 100       64311 $self->seq_id($seqid) if (defined($seqid));
247 45940 100       90158 $tag && do {
248 1980         4566 foreach my $t ( keys %$tag ) {
249 4989 100       16243 $self->add_tag_value($t, UNIVERSAL::isa($tag->{$t}, "ARRAY") ? @{$tag->{$t}} : $tag->{$t});
  198         312  
250             }
251             };
252             }
253              
254              
255             =head2 direct_new
256              
257             Title : direct_new
258             Usage : my $feat = Bio::SeqFeature::Generic->direct_new;
259             Function: create a blessed hash - for performance improvement in
260             object creation
261             Returns : Bio::SeqFeature::Generic object
262             Args : none
263              
264             =cut
265              
266             sub direct_new {
267 9798     9798 1 9968 my ( $class) = @_;
268 9798         10700 my ($self) = {};
269              
270 9798         11366 bless $self,$class;
271              
272 9798         13308 return $self;
273             }
274              
275              
276             =head2 location
277              
278             Title : location
279             Usage : my $location = $feat->location();
280             Function: returns a location object suitable for identifying location
281             of feature on sequence or parent feature
282             Returns : Bio::LocationI object
283             Args : [optional] Bio::LocationI object to set the value to.
284              
285             =cut
286              
287             sub location {
288 611860     611860 1 423866 my($self, $value ) = @_;
289              
290 611860 100       1004942 if (defined($value)) {
    100          
291 30244 50 33     119039 unless (ref($value) and $value->isa('Bio::LocationI')) {
292 0         0 $self->throw("object $value pretends to be a location but ".
293             "does not implement Bio::LocationI");
294             }
295 30244         35672 $self->{'_location'} = $value;
296             }
297             elsif (! $self->{'_location'}) {
298             # guarantees a real location object is returned every time
299 18513         36896 $self->{'_location'} = Bio::Location::Simple->new();
300             }
301 611860         929844 return $self->{'_location'};
302             }
303              
304              
305             =head2 start
306              
307             Title : start
308             Usage : my $start = $feat->start;
309             $feat->start(20);
310             Function: Get/set on the start coordinate of the feature
311             Returns : integer
312             Args : none
313              
314             =cut
315              
316             sub start {
317 195861     195861 1 179787 my ($self, $value) = @_;
318             # Return soon if setting value
319 195861 100       271237 if (defined $value) {
320 37170         48871 return $self->location->start($value);
321             }
322              
323 158691 100       285221 return $self->location->start() if not defined $self->{'_gsf_seq'};
324             # Check circular sequences cut by origin
325 66324         52706 my $start;
326 66324 100 100     126628 if ( $self->{'_gsf_seq'}->is_circular
327             and $self->location->isa('Bio::Location::SplitLocationI')
328             ) {
329 8         18 my $primary_seq_length = $self->{'_gsf_seq'}->length;
330 8         20 my @sublocs = $self->location->sub_Location;
331              
332 8         8 my $cut_by_origin = 0;
333 8         9 my ($a_end, $a_strand) = (0, 0);
334 8         11 my ($b_start, $b_strand) = (0, 0);
335 8         20 for (my $i = 1; $i < scalar @sublocs; $i++) {
336 8         18 $a_end = $sublocs[$i-1]->end;
337 8         17 $a_strand = $sublocs[$i-1]->strand;
338 8         15 $b_start = $sublocs[$i]->start;
339 8         11 $b_strand = $sublocs[$i]->strand;
340             # cut by origin condition
341 8 50 33     51 if ( $a_end == $primary_seq_length
      33        
342             and $b_start == 1
343             and $a_strand == $b_strand
344             ) {
345 8         9 $cut_by_origin = 1;
346 8         12 last;
347             }
348             }
349 8 50       20 $start = ($cut_by_origin == 1) ? ($sublocs[0]->start) : ($self->location->start);
350             }
351             else {
352 66316         77567 $start = $self->location->start;
353             }
354 66324         134128 return $start;
355             }
356              
357              
358             =head2 end
359              
360             Title : end
361             Usage : my $end = $feat->end;
362             $feat->end($end);
363             Function: get/set on the end coordinate of the feature
364             Returns : integer
365             Args : none
366              
367             =cut
368              
369             sub end {
370 195356     195356 1 157806 my ($self, $value) = @_;
371             # Return soon if setting value
372 195356 100       274252 if (defined $value) {
373 37166         43201 return $self->location->end($value);
374             }
375              
376 158190 100       263871 return $self->location->end() if not defined $self->{'_gsf_seq'};
377             # Check circular sequences cut by origin
378 66065         50222 my $end;
379 66065 100 100     106761 if ( $self->{'_gsf_seq'}->is_circular
380             and $self->location->isa('Bio::Location::SplitLocationI')
381             ) {
382 8         15 my $primary_seq_length = $self->{'_gsf_seq'}->length;
383 8         14 my @sublocs = $self->location->sub_Location;
384              
385 8         8 my $cut_by_origin = 0;
386 8         8 my ($a_end, $a_strand) = (0, 0);
387 8         8 my ($b_start, $b_strand) = (0, 0);
388 8         17 for (my $i = 1; $i < scalar @sublocs; $i++) {
389 8         19 $a_end = $sublocs[$i-1]->end;
390 8         20 $a_strand = $sublocs[$i-1]->strand;
391 8         15 $b_start = $sublocs[$i]->start;
392 8         16 $b_strand = $sublocs[$i]->strand;
393             # cut by origin condition
394 8 50 33     51 if ( $a_end == $primary_seq_length
      33        
395             and $b_start == 1
396             and $a_strand == $b_strand
397             ) {
398 8         7 $cut_by_origin = 1;
399 8         12 last;
400             }
401             }
402 8 50       24 $end = ($cut_by_origin == 1) ? ($sublocs[-1]->end) : ($self->location->end);
403             }
404             else {
405 66057         86592 $end = $self->location->end;
406             }
407 66065         131186 return $end;
408             }
409              
410              
411             =head2 length
412              
413             Title : length
414             Usage : my $len = $feat->length;
415             Function: Get the feature length computed as:
416             $feat->end - $feat->start + 1
417             Returns : integer
418             Args : none
419              
420             =cut
421              
422             sub length {
423 2679     2679 1 1968 my $self = shift;
424 2679         3635 my $length = $self->end() - $self->start() + 1;
425              
426             # In circular sequences cut by origin $start > $end,
427             # e.g., join(5075..5386,1..51)), $start = 5075, $end = 51,
428             # then adjust using the primary_seq length (5386)
429 2679 100 66     5270 if ($length < 0 and defined $self->{'_gsf_seq'}) {
430 3         7 $length += $self->{'_gsf_seq'}->length;
431             }
432 2679         6167 return $length;
433             }
434              
435              
436             =head2 strand
437              
438             Title : strand
439             Usage : my $strand = $feat->strand();
440             $feat->strand($strand);
441             Function: get/set on strand information, being 1,-1 or 0
442             Returns : -1,1 or 0
443             Args : none
444              
445             =cut
446              
447             sub strand {
448 145867     145867 1 107067 my $self = shift;
449 145867         141862 return $self->location->strand(@_);
450             }
451              
452              
453             =head2 score
454              
455             Title : score
456             Usage : my $score = $feat->score();
457             $feat->score($score);
458             Function: get/set on score information
459             Returns : float
460             Args : none if get, the new value if set
461              
462             =cut
463              
464             sub score {
465 5975     5975 1 4948 my $self = shift;
466              
467 5975 100       8590 if (@_) {
468 4951         4484 my $value = shift;
469              
470 4951 50 100     41094 if ( defined $value && $value && $value !~ /^[A-Za-z]+$/ &&
      66        
      66        
      33        
471             $value !~ /^[+-]?\d+\.?\d*(e-\d+)?/ and $value != 0) {
472 0         0 $self->throw(-class=>'Bio::Root::BadParameter',
473             -text=>"'$value' is not a valid score",
474             -value=>$value);
475             }
476 4951 50       6904 if ($self->has_tag('score')) {
477 0         0 $self->warn("Removing score value(s)");
478 0         0 $self->remove_tag('score');
479             }
480 4951         7498 $self->add_tag_value('score',$value);
481             }
482 5975 100       7631 my ($score) = $self->has_tag('score') ? $self->get_tag_values('score') : undef;
483 5975         9674 return $score;
484             }
485              
486              
487             =head2 frame
488              
489             Title : frame
490             Usage : my $frame = $feat->frame();
491             $feat->frame($frame);
492             Function: get/set on frame information
493             Returns : 0,1,2, '.'
494             Args : none if get, the new value if set
495              
496             =cut
497              
498             sub frame {
499 2794     2794 1 2344 my $self = shift;
500              
501 2794 100       4120 if ( @_ ) {
502 1975         1646 my $value = shift;
503 1975 50 33     7766 if ( defined $value &&
504             $value !~ /^[0-2.]$/ ) {
505 0         0 $self->throw("'$value' is not a valid frame");
506             }
507 1975 100 66     5897 if( defined $value && $value eq '.' ) { $value = '.' }
  8         6  
508 1975         5831 return $self->{'_gsf_frame'} = $value;
509             }
510 819         2539 return $self->{'_gsf_frame'};
511             }
512              
513              
514             =head2 primary_tag
515              
516             Title : primary_tag
517             Usage : my $tag = $feat->primary_tag();
518             $feat->primary_tag('exon');
519             Function: get/set on the primary tag for a feature,
520             eg 'exon'
521             Returns : a string
522             Args : none
523              
524             =cut
525              
526             sub primary_tag {
527 133115     133115 1 97559 my $self = shift;
528 133115 100       189692 return $self->{'_primary_tag'} = shift if @_;
529 100452   100     264751 return $self->{'_primary_tag'} || '';
530             }
531              
532              
533             =head2 source_tag
534              
535             Title : source_tag
536             Usage : my $tag = $feat->source_tag();
537             $feat->source_tag('genscan');
538             Function: Returns the source tag for a feature,
539             eg, 'genscan'
540             Returns : a string
541             Args : none
542              
543             =cut
544              
545             sub source_tag {
546 69452     69452 1 48643 my $self = shift;
547 69452 100       117597 return $self->{'_source_tag'} = shift if @_;
548 36718   100     83363 return $self->{'_source_tag'} || '';
549             }
550              
551              
552             =head2 has_tag
553              
554             Title : has_tag
555             Usage : my $value = $feat->has_tag('some_tag');
556             Function: Tests wether a feature contaings a tag
557             Returns : TRUE if the SeqFeature has the tag,
558             and FALSE otherwise.
559             Args : The name of a tag
560              
561             =cut
562              
563             sub has_tag {
564 63550     63550 1 49244 my ($self, $tag) = @_;
565 63550         144375 return exists $_[0]->{'_gsf_tag_hash'}->{$tag};
566             }
567              
568              
569             =head2 add_tag_value
570              
571             Title : add_tag_value
572             Usage : $feat->add_tag_value('note',"this is a note");
573             Returns : TRUE on success
574             Args : tag (string) and one or more values (any scalar(s))
575              
576             =cut
577              
578             sub add_tag_value {
579 116066     116066 1 82468 my $self = shift;
580 116066         77499 my $tag = shift;
581 116066   100     294165 $self->{'_gsf_tag_hash'}->{$tag} ||= [];
582 116066         80982 push(@{$self->{'_gsf_tag_hash'}->{$tag}},@_);
  116066         244470  
583             }
584              
585              
586             =head2 get_tag_values
587              
588             Title : get_tag_values
589             Usage : my @values = $feat->get_tag_values('note');
590             Function: Returns a list of all the values stored
591             under a particular tag.
592             Returns : A list of scalars
593             Args : The name of the tag
594              
595             =cut
596              
597             sub get_tag_values {
598 60640     60640 1 71386 my ($self, $tag) = @_;
599              
600 60640 50       82293 if( ! defined $tag ) { return (); }
  0         0  
601 60640 100       75474 if ( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) {
602 1         10 $self->throw("asking for tag value that does not exist $tag");
603             }
604 60639         36041 return @{$self->{'_gsf_tag_hash'}->{$tag}};
  60639         113596  
605             }
606              
607              
608             =head2 get_all_tags
609              
610             Title : get_all_tags
611             Usage : my @tags = $feat->get_all_tags();
612             Function: Get a list of all the tags in a feature
613             Returns : An array of tag names
614             Args : none
615              
616             # added a sort so that tags will be returned in a predictable order
617             # I still think we should be able to specify a sort function
618             # to the object at some point
619             # -js
620              
621             =cut
622              
623             sub get_all_tags {
624 19046     19046 1 19062 my ($self, @args) = @_;
625 19046         15409 return sort keys %{ $self->{'_gsf_tag_hash'}};
  19046         169793  
626             }
627              
628              
629             =head2 remove_tag
630              
631             Title : remove_tag
632             Usage : $feat->remove_tag('some_tag');
633             Function: removes a tag from this feature
634             Returns : the array of values for this tag before removing it
635             Args : tag (string)
636              
637             =cut
638              
639             sub remove_tag {
640 19     19 1 20 my ($self, $tag) = @_;
641              
642 19 50       48 if ( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) {
643 0         0 $self->throw("trying to remove a tag that does not exist: $tag");
644             }
645 19         21 my @vals = @{$self->{'_gsf_tag_hash'}->{$tag}};
  19         47  
646 19         39 delete $self->{'_gsf_tag_hash'}->{$tag};
647 19         28 return @vals;
648             }
649              
650              
651             =head2 attach_seq
652              
653             Title : attach_seq
654             Usage : $feat->attach_seq($seq);
655             Function: Attaches a Bio::Seq object to this feature. This
656             Bio::Seq object is for the *entire* sequence: ie
657             from 1 to 10000
658             Example :
659             Returns : TRUE on success
660             Args : a Bio::PrimarySeqI compliant object
661              
662             =cut
663              
664             sub attach_seq {
665 17234     17234 1 11402 my ($self, $seq) = @_;
666              
667 17234 50 33     74319 if ( ! ($seq && ref($seq) && $seq->isa("Bio::PrimarySeqI")) ) {
      33        
668 0         0 $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures but got '".ref($seq)."'");
669             }
670              
671 17234         18684 $self->{'_gsf_seq'} = $seq;
672              
673             # attach to sub features if they want it
674 17234         16824 foreach ( $self->sub_SeqFeature() ) {
675 4675         4862 $_->attach_seq($seq);
676             }
677 17234         15645 return 1;
678             }
679              
680              
681             =head2 seq
682              
683             Title : seq
684             Usage : my $tseq = $feat->seq();
685             Function: returns the truncated sequence (if there) for this
686             Example :
687             Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence
688             bounded by start & end, or undef if there is no sequence attached.
689             If the strand is defined and set to -1, the returned sequence is
690             the reverse-complement of the region
691             Args : none
692              
693             =cut
694              
695             sub seq {
696 239     239 1 232 my ($self, $arg) = @_;
697              
698 239 50       365 if ( defined $arg ) {
699 0         0 $self->throw("Calling SeqFeature::Generic->seq with an argument. You probably want attach_seq");
700             }
701              
702 239 100       376 if ( ! exists $self->{'_gsf_seq'} ) {
703 1         2 return;
704             }
705              
706             # assumming our seq object is sensible, it should not have to yank
707             # the entire sequence out here.
708 238         155 my $seq;
709 238         383 my $start = $self->start;
710 238         371 my $end = $self->end;
711             # Check circular sequences cut by origin (e.g. join(2006035..2007700,1..257))
712 238 100 100     415 if ( $self->{'_gsf_seq'}->is_circular
      66        
713             and $self->location->isa('Bio::Location::SplitLocationI')
714             and $start > $end
715             ) {
716 1         3 my $primary_seq_length = $self->{'_gsf_seq'}->length;
717              
718             # Get duplicate object with the first sequence piece using trunc()
719 1         11 $seq = $self->{'_gsf_seq'}->trunc($start, $primary_seq_length);
720              
721             # Get post-origin sequence and build the complete sequence
722 1         3 my $post_origin = $self->{'_gsf_seq'}->subseq(1, $end);
723 1         2 my $complete_seq = $seq->seq() . $post_origin;
724              
725             # Add complete sequence to object
726 1         2 $seq->seq($complete_seq);
727             }
728             else {
729 237         583 $seq = $self->{'_gsf_seq'}->trunc($start, $end);
730             }
731              
732 238 100 100     440 if ( defined $self->strand && $self->strand == -1 ) {
733 143         280 $seq = $seq->revcom;
734             }
735              
736 238         636 return $seq;
737             }
738              
739              
740             =head2 entire_seq
741              
742             Title : entire_seq
743             Usage : my $whole_seq = $feat->entire_seq();
744             Function: gives the entire sequence that this seqfeature is attached to
745             Example :
746             Returns : a Bio::PrimarySeqI compliant object, or undef if there is no
747             sequence attached
748             Args :
749              
750             =cut
751              
752             sub entire_seq {
753 756     756 1 2662 return shift->{'_gsf_seq'};
754             }
755              
756              
757             =head2 seq_id
758              
759             Title : seq_id
760             Usage : $feat->seq_id($newval)
761             Function: There are many cases when you make a feature that you
762             do know the sequence name, but do not know its actual
763             sequence. This is an attribute such that you can store
764             the ID (e.g., display_id) of the sequence.
765              
766             This attribute should *not* be used in GFF dumping, as
767             that should come from the collection in which the seq
768             feature was found.
769             Returns : value of seq_id
770             Args : newvalue (optional)
771              
772             =cut
773              
774             sub seq_id {
775 79321     79321 1 56863 my $obj = shift;
776 79321 100       119627 return $obj->{'_gsf_seq_id'} = shift if @_;
777 42966         79686 return $obj->{'_gsf_seq_id'};
778             }
779              
780              
781             =head2 display_name
782              
783             Title : display_name
784             Usage : my $featname = $feat->display_name;
785             Function: Implements the display_name() method, which is a human-readable
786             name for the feature.
787             Returns : value of display_name (a string)
788             Args : Optionally, on set the new value or undef
789              
790             =cut
791              
792             sub display_name {
793 322     322 1 247 my $self = shift;
794 322 100       589 return $self->{'display_name'} = shift if @_;
795 182   100     427 return $self->{'display_name'} || '';
796             }
797              
798              
799             =head1 Methods for implementing Bio::AnnotatableI
800              
801             =head2 annotation
802              
803             Title : annotation
804             Usage : $feat->annotation($annot_obj);
805             Function: Get/set the annotation collection object for annotating this
806             feature.
807              
808             Example :
809             Returns : A Bio::AnnotationCollectionI object
810             Args : newvalue (optional)
811              
812             =cut
813              
814             sub annotation {
815 2950     2950 1 2396 my ($obj,$value) = @_;
816              
817             # we are smart if someone references the object and there hasn't been
818             # one set yet
819 2950 100 100     9252 if(defined $value || ! defined $obj->{'annotation'} ) {
820 1807 100       6330 $value = Bio::Annotation::Collection->new() unless ( defined $value );
821 1807         2200 $obj->{'annotation'} = $value;
822             }
823 2950         6308 return $obj->{'annotation'};
824             }
825              
826              
827             =head1 Methods to implement Bio::FeatureHolderI
828              
829             This includes methods for retrieving, adding, and removing
830             features. Since this is already a feature, features held by this
831             feature holder are essentially sub-features.
832              
833             =head2 get_SeqFeatures
834              
835             Title : get_SeqFeatures
836             Usage : my @feats = $feat->get_SeqFeatures();
837             Function: Returns an array of sub Sequence Features
838             Returns : An array
839             Args : none
840              
841             =cut
842              
843             sub get_SeqFeatures {
844 30451 100   30451 1 17429 return @{ shift->{'_gsf_sub_array'} || []};
  30451         87830  
845             }
846              
847              
848             =head2 add_SeqFeature
849              
850             Title : add_SeqFeature
851             Usage : $feat->add_SeqFeature($subfeat);
852             $feat->add_SeqFeature($subfeat,'EXPAND');
853             Function: Adds a SeqFeature into the subSeqFeature array.
854             With no 'EXPAND' qualifer, subfeat will be tested
855             as to whether it lies inside the parent, and throw
856             an exception if not.
857              
858             If EXPAND is used, the parent's start/end/strand will
859             be adjusted so that it grows to accommodate the new
860             subFeature
861              
862             !IMPORTANT! The coordinates of the subfeature should not be relative
863             to the parent feature it is attached to, but relative to the sequence
864             the parent feature is located on.
865              
866             Returns : nothing
867             Args : An object which has the SeqFeatureI interface
868              
869             =cut
870              
871             sub add_SeqFeature {
872 23287     23287 1 21107 my ($self,$feat,$expand) = @_;
873 23287 50       30534 unless( defined $feat ) {
874 0         0 $self->warn("Called add_SeqFeature with no feature, ignoring");
875 0         0 return;
876             }
877 23287 50       54344 if ( !$feat->isa('Bio::SeqFeatureI') ) {
878 0         0 $self->warn("$feat does not implement Bio::SeqFeatureI. Will add it anyway, but beware...");
879             }
880              
881 23287 100 66     59364 if($expand && ($expand eq 'EXPAND')) {
882 18952         22112 $self->_expand_region($feat);
883             } else {
884 4335 50       6320 if ( !$self->contains($feat) ) {
885 0         0 $self->throw("$feat is not contained within parent feature, and expansion is not valid");
886             }
887             }
888              
889 23287 100       41588 $self->{'_gsf_sub_array'} = [] unless exists($self->{'_gsf_sub_array'});
890 23287         18358 push(@{$self->{'_gsf_sub_array'}},$feat);
  23287         73794  
891              
892             }
893              
894              
895             =head2 remove_SeqFeatures
896              
897             Title : remove_SeqFeatures
898             Usage : $feat->remove_SeqFeatures;
899             Function: Removes all SeqFeatures
900              
901             If you want to remove only a subset of features then remove that
902             subset from the returned array, and add back the rest.
903             Example :
904             Returns : The array of Bio::SeqFeatureI implementing features that was
905             deleted.
906             Args : none
907              
908             =cut
909              
910             sub remove_SeqFeatures {
911 48     48 1 42 my ($self) = @_;
912 48 100       36 my @subfeats = @{$self->{'_gsf_sub_array'} || []};
  48         119  
913 48         55 $self->{'_gsf_sub_array'} = []; # zap the array implicitly.
914 48         60 return @subfeats;
915             }
916              
917              
918             =head1 GFF-related methods
919              
920             =head2 gff_format
921              
922             Title : gff_format
923             Usage : # get:
924             my $gffio = $feat->gff_format();
925             # set (change the default version of GFF2):
926             $feat->gff_format(Bio::Tools::GFF->new(-gff_version => 1));
927             Function: Get/set the GFF format interpreter. This object is supposed to
928             format and parse GFF. See Bio::Tools::GFF for the interface.
929              
930             If this method is called as class method, the default for all
931             newly created instances will be changed. Otherwise only this
932             instance will be affected.
933             Example :
934             Returns : a Bio::Tools::GFF compliant object
935             Args : On set, an instance of Bio::Tools::GFF or a derived object.
936              
937             =cut
938              
939             sub gff_format {
940 1     1 1 2 my ($self, $gffio) = @_;
941 1 50       3 if(defined($gffio)) {
942 0 0       0 if(ref($self)) {
943 0         0 $self->{'_gffio'} = $gffio;
944             } else {
945 0         0 $Bio::SeqFeatureI::static_gff_formatter = $gffio;
946             }
947             }
948             return (ref($self) && exists($self->{'_gffio'}) ?
949 1 50 33     12 $self->{'_gffio'} : $self->_static_gff_formatter);
950             }
951              
952              
953             =head2 gff_string
954              
955             Title : gff_string
956             Usage : my $str = $feat->gff_string;
957             my $str = $feat->gff_string($gff_formatter);
958             Function: Provides the feature information in GFF format.
959              
960             We override this here from Bio::SeqFeatureI in order to use the
961             formatter returned by gff_format().
962              
963             Returns : A string
964             Args : Optionally, an object implementing gff_string().
965              
966             =cut
967              
968             sub gff_string {
969 8     8 1 9 my ($self,$formatter) = @_;
970 8 100       20 $formatter = $self->gff_format() unless $formatter;
971 8         18 return $formatter->gff_string($self);
972             }
973              
974              
975             =head2 slurp_gff_file
976              
977             Title : slurp_file
978             Usage : my @features = Bio::SeqFeature::Generic::slurp_gff_file(\*FILE);
979             Function: Sneaky function to load an entire file as in memory objects.
980             Beware of big files.
981              
982             This method is deprecated. Use Bio::Tools::GFF instead, which can
983             also handle large files.
984              
985             Example :
986             Returns :
987             Args :
988              
989             =cut
990              
991             sub slurp_gff_file {
992 0     0 1 0 my ($f) = @_;
993 0         0 my @out;
994 0 0       0 if ( !defined $f ) {
995 0         0 Bio::Root::Root->throw("Must have a filehandle");
996             }
997              
998 0         0 Bio::Root::Root->deprecated( -message => "deprecated method slurp_gff_file() called in Bio::SeqFeature::Generic. Use Bio::Tools::GFF instead.",
999             -warn_version => '1.005',
1000             -throw_version => '1.007',
1001             );
1002              
1003 0         0 while(<$f>) {
1004 0         0 my $sf = Bio::SeqFeature::Generic->new('-gff_string' => $_);
1005 0         0 push(@out, $sf);
1006             }
1007              
1008 0         0 return @out;
1009             }
1010              
1011              
1012             =head2 _from_gff_string
1013              
1014             Title : _from_gff_string
1015             Usage :
1016             Function: Set feature properties from GFF string.
1017              
1018             This method uses the object returned by gff_format() for the
1019             actual interpretation of the string. Set a different GFF format
1020             interpreter first if you need a specific version, like GFF1. (The
1021             default is GFF2.)
1022             Example :
1023             Returns :
1024             Args : a GFF-formatted string
1025              
1026             =cut
1027              
1028             sub _from_gff_string {
1029 0     0   0 my ($self, $string) = @_;
1030 0         0 $self->gff_format()->from_gff_string($self, $string);
1031             }
1032              
1033              
1034             =head2 _expand_region
1035              
1036             Title : _expand_region
1037             Usage : $feat->_expand_region($feature);
1038             Function: Expand the total region covered by this feature to
1039             accommodate for the given feature.
1040              
1041             May be called whenever any kind of subfeature is added to this
1042             feature. add_SeqFeature() already does this.
1043             Returns :
1044             Args : A Bio::SeqFeatureI implementing object.
1045              
1046             =cut
1047              
1048             sub _expand_region {
1049 19497     19497   14097 my ($self, $feat) = @_;
1050 19497 50       32746 if(! $feat->isa('Bio::SeqFeatureI')) {
1051 0         0 $self->warn("$feat does not implement Bio::SeqFeatureI");
1052             }
1053             # if this doesn't have start set - forget it!
1054             # changed to reflect sanity checks for LocationI
1055 19497 100       21197 if(!$self->location->valid_Location) {
1056 2414         3367 $self->start($feat->start);
1057 2414         4417 $self->end($feat->end);
1058 2414 100       3815 $self->strand($feat->strand) unless $self->strand;
1059             } else {
1060 17083         26808 my ($start,$end,$strand) = $self->union($feat);
1061 17083         21646 $self->start($start);
1062 17083         21966 $self->end($end);
1063 17083         19566 $self->strand($strand);
1064             }
1065             }
1066              
1067              
1068             =head2 _parse
1069              
1070             Title : _parse
1071             Usage :
1072             Function: Parsing hints
1073             Example :
1074             Returns :
1075             Args :
1076              
1077             =cut
1078              
1079             sub _parse {
1080 0     0   0 my ($self) = @_;
1081 0         0 return $self->{'_parse_h'};
1082             }
1083              
1084              
1085             =head2 _tag_value
1086              
1087             Title : _tag_value
1088             Usage :
1089             Function: For internal use only. Convenience method for those tags that
1090             may only have a single value.
1091             Returns : The first value under the given tag as a scalar (string)
1092             Args : The tag as a string. Optionally, the value on set.
1093              
1094             =cut
1095              
1096             sub _tag_value {
1097 11184     11184   7900 my $self = shift;
1098 11184         7652 my $tag = shift;
1099              
1100 11184 100 100     17348 if(@_ || (! $self->has_tag($tag))) {
1101 10907 100       12568 $self->remove_tag($tag) if($self->has_tag($tag));
1102 10907         13604 $self->add_tag_value($tag, @_);
1103             }
1104 11184         14261 return ($self->get_tag_values($tag))[0];
1105             }
1106              
1107              
1108             #######################################################################
1109             # aliases for methods that changed their names in an attempt to make #
1110             # bioperl names more consistent #
1111             #######################################################################
1112              
1113             sub seqname {
1114 0     0 0 0 my $self = shift;
1115 0         0 $self->warn("SeqFeatureI::seqname() is deprecated. Please use seq_id() instead.");
1116 0         0 return $self->seq_id(@_);
1117             }
1118              
1119             sub display_id {
1120 0     0 0 0 my $self = shift;
1121 0         0 $self->warn("SeqFeatureI::display_id() is deprecated. Please use display_name() instead.");
1122 0         0 return $self->display_name(@_);
1123             }
1124              
1125             # this is towards consistent naming
1126 280     280 0 293 sub each_tag_value { return shift->get_tag_values(@_); }
1127 91     91 0 132 sub all_tags { return shift->get_all_tags(@_); }
1128              
1129             # we revamped the feature containing property to implementing
1130             # Bio::FeatureHolderI
1131             *sub_SeqFeature = \&get_SeqFeatures;
1132             *add_sub_SeqFeature = \&add_SeqFeature;
1133             *flush_sub_SeqFeatures = \&remove_SeqFeatures;
1134             # this one is because of inconsistent naming ...
1135             *flush_sub_SeqFeature = \&remove_SeqFeatures;
1136              
1137             sub cleanup_generic {
1138 41057     41057 0 29480 my $self = shift;
1139 41057 100       29199 foreach my $f ( @{$self->{'_gsf_sub_array'} || []} ) {
  41057         115662  
1140 1228         1542 $f = undef;
1141             }
1142 41057         46425 $self->{'_gsf_seq'} = undef;
1143 41057         28346 foreach my $t ( keys %{$self->{'_gsf_tag_hash'} } ) {
  41057         102358  
1144 55372         42455 $self->{'_gsf_tag_hash'}->{$t} = undef;
1145 55372         124398 delete($self->{'_gsf_tag_hash'}->{$t}); # bug 1720 fix
1146             }
1147             }
1148              
1149             1;