File Coverage

Bio/SeqFeature/Tools/FeatureNamer.pm
Criterion Covered Total %
statement 24 33 72.7
branch 9 10 90.0
condition 5 8 62.5
subroutine 5 6 83.3
pod 4 4 100.0
total 47 61 77.0


line stmt bran cond sub pod time code
1             #
2             # bioperl module for Bio::SeqFeature::Tools::FeatureNamer
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Chris Mungall
7             #
8             # Copyright Chris Mungall
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::SeqFeature::Tools::FeatureNamer - generates unique persistent names for features
17              
18             =head1 SYNOPSIS
19              
20             use Bio::SeqIO;
21             use Bio::SeqFeature::Tools::FeatureNamer;
22              
23             # first fetch a genbank SeqI object
24             $seqio =
25             Bio::SeqIO->new(-file=>'AE003644.gbk',
26             -format=>'GenBank');
27             $seq = $seqio->next_seq();
28              
29             $namer = Bio::SeqFeature::Tools::FeatureNamer->new;
30             my @features = $seq->get_SeqFeatures;
31             foreach my $feature (@features) {
32             $namer->name_feature($feature) unless $feature->display_name;
33             }
34              
35             =head1 DESCRIPTION
36              
37             This is a helper class for providing names for SeqFeatures
38              
39             The L class provides a display_name
40             method. Typically the display_name is not set when parsing formats
41             such as genbank - instead properties such as B
42             B are set in a somewhat inconsistent manner.
43              
44             In addition, when generating subfeatures (for example, exons that are
45             subfeatures of a transcript feature), it is often desirable to name
46             these subfeatures before either exporting to another format or
47             reporting to the user.
48              
49             This module is intended to help given uniform display_names to
50             features and their subfeatures.
51              
52             =head1 TODO
53              
54             Currently the naming policy is hardcoded. It may be desirable to allow
55             plugging in variations on naming policies; this could be done either
56             by subclassing, anonymous subroutines (closures) or
57             parameterization. Contact the author if you feel you have need for a
58             different naming policy
59              
60              
61             =head1 FEEDBACK
62              
63             =head2 Mailing Lists
64              
65             User feedback is an integral part of the evolution of this and other
66             Bioperl modules. Send your comments and suggestions preferably to the
67             Bioperl mailing lists Your participation is much appreciated.
68              
69             bioperl-l@bioperl.org - General discussion
70             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
71              
72             =head2 Support
73              
74             Please direct usage questions or support issues to the mailing list:
75              
76             I
77              
78             rather than to the module maintainer directly. Many experienced and
79             reponsive experts will be able look at the problem and quickly
80             address it. Please include a thorough description of the problem
81             with code and data examples if at all possible.
82              
83             =head2 Reporting Bugs
84              
85             report bugs to the Bioperl bug tracking system to help us keep track
86             the bugs and their resolution. Bug reports can be submitted via the
87             web:
88              
89             https://github.com/bioperl/bioperl-live/issues
90              
91             =head1 AUTHOR - Chris Mungall
92              
93             Email: cjm AT fruitfly DOT org
94              
95             =head1 APPENDIX
96              
97             The rest of the documentation details each of the object
98             methods. Internal methods are usually preceded with a _
99              
100             =cut
101              
102              
103             # Let the code begin...
104              
105             package Bio::SeqFeature::Tools::FeatureNamer;
106 2     2   7 use strict;
  2         3  
  2         48  
107              
108             # Object preamble - inherits from Bio::Root::Root
109              
110 2     2   6 use base qw(Bio::Root::Root);
  2         2  
  2         602  
111              
112             =head2 new
113              
114             Title : new
115             Usage : $unflattener = Bio::SeqFeature::Tools::FeatureNamer->new();
116             Function: constructor
117             Example :
118             Returns : a new Bio::SeqFeature::Tools::FeatureNamer
119             Args : see below
120              
121              
122             =cut
123              
124             sub new {
125 0     0 1 0 my($class,@args) = @_;
126 0         0 my $self = $class->SUPER::new(@args);
127              
128             # my($typemap) =
129             # $self->_rearrange([qw(TYPEMAP
130             # )],
131             # @args);#
132              
133             # $typemap && $self->typemap($typemap);
134 0         0 return $self; # success - we hope!
135             }
136              
137             =head2 name_feature
138              
139             Title : name_feature
140             Usage : $namer->name_feature($sf);
141             Function: sets display_name
142             Example :
143             Returns :
144             Args : L
145              
146             This method calls generate_feature_name() and uses the returned value
147             to set the display_name of the feature
148              
149             =cut
150              
151             sub name_feature {
152 57     57 1 68 my ($self, $sf) = @_;
153 57         96 my $name = $self->generate_feature_name($sf);
154 57         85 $sf->display_name($name);
155             }
156              
157             =head2 name_contained_features
158              
159             Title : name_contained_features
160             Usage : $namer->name_contained_features($sf);
161             Function: sets display_name for all features contained by sf
162             Example :
163             Returns :
164             Args : L
165              
166             iterates through all subfeatures of a certain feature (using
167             get_all_SeqFeatures) and names each subfeatures, based on the
168             generated name for the holder feature
169              
170             A subfeature is named by concatenating the generated name of the
171             container feature with the type and a number.
172              
173             For example, if the containing feature is a gene with display name
174             B, subfeatures will be named dpp-mRNA-1 dpp-mRNA2 dpp-exon1
175             dpp-exon2 etc
176              
177             =cut
178              
179             sub name_contained_features{
180 57     57 1 40 my ($self,$sf) = @_;
181 57         79 my $cname = $self->generate_feature_name($sf);
182 57         126 my @subsfs = $sf->get_all_SeqFeatures;
183 57         44 my %num_by_type = ();
184 57         63 foreach my $ssf (@subsfs) {
185 0         0 my $type = $ssf->primary_tag;
186 0   0     0 my $num = $num_by_type{$type} || 0;
187 0         0 $num++;
188 0         0 $num_by_type{$type} = $num;
189 0         0 $ssf->display_name("$cname-$type-$num");
190             }
191 57         57 return;
192             }
193              
194             =head2 generate_feature_name
195              
196             Title : generate_feature_name
197             Usage : $name = $namer->generate_feature_name($sf);
198             Function: derives a sensible human readable name for a $sf
199             Example :
200             Returns : str
201             Args : L
202              
203             returns a generated name (but does not actually set display_name).
204              
205             If display_name is already set, the method will return this
206              
207             Otherwise, the name will depend on the property:
208              
209             =over
210              
211             =item label
212              
213             =item product
214              
215             =item gene
216              
217             =item locus_tag
218              
219             =back
220              
221             (in order of priority)
222              
223             =cut
224              
225             sub generate_feature_name {
226 172     172 1 139 my ($self, $sf) = @_;
227              
228 172         256 my $name = $sf->display_name;
229 172 100       260 if (!$name) {
230 62 50 100     112 if ($sf->has_tag("label")) {
    100 66        
    100          
    100          
231 0         0 ($name) = $sf->get_tag_values("label");
232             }
233             elsif ($sf->has_tag("product")) {
234 37         81 ($name) = $sf->get_tag_values("product");
235             }
236             elsif ($sf->primary_tag eq 'gene' &&
237             $sf->has_tag("gene")) {
238 6         19 ($name) = $sf->get_tag_values("gene");
239             }
240             elsif ($sf->primary_tag eq 'gene' &&
241             $sf->has_tag("locus_tag")) {
242 12         27 ($name) = $sf->get_tag_values("locus_tag");
243             }
244             else {
245 7         9 $name = $sf->display_name;
246             }
247             }
248 172         199 return $name;
249             }
250              
251             1;