line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# bioperl module for Bio::SeqFeature::Tools::TypeMapper |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Chris Mungall |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Chris Mungall |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::SeqFeature::Tools::TypeMapper - maps $seq_feature-Eprimary_tag |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use Bio::SeqIO; |
21
|
|
|
|
|
|
|
use Bio::SeqFeature::Tools::TypeMapper; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# first fetch a genbank SeqI object |
24
|
|
|
|
|
|
|
$seqio = |
25
|
|
|
|
|
|
|
Bio::SeqIO->new(-file=>'AE003644.gbk', |
26
|
|
|
|
|
|
|
-format=>'GenBank'); |
27
|
|
|
|
|
|
|
$seq = $seqio->next_seq(); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
$tm = Bio::SeqFeature::Tools::TypeMapper->new; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# map all the types in the sequence |
32
|
|
|
|
|
|
|
$tm->map_types(-seq=>$seq, |
33
|
|
|
|
|
|
|
{CDS=>'ORF', |
34
|
|
|
|
|
|
|
variation=>sub { |
35
|
|
|
|
|
|
|
my $f = shift; |
36
|
|
|
|
|
|
|
$f->length > 1 ? |
37
|
|
|
|
|
|
|
'variation' : 'SNP' |
38
|
|
|
|
|
|
|
}, |
39
|
|
|
|
|
|
|
}); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# alternatively, use the hardcoded SO mapping |
42
|
|
|
|
|
|
|
$tm->map_types_to_SO(-seq=>$seq); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head1 DESCRIPTION |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
This class implements an object for mapping between types; for |
47
|
|
|
|
|
|
|
example, the types in a genbank feature table, and the types specified |
48
|
|
|
|
|
|
|
in the Sequence Ontology. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
You can specify your own mapping, either as a simple hash index, or by |
51
|
|
|
|
|
|
|
providing your own subroutines. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head1 FEEDBACK |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head2 Mailing Lists |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
58
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to the |
59
|
|
|
|
|
|
|
Bioperl mailing lists Your participation is much appreciated. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
62
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=head2 Support |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
I |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
71
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
72
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
73
|
|
|
|
|
|
|
with code and data examples if at all possible. |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head2 Reporting Bugs |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
report bugs to the Bioperl bug tracking system to help us keep track |
78
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
79
|
|
|
|
|
|
|
web: |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=head1 AUTHOR - Chris Mungall |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Email: cjm@fruitfly.org |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=head1 APPENDIX |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
90
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=cut |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# Let the code begin... |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
package Bio::SeqFeature::Tools::TypeMapper; |
98
|
2
|
|
|
2
|
|
8
|
use strict; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
49
|
|
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# Object preamble - inherits from Bio::Root::Root |
101
|
|
|
|
|
|
|
|
102
|
2
|
|
|
2
|
|
7
|
use base qw(Bio::Root::Root); |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
1352
|
|
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=head2 new |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Title : new |
107
|
|
|
|
|
|
|
Usage : $unflattener = Bio::SeqFeature::Tools::TypeMapper->new(); |
108
|
|
|
|
|
|
|
Function: constructor |
109
|
|
|
|
|
|
|
Example : |
110
|
|
|
|
|
|
|
Returns : a new Bio::SeqFeature::Tools::TypeMapper |
111
|
|
|
|
|
|
|
Args : see below |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=cut |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub new { |
117
|
0
|
|
|
0
|
1
|
|
my($class,@args) = @_; |
118
|
0
|
|
|
|
|
|
my $self = $class->SUPER::new(@args); |
119
|
|
|
|
|
|
|
|
120
|
0
|
|
|
|
|
|
my($typemap) = |
121
|
|
|
|
|
|
|
$self->_rearrange([qw(TYPEMAP |
122
|
|
|
|
|
|
|
)], |
123
|
|
|
|
|
|
|
@args); |
124
|
|
|
|
|
|
|
|
125
|
0
|
0
|
|
|
|
|
$typemap && $self->typemap($typemap); |
126
|
0
|
|
|
|
|
|
return $self; # success - we hope! |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head2 typemap |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
Title : typemap |
132
|
|
|
|
|
|
|
Usage : $obj->typemap($newval) |
133
|
|
|
|
|
|
|
Function: |
134
|
|
|
|
|
|
|
Example : |
135
|
|
|
|
|
|
|
Returns : value of typemap (a scalar) |
136
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=cut |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub typemap{ |
142
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
143
|
|
|
|
|
|
|
|
144
|
0
|
0
|
|
|
|
|
return $self->{'typemap'} = shift if @_; |
145
|
0
|
|
|
|
|
|
return $self->{'typemap'}; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=head2 map_types |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
Title : map_types |
151
|
|
|
|
|
|
|
Usage : |
152
|
|
|
|
|
|
|
Function: |
153
|
|
|
|
|
|
|
Example : |
154
|
|
|
|
|
|
|
Returns : |
155
|
|
|
|
|
|
|
Args : |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
dgg: added -undefined => "region" option to produce all valid SO mappings. |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=cut |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub map_types{ |
162
|
0
|
|
|
0
|
1
|
|
my ($self,@args) = @_; |
163
|
|
|
|
|
|
|
|
164
|
0
|
|
|
|
|
|
my($sf, $seq, $type_map, $undefmap) = |
165
|
|
|
|
|
|
|
$self->_rearrange([qw(FEATURE |
166
|
|
|
|
|
|
|
SEQ |
167
|
|
|
|
|
|
|
TYPE_MAP |
168
|
|
|
|
|
|
|
UNDEFINED |
169
|
|
|
|
|
|
|
)], |
170
|
|
|
|
|
|
|
@args); |
171
|
0
|
0
|
0
|
|
|
|
if (!$sf && !$seq) { |
172
|
0
|
|
|
|
|
|
$self->throw("you need to pass in either -feature or -seq"); |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
|
175
|
0
|
|
|
|
|
|
my @sfs = ($sf); |
176
|
0
|
0
|
|
|
|
|
if ($seq) { |
177
|
0
|
0
|
|
|
|
|
$seq->isa("Bio::SeqI") || $self->throw("$seq NOT A SeqI"); |
178
|
0
|
|
|
|
|
|
@sfs = $seq->get_all_SeqFeatures; |
179
|
|
|
|
|
|
|
} |
180
|
0
|
|
0
|
|
|
|
$type_map = $type_map || $self->typemap; # dgg: was type_map; |
181
|
0
|
|
|
|
|
|
foreach my $sf (@sfs) { |
182
|
|
|
|
|
|
|
|
183
|
0
|
0
|
|
|
|
|
$sf->isa("Bio::SeqFeatureI") || $self->throw("$sf NOT A SeqFeatureI"); |
184
|
0
|
0
|
|
|
|
|
$sf->isa("Bio::FeatureHolderI") || $self->throw("$sf NOT A FeatureHolderI"); |
185
|
|
|
|
|
|
|
|
186
|
0
|
|
|
|
|
|
my $type = $sf->primary_tag; |
187
|
0
|
|
|
|
|
|
my $mtype = $type_map->{$type}; |
188
|
0
|
0
|
|
|
|
|
if ($mtype) { |
189
|
0
|
0
|
0
|
|
|
|
if (ref($mtype)) { |
|
|
0
|
|
|
|
|
|
190
|
0
|
0
|
|
|
|
|
if (ref($mtype) eq 'CODE') { |
191
|
0
|
|
|
|
|
|
$mtype = $mtype->($sf); |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
else { |
194
|
0
|
|
|
|
|
|
$self->throw('type_map values must be scalar or CODE ref. You said: '.$mtype.' for type: '.$type); |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
elsif ($undefmap && $mtype eq 'undefined') { # dgg |
198
|
0
|
|
|
|
|
|
$mtype= $undefmap; |
199
|
|
|
|
|
|
|
} |
200
|
0
|
|
|
|
|
|
$sf->primary_tag($mtype); |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
} |
203
|
0
|
|
|
|
|
|
return; |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=head2 map_types_to_SO |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
Title : map_types_to_SO |
209
|
|
|
|
|
|
|
Usage : |
210
|
|
|
|
|
|
|
Function: |
211
|
|
|
|
|
|
|
Example : |
212
|
|
|
|
|
|
|
Returns : |
213
|
|
|
|
|
|
|
Args : |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
hardcodes the genbank to SO mapping |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
Based on revision 1.22 of SO |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
Please see the actual code for the mappings |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
Taken from |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
L |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
dgg: separated out FT_SO_map for caller changes. Update with: |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
open(FTSO,"curl -s http://sequenceontology.org/resources/mapping/FT_SO.txt|"); |
228
|
|
|
|
|
|
|
while(){ |
229
|
|
|
|
|
|
|
chomp; ($ft,$so,$sid,$ftdef,$sodef)= split"\t"; |
230
|
|
|
|
|
|
|
print " '$ft' => '$so',\n" if($ft && $so && $ftdef); |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=cut |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
sub FT_SO_map { |
236
|
|
|
|
|
|
|
# $self= shift; |
237
|
|
|
|
|
|
|
# note : some of the ft_so mappings are commented out and overriden... |
238
|
|
|
|
|
|
|
return { |
239
|
0
|
|
|
0
|
0
|
|
"-" => ["located_sequence_feature", "so:0000110"], |
240
|
|
|
|
|
|
|
"-10_signal" => ["minus_10_signal", "so:0000175"], |
241
|
|
|
|
|
|
|
"-35_signal" => ["minus_35_signal", "so:0000176"], |
242
|
|
|
|
|
|
|
"3'utr" => ["three_prime_utr", "so:0000205"], |
243
|
|
|
|
|
|
|
"3'clip" => ["three_prime_clip", "so:0000557"], |
244
|
|
|
|
|
|
|
"5'utr" => ["five_prime_utr", "so:0000204"], |
245
|
|
|
|
|
|
|
"5'clip" => ["five_prime_clip", "so:0000555"], |
246
|
|
|
|
|
|
|
"caat_signal" => ["caat_signal", "so:0000172"], |
247
|
|
|
|
|
|
|
"cds" => ["cds", "so:0000316"], |
248
|
|
|
|
|
|
|
"c_region" => ["undefined", ""], |
249
|
|
|
|
|
|
|
"d-loop" => ["d_loop", "so:0000297"], |
250
|
|
|
|
|
|
|
"d_segment" => ["d_gene", "so:0000458"], |
251
|
|
|
|
|
|
|
"gc_signal" => ["gc_rich_region", "so:0000173"], |
252
|
|
|
|
|
|
|
"j_segment" => ["undefined", ""], |
253
|
|
|
|
|
|
|
"ltr" => ["long_terminal_repeat", "so:0000286"], |
254
|
|
|
|
|
|
|
"n_region" => ["undefined", ""], |
255
|
|
|
|
|
|
|
"rbs" => ["ribosome_entry_site", "so:0000139"], |
256
|
|
|
|
|
|
|
"sts" => ["sts", "so:0000331"], |
257
|
|
|
|
|
|
|
"s_region" => ["undefined", ""], |
258
|
|
|
|
|
|
|
"tata_signal" => ["tata_box", "so:0000174"], |
259
|
|
|
|
|
|
|
"v_region" => ["undefined", ""], |
260
|
|
|
|
|
|
|
"v_segment" => ["undefined", ""], |
261
|
|
|
|
|
|
|
"attenuator" => ["attenuator", "so:0000140"], |
262
|
|
|
|
|
|
|
"conflict" => ["undefined", ""], |
263
|
|
|
|
|
|
|
"enhancer" => ["enhancer", "so:0000165"], |
264
|
|
|
|
|
|
|
"exon" => ["exon", "so:0000147"], |
265
|
|
|
|
|
|
|
"gap" => ["gap", "so:0000730"], |
266
|
|
|
|
|
|
|
"gene" => ["gene", "so:0000704"], |
267
|
|
|
|
|
|
|
"idna" => ["idna", "so:0000723"], |
268
|
|
|
|
|
|
|
"intron" => ["intron", "so:0000188"], |
269
|
|
|
|
|
|
|
"mRNA" => ["mRNA", "so:0000234"], |
270
|
|
|
|
|
|
|
"mat_peptide" => ["mature_protein_region", "so:0000419"], |
271
|
|
|
|
|
|
|
"mature_peptide" => ["mature_protein_region", "so:0000419"], |
272
|
|
|
|
|
|
|
#"misc_RNA" => ["transcript", "so:0000673"], |
273
|
|
|
|
|
|
|
"misc_binding" => ["binding_site", "so:0000409"], |
274
|
|
|
|
|
|
|
"misc_difference" => ["sequence_difference", "so:0000413"], |
275
|
|
|
|
|
|
|
"misc_feature" => ["region", undef], |
276
|
|
|
|
|
|
|
"misc_recomb" => ["recombination_feature", "so:0000298"], |
277
|
|
|
|
|
|
|
"misc_signal" => ["regulatory_region", "so:0005836"], |
278
|
|
|
|
|
|
|
"misc_structure" => ["sequence_secondary_structure", "so:0000002"], |
279
|
|
|
|
|
|
|
"modified_base" => ["modified_base_site", "so:0000305"], |
280
|
|
|
|
|
|
|
"old_sequence" => ["undefined", ""], |
281
|
|
|
|
|
|
|
"operon" => ["operon", "so:0000178"], |
282
|
|
|
|
|
|
|
"oriT" => ["origin_of_transfer", "so:0000724"], |
283
|
|
|
|
|
|
|
"polya_signal" => ["polyA_signal_sequence", "so:0000551"], |
284
|
|
|
|
|
|
|
"polya_site" => ["polyA_site", "so:0000553"], |
285
|
|
|
|
|
|
|
"precursor_RNA" => ["primary_transcript", "so:0000185"], |
286
|
|
|
|
|
|
|
"prim_transcript" => ["primary_transcript", "so:0000185"], |
287
|
|
|
|
|
|
|
"primer_bind" => ["primer_binding_site", "so:0005850"], |
288
|
|
|
|
|
|
|
"promoter" => ["promoter", "so:0000167"], |
289
|
|
|
|
|
|
|
"protein_bind" => ["protein_binding_site", "so:0000410"], |
290
|
|
|
|
|
|
|
"rRNA" => ["rRNA", "so:0000252"], |
291
|
|
|
|
|
|
|
"repeat_region" => ["repeat_region", "so:0000657"], |
292
|
|
|
|
|
|
|
"repeat_unit" => ["repeat_unit", "so:0000726"], |
293
|
|
|
|
|
|
|
"satellite" => ["satellite_dna", "so:0000005"], |
294
|
|
|
|
|
|
|
"scRNA" => ["scRNA", "so:0000013"], |
295
|
|
|
|
|
|
|
"sig_peptide" => ["signal_peptide", "so:0000418"], |
296
|
|
|
|
|
|
|
"snRNA" => ["snRNA", "so:0000274"], |
297
|
|
|
|
|
|
|
"snoRNA" => ["snoRNA", "so:0000275"], |
298
|
|
|
|
|
|
|
#"source" => ["databank_entry", "so:2000061"], |
299
|
|
|
|
|
|
|
"stem_loop" => ["stem_loop", "so:0000313"], |
300
|
|
|
|
|
|
|
"tRNA" => ["tRNA", "so:0000253"], |
301
|
|
|
|
|
|
|
"terminator" => ["terminator", "so:0000141"], |
302
|
|
|
|
|
|
|
"transit_peptide" => ["transit_peptide", "so:0000725"], |
303
|
|
|
|
|
|
|
"unsure" => "undefined", |
304
|
|
|
|
|
|
|
"variation" => ["sequence_variant", "so:0000109"], |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
# manually added |
307
|
|
|
|
|
|
|
## has parent = pseudogene ; dgg |
308
|
|
|
|
|
|
|
"pseudomRNA" => ["pseudogenic_transcript", "so:0000516"], |
309
|
|
|
|
|
|
|
## from unflattener misc_rna ; dgg |
310
|
|
|
|
|
|
|
"pseudotranscript" => ["pseudogenic_transcript", "so:0000516"], |
311
|
|
|
|
|
|
|
"pseudoexon" => ["pseudogenic_exon", "so:0000507"], |
312
|
|
|
|
|
|
|
"pseudoCDS" => ["pseudogenic_exon", "so:0000507"], |
313
|
|
|
|
|
|
|
"pseudomisc_feature" => ["pseudogenic_region", "so:0000462"], |
314
|
|
|
|
|
|
|
"pseudointron" => ["pseudogenic_region", "so:0000462"], |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
## "undefined" => "region", |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
# this is the most generic form for rnas; |
320
|
|
|
|
|
|
|
# we always represent the processed form of |
321
|
|
|
|
|
|
|
# the transcript |
322
|
|
|
|
|
|
|
misc_RNA => ['mature_transcript',"so:0000233"], |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
# not sure about this one... |
325
|
|
|
|
|
|
|
source=>['contig', "SO:0000149"], |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
rep_origin=>['origin_of_replication',"SO:0000296"], |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
Protein=>['polypeptide',"SO:0000104"], |
330
|
|
|
|
|
|
|
}; |
331
|
|
|
|
|
|
|
# return { |
332
|
|
|
|
|
|
|
#"FT term" => "SO term", |
333
|
|
|
|
|
|
|
#"-" => "located_sequence_feature", |
334
|
|
|
|
|
|
|
#"-10_signal" => "minus_10_signal", |
335
|
|
|
|
|
|
|
#"-35_signal" => "minus_35_signal", |
336
|
|
|
|
|
|
|
#"3'UTR" => "three_prime_UTR", |
337
|
|
|
|
|
|
|
#"3'clip" => "three_prime_clip", |
338
|
|
|
|
|
|
|
#"5'UTR" => "five_prime_UTR", |
339
|
|
|
|
|
|
|
#"5'clip" => "five_prime_clip", |
340
|
|
|
|
|
|
|
#"CAAT_signal" => "CAAT_signal", |
341
|
|
|
|
|
|
|
#"CDS" => "CDS", |
342
|
|
|
|
|
|
|
#"C_region" => "undefined", |
343
|
|
|
|
|
|
|
#"D-loop" => "D_loop", |
344
|
|
|
|
|
|
|
#"D_segment" => "D_gene", |
345
|
|
|
|
|
|
|
#"GC_signal" => "GC_rich_region", |
346
|
|
|
|
|
|
|
#"J_segment" => "undefined", |
347
|
|
|
|
|
|
|
#"LTR" => "long_terminal_repeat", |
348
|
|
|
|
|
|
|
#"N_region" => "undefined", |
349
|
|
|
|
|
|
|
#"RBS" => "ribosome_entry_site", |
350
|
|
|
|
|
|
|
#"STS" => "STS", |
351
|
|
|
|
|
|
|
#"S_region" => "undefined", |
352
|
|
|
|
|
|
|
#"TATA_signal" => "TATA_box", |
353
|
|
|
|
|
|
|
#"V_region" => "undefined", |
354
|
|
|
|
|
|
|
#"V_segment" => "undefined", |
355
|
|
|
|
|
|
|
#"attenuator" => "attenuator", |
356
|
|
|
|
|
|
|
#"conflict" => "undefined", |
357
|
|
|
|
|
|
|
#"enhancer" => "enhancer", |
358
|
|
|
|
|
|
|
#"exon" => "exon", |
359
|
|
|
|
|
|
|
#"gap" => "gap", |
360
|
|
|
|
|
|
|
#"gene" => "gene", |
361
|
|
|
|
|
|
|
#"iDNA" => "iDNA", |
362
|
|
|
|
|
|
|
#"intron" => "intron", |
363
|
|
|
|
|
|
|
#"mRNA" => "mRNA", |
364
|
|
|
|
|
|
|
#"mat_peptide" => "mature_protein_region", |
365
|
|
|
|
|
|
|
#"mature_peptide" => "mature_protein_region", |
366
|
|
|
|
|
|
|
## "misc_RNA" => "transcript", |
367
|
|
|
|
|
|
|
#"misc_binding" => "binding_site", |
368
|
|
|
|
|
|
|
#"misc_difference" => "sequence_difference", |
369
|
|
|
|
|
|
|
#"misc_feature" => "region", |
370
|
|
|
|
|
|
|
#"misc_recomb" => "recombination_feature", |
371
|
|
|
|
|
|
|
#"misc_signal" => "regulatory_region", |
372
|
|
|
|
|
|
|
#"misc_structure" => "sequence_secondary_structure", |
373
|
|
|
|
|
|
|
#"modified_base" => "modified_base_site", |
374
|
|
|
|
|
|
|
#"old_sequence" => "undefined", |
375
|
|
|
|
|
|
|
#"operon" => "operon", |
376
|
|
|
|
|
|
|
#"oriT" => "origin_of_transfer", |
377
|
|
|
|
|
|
|
#"polyA_signal" => "polyA_signal_sequence", |
378
|
|
|
|
|
|
|
#"polyA_site" => "polyA_site", |
379
|
|
|
|
|
|
|
#"precursor_RNA" => "primary_transcript", |
380
|
|
|
|
|
|
|
#"prim_transcript" => "primary_transcript", |
381
|
|
|
|
|
|
|
#"primer_bind" => "primer_binding_site", |
382
|
|
|
|
|
|
|
#"promoter" => "promoter", |
383
|
|
|
|
|
|
|
#"protein_bind" => "protein_binding_site", |
384
|
|
|
|
|
|
|
#"rRNA" => "rRNA", |
385
|
|
|
|
|
|
|
#"repeat_region" => "repeat_region", |
386
|
|
|
|
|
|
|
#"repeat_unit" => "repeat_unit", |
387
|
|
|
|
|
|
|
#"satellite" => "satellite_DNA", |
388
|
|
|
|
|
|
|
#"scRNA" => "scRNA", |
389
|
|
|
|
|
|
|
#"sig_peptide" => "signal_peptide", |
390
|
|
|
|
|
|
|
#"snRNA" => "snRNA", |
391
|
|
|
|
|
|
|
#"snoRNA" => "snoRNA", |
392
|
|
|
|
|
|
|
## "source" => "databank_entry", |
393
|
|
|
|
|
|
|
#"stem_loop" => "stem_loop", |
394
|
|
|
|
|
|
|
#"tRNA" => "tRNA", |
395
|
|
|
|
|
|
|
#"terminator" => "terminator", |
396
|
|
|
|
|
|
|
#"transit_peptide" => "transit_peptide", |
397
|
|
|
|
|
|
|
#"unsure" => "undefined", |
398
|
|
|
|
|
|
|
#"variation" => "sequence_variant", |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
#"pseudomRNA" => "pseudogenic_transcript", ## has parent = pseudogene ; dgg |
401
|
|
|
|
|
|
|
#"pseudotranscript" => "pseudogenic_transcript", ## from Unflattener misc_RNA ; dgg |
402
|
|
|
|
|
|
|
#"pseudoexon" => "pseudogenic_exon", |
403
|
|
|
|
|
|
|
#"pseudoCDS" => "pseudogenic_exon", |
404
|
|
|
|
|
|
|
#"pseudomisc_feature" => "pseudogenic_region", |
405
|
|
|
|
|
|
|
#"pseudointron" => "pseudogenic_region", |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
### "undefined" => "region", |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
## this is the most generic form for RNAs; |
410
|
|
|
|
|
|
|
## we always represent the processed form of |
411
|
|
|
|
|
|
|
## the transcript |
412
|
|
|
|
|
|
|
#misc_RNA=>'processed_transcript', |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
## not sure about this one... |
415
|
|
|
|
|
|
|
#source=>'contig', |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
#rep_origin=>'origin_of_replication', |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
#Protein=>'protein', |
420
|
|
|
|
|
|
|
#}; |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
sub map_types_to_SO{ |
424
|
0
|
|
|
0
|
1
|
|
my ($self,@args) = @_; |
425
|
|
|
|
|
|
|
|
426
|
0
|
|
|
|
|
|
push(@args, (-type_map=> $self->FT_SO_map() ) ); |
427
|
0
|
|
|
|
|
|
return $self->map_types(@args); |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=head2 get_relationship_type_by_parent_child |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
Title : get_relationship_type_by_parent_child |
433
|
|
|
|
|
|
|
Usage : $type = $tm->get_relationship_type_by_parent_child($parent_sf, $child_sf); |
434
|
|
|
|
|
|
|
Usage : $type = $tm->get_relationship_type_by_parent_child('mRNA', 'protein'); |
435
|
|
|
|
|
|
|
Function: given two features where the parent contains the child, |
436
|
|
|
|
|
|
|
will determine what the relationship between them in |
437
|
|
|
|
|
|
|
Example : |
438
|
|
|
|
|
|
|
Returns : |
439
|
|
|
|
|
|
|
Args : parent SeqFeature, child SeqFeature OR |
440
|
|
|
|
|
|
|
parent type string, child type string OR |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
bioperl Seq::FeatureHolderI hierarchies are equivalent to unlabeled |
443
|
|
|
|
|
|
|
graphs (where parent nodes are the containers, and child nodes are the |
444
|
|
|
|
|
|
|
features being contained). For example, a feature of type mRNA can |
445
|
|
|
|
|
|
|
contain features of type exon. |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
Some external representations (eg chadoxml or chaosxml) require that |
448
|
|
|
|
|
|
|
the edges in the feature relationship graph are labeled. For example, |
449
|
|
|
|
|
|
|
the type between mRNA and exon would be B. Although it |
450
|
|
|
|
|
|
|
stretches the bioperl notion of containment, we could have a CDS |
451
|
|
|
|
|
|
|
contained by an mRNA (for example, the |
452
|
|
|
|
|
|
|
L module takes genbank records |
453
|
|
|
|
|
|
|
and makes these kind of links. The relationship here would be |
454
|
|
|
|
|
|
|
B |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
In chado speak, the child is the B feature and the parent is |
457
|
|
|
|
|
|
|
the B |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=cut |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
sub get_relationship_type_by_parent_child { |
462
|
0
|
|
|
0
|
1
|
|
my ($self,$parent,$child) = @_; |
463
|
0
|
0
|
|
|
|
|
$parent = ref($parent) ? $parent->primary_tag : $parent; |
464
|
0
|
0
|
|
|
|
|
$child = ref($child) ? $child->primary_tag : $child; |
465
|
|
|
|
|
|
|
|
466
|
0
|
|
|
|
|
|
my $type = 'part_of'; # default |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
# TODO - do this with metadata, or infer via SO itself |
469
|
|
|
|
|
|
|
|
470
|
0
|
0
|
|
|
|
|
if (lc($child) eq 'protein') { |
471
|
0
|
|
|
|
|
|
$type = 'derives_from'; |
472
|
|
|
|
|
|
|
} |
473
|
0
|
0
|
|
|
|
|
if (lc($child) eq 'polypeptide') { |
474
|
0
|
|
|
|
|
|
$type = 'derives_from'; |
475
|
|
|
|
|
|
|
} |
476
|
0
|
|
|
|
|
|
return $type; |
477
|
|
|
|
|
|
|
} |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
1; |