| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# bioperl module for Bio::SeqFeature::Tools::TypeMapper |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Cared for by Chris Mungall |
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
# Copyright Chris Mungall |
|
9
|
|
|
|
|
|
|
# |
|
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::SeqFeature::Tools::TypeMapper - maps $seq_feature-Eprimary_tag |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use Bio::SeqIO; |
|
21
|
|
|
|
|
|
|
use Bio::SeqFeature::Tools::TypeMapper; |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# first fetch a genbank SeqI object |
|
24
|
|
|
|
|
|
|
$seqio = |
|
25
|
|
|
|
|
|
|
Bio::SeqIO->new(-file=>'AE003644.gbk', |
|
26
|
|
|
|
|
|
|
-format=>'GenBank'); |
|
27
|
|
|
|
|
|
|
$seq = $seqio->next_seq(); |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
$tm = Bio::SeqFeature::Tools::TypeMapper->new; |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# map all the types in the sequence |
|
32
|
|
|
|
|
|
|
$tm->map_types(-seq=>$seq, |
|
33
|
|
|
|
|
|
|
{CDS=>'ORF', |
|
34
|
|
|
|
|
|
|
variation=>sub { |
|
35
|
|
|
|
|
|
|
my $f = shift; |
|
36
|
|
|
|
|
|
|
$f->length > 1 ? |
|
37
|
|
|
|
|
|
|
'variation' : 'SNP' |
|
38
|
|
|
|
|
|
|
}, |
|
39
|
|
|
|
|
|
|
}); |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# alternatively, use the hardcoded SO mapping |
|
42
|
|
|
|
|
|
|
$tm->map_types_to_SO(-seq=>$seq); |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
This class implements an object for mapping between types; for |
|
47
|
|
|
|
|
|
|
example, the types in a genbank feature table, and the types specified |
|
48
|
|
|
|
|
|
|
in the Sequence Ontology. |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
You can specify your own mapping, either as a simple hash index, or by |
|
51
|
|
|
|
|
|
|
providing your own subroutines. |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head1 FEEDBACK |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head2 Mailing Lists |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
|
58
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to the |
|
59
|
|
|
|
|
|
|
Bioperl mailing lists Your participation is much appreciated. |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
|
62
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=head2 Support |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
I |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
|
71
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
|
72
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
|
73
|
|
|
|
|
|
|
with code and data examples if at all possible. |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head2 Reporting Bugs |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
report bugs to the Bioperl bug tracking system to help us keep track |
|
78
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
|
79
|
|
|
|
|
|
|
web: |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=head1 AUTHOR - Chris Mungall |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Email: cjm@fruitfly.org |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=head1 APPENDIX |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
|
90
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=cut |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# Let the code begin... |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
package Bio::SeqFeature::Tools::TypeMapper; |
|
98
|
2
|
|
|
2
|
|
8
|
use strict; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
49
|
|
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# Object preamble - inherits from Bio::Root::Root |
|
101
|
|
|
|
|
|
|
|
|
102
|
2
|
|
|
2
|
|
7
|
use base qw(Bio::Root::Root); |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
1352
|
|
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=head2 new |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Title : new |
|
107
|
|
|
|
|
|
|
Usage : $unflattener = Bio::SeqFeature::Tools::TypeMapper->new(); |
|
108
|
|
|
|
|
|
|
Function: constructor |
|
109
|
|
|
|
|
|
|
Example : |
|
110
|
|
|
|
|
|
|
Returns : a new Bio::SeqFeature::Tools::TypeMapper |
|
111
|
|
|
|
|
|
|
Args : see below |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=cut |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub new { |
|
117
|
0
|
|
|
0
|
1
|
|
my($class,@args) = @_; |
|
118
|
0
|
|
|
|
|
|
my $self = $class->SUPER::new(@args); |
|
119
|
|
|
|
|
|
|
|
|
120
|
0
|
|
|
|
|
|
my($typemap) = |
|
121
|
|
|
|
|
|
|
$self->_rearrange([qw(TYPEMAP |
|
122
|
|
|
|
|
|
|
)], |
|
123
|
|
|
|
|
|
|
@args); |
|
124
|
|
|
|
|
|
|
|
|
125
|
0
|
0
|
|
|
|
|
$typemap && $self->typemap($typemap); |
|
126
|
0
|
|
|
|
|
|
return $self; # success - we hope! |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head2 typemap |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
Title : typemap |
|
132
|
|
|
|
|
|
|
Usage : $obj->typemap($newval) |
|
133
|
|
|
|
|
|
|
Function: |
|
134
|
|
|
|
|
|
|
Example : |
|
135
|
|
|
|
|
|
|
Returns : value of typemap (a scalar) |
|
136
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=cut |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub typemap{ |
|
142
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
143
|
|
|
|
|
|
|
|
|
144
|
0
|
0
|
|
|
|
|
return $self->{'typemap'} = shift if @_; |
|
145
|
0
|
|
|
|
|
|
return $self->{'typemap'}; |
|
146
|
|
|
|
|
|
|
} |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=head2 map_types |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
Title : map_types |
|
151
|
|
|
|
|
|
|
Usage : |
|
152
|
|
|
|
|
|
|
Function: |
|
153
|
|
|
|
|
|
|
Example : |
|
154
|
|
|
|
|
|
|
Returns : |
|
155
|
|
|
|
|
|
|
Args : |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
dgg: added -undefined => "region" option to produce all valid SO mappings. |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=cut |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub map_types{ |
|
162
|
0
|
|
|
0
|
1
|
|
my ($self,@args) = @_; |
|
163
|
|
|
|
|
|
|
|
|
164
|
0
|
|
|
|
|
|
my($sf, $seq, $type_map, $undefmap) = |
|
165
|
|
|
|
|
|
|
$self->_rearrange([qw(FEATURE |
|
166
|
|
|
|
|
|
|
SEQ |
|
167
|
|
|
|
|
|
|
TYPE_MAP |
|
168
|
|
|
|
|
|
|
UNDEFINED |
|
169
|
|
|
|
|
|
|
)], |
|
170
|
|
|
|
|
|
|
@args); |
|
171
|
0
|
0
|
0
|
|
|
|
if (!$sf && !$seq) { |
|
172
|
0
|
|
|
|
|
|
$self->throw("you need to pass in either -feature or -seq"); |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
|
|
175
|
0
|
|
|
|
|
|
my @sfs = ($sf); |
|
176
|
0
|
0
|
|
|
|
|
if ($seq) { |
|
177
|
0
|
0
|
|
|
|
|
$seq->isa("Bio::SeqI") || $self->throw("$seq NOT A SeqI"); |
|
178
|
0
|
|
|
|
|
|
@sfs = $seq->get_all_SeqFeatures; |
|
179
|
|
|
|
|
|
|
} |
|
180
|
0
|
|
0
|
|
|
|
$type_map = $type_map || $self->typemap; # dgg: was type_map; |
|
181
|
0
|
|
|
|
|
|
foreach my $sf (@sfs) { |
|
182
|
|
|
|
|
|
|
|
|
183
|
0
|
0
|
|
|
|
|
$sf->isa("Bio::SeqFeatureI") || $self->throw("$sf NOT A SeqFeatureI"); |
|
184
|
0
|
0
|
|
|
|
|
$sf->isa("Bio::FeatureHolderI") || $self->throw("$sf NOT A FeatureHolderI"); |
|
185
|
|
|
|
|
|
|
|
|
186
|
0
|
|
|
|
|
|
my $type = $sf->primary_tag; |
|
187
|
0
|
|
|
|
|
|
my $mtype = $type_map->{$type}; |
|
188
|
0
|
0
|
|
|
|
|
if ($mtype) { |
|
189
|
0
|
0
|
0
|
|
|
|
if (ref($mtype)) { |
|
|
|
0
|
|
|
|
|
|
|
190
|
0
|
0
|
|
|
|
|
if (ref($mtype) eq 'CODE') { |
|
191
|
0
|
|
|
|
|
|
$mtype = $mtype->($sf); |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
else { |
|
194
|
0
|
|
|
|
|
|
$self->throw('type_map values must be scalar or CODE ref. You said: '.$mtype.' for type: '.$type); |
|
195
|
|
|
|
|
|
|
} |
|
196
|
|
|
|
|
|
|
} |
|
197
|
|
|
|
|
|
|
elsif ($undefmap && $mtype eq 'undefined') { # dgg |
|
198
|
0
|
|
|
|
|
|
$mtype= $undefmap; |
|
199
|
|
|
|
|
|
|
} |
|
200
|
0
|
|
|
|
|
|
$sf->primary_tag($mtype); |
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
} |
|
203
|
0
|
|
|
|
|
|
return; |
|
204
|
|
|
|
|
|
|
} |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=head2 map_types_to_SO |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
Title : map_types_to_SO |
|
209
|
|
|
|
|
|
|
Usage : |
|
210
|
|
|
|
|
|
|
Function: |
|
211
|
|
|
|
|
|
|
Example : |
|
212
|
|
|
|
|
|
|
Returns : |
|
213
|
|
|
|
|
|
|
Args : |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
hardcodes the genbank to SO mapping |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
Based on revision 1.22 of SO |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
Please see the actual code for the mappings |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
Taken from |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
L |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
dgg: separated out FT_SO_map for caller changes. Update with: |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
open(FTSO,"curl -s http://sequenceontology.org/resources/mapping/FT_SO.txt|"); |
|
228
|
|
|
|
|
|
|
while(){ |
|
229
|
|
|
|
|
|
|
chomp; ($ft,$so,$sid,$ftdef,$sodef)= split"\t"; |
|
230
|
|
|
|
|
|
|
print " '$ft' => '$so',\n" if($ft && $so && $ftdef); |
|
231
|
|
|
|
|
|
|
} |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=cut |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
sub FT_SO_map { |
|
236
|
|
|
|
|
|
|
# $self= shift; |
|
237
|
|
|
|
|
|
|
# note : some of the ft_so mappings are commented out and overriden... |
|
238
|
|
|
|
|
|
|
return { |
|
239
|
0
|
|
|
0
|
0
|
|
"-" => ["located_sequence_feature", "so:0000110"], |
|
240
|
|
|
|
|
|
|
"-10_signal" => ["minus_10_signal", "so:0000175"], |
|
241
|
|
|
|
|
|
|
"-35_signal" => ["minus_35_signal", "so:0000176"], |
|
242
|
|
|
|
|
|
|
"3'utr" => ["three_prime_utr", "so:0000205"], |
|
243
|
|
|
|
|
|
|
"3'clip" => ["three_prime_clip", "so:0000557"], |
|
244
|
|
|
|
|
|
|
"5'utr" => ["five_prime_utr", "so:0000204"], |
|
245
|
|
|
|
|
|
|
"5'clip" => ["five_prime_clip", "so:0000555"], |
|
246
|
|
|
|
|
|
|
"caat_signal" => ["caat_signal", "so:0000172"], |
|
247
|
|
|
|
|
|
|
"cds" => ["cds", "so:0000316"], |
|
248
|
|
|
|
|
|
|
"c_region" => ["undefined", ""], |
|
249
|
|
|
|
|
|
|
"d-loop" => ["d_loop", "so:0000297"], |
|
250
|
|
|
|
|
|
|
"d_segment" => ["d_gene", "so:0000458"], |
|
251
|
|
|
|
|
|
|
"gc_signal" => ["gc_rich_region", "so:0000173"], |
|
252
|
|
|
|
|
|
|
"j_segment" => ["undefined", ""], |
|
253
|
|
|
|
|
|
|
"ltr" => ["long_terminal_repeat", "so:0000286"], |
|
254
|
|
|
|
|
|
|
"n_region" => ["undefined", ""], |
|
255
|
|
|
|
|
|
|
"rbs" => ["ribosome_entry_site", "so:0000139"], |
|
256
|
|
|
|
|
|
|
"sts" => ["sts", "so:0000331"], |
|
257
|
|
|
|
|
|
|
"s_region" => ["undefined", ""], |
|
258
|
|
|
|
|
|
|
"tata_signal" => ["tata_box", "so:0000174"], |
|
259
|
|
|
|
|
|
|
"v_region" => ["undefined", ""], |
|
260
|
|
|
|
|
|
|
"v_segment" => ["undefined", ""], |
|
261
|
|
|
|
|
|
|
"attenuator" => ["attenuator", "so:0000140"], |
|
262
|
|
|
|
|
|
|
"conflict" => ["undefined", ""], |
|
263
|
|
|
|
|
|
|
"enhancer" => ["enhancer", "so:0000165"], |
|
264
|
|
|
|
|
|
|
"exon" => ["exon", "so:0000147"], |
|
265
|
|
|
|
|
|
|
"gap" => ["gap", "so:0000730"], |
|
266
|
|
|
|
|
|
|
"gene" => ["gene", "so:0000704"], |
|
267
|
|
|
|
|
|
|
"idna" => ["idna", "so:0000723"], |
|
268
|
|
|
|
|
|
|
"intron" => ["intron", "so:0000188"], |
|
269
|
|
|
|
|
|
|
"mRNA" => ["mRNA", "so:0000234"], |
|
270
|
|
|
|
|
|
|
"mat_peptide" => ["mature_protein_region", "so:0000419"], |
|
271
|
|
|
|
|
|
|
"mature_peptide" => ["mature_protein_region", "so:0000419"], |
|
272
|
|
|
|
|
|
|
#"misc_RNA" => ["transcript", "so:0000673"], |
|
273
|
|
|
|
|
|
|
"misc_binding" => ["binding_site", "so:0000409"], |
|
274
|
|
|
|
|
|
|
"misc_difference" => ["sequence_difference", "so:0000413"], |
|
275
|
|
|
|
|
|
|
"misc_feature" => ["region", undef], |
|
276
|
|
|
|
|
|
|
"misc_recomb" => ["recombination_feature", "so:0000298"], |
|
277
|
|
|
|
|
|
|
"misc_signal" => ["regulatory_region", "so:0005836"], |
|
278
|
|
|
|
|
|
|
"misc_structure" => ["sequence_secondary_structure", "so:0000002"], |
|
279
|
|
|
|
|
|
|
"modified_base" => ["modified_base_site", "so:0000305"], |
|
280
|
|
|
|
|
|
|
"old_sequence" => ["undefined", ""], |
|
281
|
|
|
|
|
|
|
"operon" => ["operon", "so:0000178"], |
|
282
|
|
|
|
|
|
|
"oriT" => ["origin_of_transfer", "so:0000724"], |
|
283
|
|
|
|
|
|
|
"polya_signal" => ["polyA_signal_sequence", "so:0000551"], |
|
284
|
|
|
|
|
|
|
"polya_site" => ["polyA_site", "so:0000553"], |
|
285
|
|
|
|
|
|
|
"precursor_RNA" => ["primary_transcript", "so:0000185"], |
|
286
|
|
|
|
|
|
|
"prim_transcript" => ["primary_transcript", "so:0000185"], |
|
287
|
|
|
|
|
|
|
"primer_bind" => ["primer_binding_site", "so:0005850"], |
|
288
|
|
|
|
|
|
|
"promoter" => ["promoter", "so:0000167"], |
|
289
|
|
|
|
|
|
|
"protein_bind" => ["protein_binding_site", "so:0000410"], |
|
290
|
|
|
|
|
|
|
"rRNA" => ["rRNA", "so:0000252"], |
|
291
|
|
|
|
|
|
|
"repeat_region" => ["repeat_region", "so:0000657"], |
|
292
|
|
|
|
|
|
|
"repeat_unit" => ["repeat_unit", "so:0000726"], |
|
293
|
|
|
|
|
|
|
"satellite" => ["satellite_dna", "so:0000005"], |
|
294
|
|
|
|
|
|
|
"scRNA" => ["scRNA", "so:0000013"], |
|
295
|
|
|
|
|
|
|
"sig_peptide" => ["signal_peptide", "so:0000418"], |
|
296
|
|
|
|
|
|
|
"snRNA" => ["snRNA", "so:0000274"], |
|
297
|
|
|
|
|
|
|
"snoRNA" => ["snoRNA", "so:0000275"], |
|
298
|
|
|
|
|
|
|
#"source" => ["databank_entry", "so:2000061"], |
|
299
|
|
|
|
|
|
|
"stem_loop" => ["stem_loop", "so:0000313"], |
|
300
|
|
|
|
|
|
|
"tRNA" => ["tRNA", "so:0000253"], |
|
301
|
|
|
|
|
|
|
"terminator" => ["terminator", "so:0000141"], |
|
302
|
|
|
|
|
|
|
"transit_peptide" => ["transit_peptide", "so:0000725"], |
|
303
|
|
|
|
|
|
|
"unsure" => "undefined", |
|
304
|
|
|
|
|
|
|
"variation" => ["sequence_variant", "so:0000109"], |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
# manually added |
|
307
|
|
|
|
|
|
|
## has parent = pseudogene ; dgg |
|
308
|
|
|
|
|
|
|
"pseudomRNA" => ["pseudogenic_transcript", "so:0000516"], |
|
309
|
|
|
|
|
|
|
## from unflattener misc_rna ; dgg |
|
310
|
|
|
|
|
|
|
"pseudotranscript" => ["pseudogenic_transcript", "so:0000516"], |
|
311
|
|
|
|
|
|
|
"pseudoexon" => ["pseudogenic_exon", "so:0000507"], |
|
312
|
|
|
|
|
|
|
"pseudoCDS" => ["pseudogenic_exon", "so:0000507"], |
|
313
|
|
|
|
|
|
|
"pseudomisc_feature" => ["pseudogenic_region", "so:0000462"], |
|
314
|
|
|
|
|
|
|
"pseudointron" => ["pseudogenic_region", "so:0000462"], |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
## "undefined" => "region", |
|
318
|
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
# this is the most generic form for rnas; |
|
320
|
|
|
|
|
|
|
# we always represent the processed form of |
|
321
|
|
|
|
|
|
|
# the transcript |
|
322
|
|
|
|
|
|
|
misc_RNA => ['mature_transcript',"so:0000233"], |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
# not sure about this one... |
|
325
|
|
|
|
|
|
|
source=>['contig', "SO:0000149"], |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
rep_origin=>['origin_of_replication',"SO:0000296"], |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
Protein=>['polypeptide',"SO:0000104"], |
|
330
|
|
|
|
|
|
|
}; |
|
331
|
|
|
|
|
|
|
# return { |
|
332
|
|
|
|
|
|
|
#"FT term" => "SO term", |
|
333
|
|
|
|
|
|
|
#"-" => "located_sequence_feature", |
|
334
|
|
|
|
|
|
|
#"-10_signal" => "minus_10_signal", |
|
335
|
|
|
|
|
|
|
#"-35_signal" => "minus_35_signal", |
|
336
|
|
|
|
|
|
|
#"3'UTR" => "three_prime_UTR", |
|
337
|
|
|
|
|
|
|
#"3'clip" => "three_prime_clip", |
|
338
|
|
|
|
|
|
|
#"5'UTR" => "five_prime_UTR", |
|
339
|
|
|
|
|
|
|
#"5'clip" => "five_prime_clip", |
|
340
|
|
|
|
|
|
|
#"CAAT_signal" => "CAAT_signal", |
|
341
|
|
|
|
|
|
|
#"CDS" => "CDS", |
|
342
|
|
|
|
|
|
|
#"C_region" => "undefined", |
|
343
|
|
|
|
|
|
|
#"D-loop" => "D_loop", |
|
344
|
|
|
|
|
|
|
#"D_segment" => "D_gene", |
|
345
|
|
|
|
|
|
|
#"GC_signal" => "GC_rich_region", |
|
346
|
|
|
|
|
|
|
#"J_segment" => "undefined", |
|
347
|
|
|
|
|
|
|
#"LTR" => "long_terminal_repeat", |
|
348
|
|
|
|
|
|
|
#"N_region" => "undefined", |
|
349
|
|
|
|
|
|
|
#"RBS" => "ribosome_entry_site", |
|
350
|
|
|
|
|
|
|
#"STS" => "STS", |
|
351
|
|
|
|
|
|
|
#"S_region" => "undefined", |
|
352
|
|
|
|
|
|
|
#"TATA_signal" => "TATA_box", |
|
353
|
|
|
|
|
|
|
#"V_region" => "undefined", |
|
354
|
|
|
|
|
|
|
#"V_segment" => "undefined", |
|
355
|
|
|
|
|
|
|
#"attenuator" => "attenuator", |
|
356
|
|
|
|
|
|
|
#"conflict" => "undefined", |
|
357
|
|
|
|
|
|
|
#"enhancer" => "enhancer", |
|
358
|
|
|
|
|
|
|
#"exon" => "exon", |
|
359
|
|
|
|
|
|
|
#"gap" => "gap", |
|
360
|
|
|
|
|
|
|
#"gene" => "gene", |
|
361
|
|
|
|
|
|
|
#"iDNA" => "iDNA", |
|
362
|
|
|
|
|
|
|
#"intron" => "intron", |
|
363
|
|
|
|
|
|
|
#"mRNA" => "mRNA", |
|
364
|
|
|
|
|
|
|
#"mat_peptide" => "mature_protein_region", |
|
365
|
|
|
|
|
|
|
#"mature_peptide" => "mature_protein_region", |
|
366
|
|
|
|
|
|
|
## "misc_RNA" => "transcript", |
|
367
|
|
|
|
|
|
|
#"misc_binding" => "binding_site", |
|
368
|
|
|
|
|
|
|
#"misc_difference" => "sequence_difference", |
|
369
|
|
|
|
|
|
|
#"misc_feature" => "region", |
|
370
|
|
|
|
|
|
|
#"misc_recomb" => "recombination_feature", |
|
371
|
|
|
|
|
|
|
#"misc_signal" => "regulatory_region", |
|
372
|
|
|
|
|
|
|
#"misc_structure" => "sequence_secondary_structure", |
|
373
|
|
|
|
|
|
|
#"modified_base" => "modified_base_site", |
|
374
|
|
|
|
|
|
|
#"old_sequence" => "undefined", |
|
375
|
|
|
|
|
|
|
#"operon" => "operon", |
|
376
|
|
|
|
|
|
|
#"oriT" => "origin_of_transfer", |
|
377
|
|
|
|
|
|
|
#"polyA_signal" => "polyA_signal_sequence", |
|
378
|
|
|
|
|
|
|
#"polyA_site" => "polyA_site", |
|
379
|
|
|
|
|
|
|
#"precursor_RNA" => "primary_transcript", |
|
380
|
|
|
|
|
|
|
#"prim_transcript" => "primary_transcript", |
|
381
|
|
|
|
|
|
|
#"primer_bind" => "primer_binding_site", |
|
382
|
|
|
|
|
|
|
#"promoter" => "promoter", |
|
383
|
|
|
|
|
|
|
#"protein_bind" => "protein_binding_site", |
|
384
|
|
|
|
|
|
|
#"rRNA" => "rRNA", |
|
385
|
|
|
|
|
|
|
#"repeat_region" => "repeat_region", |
|
386
|
|
|
|
|
|
|
#"repeat_unit" => "repeat_unit", |
|
387
|
|
|
|
|
|
|
#"satellite" => "satellite_DNA", |
|
388
|
|
|
|
|
|
|
#"scRNA" => "scRNA", |
|
389
|
|
|
|
|
|
|
#"sig_peptide" => "signal_peptide", |
|
390
|
|
|
|
|
|
|
#"snRNA" => "snRNA", |
|
391
|
|
|
|
|
|
|
#"snoRNA" => "snoRNA", |
|
392
|
|
|
|
|
|
|
## "source" => "databank_entry", |
|
393
|
|
|
|
|
|
|
#"stem_loop" => "stem_loop", |
|
394
|
|
|
|
|
|
|
#"tRNA" => "tRNA", |
|
395
|
|
|
|
|
|
|
#"terminator" => "terminator", |
|
396
|
|
|
|
|
|
|
#"transit_peptide" => "transit_peptide", |
|
397
|
|
|
|
|
|
|
#"unsure" => "undefined", |
|
398
|
|
|
|
|
|
|
#"variation" => "sequence_variant", |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
#"pseudomRNA" => "pseudogenic_transcript", ## has parent = pseudogene ; dgg |
|
401
|
|
|
|
|
|
|
#"pseudotranscript" => "pseudogenic_transcript", ## from Unflattener misc_RNA ; dgg |
|
402
|
|
|
|
|
|
|
#"pseudoexon" => "pseudogenic_exon", |
|
403
|
|
|
|
|
|
|
#"pseudoCDS" => "pseudogenic_exon", |
|
404
|
|
|
|
|
|
|
#"pseudomisc_feature" => "pseudogenic_region", |
|
405
|
|
|
|
|
|
|
#"pseudointron" => "pseudogenic_region", |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
### "undefined" => "region", |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
## this is the most generic form for RNAs; |
|
410
|
|
|
|
|
|
|
## we always represent the processed form of |
|
411
|
|
|
|
|
|
|
## the transcript |
|
412
|
|
|
|
|
|
|
#misc_RNA=>'processed_transcript', |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
## not sure about this one... |
|
415
|
|
|
|
|
|
|
#source=>'contig', |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
#rep_origin=>'origin_of_replication', |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
#Protein=>'protein', |
|
420
|
|
|
|
|
|
|
#}; |
|
421
|
|
|
|
|
|
|
} |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
sub map_types_to_SO{ |
|
424
|
0
|
|
|
0
|
1
|
|
my ($self,@args) = @_; |
|
425
|
|
|
|
|
|
|
|
|
426
|
0
|
|
|
|
|
|
push(@args, (-type_map=> $self->FT_SO_map() ) ); |
|
427
|
0
|
|
|
|
|
|
return $self->map_types(@args); |
|
428
|
|
|
|
|
|
|
} |
|
429
|
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=head2 get_relationship_type_by_parent_child |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
Title : get_relationship_type_by_parent_child |
|
433
|
|
|
|
|
|
|
Usage : $type = $tm->get_relationship_type_by_parent_child($parent_sf, $child_sf); |
|
434
|
|
|
|
|
|
|
Usage : $type = $tm->get_relationship_type_by_parent_child('mRNA', 'protein'); |
|
435
|
|
|
|
|
|
|
Function: given two features where the parent contains the child, |
|
436
|
|
|
|
|
|
|
will determine what the relationship between them in |
|
437
|
|
|
|
|
|
|
Example : |
|
438
|
|
|
|
|
|
|
Returns : |
|
439
|
|
|
|
|
|
|
Args : parent SeqFeature, child SeqFeature OR |
|
440
|
|
|
|
|
|
|
parent type string, child type string OR |
|
441
|
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
bioperl Seq::FeatureHolderI hierarchies are equivalent to unlabeled |
|
443
|
|
|
|
|
|
|
graphs (where parent nodes are the containers, and child nodes are the |
|
444
|
|
|
|
|
|
|
features being contained). For example, a feature of type mRNA can |
|
445
|
|
|
|
|
|
|
contain features of type exon. |
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
Some external representations (eg chadoxml or chaosxml) require that |
|
448
|
|
|
|
|
|
|
the edges in the feature relationship graph are labeled. For example, |
|
449
|
|
|
|
|
|
|
the type between mRNA and exon would be B. Although it |
|
450
|
|
|
|
|
|
|
stretches the bioperl notion of containment, we could have a CDS |
|
451
|
|
|
|
|
|
|
contained by an mRNA (for example, the |
|
452
|
|
|
|
|
|
|
L module takes genbank records |
|
453
|
|
|
|
|
|
|
and makes these kind of links. The relationship here would be |
|
454
|
|
|
|
|
|
|
B |
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
In chado speak, the child is the B feature and the parent is |
|
457
|
|
|
|
|
|
|
the B |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=cut |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
sub get_relationship_type_by_parent_child { |
|
462
|
0
|
|
|
0
|
1
|
|
my ($self,$parent,$child) = @_; |
|
463
|
0
|
0
|
|
|
|
|
$parent = ref($parent) ? $parent->primary_tag : $parent; |
|
464
|
0
|
0
|
|
|
|
|
$child = ref($child) ? $child->primary_tag : $child; |
|
465
|
|
|
|
|
|
|
|
|
466
|
0
|
|
|
|
|
|
my $type = 'part_of'; # default |
|
467
|
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
# TODO - do this with metadata, or infer via SO itself |
|
469
|
|
|
|
|
|
|
|
|
470
|
0
|
0
|
|
|
|
|
if (lc($child) eq 'protein') { |
|
471
|
0
|
|
|
|
|
|
$type = 'derives_from'; |
|
472
|
|
|
|
|
|
|
} |
|
473
|
0
|
0
|
|
|
|
|
if (lc($child) eq 'polypeptide') { |
|
474
|
0
|
|
|
|
|
|
$type = 'derives_from'; |
|
475
|
|
|
|
|
|
|
} |
|
476
|
0
|
|
|
|
|
|
return $type; |
|
477
|
|
|
|
|
|
|
} |
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
1; |