line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::ToolBox::SeqFeature; |
2
|
|
|
|
|
|
|
our $VERSION = '1.67'; |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
=head1 NAME |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
Bio::ToolBox::SeqFeature - Fast, simple SeqFeature implementation |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=head1 SYNOPSIS |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
# create a transcript |
11
|
|
|
|
|
|
|
my $transcript = Bio::ToolBox::SeqFeature->new( |
12
|
|
|
|
|
|
|
-seq_id => chr1, |
13
|
|
|
|
|
|
|
-start => 1001, |
14
|
|
|
|
|
|
|
-stop => 1500, |
15
|
|
|
|
|
|
|
-strand => '+', |
16
|
|
|
|
|
|
|
); |
17
|
|
|
|
|
|
|
$seqf->primary_tag('mRNA'); # set parameters individually |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# create an exon |
20
|
|
|
|
|
|
|
my $exon = Bio::ToolBox::SeqFeature->new( |
21
|
|
|
|
|
|
|
-start => 1001, |
22
|
|
|
|
|
|
|
-end => 1200, |
23
|
|
|
|
|
|
|
-type => 'exon', |
24
|
|
|
|
|
|
|
); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# associate exon with transcript |
27
|
|
|
|
|
|
|
$transcript->add_SeqFeature($exon); |
28
|
|
|
|
|
|
|
my $exon_strand = $exon->strand; # inherits from parent |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# walk through subfeatures |
31
|
|
|
|
|
|
|
foreach my $f ($transcript->get_all_SeqFeatures) { |
32
|
|
|
|
|
|
|
printf "%s is a %s\n", $f->display_name, $f->type; |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# add attribute |
36
|
|
|
|
|
|
|
$transcript->add_tag_value('Status', $status); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# get attribute |
39
|
|
|
|
|
|
|
my $value = $transcript->get_tag_values($key); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head1 DESCRIPTION |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
SeqFeature objects represent functional elements on a genomic or chromosomal |
44
|
|
|
|
|
|
|
sequence, such as genes, transcripts, exons, etc. In many cases, especially |
45
|
|
|
|
|
|
|
genes, they have a hierarchical structure, typically in this order |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
gene |
48
|
|
|
|
|
|
|
mRNA or transcript |
49
|
|
|
|
|
|
|
exon |
50
|
|
|
|
|
|
|
CDS |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
SeqFeature objects have at a minimum coordinate information, including |
53
|
|
|
|
|
|
|
chromosome, start, stop, and strand, and a name or unique identifier. They |
54
|
|
|
|
|
|
|
often also have type or source information, which usually follows the |
55
|
|
|
|
|
|
|
Sequence Ontology key words. |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
This is a fast, efficient, simplified SeqFeature implementation that mostly |
58
|
|
|
|
|
|
|
implements the L API, and could be substituted for other |
59
|
|
|
|
|
|
|
implementations, such L and L. |
60
|
|
|
|
|
|
|
Unlike the others, however, it inherits no classes or methods and uses an |
61
|
|
|
|
|
|
|
unorthodox blessed array to store feature attributes, decreasing memory |
62
|
|
|
|
|
|
|
requirements and overhead complexity. |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=head1 METHODS |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
Refer to the L documentation for general implementation and |
67
|
|
|
|
|
|
|
ideas, which this module tries to implement. |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=head2 Creating new SeqFeature objects |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=over 4 |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=item new |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
Generate a new SeqFeature object. Pass an array of key =E value pairs |
76
|
|
|
|
|
|
|
with the feature parameters. At a minimum, location information (C, |
77
|
|
|
|
|
|
|
C, and C) should be provided, with name and ID recommended. |
78
|
|
|
|
|
|
|
Most of the accession methods may be used as key tags to the C method. |
79
|
|
|
|
|
|
|
The following attribute keys are accepted. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=over 4 |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=item -seq_id |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=item -start |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=item -end |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=item -stop |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=item -strand |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=item -name |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=item -display_name |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=item -id |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=item -primary_id |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=item -type |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=item -source |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=item -source_tag |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=item -primary_tag |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=item -score |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=item -phase |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=item -attributes |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
Provide an anonymous array of key value pairs representing attribute |
116
|
|
|
|
|
|
|
keys and their values. |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=item -segments |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Provide an anonymous array of SeqFeature objects to add as child |
121
|
|
|
|
|
|
|
objects. |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=back |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=item duplicate |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
This method will duplicate a SeqFeature object as a new object, with |
128
|
|
|
|
|
|
|
limitations. The C tag, attribute key/values, and subfeatures |
129
|
|
|
|
|
|
|
are B duplicated. This basically limits the object to coordinates, |
130
|
|
|
|
|
|
|
C, C, C, C, and C. |
131
|
|
|
|
|
|
|
Remaining attributes should be explicitly set by the user. |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=back |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head2 Accession methods |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
These are methods to set and/or retrieve attribute values. Pass a |
138
|
|
|
|
|
|
|
single value to set the attribute. The attribute value is always |
139
|
|
|
|
|
|
|
returned. |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=over 4 |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=item seq_id |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
The name of the chromosome or reference sequence. If you are generating |
146
|
|
|
|
|
|
|
a new child object, e.g. exon, then seq_id does not need to be provided. |
147
|
|
|
|
|
|
|
In these cases, the parent's seq_id is inherited. |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=item start |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
The start coordinate of the feature. SeqFeature objects use the 1-base |
152
|
|
|
|
|
|
|
numbering system, following BioPerl convention. Start coordinates are |
153
|
|
|
|
|
|
|
always less than the stop coordinate. |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=item end |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=item stop |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
The end coordinate of the feature. Stop coordinates are always greater |
160
|
|
|
|
|
|
|
than the start coordinate. |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=item strand |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
The strand that the feature is on. The default value is always unstranded, |
165
|
|
|
|
|
|
|
or 0. Any of the following may be supplied: "C<1 0 -1 + . ->". Numeric |
166
|
|
|
|
|
|
|
integers 1, 0, and -1 are always returned. |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=item source |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=item source_tag |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
A text string representing the source of the feature. This corresponds to |
173
|
|
|
|
|
|
|
the second field in a GFF file. The source tag is optional. If not supplied, |
174
|
|
|
|
|
|
|
the source tag can be inherited from a parent feature if present. |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=item primary_tag |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
The type of feature. These usually follow Sequence Ontology terms, but are |
179
|
|
|
|
|
|
|
not required. The default value is the generic term "region". Examples |
180
|
|
|
|
|
|
|
include gene, mRNA, transcript, exon, CDS, etc. This corresponds to the |
181
|
|
|
|
|
|
|
third field in a GFF file. |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=item type |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
A shortcut method which can represent either "primary_tag:source_tag" or, |
186
|
|
|
|
|
|
|
if no source_tag is defined, simply "primary_tag". |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=item name |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=item display_name |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
A text string representing the name of the feature. The name is not |
193
|
|
|
|
|
|
|
required to be a unique value, but generally is. The default name, if |
194
|
|
|
|
|
|
|
none is provided, is the C. |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=item id |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=item primary_id |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
A text string representing a unique identifier of the feature. If not |
201
|
|
|
|
|
|
|
explicitly defined, a unique ID is automatically generated. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=item score |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
A numeric (integer or floating) value representing the feature. |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=item phase |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
An integer (0,1,2) representing the coding frame. Only required for |
210
|
|
|
|
|
|
|
CDS features. |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=back |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=head2 Special Attributes |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
Special attributes are key value pairs that do not fall under the above |
217
|
|
|
|
|
|
|
conventions. It is possible to have more than one value assigned to a |
218
|
|
|
|
|
|
|
given key. In a GFF file, this corresponds to the attributes in the 9th |
219
|
|
|
|
|
|
|
field, with the exception of special reserved attributes such as C, |
220
|
|
|
|
|
|
|
C, and C. |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=over 4 |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=item add_tag_value |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
$seqf->add_tag_value($key, $value); |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
Sets the special attribute C<$key> to C<$value>. If you have more than one |
229
|
|
|
|
|
|
|
value, C<$value> should be an anonymous array of text values. Following |
230
|
|
|
|
|
|
|
GFF convention, C<$key> should not comprise of special characters, including |
231
|
|
|
|
|
|
|
";,= ". |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=item all_tags |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=item get_all_tags |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
Returns an array of all attribute keys. |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=item has_tag($key) |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
Boolean method whether the SeqFeature object contains the attribute. |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=item get_tag_values($key) |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=item each_tag_value($key) |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
Returns the value for attribute C<$key>. If multiple values are present, |
248
|
|
|
|
|
|
|
it may return an array or array reference. |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=item attributes |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
Returns an array or reference to the key value hash; |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=item remove_tag($key) |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
Deletes the indicated attribute. |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=back |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=head2 Subfeature Hierarchy |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
Following Sequence Ontology and GFF conventions, SeqFeatures can have |
263
|
|
|
|
|
|
|
subfeatures (children) representing a hierarchical structure, for example |
264
|
|
|
|
|
|
|
genes beget transcripts which beget exons. |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
Child SeqFeature objects may have more than one parent, for example, shared |
267
|
|
|
|
|
|
|
exons between alternate transcripts. In which case, only one exon |
268
|
|
|
|
|
|
|
SeqFeature object is generated, but is added to both transcript objects. |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=over 4 |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=item add_SeqFeature |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
=item add_segment |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
Pass one or more SeqFeature objects to be associated as children. |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=item get_SeqFeatures |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=item get_all_SeqFeatures |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
=item segments |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
Returns an array of all sub SeqFeature objects. |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=item delete_SeqFeature($id) |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
This method will delete a specific child SeqFeature. Pass the method the |
289
|
|
|
|
|
|
|
C of the SeqFeature. All SeqFeatures will have a C. |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=back |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
=head2 Range Methods |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
These are range methods for comparing one SeqFeature object to another. |
296
|
|
|
|
|
|
|
They are analogous to L methods. |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
They currently do not support strand checks or strand options. |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
=over 4 |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
=item length |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
Returns the length of the SeqFeature object. |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
=item overlaps |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
my $overlap_check = $seqf1->overlaps($other_seqf); |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
Returns a boolean value whether a second SeqFeature object overlaps |
311
|
|
|
|
|
|
|
with the self object. |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
=item contains |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
my $check = $seqf1->contains($seqf2); |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
Returns a boolean value whether the self object completely |
318
|
|
|
|
|
|
|
contains a second SeqFeature object. |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=item equals |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
my $check = $seqf1->equals($seqf2); |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
Returns a boolean value whether the self object coordinates are |
325
|
|
|
|
|
|
|
equivalent to a second SeqFeature object. |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
=item intersection |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
my $check = $seqf1->intersection($seqf2); |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
Returns a new SeqFeature object representing the intersection or |
332
|
|
|
|
|
|
|
overlap area between the self object and a second SeqFeature |
333
|
|
|
|
|
|
|
object. |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=item union($other) |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
my $new_seqf = $seqf1->union($seqf2); |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
Returns a new SeqFeature object representing the merged interval |
340
|
|
|
|
|
|
|
between the self and a second SeqFeature object. |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
=item subtract |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
my $new_seqf = $seqf1->subtract($seqf2); |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
Returns a new SeqFeature object representing the interval of the |
347
|
|
|
|
|
|
|
self object after subtracting a second SeqFeature object. |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
=back |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=head2 Export Strings |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
These methods export the SeqFeature object as a text string in the |
354
|
|
|
|
|
|
|
specified format. New line characters are included. |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
=over 4 |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=item gff_string($recurse) |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
Exports the SeqFeature object as a GFF3 formatted string. Pass a |
361
|
|
|
|
|
|
|
boolean value if you wish to recurse through the hierarchy and |
362
|
|
|
|
|
|
|
print subfeatures as a multi-line string. Child-Parent ID attributes |
363
|
|
|
|
|
|
|
are smartly handled, including multiple parentage. |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
See L for methods for printing GTF transcript models. |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
=item bed_string |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
Exports the SeqFeature object as a simple BED6 formatted string. |
370
|
|
|
|
|
|
|
See L for methods for printing BED12 transcript models. |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=back |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
=head1 LIMITATIONS |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
Because of their underlying array structure, Bio::ToolBox::SeqFeature objects |
377
|
|
|
|
|
|
|
should generally not be used as a base class (unless you know the ramifications |
378
|
|
|
|
|
|
|
of doing so). The following Bio classes and Interfaces are similar and their API |
379
|
|
|
|
|
|
|
was used as a model. However, in most cases they are not likely to work with this |
380
|
|
|
|
|
|
|
module because of object structure incompatibility, although this has not been |
381
|
|
|
|
|
|
|
explicitly tested. |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=over 4 |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
=item Bio::AnnotationI |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
=item Bio::LocationI |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=item Bio::RangeI |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=item Bio::SeqI |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=item Bio::Tools::GFF |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
=item Bio::DB::SeqFeature::Store |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=item Bio::Graphics |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
=back |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=head1 SEE ALSO |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
L, L, L, |
404
|
|
|
|
|
|
|
L, L, L |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
=cut |
407
|
|
|
|
|
|
|
|
408
|
2
|
|
|
2
|
|
11
|
use strict; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
63
|
|
409
|
2
|
|
|
2
|
|
9
|
use Carp qw(carp cluck croak confess); |
|
2
|
|
|
|
|
23
|
|
|
2
|
|
|
|
|
148
|
|
410
|
|
|
|
|
|
|
use constant { |
411
|
2
|
|
|
|
|
5828
|
SEQID => 0, |
412
|
|
|
|
|
|
|
START => 1, |
413
|
|
|
|
|
|
|
STOP => 2, |
414
|
|
|
|
|
|
|
STRND => 3, |
415
|
|
|
|
|
|
|
NAME => 4, |
416
|
|
|
|
|
|
|
ID => 5, |
417
|
|
|
|
|
|
|
TYPE => 6, |
418
|
|
|
|
|
|
|
SRC => 7, |
419
|
|
|
|
|
|
|
SCORE => 8, |
420
|
|
|
|
|
|
|
PHASE => 9, |
421
|
|
|
|
|
|
|
ATTRB => 10, |
422
|
|
|
|
|
|
|
SUBF => 11, |
423
|
2
|
|
|
2
|
|
9
|
}; |
|
2
|
|
|
|
|
4
|
|
424
|
|
|
|
|
|
|
our $IDCOUNT = 0; |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
1; |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
#### Aliases #### |
429
|
|
|
|
|
|
|
# to maintain compatibility with Bio::SeqFeature::Lite and Bio::SeqFeatureI we |
430
|
|
|
|
|
|
|
# put in plenty of aliases to some of the methods |
431
|
|
|
|
|
|
|
*stop = \&end; |
432
|
|
|
|
|
|
|
*name = \&display_name; |
433
|
|
|
|
|
|
|
*id = \&primary_id; |
434
|
|
|
|
|
|
|
*method = \&primary_tag; |
435
|
|
|
|
|
|
|
*source = \&source_tag; |
436
|
|
|
|
|
|
|
*add_segment = \&add_SeqFeature; |
437
|
|
|
|
|
|
|
*get_all_SeqFeatures = *segments = \&get_SeqFeatures; |
438
|
|
|
|
|
|
|
*each_tag_value = \&get_tag_values; |
439
|
|
|
|
|
|
|
*get_all_tags = \&all_tags; |
440
|
|
|
|
|
|
|
*gff3_string = \&gff_string; |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
#### METHODS #### |
444
|
|
|
|
|
|
|
sub new { |
445
|
1658
|
|
|
1658
|
1
|
1816
|
my $class = shift; |
446
|
1658
|
100
|
|
|
|
2373
|
if (ref($class)) { |
447
|
723
|
|
|
|
|
777
|
$class = ref($class); |
448
|
|
|
|
|
|
|
} |
449
|
1658
|
|
|
|
|
4792
|
my %args = @_; |
450
|
1658
|
|
|
|
|
2739
|
my $self = bless [], $class; |
451
|
|
|
|
|
|
|
# bless ourselves early to take advantage of some methods |
452
|
|
|
|
|
|
|
# but otherwise do what we can simply and quickly |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
# primary options |
455
|
|
|
|
|
|
|
$self->[SEQID] = $args{-seq_id} || $args{-seqid} || $args{'-ref'} || |
456
|
1658
|
|
0
|
|
|
3185
|
$args{chrom} || undef; |
457
|
1658
|
|
50
|
|
|
2647
|
$self->[START] = $args{-start} || undef; |
458
|
1658
|
|
0
|
|
|
2452
|
$self->[STOP] = $args{-end} || $args{-stop} || undef; |
459
|
1658
|
50
|
|
|
|
3528
|
$self->strand($args{-strand}) if exists $args{-strand}; |
460
|
1658
|
|
100
|
|
|
3928
|
$self->[NAME] = $args{-display_name} || $args{-name} || undef; |
461
|
1658
|
|
100
|
|
|
4008
|
$self->[ID] = $args{-primary_id} || $args{-id} || undef; |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
# check orientation |
464
|
1658
|
50
|
33
|
|
|
5313
|
if (defined $self->[START] and defined $self->[STOP] and |
|
|
|
33
|
|
|
|
|
465
|
|
|
|
|
|
|
$self->[START] > $self->[STOP] |
466
|
|
|
|
|
|
|
) { |
467
|
|
|
|
|
|
|
# flip the coordinates around |
468
|
0
|
|
|
|
|
0
|
($self->[START], $self->[STOP]) = ($self->[STOP], $self->[START]); |
469
|
0
|
|
|
|
|
0
|
$self->[STRND] *= -1; |
470
|
|
|
|
|
|
|
} |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
# additional options |
473
|
1658
|
|
50
|
|
|
4355
|
$args{-type} ||= $args{-primary_tag} || undef; |
|
|
|
66
|
|
|
|
|
474
|
1658
|
50
|
|
|
|
2182
|
if (defined $args{-type}) { |
475
|
1658
|
|
|
|
|
2362
|
$self->type($args{-type}); |
476
|
|
|
|
|
|
|
} |
477
|
1658
|
|
100
|
|
|
2433
|
$args{-source} ||= $args{-source_tag} || undef; |
|
|
|
100
|
|
|
|
|
478
|
1658
|
100
|
|
|
|
2112
|
if (defined $args{-source}) { |
479
|
1638
|
|
|
|
|
1793
|
$self->[SRC] = $args{-source}; |
480
|
|
|
|
|
|
|
} |
481
|
1658
|
100
|
|
|
|
2101
|
if (exists $args{-score}) { |
482
|
32
|
|
|
|
|
64
|
$self->[SCORE] = $args{-score}; |
483
|
|
|
|
|
|
|
} |
484
|
1658
|
100
|
|
|
|
2159
|
if (exists $args{-phase}) { |
485
|
240
|
|
|
|
|
371
|
$self->phase($args{-phase}); |
486
|
|
|
|
|
|
|
} |
487
|
1658
|
100
|
66
|
|
|
3802
|
if (exists $args{-attributes} or exists $args{-tags}) { |
488
|
1
|
|
33
|
|
|
5
|
$args{-attributes} ||= $args{-tags}; |
489
|
1
|
50
|
|
|
|
4
|
if (ref($args{-attributes}) eq 'HASH') { |
490
|
1
|
|
|
|
|
3
|
$self->[ATTRB] = $args{-attributes}; |
491
|
|
|
|
|
|
|
} |
492
|
|
|
|
|
|
|
} |
493
|
1658
|
100
|
|
|
|
2082
|
if (exists $args{-segments}) { |
494
|
2
|
|
|
|
|
9
|
$self->[SUBF] = []; |
495
|
2
|
|
|
|
|
4
|
foreach my $s (@{ $args{-segments} }) { |
|
2
|
|
|
|
|
7
|
|
496
|
16
|
50
|
|
|
|
25
|
unless (ref($s) eq $class) { |
497
|
0
|
|
|
|
|
0
|
croak "segments should be passed as $class objects!"; |
498
|
|
|
|
|
|
|
} |
499
|
16
|
|
|
|
|
15
|
push @{$self->[SUBF]}, $s; |
|
16
|
|
|
|
|
21
|
|
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
|
503
|
1658
|
|
|
|
|
3685
|
return $self; |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
sub seq_id { |
508
|
1671
|
|
|
1671
|
1
|
6136
|
my $self = shift; |
509
|
1671
|
50
|
|
|
|
2186
|
if (@_) { |
510
|
0
|
|
|
|
|
0
|
$self->[SEQID] = $_[0]; |
511
|
|
|
|
|
|
|
} |
512
|
1671
|
50
|
|
|
|
3504
|
return defined $self->[SEQID] ? $self->[SEQID] : undef; |
513
|
|
|
|
|
|
|
} |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
sub start { |
516
|
6485
|
|
|
6485
|
1
|
17576
|
my $self = shift; |
517
|
6485
|
50
|
|
|
|
7762
|
if (@_) { |
518
|
0
|
|
|
|
|
0
|
$self->[START] = $_[0]; |
519
|
|
|
|
|
|
|
} |
520
|
6485
|
50
|
|
|
|
14716
|
return defined $self->[START] ? $self->[START] : undef; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
sub end { |
524
|
2362
|
|
|
2362
|
1
|
2415
|
my $self = shift; |
525
|
2362
|
100
|
|
|
|
3090
|
if (@_) { |
526
|
9
|
|
|
|
|
14
|
$self->[STOP] = $_[0]; |
527
|
|
|
|
|
|
|
} |
528
|
2362
|
50
|
|
|
|
5097
|
return defined $self->[STOP] ? $self->[STOP] : undef; |
529
|
|
|
|
|
|
|
} |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
sub strand { |
532
|
4245
|
|
|
4245
|
1
|
4084
|
my $self = shift; |
533
|
4245
|
100
|
|
|
|
5317
|
if (@_) { |
534
|
1699
|
|
|
|
|
1699
|
my $str = $_[0]; |
535
|
1699
|
100
|
|
|
|
5038
|
if ($str eq '+') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
536
|
262
|
|
|
|
|
325
|
$self->[STRND] = 1; |
537
|
|
|
|
|
|
|
} |
538
|
|
|
|
|
|
|
elsif ($str eq '-') { |
539
|
116
|
|
|
|
|
159
|
$self->[STRND] = -1; |
540
|
|
|
|
|
|
|
} |
541
|
|
|
|
|
|
|
elsif ($str eq '.') { |
542
|
16
|
|
|
|
|
26
|
$self->[STRND] = 0; |
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
elsif ($str =~ /^[\+\-]?1$/) { |
545
|
1207
|
|
|
|
|
1731
|
$self->[STRND] = $str; |
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
elsif ($str eq '0') { |
548
|
98
|
|
|
|
|
124
|
$self->[STRND] = 0; |
549
|
|
|
|
|
|
|
} |
550
|
|
|
|
|
|
|
} |
551
|
4245
|
50
|
|
|
|
6851
|
return defined $self->[STRND] ? $self->[STRND] : 0; |
552
|
|
|
|
|
|
|
} |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
sub display_name { |
555
|
2189
|
|
|
2189
|
1
|
5981
|
my $self = shift; |
556
|
2189
|
100
|
|
|
|
2706
|
if (@_) { |
557
|
223
|
|
|
|
|
279
|
$self->[NAME] = $_[0]; |
558
|
|
|
|
|
|
|
} |
559
|
2189
|
100
|
|
|
|
4966
|
return defined $self->[NAME] ? $self->[NAME] : $self->primary_id; |
560
|
|
|
|
|
|
|
} |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
sub primary_id { |
563
|
2011
|
|
|
2011
|
1
|
2029
|
my $self = shift; |
564
|
2011
|
100
|
|
|
|
3361
|
if (@_) { |
|
|
100
|
|
|
|
|
|
565
|
235
|
|
|
|
|
287
|
$self->[ID] = $_[0]; |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
elsif (not defined $self->[ID]) { |
568
|
|
|
|
|
|
|
# automatically assign a new ID |
569
|
205
|
|
|
|
|
332
|
$self->[ID] = sprintf("%s_%09d", $self->primary_tag, $IDCOUNT++); |
570
|
|
|
|
|
|
|
} |
571
|
2011
|
|
|
|
|
4163
|
return $self->[ID]; |
572
|
|
|
|
|
|
|
} |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
sub primary_tag { |
575
|
9333
|
|
|
9333
|
1
|
9581
|
my $self = shift; |
576
|
9333
|
100
|
|
|
|
10911
|
if (@_) { |
577
|
16
|
|
|
|
|
18
|
$self->[TYPE] = $_[0]; |
578
|
|
|
|
|
|
|
} |
579
|
9333
|
|
50
|
|
|
11410
|
$self->[TYPE] ||= 'region'; |
580
|
9333
|
|
|
|
|
16416
|
return $self->[TYPE]; |
581
|
|
|
|
|
|
|
} |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
sub source_tag { |
584
|
1925
|
|
|
1925
|
1
|
1961
|
my $self = shift; |
585
|
1925
|
50
|
|
|
|
2360
|
if (@_) { |
586
|
0
|
|
|
|
|
0
|
$self->[SRC] = $_[0]; |
587
|
|
|
|
|
|
|
} |
588
|
1925
|
50
|
|
|
|
3335
|
return defined $self->[SRC] ? $self->[SRC] : undef; |
589
|
|
|
|
|
|
|
} |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
sub type { |
592
|
1901
|
|
|
1901
|
1
|
3395
|
my $self = shift; |
593
|
1901
|
100
|
|
|
|
2556
|
if (@_) { |
594
|
1658
|
|
|
|
|
1636
|
my $type = $_[0]; |
595
|
1658
|
50
|
|
|
|
2532
|
if ($type =~ /:/) { |
596
|
0
|
|
|
|
|
0
|
my ($t, $s) = split /:/, $type; |
597
|
0
|
|
|
|
|
0
|
$self->[TYPE] = $t; |
598
|
0
|
|
|
|
|
0
|
$self->[SRC] = $s; |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
else { |
601
|
1658
|
|
|
|
|
2607
|
$self->[TYPE] = $type; |
602
|
|
|
|
|
|
|
} |
603
|
|
|
|
|
|
|
} |
604
|
1901
|
|
50
|
|
|
2446
|
$self->[TYPE] ||= undef; |
605
|
1901
|
|
100
|
|
|
4107
|
$self->[SRC] ||= undef; |
606
|
1901
|
100
|
|
|
|
2509
|
if (defined $self->[SRC]) { |
607
|
243
|
|
|
|
|
834
|
return join(':', $self->[TYPE], $self->[SRC]); |
608
|
|
|
|
|
|
|
} |
609
|
1658
|
|
|
|
|
1732
|
return $self->[TYPE]; |
610
|
|
|
|
|
|
|
} |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
sub score { |
613
|
119
|
|
|
119
|
1
|
126
|
my $self = shift; |
614
|
119
|
100
|
|
|
|
190
|
if (@_) { |
615
|
68
|
|
|
|
|
97
|
$self->[SCORE] = $_[0]; |
616
|
|
|
|
|
|
|
} |
617
|
119
|
100
|
|
|
|
244
|
return defined $self->[SCORE] ? $self->[SCORE] : undef; |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
sub phase { |
621
|
420
|
|
|
420
|
1
|
497
|
my $self = shift; |
622
|
420
|
100
|
|
|
|
578
|
if (@_) { |
623
|
356
|
|
|
|
|
353
|
my $p = $_[0]; |
624
|
356
|
50
|
|
|
|
734
|
unless ($p =~ /^[012\.]$/) { |
625
|
0
|
|
|
|
|
0
|
cluck "phase must be 0, 1, 2 or .!"; |
626
|
|
|
|
|
|
|
} |
627
|
356
|
|
|
|
|
617
|
$self->[PHASE] = $p; |
628
|
|
|
|
|
|
|
} |
629
|
420
|
100
|
|
|
|
668
|
return defined $self->[PHASE] ? $self->[PHASE] : undef; |
630
|
|
|
|
|
|
|
} |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
sub duplicate { |
633
|
471
|
|
|
471
|
1
|
464
|
my $self = shift; |
634
|
471
|
|
|
|
|
621
|
my $n = $self->new( |
635
|
|
|
|
|
|
|
-seq_id => $self->[SEQID], |
636
|
|
|
|
|
|
|
-start => $self->[START], |
637
|
|
|
|
|
|
|
-end => $self->[STOP], |
638
|
|
|
|
|
|
|
-strand => $self->strand, |
639
|
|
|
|
|
|
|
-type => $self->primary_tag, |
640
|
|
|
|
|
|
|
-source => $self->source_tag, |
641
|
|
|
|
|
|
|
-display_name => $self->name |
642
|
|
|
|
|
|
|
); |
643
|
471
|
50
|
|
|
|
851
|
$n->score( $self->[SCORE] ) if defined $self->[SCORE]; |
644
|
471
|
50
|
|
|
|
546
|
$n->phase( $self->[PHASE] ) if defined $self->[PHASE]; |
645
|
471
|
|
|
|
|
666
|
return $n; |
646
|
|
|
|
|
|
|
} |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
sub add_SeqFeature { |
649
|
773
|
|
|
773
|
1
|
776
|
my $self = shift; |
650
|
773
|
|
100
|
|
|
1414
|
$self->[SUBF] ||= []; |
651
|
773
|
|
|
|
|
770
|
my $count = 0; |
652
|
773
|
|
|
|
|
937
|
foreach my $s (@_) { |
653
|
773
|
50
|
|
|
|
1130
|
if (ref($s) eq ref($self)) { |
654
|
773
|
50
|
|
|
|
977
|
$s->seq_id($self->seq_id) unless ($s->seq_id); |
655
|
773
|
100
|
|
|
|
1159
|
$s->strand($self->strand) unless ($s->strand); |
656
|
773
|
50
|
|
|
|
1044
|
$s->source_tag($self->source_tag) unless ($s->source_tag); |
657
|
773
|
|
|
|
|
743
|
push @{ $self->[SUBF] }, $s; |
|
773
|
|
|
|
|
1040
|
|
658
|
773
|
|
|
|
|
975
|
$count++; |
659
|
|
|
|
|
|
|
} |
660
|
|
|
|
|
|
|
else { |
661
|
0
|
|
|
|
|
0
|
cluck "please use SeqFeature objects when adding sub features!"; |
662
|
|
|
|
|
|
|
} |
663
|
|
|
|
|
|
|
} |
664
|
773
|
|
|
|
|
1540
|
return $count; |
665
|
|
|
|
|
|
|
} |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
sub get_SeqFeatures { |
668
|
1215
|
|
|
1215
|
1
|
1331
|
my $self = shift; |
669
|
1215
|
100
|
|
|
|
1677
|
return unless $self->[SUBF]; |
670
|
1067
|
|
|
|
|
989
|
my @children; |
671
|
1067
|
|
|
|
|
964
|
foreach (@{ $self->[SUBF] }) { |
|
1067
|
|
|
|
|
1358
|
|
672
|
10867
|
|
|
|
|
10690
|
push @children, $_; |
673
|
|
|
|
|
|
|
} |
674
|
1067
|
50
|
|
|
|
2498
|
return wantarray ? @children : \@children; |
675
|
|
|
|
|
|
|
} |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
sub delete_SeqFeature { |
678
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
679
|
0
|
0
|
|
|
|
0
|
return unless $self->[SUBF]; |
680
|
0
|
|
|
|
|
0
|
my $id = shift; |
681
|
0
|
|
|
|
|
0
|
my $d; |
682
|
0
|
|
|
|
|
0
|
my $i = 0; |
683
|
0
|
|
|
|
|
0
|
foreach (@{ $self->[SUBF] }) { |
|
0
|
|
|
|
|
0
|
|
684
|
0
|
0
|
|
|
|
0
|
if ($_->[ID] eq $id) { |
685
|
0
|
|
|
|
|
0
|
$d = $i; |
686
|
0
|
|
|
|
|
0
|
last; |
687
|
|
|
|
|
|
|
} |
688
|
0
|
|
|
|
|
0
|
$i++; |
689
|
|
|
|
|
|
|
} |
690
|
0
|
0
|
|
|
|
0
|
if (defined $d) { |
691
|
0
|
|
|
|
|
0
|
splice(@{$self->[SUBF]}, $d, 1); |
|
0
|
|
|
|
|
0
|
|
692
|
0
|
|
|
|
|
0
|
return 1; |
693
|
|
|
|
|
|
|
} |
694
|
0
|
|
|
|
|
0
|
return; |
695
|
|
|
|
|
|
|
} |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
sub add_tag_value { |
698
|
518
|
|
|
518
|
1
|
741
|
my ($self, $key, $value) = @_; |
699
|
518
|
100
|
66
|
|
|
1157
|
return unless ($key and $value); |
700
|
450
|
|
100
|
|
|
1119
|
$self->[ATTRB] ||= {}; |
701
|
450
|
50
|
|
|
|
675
|
if (exists $self->[ATTRB]->{$key}) { |
702
|
0
|
0
|
|
|
|
0
|
if (ref($self->[ATTRB]->{$key}) eq 'ARRAY') { |
703
|
0
|
|
|
|
|
0
|
push @{ $self->[ATTRB]->{$key} }, $value; |
|
0
|
|
|
|
|
0
|
|
704
|
|
|
|
|
|
|
} |
705
|
|
|
|
|
|
|
else { |
706
|
0
|
|
|
|
|
0
|
my $current = $self->[ATTRB]->{$key}; |
707
|
0
|
|
|
|
|
0
|
$self->[ATTRB]->{$key} = [$current, $value]; |
708
|
|
|
|
|
|
|
} |
709
|
|
|
|
|
|
|
} |
710
|
|
|
|
|
|
|
else { |
711
|
450
|
|
|
|
|
855
|
$self->[ATTRB]->{$key} = $value; |
712
|
|
|
|
|
|
|
} |
713
|
|
|
|
|
|
|
} |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
sub has_tag { |
716
|
516
|
|
|
516
|
1
|
647
|
my ($self, $key) = @_; |
717
|
516
|
100
|
|
|
|
814
|
return unless $self->[ATTRB]; |
718
|
367
|
|
|
|
|
683
|
return exists $self->[ATTRB]->{$key}; |
719
|
|
|
|
|
|
|
} |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
sub get_tag_values { |
722
|
425
|
|
|
425
|
1
|
515
|
my ($self, $key) = @_; |
723
|
425
|
100
|
|
|
|
586
|
return unless $self->[ATTRB]; |
724
|
424
|
100
|
|
|
|
566
|
if (exists $self->[ATTRB]->{$key}) { |
725
|
411
|
50
|
|
|
|
531
|
if (ref($self->[ATTRB]->{$key}) eq 'ARRAY') { |
726
|
0
|
0
|
|
|
|
0
|
return wantarray ? @{ $self->[ATTRB]->{$key} } : $self->[ATTRB]->{$key}; |
|
0
|
|
|
|
|
0
|
|
727
|
|
|
|
|
|
|
} |
728
|
|
|
|
|
|
|
else { |
729
|
411
|
|
|
|
|
757
|
return $self->[ATTRB]->{$key}; |
730
|
|
|
|
|
|
|
} |
731
|
|
|
|
|
|
|
} |
732
|
|
|
|
|
|
|
else { |
733
|
13
|
|
|
|
|
28
|
return; |
734
|
|
|
|
|
|
|
} |
735
|
|
|
|
|
|
|
} |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
sub attributes { |
738
|
1
|
|
|
1
|
1
|
3
|
my $self = shift; |
739
|
1
|
50
|
|
|
|
13
|
return unless $self->[ATTRB]; |
740
|
0
|
0
|
|
|
|
0
|
return wantarray ? %{ $self->[ATTRB] } : $self->[ATTRB]; |
|
0
|
|
|
|
|
0
|
|
741
|
|
|
|
|
|
|
} |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
sub all_tags { |
744
|
49
|
|
|
49
|
1
|
48
|
my $self = shift; |
745
|
49
|
100
|
|
|
|
76
|
return unless $self->[ATTRB]; |
746
|
15
|
|
|
|
|
15
|
my @k = keys %{ $self->[ATTRB] }; |
|
15
|
|
|
|
|
36
|
|
747
|
15
|
50
|
|
|
|
33
|
return wantarray ? @k : \@k; |
748
|
|
|
|
|
|
|
} |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
sub remove_tag { |
751
|
0
|
|
|
0
|
1
|
0
|
my ($self, $key) = @_; |
752
|
0
|
0
|
|
|
|
0
|
return unless $self->[ATTRB]; |
753
|
0
|
0
|
|
|
|
0
|
if (exists $self->[ATTRB]->{$key}) { |
754
|
0
|
|
|
|
|
0
|
delete $self->[ATTRB]->{$key}; |
755
|
|
|
|
|
|
|
} |
756
|
|
|
|
|
|
|
} |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
### Range Methods |
761
|
|
|
|
|
|
|
# Borrowed from Bio::RangeI, but does not do strong/weak strand checks |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
sub length { |
764
|
232
|
|
|
232
|
1
|
222
|
my $self = shift; |
765
|
232
|
|
|
|
|
328
|
return $self->end - $self->start + 1; |
766
|
|
|
|
|
|
|
} |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
sub overlaps { |
769
|
0
|
|
|
0
|
1
|
0
|
my ($self, $other) = @_; |
770
|
0
|
0
|
0
|
|
|
0
|
return unless ($other and ref($other)); |
771
|
0
|
0
|
|
|
|
0
|
return unless ($self->seq_id eq $other->seq_id); |
772
|
|
|
|
|
|
|
return not ( |
773
|
0
|
|
0
|
|
|
0
|
$self->start > $other->end or |
774
|
|
|
|
|
|
|
$self->end < $other->start |
775
|
|
|
|
|
|
|
); |
776
|
|
|
|
|
|
|
} |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
sub contains { |
779
|
0
|
|
|
0
|
1
|
0
|
my ($self, $other) = @_; |
780
|
0
|
0
|
0
|
|
|
0
|
return unless ($other and ref($other)); |
781
|
0
|
0
|
|
|
|
0
|
return unless ($self->seq_id eq $other->seq_id); |
782
|
|
|
|
|
|
|
return ( |
783
|
0
|
|
0
|
|
|
0
|
$other->start >= $self->start and |
784
|
|
|
|
|
|
|
$other->end <= $self->end |
785
|
|
|
|
|
|
|
); |
786
|
|
|
|
|
|
|
} |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
sub equals { |
789
|
0
|
|
|
0
|
1
|
0
|
my ($self, $other) = @_; |
790
|
0
|
0
|
0
|
|
|
0
|
return unless ($other and ref($other)); |
791
|
0
|
0
|
|
|
|
0
|
return unless ($self->seq_id eq $other->seq_id); |
792
|
|
|
|
|
|
|
return ( |
793
|
0
|
|
0
|
|
|
0
|
$other->start == $self->start and |
794
|
|
|
|
|
|
|
$other->end == $self->end |
795
|
|
|
|
|
|
|
); |
796
|
|
|
|
|
|
|
} |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
sub intersection { |
799
|
0
|
|
|
0
|
1
|
0
|
my ($self, $other) = @_; |
800
|
0
|
0
|
0
|
|
|
0
|
return unless ($other and ref($other)); |
801
|
0
|
0
|
|
|
|
0
|
return unless ($self->seq_id eq $other->seq_id); |
802
|
0
|
|
|
|
|
0
|
my ($start, $stop); |
803
|
0
|
0
|
|
|
|
0
|
if ($self->start >= $other->start) { |
804
|
0
|
|
|
|
|
0
|
$start = $self->start; |
805
|
|
|
|
|
|
|
} |
806
|
|
|
|
|
|
|
else { |
807
|
0
|
|
|
|
|
0
|
$start = $other->start; |
808
|
|
|
|
|
|
|
} |
809
|
0
|
0
|
|
|
|
0
|
if ($self->end <= $other->end) { |
810
|
0
|
|
|
|
|
0
|
$stop = $self->end; |
811
|
|
|
|
|
|
|
} |
812
|
|
|
|
|
|
|
else { |
813
|
0
|
|
|
|
|
0
|
$stop = $other->end; |
814
|
|
|
|
|
|
|
} |
815
|
0
|
0
|
|
|
|
0
|
return if $start > $stop; |
816
|
0
|
|
|
|
|
0
|
return $self->new( |
817
|
|
|
|
|
|
|
-seq_id => $self->seq_id, |
818
|
|
|
|
|
|
|
-start => $start, |
819
|
|
|
|
|
|
|
-end => $stop, |
820
|
|
|
|
|
|
|
); |
821
|
|
|
|
|
|
|
} |
822
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
sub union { |
824
|
0
|
|
|
0
|
1
|
0
|
my ($self, $other) = @_; |
825
|
0
|
0
|
0
|
|
|
0
|
return unless ($other and ref($other)); |
826
|
0
|
0
|
|
|
|
0
|
return unless ($self->seq_id eq $other->seq_id); |
827
|
0
|
|
|
|
|
0
|
my ($start, $stop); |
828
|
0
|
0
|
|
|
|
0
|
if ($self->start <= $other->start) { |
829
|
0
|
|
|
|
|
0
|
$start = $self->start; |
830
|
|
|
|
|
|
|
} |
831
|
|
|
|
|
|
|
else { |
832
|
0
|
|
|
|
|
0
|
$start = $other->start; |
833
|
|
|
|
|
|
|
} |
834
|
0
|
0
|
|
|
|
0
|
if ($self->end >= $other->end) { |
835
|
0
|
|
|
|
|
0
|
$stop = $self->end; |
836
|
|
|
|
|
|
|
} |
837
|
|
|
|
|
|
|
else { |
838
|
0
|
|
|
|
|
0
|
$stop = $other->end; |
839
|
|
|
|
|
|
|
} |
840
|
0
|
0
|
|
|
|
0
|
return if $start > $stop; |
841
|
0
|
|
|
|
|
0
|
return $self->new( |
842
|
|
|
|
|
|
|
-seq_id => $self->seq_id, |
843
|
|
|
|
|
|
|
-start => $start, |
844
|
|
|
|
|
|
|
-end => $stop, |
845
|
|
|
|
|
|
|
); |
846
|
|
|
|
|
|
|
} |
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
sub subtract { |
849
|
0
|
|
|
0
|
1
|
0
|
my ($self, $other) = @_; |
850
|
0
|
0
|
0
|
|
|
0
|
return unless ($other and ref($other)); |
851
|
0
|
0
|
|
|
|
0
|
return unless ($self->seq_id eq $other->seq_id); |
852
|
0
|
0
|
|
|
|
0
|
return if $self->equals($other); |
853
|
0
|
|
|
|
|
0
|
my @pieces; |
854
|
0
|
0
|
|
|
|
0
|
if ($self->overlaps($other)) { |
855
|
0
|
|
|
|
|
0
|
my $int = $self->intersection($other); |
856
|
0
|
0
|
|
|
|
0
|
if ($self->start < $int->start) { |
857
|
0
|
|
|
|
|
0
|
push @pieces, $self->new( |
858
|
|
|
|
|
|
|
-seq_id => $self->seq_id, |
859
|
|
|
|
|
|
|
-start => $self->start, |
860
|
|
|
|
|
|
|
-end => $int->start - 1, |
861
|
|
|
|
|
|
|
); |
862
|
|
|
|
|
|
|
} |
863
|
0
|
0
|
|
|
|
0
|
if ($self->end > $int->end) { |
864
|
0
|
|
|
|
|
0
|
push @pieces, $self->new( |
865
|
|
|
|
|
|
|
-seq_id => $self->seq_id, |
866
|
|
|
|
|
|
|
-start => $int->end + 1, |
867
|
|
|
|
|
|
|
-end => $self->end, |
868
|
|
|
|
|
|
|
); |
869
|
|
|
|
|
|
|
} |
870
|
|
|
|
|
|
|
} |
871
|
|
|
|
|
|
|
else { |
872
|
0
|
|
|
|
|
0
|
push @pieces, $self->new( |
873
|
|
|
|
|
|
|
-seq_id => $self->seq_id, |
874
|
|
|
|
|
|
|
-start => $self->start, |
875
|
|
|
|
|
|
|
-end => $self->end, |
876
|
|
|
|
|
|
|
); |
877
|
|
|
|
|
|
|
} |
878
|
0
|
0
|
|
|
|
0
|
return wantarray ? @pieces : \@pieces; |
879
|
|
|
|
|
|
|
} |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
### Export methods |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
sub version { |
885
|
2
|
|
|
2
|
0
|
4
|
my $self = shift; |
886
|
2
|
50
|
33
|
|
|
12
|
if (@_ and $_[0] ne '3') { |
887
|
0
|
|
|
|
|
0
|
carp "sorry, only GFF version 3 is currently supported!"; |
888
|
|
|
|
|
|
|
} |
889
|
2
|
|
|
|
|
3
|
return 3; |
890
|
|
|
|
|
|
|
} |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
sub gff_string { |
893
|
|
|
|
|
|
|
# exports version 3 GFF, sorry, no mechanism for others.... |
894
|
101
|
|
|
101
|
1
|
102
|
my $self = shift; |
895
|
101
|
|
50
|
|
|
131
|
my $recurse = shift || 0; |
896
|
101
|
|
100
|
|
|
129
|
my $childIDs = shift || undef; |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
# skip myself if we've already printed myself |
899
|
|
|
|
|
|
|
return if ($recurse and $childIDs and |
900
|
101
|
100
|
66
|
|
|
229
|
not exists $childIDs->{ $self->primary_id } ); |
|
|
|
100
|
|
|
|
|
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
# map all parent-child relationships |
903
|
49
|
100
|
66
|
|
|
100
|
if ($recurse and not defined $childIDs) { |
904
|
|
|
|
|
|
|
# we have a top level SeqFeature and we need to go spelunking for IDs |
905
|
2
|
|
|
|
|
4
|
$childIDs = {}; |
906
|
2
|
|
|
|
|
5
|
foreach my $f ($self->get_SeqFeatures) { |
907
|
14
|
|
|
|
|
19
|
$f->_spelunk($self->primary_id, $childIDs); |
908
|
|
|
|
|
|
|
} |
909
|
|
|
|
|
|
|
} |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
# basics |
912
|
49
|
50
|
50
|
|
|
64
|
my $string = join("\t", ( |
|
|
0
|
50
|
|
|
|
|
|
|
50
|
50
|
|
|
|
|
|
|
100
|
50
|
|
|
|
|
913
|
|
|
|
|
|
|
$self->seq_id || '.', |
914
|
|
|
|
|
|
|
$self->source_tag || '.', |
915
|
|
|
|
|
|
|
$self->primary_tag, |
916
|
|
|
|
|
|
|
$self->start || 1, |
917
|
|
|
|
|
|
|
$self->end || 1, |
918
|
|
|
|
|
|
|
defined $self->score ? $self->score : '.', |
919
|
|
|
|
|
|
|
$self->strand > 0 ? '+' : $self->strand < 0 ? '-' : '.', |
920
|
|
|
|
|
|
|
defined $self->phase ? $self->phase : '.', |
921
|
|
|
|
|
|
|
) ); |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
# group attributes |
924
|
49
|
|
|
|
|
68
|
my $attributes; |
925
|
49
|
|
|
|
|
57
|
my $name = $self->display_name; |
926
|
49
|
50
|
|
|
|
55
|
if ($name) { |
927
|
|
|
|
|
|
|
# names are optional and may not exist |
928
|
49
|
|
|
|
|
68
|
$attributes = sprintf "Name=%s;ID=%s", $self->_encode($name), |
929
|
|
|
|
|
|
|
$self->_encode( $self->primary_id ); |
930
|
|
|
|
|
|
|
} |
931
|
|
|
|
|
|
|
else { |
932
|
|
|
|
|
|
|
# IDs always exist |
933
|
0
|
|
|
|
|
0
|
$attributes = sprintf "ID=%s", $self->_encode( $self->primary_id ); |
934
|
|
|
|
|
|
|
} |
935
|
49
|
50
|
|
|
|
82
|
if ($childIDs) { |
936
|
49
|
100
|
|
|
|
66
|
if (exists $childIDs->{$self->primary_id}) { |
937
|
|
|
|
|
|
|
$attributes .= ';Parent=' . join(',', |
938
|
95
|
|
|
|
|
106
|
sort {$a cmp $b} |
939
|
99
|
|
|
|
|
118
|
map { $self->_encode($_) } |
940
|
47
|
|
|
|
|
48
|
keys %{ $childIDs->{$self->primary_id} }); |
|
47
|
|
|
|
|
55
|
|
941
|
47
|
|
|
|
|
77
|
delete $childIDs->{$self->primary_id}; |
942
|
|
|
|
|
|
|
} |
943
|
|
|
|
|
|
|
} |
944
|
49
|
|
|
|
|
81
|
foreach my $tag ($self->all_tags) { |
945
|
41
|
50
|
|
|
|
53
|
next if $tag eq 'Name'; |
946
|
41
|
50
|
|
|
|
49
|
next if $tag eq 'ID'; |
947
|
41
|
100
|
|
|
|
74
|
next if $tag eq 'Parent'; |
948
|
27
|
|
|
|
|
37
|
my $value = $self->get_tag_values($tag); |
949
|
27
|
50
|
|
|
|
39
|
if (ref($value) eq 'ARRAY') { |
950
|
0
|
|
|
|
|
0
|
$value = join(",", map { $self->_encode($_) } @$value); |
|
0
|
|
|
|
|
0
|
|
951
|
|
|
|
|
|
|
} |
952
|
|
|
|
|
|
|
else { |
953
|
27
|
|
|
|
|
31
|
$value = $self->_encode($value); |
954
|
|
|
|
|
|
|
} |
955
|
27
|
|
|
|
|
47
|
$attributes .= ";$tag=$value"; |
956
|
|
|
|
|
|
|
} |
957
|
49
|
|
|
|
|
89
|
$string .= "\t$attributes\n"; |
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
# recurse |
960
|
49
|
50
|
|
|
|
57
|
if ($recurse) { |
961
|
49
|
|
|
|
|
77
|
foreach my $f ($self->get_SeqFeatures) { |
962
|
99
|
|
|
|
|
137
|
my $s = $f->gff_string(1, $childIDs); |
963
|
99
|
100
|
|
|
|
179
|
$string .= $s if $s; |
964
|
|
|
|
|
|
|
} |
965
|
|
|
|
|
|
|
} |
966
|
49
|
|
|
|
|
92
|
return $string; |
967
|
|
|
|
|
|
|
} |
968
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
sub _encode { |
970
|
224
|
|
|
224
|
|
249
|
my ($self, $value) = @_; |
971
|
224
|
|
|
|
|
270
|
$value =~ s/([\t\n\r%&\=;, ])/sprintf("%%%X",ord($1))/ge; |
|
0
|
|
|
|
|
0
|
|
972
|
224
|
|
|
|
|
374
|
return $value; |
973
|
|
|
|
|
|
|
} |
974
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
sub _spelunk { |
976
|
99
|
|
|
99
|
|
142
|
my ($self, $parentID, $childIDs) = @_; |
977
|
99
|
|
|
|
|
105
|
$childIDs->{$self->primary_id}{$parentID} += 1; |
978
|
99
|
|
|
|
|
111
|
foreach my $f ($self->get_SeqFeatures) { |
979
|
85
|
|
|
|
|
101
|
$f->_spelunk($self->primary_id, $childIDs); |
980
|
|
|
|
|
|
|
} |
981
|
|
|
|
|
|
|
} |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
sub bed_string { |
984
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
985
|
0
|
0
|
0
|
|
|
|
my $string = join("\t", ( |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
986
|
|
|
|
|
|
|
$self->seq_id || '.', |
987
|
|
|
|
|
|
|
($self->start || 1) - 1, |
988
|
|
|
|
|
|
|
$self->end || 1, |
989
|
|
|
|
|
|
|
$self->display_name || $self->primary_id, |
990
|
|
|
|
|
|
|
defined $self->score ? $self->score : 0, |
991
|
|
|
|
|
|
|
$self->strand >= 0 ? '+' : '-', |
992
|
|
|
|
|
|
|
) ); |
993
|
0
|
|
|
|
|
|
return "$string\n"; |
994
|
|
|
|
|
|
|
} |
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
__END__ |