line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::SeqFeatureI |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Ewan Birney |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Ewan Birney |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::SeqFeatureI - Abstract interface of a Sequence Feature |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# get a seqfeature somehow, eg, from a Sequence with Features attached |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
foreach $feat ( $seq->get_SeqFeatures() ) { |
23
|
|
|
|
|
|
|
print "Feature from ", $feat->start, "to ", |
24
|
|
|
|
|
|
|
$feat->end, " Primary tag ", $feat->primary_tag, |
25
|
|
|
|
|
|
|
", produced by ", $feat->source_tag(), "\n"; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
if ( $feat->strand == 0 ) { |
28
|
|
|
|
|
|
|
print "Feature applicable to either strand\n"; |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
else { |
31
|
|
|
|
|
|
|
print "Feature on strand ", $feat->strand,"\n"; # -1,1 |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
print "feature location is ",$feat->start, "..", |
35
|
|
|
|
|
|
|
$feat->end, " on strand ", $feat->strand, "\n"; |
36
|
|
|
|
|
|
|
print "easy utility to print locations in GenBank/EMBL way ", |
37
|
|
|
|
|
|
|
$feat->location->to_FTstring(), "\n"; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
foreach $tag ( $feat->get_all_tags() ) { |
40
|
|
|
|
|
|
|
print "Feature has tag ", $tag, " with values, ", |
41
|
|
|
|
|
|
|
join(' ',$feat->get_tag_values($tag)), "\n"; |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
print "new feature\n" if $feat->has_tag('new'); |
44
|
|
|
|
|
|
|
# features can have sub features |
45
|
|
|
|
|
|
|
my @subfeat = $feat->get_SeqFeatures(); |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 DESCRIPTION |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
This interface is the functions one can expect for any Sequence |
51
|
|
|
|
|
|
|
Feature, whatever its implementation or whether it is a more complex |
52
|
|
|
|
|
|
|
type (eg, a Gene). This object does not actually provide any |
53
|
|
|
|
|
|
|
implementation, it just provides the definitions of what methods one can |
54
|
|
|
|
|
|
|
call. See Bio::SeqFeature::Generic for a good standard implementation |
55
|
|
|
|
|
|
|
of this object |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=head1 FEEDBACK |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
60
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
61
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
64
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head2 Support |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
I |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
73
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
74
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
75
|
|
|
|
|
|
|
with code and data examples if at all possible. |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head2 Reporting Bugs |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
80
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
81
|
|
|
|
|
|
|
web: |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=head1 APPENDIX |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
88
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=cut |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# Let the code begin... |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
package Bio::SeqFeatureI; |
97
|
149
|
|
|
149
|
|
738
|
use vars qw($HasInMemory); |
|
149
|
|
|
|
|
393
|
|
|
149
|
|
|
|
|
5954
|
|
98
|
149
|
|
|
149
|
|
569
|
use strict; |
|
149
|
|
|
|
|
177
|
|
|
149
|
|
|
|
|
6093
|
|
99
|
|
|
|
|
|
|
BEGIN { |
100
|
149
|
|
|
149
|
|
225
|
eval { require Bio::DB::InMemoryCache }; |
|
149
|
|
|
|
|
41136
|
|
101
|
149
|
50
|
|
|
|
643
|
if( $@ ) { $HasInMemory = 0 } |
|
0
|
|
|
|
|
0
|
|
102
|
149
|
|
|
|
|
2427
|
else { $HasInMemory = 1 } |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
149
|
|
|
149
|
|
657
|
use Bio::Seq; |
|
149
|
|
|
|
|
164
|
|
|
149
|
|
|
|
|
2966
|
|
106
|
|
|
|
|
|
|
|
107
|
149
|
|
|
149
|
|
454
|
use Carp; |
|
149
|
|
|
|
|
158
|
|
|
149
|
|
|
|
|
7016
|
|
108
|
|
|
|
|
|
|
|
109
|
149
|
|
|
149
|
|
528
|
use base qw(Bio::RangeI); |
|
149
|
|
|
|
|
156
|
|
|
149
|
|
|
|
|
202489
|
|
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=head1 Bio::SeqFeatureI specific methods |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
New method interfaces. |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=cut |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head2 get_SeqFeatures |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
Title : get_SeqFeatures |
120
|
|
|
|
|
|
|
Usage : @feats = $feat->get_SeqFeatures(); |
121
|
|
|
|
|
|
|
Function: Returns an array of sub Sequence Features |
122
|
|
|
|
|
|
|
Returns : An array |
123
|
|
|
|
|
|
|
Args : none |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=cut |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
sub get_SeqFeatures{ |
128
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
129
|
|
|
|
|
|
|
|
130
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented(); |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=head2 display_name |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
Title : display_name |
136
|
|
|
|
|
|
|
Usage : $name = $feat->display_name() |
137
|
|
|
|
|
|
|
Function: Returns the human-readable name of the feature for displays. |
138
|
|
|
|
|
|
|
Returns : a string |
139
|
|
|
|
|
|
|
Args : none |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=cut |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub display_name { |
144
|
0
|
|
|
0
|
1
|
0
|
shift->throw_not_implemented(); |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=head2 primary_tag |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Title : primary_tag |
150
|
|
|
|
|
|
|
Usage : $tag = $feat->primary_tag() |
151
|
|
|
|
|
|
|
Function: Returns the primary tag for a feature, |
152
|
|
|
|
|
|
|
eg 'exon' |
153
|
|
|
|
|
|
|
Returns : a string |
154
|
|
|
|
|
|
|
Args : none |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=cut |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub primary_tag{ |
160
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
161
|
|
|
|
|
|
|
|
162
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented(); |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=head2 source_tag |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
Title : source_tag |
169
|
|
|
|
|
|
|
Usage : $tag = $feat->source_tag() |
170
|
|
|
|
|
|
|
Function: Returns the source tag for a feature, |
171
|
|
|
|
|
|
|
eg, 'genscan' |
172
|
|
|
|
|
|
|
Returns : a string |
173
|
|
|
|
|
|
|
Args : none |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=cut |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
sub source_tag{ |
179
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
180
|
|
|
|
|
|
|
|
181
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented(); |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=head2 has_tag |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
Title : has_tag |
187
|
|
|
|
|
|
|
Usage : $tag_exists = $self->has_tag('some_tag') |
188
|
|
|
|
|
|
|
Function: |
189
|
|
|
|
|
|
|
Returns : TRUE if the specified tag exists, and FALSE otherwise |
190
|
|
|
|
|
|
|
Args : |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=cut |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
sub has_tag{ |
195
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
196
|
|
|
|
|
|
|
|
197
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented(); |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=head2 get_tag_values |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
Title : get_tag_values |
204
|
|
|
|
|
|
|
Usage : @values = $self->get_tag_values('some_tag') |
205
|
|
|
|
|
|
|
Function: |
206
|
|
|
|
|
|
|
Returns : An array comprising the values of the specified tag. |
207
|
|
|
|
|
|
|
Args : a string |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
throws an exception if there is no such tag |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=cut |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub get_tag_values { |
214
|
0
|
|
|
0
|
1
|
0
|
shift->throw_not_implemented(); |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=head2 get_tagset_values |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
Title : get_tagset_values |
220
|
|
|
|
|
|
|
Usage : @values = $self->get_tagset_values(qw(label transcript_id product)) |
221
|
|
|
|
|
|
|
Function: |
222
|
|
|
|
|
|
|
Returns : An array comprising the values of the specified tags, in order of tags |
223
|
|
|
|
|
|
|
Args : An array of strings |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
does NOT throw an exception if none of the tags are not present |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
this method is useful for getting a human-readable label for a |
228
|
|
|
|
|
|
|
SeqFeatureI; not all tags can be assumed to be present, so a list of |
229
|
|
|
|
|
|
|
possible tags in preferential order is provided |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=cut |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# interface + abstract method |
234
|
|
|
|
|
|
|
sub get_tagset_values { |
235
|
127
|
|
|
127
|
1
|
163
|
my ($self, @args) = @_; |
236
|
127
|
|
|
|
|
103
|
my @vals = (); |
237
|
127
|
|
|
|
|
134
|
foreach my $arg (@args) { |
238
|
128
|
100
|
|
|
|
181
|
if ($self->has_tag($arg)) { |
239
|
67
|
|
|
|
|
101
|
push(@vals, $self->get_tag_values($arg)); |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
} |
242
|
127
|
|
|
|
|
180
|
return @vals; |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=head2 get_all_tags |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
Title : get_all_tags |
248
|
|
|
|
|
|
|
Usage : @tags = $feat->get_all_tags() |
249
|
|
|
|
|
|
|
Function: gives all tags for this feature |
250
|
|
|
|
|
|
|
Returns : an array of strings |
251
|
|
|
|
|
|
|
Args : none |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=cut |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
sub get_all_tags{ |
257
|
0
|
|
|
0
|
1
|
0
|
shift->throw_not_implemented(); |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=head2 attach_seq |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
Title : attach_seq |
263
|
|
|
|
|
|
|
Usage : $sf->attach_seq($seq) |
264
|
|
|
|
|
|
|
Function: Attaches a Bio::Seq object to this feature. This |
265
|
|
|
|
|
|
|
Bio::Seq object is for the *entire* sequence: ie |
266
|
|
|
|
|
|
|
from 1 to 10000 |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
Note that it is not guaranteed that if you obtain a feature from |
269
|
|
|
|
|
|
|
an object in bioperl, it will have a sequence attached. Also, |
270
|
|
|
|
|
|
|
implementors of this interface can choose to provide an empty |
271
|
|
|
|
|
|
|
implementation of this method. I.e., there is also no guarantee |
272
|
|
|
|
|
|
|
that if you do attach a sequence, seq() or entire_seq() will not |
273
|
|
|
|
|
|
|
return undef. |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
The reason that this method is here on the interface is to enable |
276
|
|
|
|
|
|
|
you to call it on every SeqFeatureI compliant object, and |
277
|
|
|
|
|
|
|
that it will be implemented in a useful way and set to a useful |
278
|
|
|
|
|
|
|
value for the great majority of use cases. Implementors who choose |
279
|
|
|
|
|
|
|
to ignore the call are encouraged to specifically state this in |
280
|
|
|
|
|
|
|
their documentation. |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
Example : |
283
|
|
|
|
|
|
|
Returns : TRUE on success |
284
|
|
|
|
|
|
|
Args : a Bio::PrimarySeqI compliant object |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
=cut |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
sub attach_seq { |
290
|
0
|
|
|
0
|
1
|
0
|
shift->throw_not_implemented(); |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
=head2 seq |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
Title : seq |
296
|
|
|
|
|
|
|
Usage : $tseq = $sf->seq() |
297
|
|
|
|
|
|
|
Function: returns the truncated sequence (if there is a sequence attached) |
298
|
|
|
|
|
|
|
for this feature |
299
|
|
|
|
|
|
|
Example : |
300
|
|
|
|
|
|
|
Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence |
301
|
|
|
|
|
|
|
bounded by start & end, or undef if there is no sequence attached. |
302
|
|
|
|
|
|
|
If the strand is defined and set to -1, the returned sequence is |
303
|
|
|
|
|
|
|
the reverse-complement of the region |
304
|
|
|
|
|
|
|
Args : none |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=cut |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
sub seq { |
310
|
0
|
|
|
0
|
1
|
0
|
shift->throw_not_implemented(); |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
=head2 entire_seq |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
Title : entire_seq |
316
|
|
|
|
|
|
|
Usage : $whole_seq = $sf->entire_seq() |
317
|
|
|
|
|
|
|
Function: gives the entire sequence that this seqfeature is attached to |
318
|
|
|
|
|
|
|
Example : |
319
|
|
|
|
|
|
|
Returns : a Bio::PrimarySeqI compliant object, or undef if there is no |
320
|
|
|
|
|
|
|
sequence attached |
321
|
|
|
|
|
|
|
Args : none |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=cut |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
sub entire_seq { |
327
|
0
|
|
|
0
|
1
|
0
|
shift->throw_not_implemented(); |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head2 seq_id |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
Title : seq_id |
334
|
|
|
|
|
|
|
Usage : $obj->seq_id($newval) |
335
|
|
|
|
|
|
|
Function: There are many cases when you make a feature that you |
336
|
|
|
|
|
|
|
do know the sequence name, but do not know its actual |
337
|
|
|
|
|
|
|
sequence. This is an attribute such that you can store |
338
|
|
|
|
|
|
|
the ID (e.g., display_id) of the sequence. |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
This attribute should *not* be used in GFF dumping, as |
341
|
|
|
|
|
|
|
that should come from the collection in which the seq |
342
|
|
|
|
|
|
|
feature was found. |
343
|
|
|
|
|
|
|
Returns : value of seq_id |
344
|
|
|
|
|
|
|
Args : newvalue (optional) |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
=cut |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
sub seq_id { |
350
|
0
|
|
|
0
|
1
|
0
|
shift->throw_not_implemented(); |
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=head2 gff_string |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
Title : gff_string |
356
|
|
|
|
|
|
|
Usage : $str = $feat->gff_string; |
357
|
|
|
|
|
|
|
$str = $feat->gff_string($gff_formatter); |
358
|
|
|
|
|
|
|
Function: Provides the feature information in GFF format. |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
The implementation provided here returns GFF2 by default. If you |
361
|
|
|
|
|
|
|
want a different version, supply an object implementing a method |
362
|
|
|
|
|
|
|
gff_string() accepting a SeqFeatureI object as argument. E.g., to |
363
|
|
|
|
|
|
|
obtain GFF1 format, do the following: |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
my $gffio = Bio::Tools::GFF->new(-gff_version => 1); |
366
|
|
|
|
|
|
|
$gff1str = $feat->gff_string($gff1io); |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
Returns : A string |
369
|
|
|
|
|
|
|
Args : Optionally, an object implementing gff_string(). |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=cut |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
sub gff_string{ |
375
|
0
|
|
|
0
|
1
|
0
|
my ($self,$formatter) = @_; |
376
|
|
|
|
|
|
|
|
377
|
0
|
0
|
|
|
|
0
|
$formatter = $self->_static_gff_formatter unless $formatter; |
378
|
0
|
|
|
|
|
0
|
return $formatter->gff_string($self); |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
my $static_gff_formatter = undef; |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=head2 _static_gff_formatter |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
Title : _static_gff_formatter |
386
|
|
|
|
|
|
|
Usage : |
387
|
|
|
|
|
|
|
Function: |
388
|
|
|
|
|
|
|
Example : |
389
|
|
|
|
|
|
|
Returns : |
390
|
|
|
|
|
|
|
Args : |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=cut |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
sub _static_gff_formatter{ |
396
|
1
|
|
|
1
|
|
2
|
my ($self,@args) = @_; |
397
|
1
|
|
|
|
|
7
|
require Bio::Tools::GFF; # on the fly inclusion -- is this better? |
398
|
1
|
50
|
|
|
|
3
|
if( !defined $static_gff_formatter ) { |
399
|
1
|
|
|
|
|
9
|
$static_gff_formatter = Bio::Tools::GFF->new('-gff_version' => 2); |
400
|
|
|
|
|
|
|
} |
401
|
1
|
|
|
|
|
2
|
return $static_gff_formatter; |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=head1 Decorating methods |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
These methods have an implementation provided by Bio::SeqFeatureI, |
408
|
|
|
|
|
|
|
but can be validly overwritten by subclasses |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=head2 spliced_seq |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
Title : spliced_seq |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
Usage : $seq = $feature->spliced_seq() |
415
|
|
|
|
|
|
|
$seq = $feature_with_remote_locations->spliced_seq($db_for_seqs) |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
Function: Provides a sequence of the feature which is the most |
418
|
|
|
|
|
|
|
semantically "relevant" feature for this sequence. A default |
419
|
|
|
|
|
|
|
implementation is provided which for simple cases returns just |
420
|
|
|
|
|
|
|
the sequence, but for split cases, loops over the split location |
421
|
|
|
|
|
|
|
to return the sequence. In the case of split locations with |
422
|
|
|
|
|
|
|
remote locations, eg |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
join(AB000123:5567-5589,80..1144) |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
in the case when a database object is passed in, it will attempt |
427
|
|
|
|
|
|
|
to retrieve the sequence from the database object, and "Do the right thing", |
428
|
|
|
|
|
|
|
however if no database object is provided, it will generate the correct |
429
|
|
|
|
|
|
|
number of N's (DNA) or X's (protein, though this is unlikely). |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
This function is deliberately "magical" attempting to second guess |
432
|
|
|
|
|
|
|
what a user wants as "the" sequence for this feature. |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
Implementing classes are free to override this method with their |
435
|
|
|
|
|
|
|
own magic if they have a better idea what the user wants. |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
Args : [optional] |
438
|
|
|
|
|
|
|
-db A L compliant object if |
439
|
|
|
|
|
|
|
one needs to retrieve remote seqs. |
440
|
|
|
|
|
|
|
-nosort boolean if the locations should not be sorted |
441
|
|
|
|
|
|
|
by start location. This may occur, for instance, |
442
|
|
|
|
|
|
|
in a circular sequence where a gene span starts |
443
|
|
|
|
|
|
|
before the end of the sequence and ends after the |
444
|
|
|
|
|
|
|
sequence start. Example : join(15685..16260,1..207) |
445
|
|
|
|
|
|
|
(default = if sequence is_circular(), 1, otherwise 0) |
446
|
|
|
|
|
|
|
-phase truncates the returned sequence based on the |
447
|
|
|
|
|
|
|
intron phase (0,1,2). |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
Returns : A L object |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=cut |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
sub spliced_seq { |
454
|
138
|
|
|
138
|
1
|
45495
|
my $self = shift; |
455
|
138
|
|
|
|
|
228
|
my @args = @_; |
456
|
138
|
|
|
|
|
400
|
my ($db, $nosort, $phase) = |
457
|
|
|
|
|
|
|
$self->_rearrange([qw(DB NOSORT PHASE)], @args); |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
# set no_sort based on the parent sequence status |
460
|
138
|
100
|
|
|
|
726
|
if ($self->entire_seq->is_circular) { |
461
|
122
|
|
|
|
|
101
|
$nosort = 1; |
462
|
|
|
|
|
|
|
} |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
# (added 7/7/06 to allow use old API (with warnings) |
465
|
138
|
100
|
|
|
|
148
|
my $old_api = (!(grep {$_ =~ /(?:nosort|db|phase)/} @args)) ? 1 : 0; |
|
262
|
|
|
|
|
836
|
|
466
|
138
|
50
|
66
|
|
|
434
|
if (@args && $old_api) { |
467
|
0
|
|
|
|
|
0
|
$self->warn( q(API has changed; please use '-db' or '-nosort' ) |
468
|
|
|
|
|
|
|
. qq(for args. See POD for more details.)); |
469
|
0
|
0
|
|
|
|
0
|
$db = shift @args if @args; |
470
|
0
|
0
|
|
|
|
0
|
$nosort = shift @args if @args; |
471
|
0
|
0
|
|
|
|
0
|
$phase = shift @args if @args; |
472
|
|
|
|
|
|
|
}; |
473
|
|
|
|
|
|
|
|
474
|
138
|
100
|
100
|
|
|
226
|
if (defined($phase) && ($phase < 0 || $phase > 2)) { |
|
|
|
66
|
|
|
|
|
475
|
2
|
|
|
|
|
13
|
$self->warn("Phase must be 0,1, or 2. Setting phase to 0..."); |
476
|
2
|
|
|
|
|
2
|
$phase = 0; |
477
|
|
|
|
|
|
|
} |
478
|
|
|
|
|
|
|
|
479
|
138
|
50
|
33
|
|
|
487
|
if ( $db && ref($db) && ! $db->isa('Bio::DB::RandomAccessI') ) { |
|
|
50
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
0
|
|
|
|
|
480
|
0
|
|
|
|
|
0
|
$self->warn( "Must pass in a valid Bio::DB::RandomAccessI object" |
481
|
|
|
|
|
|
|
. " for access to remote locations for spliced_seq"); |
482
|
0
|
|
|
|
|
0
|
$db = undef; |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
elsif ( defined $db && $HasInMemory && $db->isa('Bio::DB::InMemoryCache') ) { |
485
|
0
|
|
|
|
|
0
|
$db = Bio::DB::InMemoryCache->new(-seqdb => $db); |
486
|
|
|
|
|
|
|
} |
487
|
|
|
|
|
|
|
|
488
|
138
|
100
|
|
|
|
269
|
if ( not $self->location->isa("Bio::Location::SplitLocationI") ) { |
489
|
114
|
100
|
|
|
|
132
|
if ($phase) { |
490
|
2
|
|
|
|
|
6
|
$self->debug("Subseq start: ",$phase+1,"\tend: ",$self->end,"\n"); |
491
|
2
|
|
|
|
|
4
|
my $seqstr = substr($self->seq->seq, $phase); |
492
|
2
|
|
|
|
|
6
|
my $out = Bio::Seq->new( -id => $self->entire_seq->display_id |
493
|
|
|
|
|
|
|
. "_spliced_feat", |
494
|
|
|
|
|
|
|
-seq => $seqstr); |
495
|
2
|
|
|
|
|
7
|
return $out; |
496
|
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
|
else { |
498
|
112
|
|
|
|
|
194
|
return $self->seq(); # nice and easy! |
499
|
|
|
|
|
|
|
} |
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
# redundant test, but the above ISA is probably not ideal. |
503
|
24
|
50
|
|
|
|
40
|
if ( not $self->location->isa("Bio::Location::SplitLocationI") ) { |
504
|
0
|
|
|
|
|
0
|
$self->throw("not atomic, not split, yikes, in trouble!"); |
505
|
|
|
|
|
|
|
} |
506
|
|
|
|
|
|
|
|
507
|
24
|
|
|
|
|
27
|
my $seqstr = ''; |
508
|
24
|
|
|
|
|
39
|
my $seqid = $self->entire_seq->display_id; |
509
|
|
|
|
|
|
|
# This is to deal with reverse strand features |
510
|
|
|
|
|
|
|
# so we are really sorting features 5' -> 3' on their strand |
511
|
|
|
|
|
|
|
# i.e. rev strand features will be sorted largest to smallest |
512
|
|
|
|
|
|
|
# as this how revcom CDSes seem to be annotated in genbank. |
513
|
|
|
|
|
|
|
# Might need to eventually allow this to be programable? |
514
|
|
|
|
|
|
|
# (can I mention how much fun this is NOT! --jason) |
515
|
|
|
|
|
|
|
|
516
|
24
|
|
|
|
|
29
|
my ($mixed,$mixedloc, $fstrand) = (0); |
517
|
|
|
|
|
|
|
|
518
|
24
|
50
|
33
|
|
|
114
|
if ( $self->isa('Bio::Das::SegmentI') and not $self->absolute ) { |
519
|
0
|
|
|
|
|
0
|
$self->warn( "Calling spliced_seq with a Bio::Das::SegmentI which " |
520
|
|
|
|
|
|
|
. "does have absolute set to 1 -- be warned you may not " |
521
|
|
|
|
|
|
|
. "be getting things on the correct strand"); |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
24
|
|
|
|
|
47
|
my @locset = $self->location->each_Location; |
525
|
24
|
|
|
|
|
24
|
my @locs; |
526
|
24
|
100
|
|
|
|
31
|
if ( not $nosort ) { |
527
|
|
|
|
|
|
|
# @locs = map { $_->[0] } |
528
|
|
|
|
|
|
|
# sort so that most negative is first basically to order |
529
|
|
|
|
|
|
|
# the features on the opposite strand 5'->3' on their strand |
530
|
|
|
|
|
|
|
# rather than they way most are input which is on the fwd strand |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
# sort { $a->[1] <=> $b->[1] } # Yes Tim, Schwartzian transformation |
533
|
|
|
|
|
|
|
my @proc_locs = |
534
|
|
|
|
|
|
|
map { |
535
|
2
|
100
|
|
|
|
4
|
$fstrand = $_->strand unless defined $fstrand; |
|
5
|
|
|
|
|
14
|
|
536
|
5
|
50
|
33
|
|
|
8
|
$mixed = 1 if defined $_->strand && $fstrand != $_->strand; |
537
|
|
|
|
|
|
|
|
538
|
5
|
50
|
|
|
|
11
|
if( defined $_->seq_id ) { |
539
|
5
|
100
|
|
|
|
8
|
$mixedloc = 1 if( $_->seq_id ne $seqid ); |
540
|
|
|
|
|
|
|
} |
541
|
5
|
|
50
|
|
|
12
|
[ $_, $_->start * ($_->strand || 1) ]; |
542
|
|
|
|
|
|
|
} @locset; |
543
|
|
|
|
|
|
|
|
544
|
2
|
|
|
|
|
5
|
my @sort_locs; |
545
|
2
|
100
|
|
|
|
8
|
if ( $fstrand == 1 ) { |
|
|
50
|
|
|
|
|
|
546
|
1
|
|
|
|
|
5
|
@sort_locs = sort { $a->[1] <=> $b->[1] } @proc_locs; # Yes Tim, Schwartzian transformation |
|
3
|
|
|
|
|
6
|
|
547
|
|
|
|
|
|
|
}elsif ( $fstrand == -1 ){ |
548
|
1
|
|
|
|
|
6
|
@sort_locs = sort { $b->[1] <=> $a->[1] } @proc_locs; # Yes Tim, Schwartzian transformation |
|
1
|
|
|
|
|
4
|
|
549
|
|
|
|
|
|
|
} else { |
550
|
0
|
|
|
|
|
0
|
@sort_locs = @proc_locs; |
551
|
|
|
|
|
|
|
} |
552
|
2
|
|
|
|
|
4
|
@locs = map { $_->[0] } @sort_locs; |
|
5
|
|
|
|
|
7
|
|
553
|
|
|
|
|
|
|
|
554
|
2
|
50
|
|
|
|
7
|
if ( $mixed ) { |
555
|
0
|
|
|
|
|
0
|
$self->warn( "Mixed strand locations, spliced seq using the " |
556
|
|
|
|
|
|
|
. "input order rather than trying to sort"); |
557
|
0
|
|
|
|
|
0
|
@locs = @locset; |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
else { |
561
|
|
|
|
|
|
|
# use the original order instead of trying to sort |
562
|
22
|
|
|
|
|
33
|
@locs = @locset; |
563
|
22
|
|
|
|
|
50
|
$fstrand = $locs[0]->strand; |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
|
567
|
24
|
|
|
|
|
25
|
my $last_id = undef; |
568
|
24
|
|
|
|
|
26
|
my $called_seq = undef; |
569
|
|
|
|
|
|
|
# This will be left as undefined if 1) db is remote or 2)seq_id is undefined. |
570
|
|
|
|
|
|
|
# In that case, old code is used to make exon sequence |
571
|
24
|
|
|
|
|
22
|
my $called_seq_seq = undef; |
572
|
24
|
|
|
|
|
17
|
my $called_seq_len = undef; |
573
|
|
|
|
|
|
|
|
574
|
24
|
|
|
|
|
31
|
foreach my $loc ( @locs ) { |
575
|
107
|
50
|
|
|
|
224
|
if ( not $loc->isa("Bio::Location::Atomic") ) { |
576
|
0
|
|
|
|
|
0
|
$self->throw("Can only deal with one level deep locations"); |
577
|
|
|
|
|
|
|
} |
578
|
|
|
|
|
|
|
|
579
|
107
|
50
|
|
|
|
145
|
if ( $fstrand != $loc->strand ) { |
580
|
0
|
|
|
|
|
0
|
$self->warn("feature strand is different from location strand!"); |
581
|
|
|
|
|
|
|
} |
582
|
|
|
|
|
|
|
|
583
|
107
|
|
|
|
|
83
|
my $loc_seq_id; |
584
|
107
|
100
|
|
|
|
151
|
if ( defined $loc->seq_id ) { |
585
|
105
|
|
|
|
|
130
|
$loc_seq_id = $loc->seq_id; |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
# deal with remote sequences |
588
|
105
|
100
|
|
|
|
134
|
if ($loc_seq_id ne $seqid ) { |
589
|
|
|
|
|
|
|
# might be too big to download whole sequence |
590
|
2
|
|
|
|
|
3
|
$called_seq_seq = undef; |
591
|
|
|
|
|
|
|
|
592
|
2
|
50
|
|
|
|
4
|
if ( defined $db ) { |
593
|
0
|
|
|
|
|
0
|
my $sid = $loc_seq_id; |
594
|
0
|
|
|
|
|
0
|
$sid =~ s/\.\d+$//g; |
595
|
0
|
|
|
|
|
0
|
eval { |
596
|
0
|
|
|
|
|
0
|
$called_seq = $db->get_Seq_by_acc($sid); |
597
|
|
|
|
|
|
|
}; |
598
|
0
|
0
|
|
|
|
0
|
if( $@ ) { |
599
|
0
|
|
|
|
|
0
|
$self->warn( "In attempting to join a remote location, sequence $sid " |
600
|
|
|
|
|
|
|
. "was not in database. Will provide padding N's. Full exception \n\n$@"); |
601
|
0
|
|
|
|
|
0
|
$called_seq = undef; |
602
|
|
|
|
|
|
|
} |
603
|
|
|
|
|
|
|
} |
604
|
|
|
|
|
|
|
else { |
605
|
2
|
|
|
|
|
14
|
$self->warn( "cannot get remote location for ".$loc_seq_id ." without a valid " |
606
|
|
|
|
|
|
|
. "Bio::DB::RandomAccessI database handle (like Bio::DB::GenBank)"); |
607
|
2
|
|
|
|
|
3
|
$called_seq = undef; |
608
|
|
|
|
|
|
|
} |
609
|
2
|
50
|
|
|
|
4
|
if ( !defined $called_seq ) { |
610
|
2
|
|
|
|
|
6
|
$seqstr .= 'N' x $loc->length; |
611
|
2
|
|
|
|
|
5
|
next; |
612
|
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
# have local sequence available |
615
|
|
|
|
|
|
|
else { |
616
|
|
|
|
|
|
|
# don't have to pull out source sequence again if it's local unless |
617
|
|
|
|
|
|
|
# it's the first exon or different from previous exon |
618
|
103
|
100
|
66
|
|
|
307
|
unless (defined(($last_id) && $last_id eq $loc_seq_id )){ |
619
|
23
|
|
|
|
|
47
|
$called_seq = $self->entire_seq; |
620
|
23
|
|
|
|
|
46
|
$called_seq_seq = $called_seq->seq(); # this is slow |
621
|
|
|
|
|
|
|
} |
622
|
|
|
|
|
|
|
} |
623
|
|
|
|
|
|
|
} |
624
|
|
|
|
|
|
|
#undefined $loc->seq->id |
625
|
|
|
|
|
|
|
else { |
626
|
2
|
|
|
|
|
4
|
$called_seq = $self->entire_seq; |
627
|
2
|
|
|
|
|
1
|
$called_seq_seq = undef; |
628
|
|
|
|
|
|
|
} |
629
|
|
|
|
|
|
|
|
630
|
105
|
|
|
|
|
168
|
my ($start,$end) = ($loc->start,$loc->end); |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
# does the called sequence make sense? Bug 1780 |
633
|
105
|
|
|
|
|
82
|
my $called_seq_len; |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
# can avoid a seq() call on called_seq |
636
|
105
|
100
|
|
|
|
114
|
if (defined($called_seq_seq)) { |
637
|
103
|
|
|
|
|
79
|
$called_seq_len = length($called_seq_seq); |
638
|
|
|
|
|
|
|
} |
639
|
|
|
|
|
|
|
# can't avoid a seq() call on called_seq |
640
|
|
|
|
|
|
|
else { |
641
|
2
|
|
|
|
|
4
|
$called_seq_len = $called_seq->length # this is slow |
642
|
|
|
|
|
|
|
} |
643
|
|
|
|
|
|
|
|
644
|
105
|
50
|
|
|
|
130
|
if ($called_seq_len < $loc->end) { |
645
|
0
|
|
|
|
|
0
|
my $accession = $called_seq->accession; |
646
|
0
|
|
|
|
|
0
|
my $orig_id = $self->seq_id; # originating sequence |
647
|
0
|
|
|
|
|
0
|
my ($locus) = $self->get_tagset_values("locus_tag"); |
648
|
0
|
|
|
|
|
0
|
$self->throw( "Location end ($end) exceeds length ($called_seq_len) of " |
649
|
|
|
|
|
|
|
. "called sequence $accession.\nCheck sequence version used in " |
650
|
|
|
|
|
|
|
. "$locus locus-tagged SeqFeature in $orig_id."); |
651
|
|
|
|
|
|
|
} |
652
|
|
|
|
|
|
|
|
653
|
105
|
50
|
|
|
|
359
|
if ( $self->isa('Bio::Das::SegmentI') ) { |
654
|
|
|
|
|
|
|
# $called_seq is Bio::DB::GFF::RelSegment, as well as its subseq(); |
655
|
|
|
|
|
|
|
# Bio::DB::GFF::RelSegment::seq() returns a Bio::PrimarySeq, and using seq() |
656
|
|
|
|
|
|
|
# in turn returns a string. Confused? |
657
|
0
|
|
|
|
|
0
|
$seqstr .= $called_seq->subseq($start,$end)->seq()->seq(); # this is slow |
658
|
|
|
|
|
|
|
} |
659
|
|
|
|
|
|
|
else { |
660
|
105
|
|
|
|
|
99
|
my $exon_seq; |
661
|
105
|
100
|
|
|
|
113
|
if (defined ($called_seq_seq)){ |
662
|
103
|
|
|
|
|
214
|
$exon_seq = substr($called_seq_seq, $start-1, $end-$start+1); # this is quick |
663
|
|
|
|
|
|
|
} |
664
|
|
|
|
|
|
|
else { |
665
|
2
|
|
|
|
|
4
|
$exon_seq = $called_seq->subseq($loc->start,$loc->end); # this is slow |
666
|
|
|
|
|
|
|
} |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
# If guide_strand is defined, assemble the sequence first and revcom later if needed, |
669
|
|
|
|
|
|
|
# if its not defined, apply revcom immediately to proper locations |
670
|
105
|
50
|
|
|
|
154
|
if (defined $self->location->guide_strand) { |
671
|
105
|
|
|
|
|
157
|
$seqstr .= $exon_seq; |
672
|
|
|
|
|
|
|
} |
673
|
|
|
|
|
|
|
else { |
674
|
0
|
0
|
|
|
|
0
|
my $strand = defined ($loc->strand) ? ($loc->strand) : 0; |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
# revcomp $exon_seq |
677
|
0
|
0
|
|
|
|
0
|
if ($strand == -1) { |
678
|
0
|
|
|
|
|
0
|
$exon_seq = reverse($exon_seq); |
679
|
0
|
|
|
|
|
0
|
$exon_seq =~ tr/ABCDGHKMNRSTUVWXYabcdghkmnrstuvwxy/TVGHCDMKNYSAABWXRtvghcdmknysaabwxr/; |
680
|
0
|
|
|
|
|
0
|
$seqstr .= $exon_seq; |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
else { |
683
|
0
|
|
|
|
|
0
|
$seqstr .= $exon_seq; |
684
|
|
|
|
|
|
|
} |
685
|
|
|
|
|
|
|
} |
686
|
|
|
|
|
|
|
} |
687
|
|
|
|
|
|
|
|
688
|
105
|
100
|
|
|
|
197
|
$last_id = $loc_seq_id if (defined($loc_seq_id)); |
689
|
|
|
|
|
|
|
} #next $loc |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
# Use revcom only after the whole sequence has been assembled |
692
|
24
|
50
|
|
|
|
37
|
my $guide_strand = defined ($self->location->guide_strand) ? ($self->location->guide_strand) : 0; |
693
|
24
|
100
|
|
|
|
47
|
if ($guide_strand == -1) { |
694
|
11
|
|
|
|
|
43
|
my $seqstr_obj = Bio::Seq->new(-seq => $seqstr); |
695
|
11
|
|
|
|
|
53
|
$seqstr = $seqstr_obj->revcom->seq; |
696
|
|
|
|
|
|
|
} |
697
|
|
|
|
|
|
|
|
698
|
24
|
50
|
|
|
|
43
|
if (defined($phase)) { |
699
|
0
|
|
|
|
|
0
|
$seqstr = substr($seqstr, $phase); |
700
|
|
|
|
|
|
|
} |
701
|
|
|
|
|
|
|
|
702
|
24
|
|
|
|
|
64
|
my $out = Bio::Seq->new( -id => $self->entire_seq->display_id |
703
|
|
|
|
|
|
|
. "_spliced_feat", |
704
|
|
|
|
|
|
|
-seq => $seqstr); |
705
|
|
|
|
|
|
|
|
706
|
24
|
|
|
|
|
96
|
return $out; |
707
|
|
|
|
|
|
|
} |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
=head2 location |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
Title : location |
712
|
|
|
|
|
|
|
Usage : my $location = $seqfeature->location() |
713
|
|
|
|
|
|
|
Function: returns a location object suitable for identifying location |
714
|
|
|
|
|
|
|
of feature on sequence or parent feature |
715
|
|
|
|
|
|
|
Returns : Bio::LocationI object |
716
|
|
|
|
|
|
|
Args : none |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
=cut |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
sub location { |
722
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
723
|
|
|
|
|
|
|
|
724
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented(); |
725
|
|
|
|
|
|
|
} |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
=head2 primary_id |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
Title : primary_id |
731
|
|
|
|
|
|
|
Usage : $obj->primary_id($newval) |
732
|
|
|
|
|
|
|
Function: |
733
|
|
|
|
|
|
|
Example : |
734
|
|
|
|
|
|
|
Returns : value of primary_id (a scalar) |
735
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
Primary ID is a synonym for the tag 'ID' |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
=cut |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
sub primary_id{ |
742
|
116
|
|
|
116
|
1
|
91
|
my $self = shift; |
743
|
|
|
|
|
|
|
# note from cjm@fruitfly.org: |
744
|
|
|
|
|
|
|
# I have commented out the following 2 lines: |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
#return $self->{'primary_id'} = shift if @_; |
747
|
|
|
|
|
|
|
#return $self->{'primary_id'}; |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
#... and replaced it with the following; see |
750
|
|
|
|
|
|
|
# http://bioperl.org/pipermail/bioperl-l/2003-December/014150.html |
751
|
|
|
|
|
|
|
# for the discussion that lead to this change |
752
|
|
|
|
|
|
|
|
753
|
116
|
100
|
|
|
|
160
|
if (@_) { |
754
|
58
|
50
|
|
|
|
80
|
if ($self->has_tag('ID')) { |
755
|
0
|
|
|
|
|
0
|
$self->remove_tag('ID'); |
756
|
|
|
|
|
|
|
} |
757
|
58
|
|
|
|
|
96
|
$self->add_tag_value('ID', shift); |
758
|
|
|
|
|
|
|
} |
759
|
116
|
|
|
|
|
174
|
my ($id) = $self->get_tagset_values('ID'); |
760
|
116
|
|
|
|
|
170
|
return $id; |
761
|
|
|
|
|
|
|
} |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
sub generate_unique_persistent_id { |
764
|
|
|
|
|
|
|
# DEPRECATED - us IDHandler |
765
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
766
|
0
|
|
|
|
|
0
|
require Bio::SeqFeature::Tools::IDHandler; |
767
|
0
|
|
|
|
|
0
|
Bio::SeqFeature::Tools::IDHandler->new->generate_unique_persistent_id($self); |
768
|
|
|
|
|
|
|
} |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
=head2 phase |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
Title : phase |
774
|
|
|
|
|
|
|
Usage : $obj->phase($newval) |
775
|
|
|
|
|
|
|
Function: get/set this feature's phase. |
776
|
|
|
|
|
|
|
Example : |
777
|
|
|
|
|
|
|
Returns : undef if no phase is set, |
778
|
|
|
|
|
|
|
otherwise 0, 1, or 2 (the only valid values for phase) |
779
|
|
|
|
|
|
|
Args : on set, the new value |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
Most features do not have or need a defined phase. |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
For features representing a CDS, the phase indicates where the feature |
784
|
|
|
|
|
|
|
begins with reference to the reading frame. The phase is one of the |
785
|
|
|
|
|
|
|
integers 0, 1, or 2, indicating the number of bases that should be |
786
|
|
|
|
|
|
|
removed from the beginning of this feature to reach the first base of |
787
|
|
|
|
|
|
|
the next codon. In other words, a phase of "0" indicates that the next |
788
|
|
|
|
|
|
|
codon begins at the first base of the region described by the current |
789
|
|
|
|
|
|
|
line, a phase of "1" indicates that the next codon begins at the |
790
|
|
|
|
|
|
|
second base of this region, and a phase of "2" indicates that the |
791
|
|
|
|
|
|
|
codon begins at the third base of this region. This is NOT to be |
792
|
|
|
|
|
|
|
confused with the frame, which is simply start modulo 3. |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
For forward strand features, phase is counted from the start |
795
|
|
|
|
|
|
|
field. For reverse strand features, phase is counted from the end |
796
|
|
|
|
|
|
|
field. |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=cut |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
sub phase { |
801
|
6
|
|
|
6
|
1
|
8
|
my $self = shift; |
802
|
6
|
100
|
|
|
|
21
|
if( @_ ) { |
803
|
3
|
50
|
|
|
|
9
|
$self->remove_tag('phase') if $self->has_tag('phase'); |
804
|
3
|
|
|
|
|
3
|
my $newphase = shift; |
805
|
3
|
50
|
33
|
|
|
24
|
$self->throw("illegal phase value '$newphase', phase must be either undef, 0, 1, or 2") |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
806
|
|
|
|
|
|
|
unless !defined $newphase || $newphase == 0 || $newphase == 1 || $newphase == 2; |
807
|
3
|
|
|
|
|
6
|
$self->add_tag_value('phase', $newphase ); |
808
|
3
|
|
|
|
|
5
|
return $newphase; |
809
|
|
|
|
|
|
|
} |
810
|
|
|
|
|
|
|
|
811
|
3
|
100
|
|
|
|
24
|
return $self->has_tag('phase') ? ($self->get_tag_values('phase'))[0] : undef; |
812
|
|
|
|
|
|
|
} |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
=head1 Bio::RangeI methods |
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
These methods are inherited from RangeI and can be used |
818
|
|
|
|
|
|
|
directly from a SeqFeatureI interface. Remember that a |
819
|
|
|
|
|
|
|
SeqFeature is-a RangeI, and so wherever you see RangeI you |
820
|
|
|
|
|
|
|
can use a feature ($r in the below documentation). |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
=cut |
823
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
=head2 start() |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
See L |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
=head2 end() |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
See L |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
=head2 strand() |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
See L |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
=head2 overlaps() |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
See L |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
=head2 contains() |
841
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
See L |
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
=head2 equals() |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
See L |
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
=head2 intersection() |
849
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
See L |
851
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
=head2 union() |
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
See L |
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
=cut |
857
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
1; |