| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# BioPerl module for Bio::SeqFeatureI |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Cared for by Ewan Birney |
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
# Copyright Ewan Birney |
|
9
|
|
|
|
|
|
|
# |
|
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::SeqFeatureI - Abstract interface of a Sequence Feature |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# get a seqfeature somehow, eg, from a Sequence with Features attached |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
foreach $feat ( $seq->get_SeqFeatures() ) { |
|
23
|
|
|
|
|
|
|
print "Feature from ", $feat->start, "to ", |
|
24
|
|
|
|
|
|
|
$feat->end, " Primary tag ", $feat->primary_tag, |
|
25
|
|
|
|
|
|
|
", produced by ", $feat->source_tag(), "\n"; |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
if ( $feat->strand == 0 ) { |
|
28
|
|
|
|
|
|
|
print "Feature applicable to either strand\n"; |
|
29
|
|
|
|
|
|
|
} |
|
30
|
|
|
|
|
|
|
else { |
|
31
|
|
|
|
|
|
|
print "Feature on strand ", $feat->strand,"\n"; # -1,1 |
|
32
|
|
|
|
|
|
|
} |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
print "feature location is ",$feat->start, "..", |
|
35
|
|
|
|
|
|
|
$feat->end, " on strand ", $feat->strand, "\n"; |
|
36
|
|
|
|
|
|
|
print "easy utility to print locations in GenBank/EMBL way ", |
|
37
|
|
|
|
|
|
|
$feat->location->to_FTstring(), "\n"; |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
foreach $tag ( $feat->get_all_tags() ) { |
|
40
|
|
|
|
|
|
|
print "Feature has tag ", $tag, " with values, ", |
|
41
|
|
|
|
|
|
|
join(' ',$feat->get_tag_values($tag)), "\n"; |
|
42
|
|
|
|
|
|
|
} |
|
43
|
|
|
|
|
|
|
print "new feature\n" if $feat->has_tag('new'); |
|
44
|
|
|
|
|
|
|
# features can have sub features |
|
45
|
|
|
|
|
|
|
my @subfeat = $feat->get_SeqFeatures(); |
|
46
|
|
|
|
|
|
|
} |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
This interface is the functions one can expect for any Sequence |
|
51
|
|
|
|
|
|
|
Feature, whatever its implementation or whether it is a more complex |
|
52
|
|
|
|
|
|
|
type (eg, a Gene). This object does not actually provide any |
|
53
|
|
|
|
|
|
|
implementation, it just provides the definitions of what methods one can |
|
54
|
|
|
|
|
|
|
call. See Bio::SeqFeature::Generic for a good standard implementation |
|
55
|
|
|
|
|
|
|
of this object |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=head1 FEEDBACK |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
|
60
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
|
61
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
|
64
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head2 Support |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
I |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
|
73
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
|
74
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
|
75
|
|
|
|
|
|
|
with code and data examples if at all possible. |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head2 Reporting Bugs |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
|
80
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
|
81
|
|
|
|
|
|
|
web: |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=head1 APPENDIX |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
|
88
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=cut |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# Let the code begin... |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
package Bio::SeqFeatureI; |
|
97
|
149
|
|
|
149
|
|
729
|
use vars qw($HasInMemory); |
|
|
149
|
|
|
|
|
269
|
|
|
|
149
|
|
|
|
|
5872
|
|
|
98
|
149
|
|
|
149
|
|
545
|
use strict; |
|
|
149
|
|
|
|
|
178
|
|
|
|
149
|
|
|
|
|
5993
|
|
|
99
|
|
|
|
|
|
|
BEGIN { |
|
100
|
149
|
|
|
149
|
|
223
|
eval { require Bio::DB::InMemoryCache }; |
|
|
149
|
|
|
|
|
40565
|
|
|
101
|
149
|
50
|
|
|
|
655
|
if( $@ ) { $HasInMemory = 0 } |
|
|
0
|
|
|
|
|
0
|
|
|
102
|
149
|
|
|
|
|
2341
|
else { $HasInMemory = 1 } |
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
|
|
105
|
149
|
|
|
149
|
|
1335
|
use Bio::Seq; |
|
|
149
|
|
|
|
|
165
|
|
|
|
149
|
|
|
|
|
3663
|
|
|
106
|
|
|
|
|
|
|
|
|
107
|
149
|
|
|
149
|
|
454
|
use Carp; |
|
|
149
|
|
|
|
|
153
|
|
|
|
149
|
|
|
|
|
7530
|
|
|
108
|
|
|
|
|
|
|
|
|
109
|
149
|
|
|
149
|
|
493
|
use base qw(Bio::RangeI); |
|
|
149
|
|
|
|
|
152
|
|
|
|
149
|
|
|
|
|
197937
|
|
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=head1 Bio::SeqFeatureI specific methods |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
New method interfaces. |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=cut |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head2 get_SeqFeatures |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
Title : get_SeqFeatures |
|
120
|
|
|
|
|
|
|
Usage : @feats = $feat->get_SeqFeatures(); |
|
121
|
|
|
|
|
|
|
Function: Returns an array of sub Sequence Features |
|
122
|
|
|
|
|
|
|
Returns : An array |
|
123
|
|
|
|
|
|
|
Args : none |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=cut |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
sub get_SeqFeatures{ |
|
128
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
|
129
|
|
|
|
|
|
|
|
|
130
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented(); |
|
131
|
|
|
|
|
|
|
} |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=head2 display_name |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
Title : display_name |
|
136
|
|
|
|
|
|
|
Usage : $name = $feat->display_name() |
|
137
|
|
|
|
|
|
|
Function: Returns the human-readable name of the feature for displays. |
|
138
|
|
|
|
|
|
|
Returns : a string |
|
139
|
|
|
|
|
|
|
Args : none |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=cut |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub display_name { |
|
144
|
0
|
|
|
0
|
1
|
0
|
shift->throw_not_implemented(); |
|
145
|
|
|
|
|
|
|
} |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=head2 primary_tag |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Title : primary_tag |
|
150
|
|
|
|
|
|
|
Usage : $tag = $feat->primary_tag() |
|
151
|
|
|
|
|
|
|
Function: Returns the primary tag for a feature, |
|
152
|
|
|
|
|
|
|
eg 'exon' |
|
153
|
|
|
|
|
|
|
Returns : a string |
|
154
|
|
|
|
|
|
|
Args : none |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=cut |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub primary_tag{ |
|
160
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
|
161
|
|
|
|
|
|
|
|
|
162
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented(); |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=head2 source_tag |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
Title : source_tag |
|
169
|
|
|
|
|
|
|
Usage : $tag = $feat->source_tag() |
|
170
|
|
|
|
|
|
|
Function: Returns the source tag for a feature, |
|
171
|
|
|
|
|
|
|
eg, 'genscan' |
|
172
|
|
|
|
|
|
|
Returns : a string |
|
173
|
|
|
|
|
|
|
Args : none |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=cut |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
sub source_tag{ |
|
179
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
|
180
|
|
|
|
|
|
|
|
|
181
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented(); |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=head2 has_tag |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
Title : has_tag |
|
187
|
|
|
|
|
|
|
Usage : $tag_exists = $self->has_tag('some_tag') |
|
188
|
|
|
|
|
|
|
Function: |
|
189
|
|
|
|
|
|
|
Returns : TRUE if the specified tag exists, and FALSE otherwise |
|
190
|
|
|
|
|
|
|
Args : |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=cut |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
sub has_tag{ |
|
195
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
|
196
|
|
|
|
|
|
|
|
|
197
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented(); |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
} |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=head2 get_tag_values |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
Title : get_tag_values |
|
204
|
|
|
|
|
|
|
Usage : @values = $self->get_tag_values('some_tag') |
|
205
|
|
|
|
|
|
|
Function: |
|
206
|
|
|
|
|
|
|
Returns : An array comprising the values of the specified tag. |
|
207
|
|
|
|
|
|
|
Args : a string |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
throws an exception if there is no such tag |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=cut |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub get_tag_values { |
|
214
|
0
|
|
|
0
|
1
|
0
|
shift->throw_not_implemented(); |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=head2 get_tagset_values |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
Title : get_tagset_values |
|
220
|
|
|
|
|
|
|
Usage : @values = $self->get_tagset_values(qw(label transcript_id product)) |
|
221
|
|
|
|
|
|
|
Function: |
|
222
|
|
|
|
|
|
|
Returns : An array comprising the values of the specified tags, in order of tags |
|
223
|
|
|
|
|
|
|
Args : An array of strings |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
does NOT throw an exception if none of the tags are not present |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
this method is useful for getting a human-readable label for a |
|
228
|
|
|
|
|
|
|
SeqFeatureI; not all tags can be assumed to be present, so a list of |
|
229
|
|
|
|
|
|
|
possible tags in preferential order is provided |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=cut |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# interface + abstract method |
|
234
|
|
|
|
|
|
|
sub get_tagset_values { |
|
235
|
127
|
|
|
127
|
1
|
450
|
my ($self, @args) = @_; |
|
236
|
127
|
|
|
|
|
106
|
my @vals = (); |
|
237
|
127
|
|
|
|
|
144
|
foreach my $arg (@args) { |
|
238
|
128
|
100
|
|
|
|
189
|
if ($self->has_tag($arg)) { |
|
239
|
67
|
|
|
|
|
101
|
push(@vals, $self->get_tag_values($arg)); |
|
240
|
|
|
|
|
|
|
} |
|
241
|
|
|
|
|
|
|
} |
|
242
|
127
|
|
|
|
|
198
|
return @vals; |
|
243
|
|
|
|
|
|
|
} |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=head2 get_all_tags |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
Title : get_all_tags |
|
248
|
|
|
|
|
|
|
Usage : @tags = $feat->get_all_tags() |
|
249
|
|
|
|
|
|
|
Function: gives all tags for this feature |
|
250
|
|
|
|
|
|
|
Returns : an array of strings |
|
251
|
|
|
|
|
|
|
Args : none |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=cut |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
sub get_all_tags{ |
|
257
|
0
|
|
|
0
|
1
|
0
|
shift->throw_not_implemented(); |
|
258
|
|
|
|
|
|
|
} |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=head2 attach_seq |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
Title : attach_seq |
|
263
|
|
|
|
|
|
|
Usage : $sf->attach_seq($seq) |
|
264
|
|
|
|
|
|
|
Function: Attaches a Bio::Seq object to this feature. This |
|
265
|
|
|
|
|
|
|
Bio::Seq object is for the *entire* sequence: ie |
|
266
|
|
|
|
|
|
|
from 1 to 10000 |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
Note that it is not guaranteed that if you obtain a feature from |
|
269
|
|
|
|
|
|
|
an object in bioperl, it will have a sequence attached. Also, |
|
270
|
|
|
|
|
|
|
implementors of this interface can choose to provide an empty |
|
271
|
|
|
|
|
|
|
implementation of this method. I.e., there is also no guarantee |
|
272
|
|
|
|
|
|
|
that if you do attach a sequence, seq() or entire_seq() will not |
|
273
|
|
|
|
|
|
|
return undef. |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
The reason that this method is here on the interface is to enable |
|
276
|
|
|
|
|
|
|
you to call it on every SeqFeatureI compliant object, and |
|
277
|
|
|
|
|
|
|
that it will be implemented in a useful way and set to a useful |
|
278
|
|
|
|
|
|
|
value for the great majority of use cases. Implementors who choose |
|
279
|
|
|
|
|
|
|
to ignore the call are encouraged to specifically state this in |
|
280
|
|
|
|
|
|
|
their documentation. |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
Example : |
|
283
|
|
|
|
|
|
|
Returns : TRUE on success |
|
284
|
|
|
|
|
|
|
Args : a Bio::PrimarySeqI compliant object |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
=cut |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
sub attach_seq { |
|
290
|
0
|
|
|
0
|
1
|
0
|
shift->throw_not_implemented(); |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
=head2 seq |
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
Title : seq |
|
296
|
|
|
|
|
|
|
Usage : $tseq = $sf->seq() |
|
297
|
|
|
|
|
|
|
Function: returns the truncated sequence (if there is a sequence attached) |
|
298
|
|
|
|
|
|
|
for this feature |
|
299
|
|
|
|
|
|
|
Example : |
|
300
|
|
|
|
|
|
|
Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence |
|
301
|
|
|
|
|
|
|
bounded by start & end, or undef if there is no sequence attached. |
|
302
|
|
|
|
|
|
|
If the strand is defined and set to -1, the returned sequence is |
|
303
|
|
|
|
|
|
|
the reverse-complement of the region |
|
304
|
|
|
|
|
|
|
Args : none |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=cut |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
sub seq { |
|
310
|
0
|
|
|
0
|
1
|
0
|
shift->throw_not_implemented(); |
|
311
|
|
|
|
|
|
|
} |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
=head2 entire_seq |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
Title : entire_seq |
|
316
|
|
|
|
|
|
|
Usage : $whole_seq = $sf->entire_seq() |
|
317
|
|
|
|
|
|
|
Function: gives the entire sequence that this seqfeature is attached to |
|
318
|
|
|
|
|
|
|
Example : |
|
319
|
|
|
|
|
|
|
Returns : a Bio::PrimarySeqI compliant object, or undef if there is no |
|
320
|
|
|
|
|
|
|
sequence attached |
|
321
|
|
|
|
|
|
|
Args : none |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=cut |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
sub entire_seq { |
|
327
|
0
|
|
|
0
|
1
|
0
|
shift->throw_not_implemented(); |
|
328
|
|
|
|
|
|
|
} |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head2 seq_id |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
Title : seq_id |
|
334
|
|
|
|
|
|
|
Usage : $obj->seq_id($newval) |
|
335
|
|
|
|
|
|
|
Function: There are many cases when you make a feature that you |
|
336
|
|
|
|
|
|
|
do know the sequence name, but do not know its actual |
|
337
|
|
|
|
|
|
|
sequence. This is an attribute such that you can store |
|
338
|
|
|
|
|
|
|
the ID (e.g., display_id) of the sequence. |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
This attribute should *not* be used in GFF dumping, as |
|
341
|
|
|
|
|
|
|
that should come from the collection in which the seq |
|
342
|
|
|
|
|
|
|
feature was found. |
|
343
|
|
|
|
|
|
|
Returns : value of seq_id |
|
344
|
|
|
|
|
|
|
Args : newvalue (optional) |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
=cut |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
sub seq_id { |
|
350
|
0
|
|
|
0
|
1
|
0
|
shift->throw_not_implemented(); |
|
351
|
|
|
|
|
|
|
} |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=head2 gff_string |
|
354
|
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
Title : gff_string |
|
356
|
|
|
|
|
|
|
Usage : $str = $feat->gff_string; |
|
357
|
|
|
|
|
|
|
$str = $feat->gff_string($gff_formatter); |
|
358
|
|
|
|
|
|
|
Function: Provides the feature information in GFF format. |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
The implementation provided here returns GFF2 by default. If you |
|
361
|
|
|
|
|
|
|
want a different version, supply an object implementing a method |
|
362
|
|
|
|
|
|
|
gff_string() accepting a SeqFeatureI object as argument. E.g., to |
|
363
|
|
|
|
|
|
|
obtain GFF1 format, do the following: |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
my $gffio = Bio::Tools::GFF->new(-gff_version => 1); |
|
366
|
|
|
|
|
|
|
$gff1str = $feat->gff_string($gff1io); |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
Returns : A string |
|
369
|
|
|
|
|
|
|
Args : Optionally, an object implementing gff_string(). |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=cut |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
sub gff_string{ |
|
375
|
0
|
|
|
0
|
1
|
0
|
my ($self,$formatter) = @_; |
|
376
|
|
|
|
|
|
|
|
|
377
|
0
|
0
|
|
|
|
0
|
$formatter = $self->_static_gff_formatter unless $formatter; |
|
378
|
0
|
|
|
|
|
0
|
return $formatter->gff_string($self); |
|
379
|
|
|
|
|
|
|
} |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
my $static_gff_formatter = undef; |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=head2 _static_gff_formatter |
|
384
|
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
Title : _static_gff_formatter |
|
386
|
|
|
|
|
|
|
Usage : |
|
387
|
|
|
|
|
|
|
Function: |
|
388
|
|
|
|
|
|
|
Example : |
|
389
|
|
|
|
|
|
|
Returns : |
|
390
|
|
|
|
|
|
|
Args : |
|
391
|
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=cut |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
sub _static_gff_formatter{ |
|
396
|
1
|
|
|
1
|
|
2
|
my ($self,@args) = @_; |
|
397
|
1
|
|
|
|
|
5
|
require Bio::Tools::GFF; # on the fly inclusion -- is this better? |
|
398
|
1
|
50
|
|
|
|
2
|
if( !defined $static_gff_formatter ) { |
|
399
|
1
|
|
|
|
|
8
|
$static_gff_formatter = Bio::Tools::GFF->new('-gff_version' => 2); |
|
400
|
|
|
|
|
|
|
} |
|
401
|
1
|
|
|
|
|
2
|
return $static_gff_formatter; |
|
402
|
|
|
|
|
|
|
} |
|
403
|
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=head1 Decorating methods |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
These methods have an implementation provided by Bio::SeqFeatureI, |
|
408
|
|
|
|
|
|
|
but can be validly overwritten by subclasses |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=head2 spliced_seq |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
Title : spliced_seq |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
Usage : $seq = $feature->spliced_seq() |
|
415
|
|
|
|
|
|
|
$seq = $feature_with_remote_locations->spliced_seq($db_for_seqs) |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
Function: Provides a sequence of the feature which is the most |
|
418
|
|
|
|
|
|
|
semantically "relevant" feature for this sequence. A default |
|
419
|
|
|
|
|
|
|
implementation is provided which for simple cases returns just |
|
420
|
|
|
|
|
|
|
the sequence, but for split cases, loops over the split location |
|
421
|
|
|
|
|
|
|
to return the sequence. In the case of split locations with |
|
422
|
|
|
|
|
|
|
remote locations, eg |
|
423
|
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
join(AB000123:5567-5589,80..1144) |
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
in the case when a database object is passed in, it will attempt |
|
427
|
|
|
|
|
|
|
to retrieve the sequence from the database object, and "Do the right thing", |
|
428
|
|
|
|
|
|
|
however if no database object is provided, it will generate the correct |
|
429
|
|
|
|
|
|
|
number of N's (DNA) or X's (protein, though this is unlikely). |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
This function is deliberately "magical" attempting to second guess |
|
432
|
|
|
|
|
|
|
what a user wants as "the" sequence for this feature. |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
Implementing classes are free to override this method with their |
|
435
|
|
|
|
|
|
|
own magic if they have a better idea what the user wants. |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
Args : [optional] |
|
438
|
|
|
|
|
|
|
-db A L compliant object if |
|
439
|
|
|
|
|
|
|
one needs to retrieve remote seqs. |
|
440
|
|
|
|
|
|
|
-nosort boolean if the locations should not be sorted |
|
441
|
|
|
|
|
|
|
by start location. This may occur, for instance, |
|
442
|
|
|
|
|
|
|
in a circular sequence where a gene span starts |
|
443
|
|
|
|
|
|
|
before the end of the sequence and ends after the |
|
444
|
|
|
|
|
|
|
sequence start. Example : join(15685..16260,1..207) |
|
445
|
|
|
|
|
|
|
(default = if sequence is_circular(), 1, otherwise 0) |
|
446
|
|
|
|
|
|
|
-phase truncates the returned sequence based on the |
|
447
|
|
|
|
|
|
|
intron phase (0,1,2). |
|
448
|
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
Returns : A L object |
|
450
|
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=cut |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
sub spliced_seq { |
|
454
|
138
|
|
|
138
|
1
|
56548
|
my $self = shift; |
|
455
|
138
|
|
|
|
|
229
|
my @args = @_; |
|
456
|
138
|
|
|
|
|
492
|
my ($db, $nosort, $phase) = |
|
457
|
|
|
|
|
|
|
$self->_rearrange([qw(DB NOSORT PHASE)], @args); |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
# set no_sort based on the parent sequence status |
|
460
|
138
|
100
|
|
|
|
406
|
if ($self->entire_seq->is_circular) { |
|
461
|
122
|
|
|
|
|
118
|
$nosort = 1; |
|
462
|
|
|
|
|
|
|
} |
|
463
|
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
# (added 7/7/06 to allow use old API (with warnings) |
|
465
|
138
|
100
|
|
|
|
167
|
my $old_api = (!(grep {$_ =~ /(?:nosort|db|phase)/} @args)) ? 1 : 0; |
|
|
262
|
|
|
|
|
852
|
|
|
466
|
138
|
50
|
66
|
|
|
438
|
if (@args && $old_api) { |
|
467
|
0
|
|
|
|
|
0
|
$self->warn( q(API has changed; please use '-db' or '-nosort' ) |
|
468
|
|
|
|
|
|
|
. qq(for args. See POD for more details.)); |
|
469
|
0
|
0
|
|
|
|
0
|
$db = shift @args if @args; |
|
470
|
0
|
0
|
|
|
|
0
|
$nosort = shift @args if @args; |
|
471
|
0
|
0
|
|
|
|
0
|
$phase = shift @args if @args; |
|
472
|
|
|
|
|
|
|
}; |
|
473
|
|
|
|
|
|
|
|
|
474
|
138
|
100
|
100
|
|
|
255
|
if (defined($phase) && ($phase < 0 || $phase > 2)) { |
|
|
|
|
66
|
|
|
|
|
|
475
|
2
|
|
|
|
|
12
|
$self->warn("Phase must be 0,1, or 2. Setting phase to 0..."); |
|
476
|
2
|
|
|
|
|
1
|
$phase = 0; |
|
477
|
|
|
|
|
|
|
} |
|
478
|
|
|
|
|
|
|
|
|
479
|
138
|
50
|
33
|
|
|
510
|
if ( $db && ref($db) && ! $db->isa('Bio::DB::RandomAccessI') ) { |
|
|
|
50
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
480
|
0
|
|
|
|
|
0
|
$self->warn( "Must pass in a valid Bio::DB::RandomAccessI object" |
|
481
|
|
|
|
|
|
|
. " for access to remote locations for spliced_seq"); |
|
482
|
0
|
|
|
|
|
0
|
$db = undef; |
|
483
|
|
|
|
|
|
|
} |
|
484
|
|
|
|
|
|
|
elsif ( defined $db && $HasInMemory && $db->isa('Bio::DB::InMemoryCache') ) { |
|
485
|
0
|
|
|
|
|
0
|
$db = Bio::DB::InMemoryCache->new(-seqdb => $db); |
|
486
|
|
|
|
|
|
|
} |
|
487
|
|
|
|
|
|
|
|
|
488
|
138
|
100
|
|
|
|
271
|
if ( not $self->location->isa("Bio::Location::SplitLocationI") ) { |
|
489
|
114
|
100
|
|
|
|
181
|
if ($phase) { |
|
490
|
2
|
|
|
|
|
6
|
$self->debug("Subseq start: ",$phase+1,"\tend: ",$self->end,"\n"); |
|
491
|
2
|
|
|
|
|
6
|
my $seqstr = substr($self->seq->seq, $phase); |
|
492
|
2
|
|
|
|
|
7
|
my $out = Bio::Seq->new( -id => $self->entire_seq->display_id |
|
493
|
|
|
|
|
|
|
. "_spliced_feat", |
|
494
|
|
|
|
|
|
|
-seq => $seqstr); |
|
495
|
2
|
|
|
|
|
8
|
return $out; |
|
496
|
|
|
|
|
|
|
} |
|
497
|
|
|
|
|
|
|
else { |
|
498
|
112
|
|
|
|
|
187
|
return $self->seq(); # nice and easy! |
|
499
|
|
|
|
|
|
|
} |
|
500
|
|
|
|
|
|
|
} |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
# redundant test, but the above ISA is probably not ideal. |
|
503
|
24
|
50
|
|
|
|
41
|
if ( not $self->location->isa("Bio::Location::SplitLocationI") ) { |
|
504
|
0
|
|
|
|
|
0
|
$self->throw("not atomic, not split, yikes, in trouble!"); |
|
505
|
|
|
|
|
|
|
} |
|
506
|
|
|
|
|
|
|
|
|
507
|
24
|
|
|
|
|
34
|
my $seqstr = ''; |
|
508
|
24
|
|
|
|
|
46
|
my $seqid = $self->entire_seq->display_id; |
|
509
|
|
|
|
|
|
|
# This is to deal with reverse strand features |
|
510
|
|
|
|
|
|
|
# so we are really sorting features 5' -> 3' on their strand |
|
511
|
|
|
|
|
|
|
# i.e. rev strand features will be sorted largest to smallest |
|
512
|
|
|
|
|
|
|
# as this how revcom CDSes seem to be annotated in genbank. |
|
513
|
|
|
|
|
|
|
# Might need to eventually allow this to be programable? |
|
514
|
|
|
|
|
|
|
# (can I mention how much fun this is NOT! --jason) |
|
515
|
|
|
|
|
|
|
|
|
516
|
24
|
|
|
|
|
39
|
my ($mixed,$mixedloc, $fstrand) = (0); |
|
517
|
|
|
|
|
|
|
|
|
518
|
24
|
50
|
33
|
|
|
118
|
if ( $self->isa('Bio::Das::SegmentI') and not $self->absolute ) { |
|
519
|
0
|
|
|
|
|
0
|
$self->warn( "Calling spliced_seq with a Bio::Das::SegmentI which " |
|
520
|
|
|
|
|
|
|
. "does have absolute set to 1 -- be warned you may not " |
|
521
|
|
|
|
|
|
|
. "be getting things on the correct strand"); |
|
522
|
|
|
|
|
|
|
} |
|
523
|
|
|
|
|
|
|
|
|
524
|
24
|
|
|
|
|
43
|
my @locset = $self->location->each_Location; |
|
525
|
24
|
|
|
|
|
26
|
my @locs; |
|
526
|
24
|
100
|
|
|
|
39
|
if ( not $nosort ) { |
|
527
|
|
|
|
|
|
|
# @locs = map { $_->[0] } |
|
528
|
|
|
|
|
|
|
# sort so that most negative is first basically to order |
|
529
|
|
|
|
|
|
|
# the features on the opposite strand 5'->3' on their strand |
|
530
|
|
|
|
|
|
|
# rather than they way most are input which is on the fwd strand |
|
531
|
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
# sort { $a->[1] <=> $b->[1] } # Yes Tim, Schwartzian transformation |
|
533
|
|
|
|
|
|
|
my @proc_locs = |
|
534
|
|
|
|
|
|
|
map { |
|
535
|
2
|
100
|
|
|
|
4
|
$fstrand = $_->strand unless defined $fstrand; |
|
|
5
|
|
|
|
|
15
|
|
|
536
|
5
|
50
|
33
|
|
|
12
|
$mixed = 1 if defined $_->strand && $fstrand != $_->strand; |
|
537
|
|
|
|
|
|
|
|
|
538
|
5
|
50
|
|
|
|
11
|
if( defined $_->seq_id ) { |
|
539
|
5
|
100
|
|
|
|
8
|
$mixedloc = 1 if( $_->seq_id ne $seqid ); |
|
540
|
|
|
|
|
|
|
} |
|
541
|
5
|
|
50
|
|
|
12
|
[ $_, $_->start * ($_->strand || 1) ]; |
|
542
|
|
|
|
|
|
|
} @locset; |
|
543
|
|
|
|
|
|
|
|
|
544
|
2
|
|
|
|
|
3
|
my @sort_locs; |
|
545
|
2
|
100
|
|
|
|
7
|
if ( $fstrand == 1 ) { |
|
|
|
50
|
|
|
|
|
|
|
546
|
1
|
|
|
|
|
6
|
@sort_locs = sort { $a->[1] <=> $b->[1] } @proc_locs; # Yes Tim, Schwartzian transformation |
|
|
3
|
|
|
|
|
6
|
|
|
547
|
|
|
|
|
|
|
}elsif ( $fstrand == -1 ){ |
|
548
|
1
|
|
|
|
|
7
|
@sort_locs = sort { $b->[1] <=> $a->[1] } @proc_locs; # Yes Tim, Schwartzian transformation |
|
|
1
|
|
|
|
|
4
|
|
|
549
|
|
|
|
|
|
|
} else { |
|
550
|
0
|
|
|
|
|
0
|
@sort_locs = @proc_locs; |
|
551
|
|
|
|
|
|
|
} |
|
552
|
2
|
|
|
|
|
5
|
@locs = map { $_->[0] } @sort_locs; |
|
|
5
|
|
|
|
|
8
|
|
|
553
|
|
|
|
|
|
|
|
|
554
|
2
|
50
|
|
|
|
8
|
if ( $mixed ) { |
|
555
|
0
|
|
|
|
|
0
|
$self->warn( "Mixed strand locations, spliced seq using the " |
|
556
|
|
|
|
|
|
|
. "input order rather than trying to sort"); |
|
557
|
0
|
|
|
|
|
0
|
@locs = @locset; |
|
558
|
|
|
|
|
|
|
} |
|
559
|
|
|
|
|
|
|
} |
|
560
|
|
|
|
|
|
|
else { |
|
561
|
|
|
|
|
|
|
# use the original order instead of trying to sort |
|
562
|
22
|
|
|
|
|
30
|
@locs = @locset; |
|
563
|
22
|
|
|
|
|
53
|
$fstrand = $locs[0]->strand; |
|
564
|
|
|
|
|
|
|
} |
|
565
|
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
|
|
567
|
24
|
|
|
|
|
27
|
my $last_id = undef; |
|
568
|
24
|
|
|
|
|
29
|
my $called_seq = undef; |
|
569
|
|
|
|
|
|
|
# This will be left as undefined if 1) db is remote or 2)seq_id is undefined. |
|
570
|
|
|
|
|
|
|
# In that case, old code is used to make exon sequence |
|
571
|
24
|
|
|
|
|
20
|
my $called_seq_seq = undef; |
|
572
|
24
|
|
|
|
|
24
|
my $called_seq_len = undef; |
|
573
|
|
|
|
|
|
|
|
|
574
|
24
|
|
|
|
|
40
|
foreach my $loc ( @locs ) { |
|
575
|
107
|
50
|
|
|
|
241
|
if ( not $loc->isa("Bio::Location::Atomic") ) { |
|
576
|
0
|
|
|
|
|
0
|
$self->throw("Can only deal with one level deep locations"); |
|
577
|
|
|
|
|
|
|
} |
|
578
|
|
|
|
|
|
|
|
|
579
|
107
|
50
|
|
|
|
147
|
if ( $fstrand != $loc->strand ) { |
|
580
|
0
|
|
|
|
|
0
|
$self->warn("feature strand is different from location strand!"); |
|
581
|
|
|
|
|
|
|
} |
|
582
|
|
|
|
|
|
|
|
|
583
|
107
|
|
|
|
|
83
|
my $loc_seq_id; |
|
584
|
107
|
100
|
|
|
|
159
|
if ( defined $loc->seq_id ) { |
|
585
|
105
|
|
|
|
|
121
|
$loc_seq_id = $loc->seq_id; |
|
586
|
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
# deal with remote sequences |
|
588
|
105
|
100
|
|
|
|
175
|
if ($loc_seq_id ne $seqid ) { |
|
589
|
|
|
|
|
|
|
# might be too big to download whole sequence |
|
590
|
2
|
|
|
|
|
1
|
$called_seq_seq = undef; |
|
591
|
|
|
|
|
|
|
|
|
592
|
2
|
50
|
|
|
|
4
|
if ( defined $db ) { |
|
593
|
0
|
|
|
|
|
0
|
my $sid = $loc_seq_id; |
|
594
|
0
|
|
|
|
|
0
|
$sid =~ s/\.\d+$//g; |
|
595
|
0
|
|
|
|
|
0
|
eval { |
|
596
|
0
|
|
|
|
|
0
|
$called_seq = $db->get_Seq_by_acc($sid); |
|
597
|
|
|
|
|
|
|
}; |
|
598
|
0
|
0
|
|
|
|
0
|
if( $@ ) { |
|
599
|
0
|
|
|
|
|
0
|
$self->warn( "In attempting to join a remote location, sequence $sid " |
|
600
|
|
|
|
|
|
|
. "was not in database. Will provide padding N's. Full exception \n\n$@"); |
|
601
|
0
|
|
|
|
|
0
|
$called_seq = undef; |
|
602
|
|
|
|
|
|
|
} |
|
603
|
|
|
|
|
|
|
} |
|
604
|
|
|
|
|
|
|
else { |
|
605
|
2
|
|
|
|
|
13
|
$self->warn( "cannot get remote location for ".$loc_seq_id ." without a valid " |
|
606
|
|
|
|
|
|
|
. "Bio::DB::RandomAccessI database handle (like Bio::DB::GenBank)"); |
|
607
|
2
|
|
|
|
|
2
|
$called_seq = undef; |
|
608
|
|
|
|
|
|
|
} |
|
609
|
2
|
50
|
|
|
|
4
|
if ( !defined $called_seq ) { |
|
610
|
2
|
|
|
|
|
6
|
$seqstr .= 'N' x $loc->length; |
|
611
|
2
|
|
|
|
|
4
|
next; |
|
612
|
|
|
|
|
|
|
} |
|
613
|
|
|
|
|
|
|
} |
|
614
|
|
|
|
|
|
|
# have local sequence available |
|
615
|
|
|
|
|
|
|
else { |
|
616
|
|
|
|
|
|
|
# don't have to pull out source sequence again if it's local unless |
|
617
|
|
|
|
|
|
|
# it's the first exon or different from previous exon |
|
618
|
103
|
100
|
66
|
|
|
304
|
unless (defined(($last_id) && $last_id eq $loc_seq_id )){ |
|
619
|
23
|
|
|
|
|
45
|
$called_seq = $self->entire_seq; |
|
620
|
23
|
|
|
|
|
45
|
$called_seq_seq = $called_seq->seq(); # this is slow |
|
621
|
|
|
|
|
|
|
} |
|
622
|
|
|
|
|
|
|
} |
|
623
|
|
|
|
|
|
|
} |
|
624
|
|
|
|
|
|
|
#undefined $loc->seq->id |
|
625
|
|
|
|
|
|
|
else { |
|
626
|
2
|
|
|
|
|
5
|
$called_seq = $self->entire_seq; |
|
627
|
2
|
|
|
|
|
3
|
$called_seq_seq = undef; |
|
628
|
|
|
|
|
|
|
} |
|
629
|
|
|
|
|
|
|
|
|
630
|
105
|
|
|
|
|
185
|
my ($start,$end) = ($loc->start,$loc->end); |
|
631
|
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
# does the called sequence make sense? Bug 1780 |
|
633
|
105
|
|
|
|
|
77
|
my $called_seq_len; |
|
634
|
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
# can avoid a seq() call on called_seq |
|
636
|
105
|
100
|
|
|
|
120
|
if (defined($called_seq_seq)) { |
|
637
|
103
|
|
|
|
|
87
|
$called_seq_len = length($called_seq_seq); |
|
638
|
|
|
|
|
|
|
} |
|
639
|
|
|
|
|
|
|
# can't avoid a seq() call on called_seq |
|
640
|
|
|
|
|
|
|
else { |
|
641
|
2
|
|
|
|
|
5
|
$called_seq_len = $called_seq->length # this is slow |
|
642
|
|
|
|
|
|
|
} |
|
643
|
|
|
|
|
|
|
|
|
644
|
105
|
50
|
|
|
|
160
|
if ($called_seq_len < $loc->end) { |
|
645
|
0
|
|
|
|
|
0
|
my $accession = $called_seq->accession; |
|
646
|
0
|
|
|
|
|
0
|
my $orig_id = $self->seq_id; # originating sequence |
|
647
|
0
|
|
|
|
|
0
|
my ($locus) = $self->get_tagset_values("locus_tag"); |
|
648
|
0
|
|
|
|
|
0
|
$self->throw( "Location end ($end) exceeds length ($called_seq_len) of " |
|
649
|
|
|
|
|
|
|
. "called sequence $accession.\nCheck sequence version used in " |
|
650
|
|
|
|
|
|
|
. "$locus locus-tagged SeqFeature in $orig_id."); |
|
651
|
|
|
|
|
|
|
} |
|
652
|
|
|
|
|
|
|
|
|
653
|
105
|
50
|
|
|
|
346
|
if ( $self->isa('Bio::Das::SegmentI') ) { |
|
654
|
|
|
|
|
|
|
# $called_seq is Bio::DB::GFF::RelSegment, as well as its subseq(); |
|
655
|
|
|
|
|
|
|
# Bio::DB::GFF::RelSegment::seq() returns a Bio::PrimarySeq, and using seq() |
|
656
|
|
|
|
|
|
|
# in turn returns a string. Confused? |
|
657
|
0
|
|
|
|
|
0
|
$seqstr .= $called_seq->subseq($start,$end)->seq()->seq(); # this is slow |
|
658
|
|
|
|
|
|
|
} |
|
659
|
|
|
|
|
|
|
else { |
|
660
|
105
|
|
|
|
|
86
|
my $exon_seq; |
|
661
|
105
|
100
|
|
|
|
113
|
if (defined ($called_seq_seq)){ |
|
662
|
103
|
|
|
|
|
225
|
$exon_seq = substr($called_seq_seq, $start-1, $end-$start+1); # this is quick |
|
663
|
|
|
|
|
|
|
} |
|
664
|
|
|
|
|
|
|
else { |
|
665
|
2
|
|
|
|
|
3
|
$exon_seq = $called_seq->subseq($loc->start,$loc->end); # this is slow |
|
666
|
|
|
|
|
|
|
} |
|
667
|
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
# If guide_strand is defined, assemble the sequence first and revcom later if needed, |
|
669
|
|
|
|
|
|
|
# if its not defined, apply revcom immediately to proper locations |
|
670
|
105
|
50
|
|
|
|
175
|
if (defined $self->location->guide_strand) { |
|
671
|
105
|
|
|
|
|
179
|
$seqstr .= $exon_seq; |
|
672
|
|
|
|
|
|
|
} |
|
673
|
|
|
|
|
|
|
else { |
|
674
|
0
|
0
|
|
|
|
0
|
my $strand = defined ($loc->strand) ? ($loc->strand) : 0; |
|
675
|
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
# revcomp $exon_seq |
|
677
|
0
|
0
|
|
|
|
0
|
if ($strand == -1) { |
|
678
|
0
|
|
|
|
|
0
|
$exon_seq = reverse($exon_seq); |
|
679
|
0
|
|
|
|
|
0
|
$exon_seq =~ tr/ABCDGHKMNRSTUVWXYabcdghkmnrstuvwxy/TVGHCDMKNYSAABWXRtvghcdmknysaabwxr/; |
|
680
|
0
|
|
|
|
|
0
|
$seqstr .= $exon_seq; |
|
681
|
|
|
|
|
|
|
} |
|
682
|
|
|
|
|
|
|
else { |
|
683
|
0
|
|
|
|
|
0
|
$seqstr .= $exon_seq; |
|
684
|
|
|
|
|
|
|
} |
|
685
|
|
|
|
|
|
|
} |
|
686
|
|
|
|
|
|
|
} |
|
687
|
|
|
|
|
|
|
|
|
688
|
105
|
100
|
|
|
|
213
|
$last_id = $loc_seq_id if (defined($loc_seq_id)); |
|
689
|
|
|
|
|
|
|
} #next $loc |
|
690
|
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
# Use revcom only after the whole sequence has been assembled |
|
692
|
24
|
50
|
|
|
|
46
|
my $guide_strand = defined ($self->location->guide_strand) ? ($self->location->guide_strand) : 0; |
|
693
|
24
|
100
|
|
|
|
48
|
if ($guide_strand == -1) { |
|
694
|
11
|
|
|
|
|
46
|
my $seqstr_obj = Bio::Seq->new(-seq => $seqstr); |
|
695
|
11
|
|
|
|
|
61
|
$seqstr = $seqstr_obj->revcom->seq; |
|
696
|
|
|
|
|
|
|
} |
|
697
|
|
|
|
|
|
|
|
|
698
|
24
|
50
|
|
|
|
39
|
if (defined($phase)) { |
|
699
|
0
|
|
|
|
|
0
|
$seqstr = substr($seqstr, $phase); |
|
700
|
|
|
|
|
|
|
} |
|
701
|
|
|
|
|
|
|
|
|
702
|
24
|
|
|
|
|
70
|
my $out = Bio::Seq->new( -id => $self->entire_seq->display_id |
|
703
|
|
|
|
|
|
|
. "_spliced_feat", |
|
704
|
|
|
|
|
|
|
-seq => $seqstr); |
|
705
|
|
|
|
|
|
|
|
|
706
|
24
|
|
|
|
|
112
|
return $out; |
|
707
|
|
|
|
|
|
|
} |
|
708
|
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
=head2 location |
|
710
|
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
Title : location |
|
712
|
|
|
|
|
|
|
Usage : my $location = $seqfeature->location() |
|
713
|
|
|
|
|
|
|
Function: returns a location object suitable for identifying location |
|
714
|
|
|
|
|
|
|
of feature on sequence or parent feature |
|
715
|
|
|
|
|
|
|
Returns : Bio::LocationI object |
|
716
|
|
|
|
|
|
|
Args : none |
|
717
|
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
=cut |
|
720
|
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
sub location { |
|
722
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
|
723
|
|
|
|
|
|
|
|
|
724
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented(); |
|
725
|
|
|
|
|
|
|
} |
|
726
|
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
=head2 primary_id |
|
729
|
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
Title : primary_id |
|
731
|
|
|
|
|
|
|
Usage : $obj->primary_id($newval) |
|
732
|
|
|
|
|
|
|
Function: |
|
733
|
|
|
|
|
|
|
Example : |
|
734
|
|
|
|
|
|
|
Returns : value of primary_id (a scalar) |
|
735
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
|
736
|
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
Primary ID is a synonym for the tag 'ID' |
|
738
|
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
=cut |
|
740
|
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
sub primary_id{ |
|
742
|
116
|
|
|
116
|
1
|
84
|
my $self = shift; |
|
743
|
|
|
|
|
|
|
# note from cjm@fruitfly.org: |
|
744
|
|
|
|
|
|
|
# I have commented out the following 2 lines: |
|
745
|
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
#return $self->{'primary_id'} = shift if @_; |
|
747
|
|
|
|
|
|
|
#return $self->{'primary_id'}; |
|
748
|
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
#... and replaced it with the following; see |
|
750
|
|
|
|
|
|
|
# http://bioperl.org/pipermail/bioperl-l/2003-December/014150.html |
|
751
|
|
|
|
|
|
|
# for the discussion that lead to this change |
|
752
|
|
|
|
|
|
|
|
|
753
|
116
|
100
|
|
|
|
152
|
if (@_) { |
|
754
|
58
|
50
|
|
|
|
68
|
if ($self->has_tag('ID')) { |
|
755
|
0
|
|
|
|
|
0
|
$self->remove_tag('ID'); |
|
756
|
|
|
|
|
|
|
} |
|
757
|
58
|
|
|
|
|
83
|
$self->add_tag_value('ID', shift); |
|
758
|
|
|
|
|
|
|
} |
|
759
|
116
|
|
|
|
|
164
|
my ($id) = $self->get_tagset_values('ID'); |
|
760
|
116
|
|
|
|
|
140
|
return $id; |
|
761
|
|
|
|
|
|
|
} |
|
762
|
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
sub generate_unique_persistent_id { |
|
764
|
|
|
|
|
|
|
# DEPRECATED - us IDHandler |
|
765
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
766
|
0
|
|
|
|
|
0
|
require Bio::SeqFeature::Tools::IDHandler; |
|
767
|
0
|
|
|
|
|
0
|
Bio::SeqFeature::Tools::IDHandler->new->generate_unique_persistent_id($self); |
|
768
|
|
|
|
|
|
|
} |
|
769
|
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
=head2 phase |
|
772
|
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
Title : phase |
|
774
|
|
|
|
|
|
|
Usage : $obj->phase($newval) |
|
775
|
|
|
|
|
|
|
Function: get/set this feature's phase. |
|
776
|
|
|
|
|
|
|
Example : |
|
777
|
|
|
|
|
|
|
Returns : undef if no phase is set, |
|
778
|
|
|
|
|
|
|
otherwise 0, 1, or 2 (the only valid values for phase) |
|
779
|
|
|
|
|
|
|
Args : on set, the new value |
|
780
|
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
Most features do not have or need a defined phase. |
|
782
|
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
For features representing a CDS, the phase indicates where the feature |
|
784
|
|
|
|
|
|
|
begins with reference to the reading frame. The phase is one of the |
|
785
|
|
|
|
|
|
|
integers 0, 1, or 2, indicating the number of bases that should be |
|
786
|
|
|
|
|
|
|
removed from the beginning of this feature to reach the first base of |
|
787
|
|
|
|
|
|
|
the next codon. In other words, a phase of "0" indicates that the next |
|
788
|
|
|
|
|
|
|
codon begins at the first base of the region described by the current |
|
789
|
|
|
|
|
|
|
line, a phase of "1" indicates that the next codon begins at the |
|
790
|
|
|
|
|
|
|
second base of this region, and a phase of "2" indicates that the |
|
791
|
|
|
|
|
|
|
codon begins at the third base of this region. This is NOT to be |
|
792
|
|
|
|
|
|
|
confused with the frame, which is simply start modulo 3. |
|
793
|
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
For forward strand features, phase is counted from the start |
|
795
|
|
|
|
|
|
|
field. For reverse strand features, phase is counted from the end |
|
796
|
|
|
|
|
|
|
field. |
|
797
|
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=cut |
|
799
|
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
sub phase { |
|
801
|
6
|
|
|
6
|
1
|
6
|
my $self = shift; |
|
802
|
6
|
100
|
|
|
|
20
|
if( @_ ) { |
|
803
|
3
|
50
|
|
|
|
7
|
$self->remove_tag('phase') if $self->has_tag('phase'); |
|
804
|
3
|
|
|
|
|
5
|
my $newphase = shift; |
|
805
|
3
|
50
|
33
|
|
|
21
|
$self->throw("illegal phase value '$newphase', phase must be either undef, 0, 1, or 2") |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
806
|
|
|
|
|
|
|
unless !defined $newphase || $newphase == 0 || $newphase == 1 || $newphase == 2; |
|
807
|
3
|
|
|
|
|
8
|
$self->add_tag_value('phase', $newphase ); |
|
808
|
3
|
|
|
|
|
5
|
return $newphase; |
|
809
|
|
|
|
|
|
|
} |
|
810
|
|
|
|
|
|
|
|
|
811
|
3
|
100
|
|
|
|
20
|
return $self->has_tag('phase') ? ($self->get_tag_values('phase'))[0] : undef; |
|
812
|
|
|
|
|
|
|
} |
|
813
|
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
=head1 Bio::RangeI methods |
|
816
|
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
These methods are inherited from RangeI and can be used |
|
818
|
|
|
|
|
|
|
directly from a SeqFeatureI interface. Remember that a |
|
819
|
|
|
|
|
|
|
SeqFeature is-a RangeI, and so wherever you see RangeI you |
|
820
|
|
|
|
|
|
|
can use a feature ($r in the below documentation). |
|
821
|
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
=cut |
|
823
|
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
=head2 start() |
|
825
|
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
See L |
|
827
|
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
=head2 end() |
|
829
|
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
See L |
|
831
|
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
=head2 strand() |
|
833
|
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
See L |
|
835
|
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
=head2 overlaps() |
|
837
|
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
See L |
|
839
|
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
=head2 contains() |
|
841
|
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
See L |
|
843
|
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
=head2 equals() |
|
845
|
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
See L |
|
847
|
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
=head2 intersection() |
|
849
|
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
See L |
|
851
|
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
=head2 union() |
|
853
|
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
See L |
|
855
|
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
=cut |
|
857
|
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
1; |