| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# BioPerl module for Bio::PrimarySeqI |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Cared for by Ewan Birney |
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
# Copyright Ewan Birney |
|
9
|
|
|
|
|
|
|
# |
|
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Bio::PrimarySeqI - Interface definition for a Bio::PrimarySeq |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# Bio::PrimarySeqI is the interface class for sequences. |
|
22
|
|
|
|
|
|
|
# If you are a newcomer to bioperl, you might want to start with |
|
23
|
|
|
|
|
|
|
# Bio::Seq documentation. |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# Test if this is a seq object |
|
26
|
|
|
|
|
|
|
$obj->isa("Bio::PrimarySeqI") || |
|
27
|
|
|
|
|
|
|
$obj->throw("$obj does not implement the Bio::PrimarySeqI interface"); |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# Accessors |
|
30
|
|
|
|
|
|
|
$string = $obj->seq(); |
|
31
|
|
|
|
|
|
|
$substring = $obj->subseq(12,50); |
|
32
|
|
|
|
|
|
|
$display = $obj->display_id(); # for human display |
|
33
|
|
|
|
|
|
|
$id = $obj->primary_id(); # unique id for this object, |
|
34
|
|
|
|
|
|
|
# implementation defined |
|
35
|
|
|
|
|
|
|
$unique_key= $obj->accession_number(); # unique biological id |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# Object manipulation |
|
39
|
|
|
|
|
|
|
eval { |
|
40
|
|
|
|
|
|
|
$rev = $obj->revcom(); |
|
41
|
|
|
|
|
|
|
}; |
|
42
|
|
|
|
|
|
|
if( $@ ) { |
|
43
|
|
|
|
|
|
|
$obj->throw( "Could not reverse complement. ". |
|
44
|
|
|
|
|
|
|
"Probably not DNA. Actual exception\n$@\n" ); |
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
$trunc = $obj->trunc(12,50); |
|
48
|
|
|
|
|
|
|
# $rev and $trunc are Bio::PrimarySeqI compliant objects |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
This object defines an abstract interface to basic sequence |
|
54
|
|
|
|
|
|
|
information - for most users of the package the documentation (and |
|
55
|
|
|
|
|
|
|
methods) in this class are not useful - this is a developers-only |
|
56
|
|
|
|
|
|
|
class which defines what methods have to be implemented by other Perl |
|
57
|
|
|
|
|
|
|
objects to comply to the Bio::PrimarySeqI interface. Go "perldoc |
|
58
|
|
|
|
|
|
|
Bio::Seq" or "man Bio::Seq" for more information on the main class for |
|
59
|
|
|
|
|
|
|
sequences. |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
PrimarySeq is an object just for the sequence and its name(s), nothing |
|
62
|
|
|
|
|
|
|
more. Seq is the larger object complete with features. There is a pure |
|
63
|
|
|
|
|
|
|
perl implementation of this in L. If you just want to |
|
64
|
|
|
|
|
|
|
use L objects, then please read that module first. This |
|
65
|
|
|
|
|
|
|
module defines the interface, and is of more interest to people who |
|
66
|
|
|
|
|
|
|
want to wrap their own Perl Objects/RDBs/FileSystems etc in way that |
|
67
|
|
|
|
|
|
|
they "are" bioperl sequence objects, even though it is not using Perl |
|
68
|
|
|
|
|
|
|
to store the sequence etc. |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
This interface defines what bioperl considers necessary to "be" a |
|
71
|
|
|
|
|
|
|
sequence, without providing an implementation of this, an |
|
72
|
|
|
|
|
|
|
implementation is provided in L. If you want to provide |
|
73
|
|
|
|
|
|
|
a Bio::PrimarySeq-compliant object which in fact wraps another |
|
74
|
|
|
|
|
|
|
object/database/out-of-perl experience, then this is the correct thing |
|
75
|
|
|
|
|
|
|
to wrap, generally by providing a wrapper class which would inherit |
|
76
|
|
|
|
|
|
|
from your object and this Bio::PrimarySeqI interface. The wrapper class |
|
77
|
|
|
|
|
|
|
then would have methods lists in the "Implementation Specific |
|
78
|
|
|
|
|
|
|
Functions" which would provide these methods for your object. |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=head1 FEEDBACK |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head2 Mailing Lists |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
|
85
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
|
86
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
|
89
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=head2 Support |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
I |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
|
98
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
|
99
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
|
100
|
|
|
|
|
|
|
with code and data examples if at all possible. |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=head2 Reporting Bugs |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
|
105
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
|
106
|
|
|
|
|
|
|
web: |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head1 AUTHOR - Ewan Birney |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Email birney@ebi.ac.uk |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=head1 APPENDIX |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
|
117
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=cut |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
package Bio::PrimarySeqI; |
|
123
|
203
|
|
|
203
|
|
1329
|
use strict; |
|
|
203
|
|
|
|
|
384
|
|
|
|
203
|
|
|
|
|
5180
|
|
|
124
|
203
|
|
|
203
|
|
62691
|
use Bio::Tools::CodonTable; |
|
|
203
|
|
|
|
|
467
|
|
|
|
203
|
|
|
|
|
5722
|
|
|
125
|
|
|
|
|
|
|
|
|
126
|
203
|
|
|
203
|
|
1308
|
use base qw(Bio::Root::RootI); |
|
|
203
|
|
|
|
|
363
|
|
|
|
203
|
|
|
|
|
420893
|
|
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head1 Implementation-specific Functions |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
These functions are the ones that a specific implementation must |
|
132
|
|
|
|
|
|
|
define. |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head2 seq |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Title : seq |
|
137
|
|
|
|
|
|
|
Usage : $string = $obj->seq() |
|
138
|
|
|
|
|
|
|
Function: Returns the sequence as a string of letters. The |
|
139
|
|
|
|
|
|
|
case of the letters is left up to the implementer. |
|
140
|
|
|
|
|
|
|
Suggested cases are upper case for proteins and lower case for |
|
141
|
|
|
|
|
|
|
DNA sequence (IUPAC standard), but implementations are suggested to |
|
142
|
|
|
|
|
|
|
keep an open mind about case (some users... want mixed case!) |
|
143
|
|
|
|
|
|
|
Returns : A scalar |
|
144
|
|
|
|
|
|
|
Status : Virtual |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=cut |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub seq { |
|
149
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
|
150
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented(); |
|
151
|
|
|
|
|
|
|
} |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=head2 subseq |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
Title : subseq |
|
157
|
|
|
|
|
|
|
Usage : $substring = $obj->subseq(10,40); |
|
158
|
|
|
|
|
|
|
Function: Returns the subseq from start to end, where the first base |
|
159
|
|
|
|
|
|
|
is 1 and the number is inclusive, i.e. 1-2 are the first two |
|
160
|
|
|
|
|
|
|
bases of the sequence. |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
Start cannot be larger than end but can be equal. |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Returns : A string |
|
165
|
|
|
|
|
|
|
Args : |
|
166
|
|
|
|
|
|
|
Status : Virtual |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=cut |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
sub subseq { |
|
171
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
|
172
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented(); |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=head2 display_id |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
Title : display_id |
|
179
|
|
|
|
|
|
|
Usage : $id_string = $obj->display_id(); |
|
180
|
|
|
|
|
|
|
Function: Returns the display id, also known as the common name of the Sequence |
|
181
|
|
|
|
|
|
|
object. |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
The semantics of this is that it is the most likely string |
|
184
|
|
|
|
|
|
|
to be used as an identifier of the sequence, and likely to |
|
185
|
|
|
|
|
|
|
have "human" readability. The id is equivalent to the ID |
|
186
|
|
|
|
|
|
|
field of the GenBank/EMBL databanks and the id field of the |
|
187
|
|
|
|
|
|
|
Swissprot/sptrembl database. In fasta format, the >(\S+) is |
|
188
|
|
|
|
|
|
|
presumed to be the id, though some people overload the id |
|
189
|
|
|
|
|
|
|
to embed other information. Bioperl does not use any |
|
190
|
|
|
|
|
|
|
embedded information in the ID field, and people are |
|
191
|
|
|
|
|
|
|
encouraged to use other mechanisms (accession field for |
|
192
|
|
|
|
|
|
|
example, or extending the sequence object) to solve this. |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
Notice that $seq->id() maps to this function, mainly for |
|
195
|
|
|
|
|
|
|
legacy/convenience reasons. |
|
196
|
|
|
|
|
|
|
Returns : A string |
|
197
|
|
|
|
|
|
|
Args : None |
|
198
|
|
|
|
|
|
|
Status : Virtual |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=cut |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub display_id { |
|
203
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
|
204
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented(); |
|
205
|
|
|
|
|
|
|
} |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=head2 accession_number |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
Title : accession_number |
|
211
|
|
|
|
|
|
|
Usage : $unique_biological_key = $obj->accession_number; |
|
212
|
|
|
|
|
|
|
Function: Returns the unique biological id for a sequence, commonly |
|
213
|
|
|
|
|
|
|
called the accession_number. For sequences from established |
|
214
|
|
|
|
|
|
|
databases, the implementors should try to use the correct |
|
215
|
|
|
|
|
|
|
accession number. Notice that primary_id() provides the |
|
216
|
|
|
|
|
|
|
unique id for the implementation, allowing multiple objects |
|
217
|
|
|
|
|
|
|
to have the same accession number in a particular implementation. |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
For sequences with no accession number, this method should return |
|
220
|
|
|
|
|
|
|
"unknown". |
|
221
|
|
|
|
|
|
|
Returns : A string |
|
222
|
|
|
|
|
|
|
Args : None |
|
223
|
|
|
|
|
|
|
Status : Virtual |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=cut |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
sub accession_number { |
|
228
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
|
229
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented(); |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=head2 primary_id |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
Title : primary_id |
|
236
|
|
|
|
|
|
|
Usage : $unique_implementation_key = $obj->primary_id; |
|
237
|
|
|
|
|
|
|
Function: Returns the unique id for this object in this |
|
238
|
|
|
|
|
|
|
implementation. This allows implementations to manage their |
|
239
|
|
|
|
|
|
|
own object ids in a way the implementation can control |
|
240
|
|
|
|
|
|
|
clients can expect one id to map to one object. |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
For sequences with no accession number, this method should |
|
243
|
|
|
|
|
|
|
return a stringified memory location. |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
Returns : A string |
|
246
|
|
|
|
|
|
|
Args : None |
|
247
|
|
|
|
|
|
|
Status : Virtual |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=cut |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
sub primary_id { |
|
252
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
|
253
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented(); |
|
254
|
|
|
|
|
|
|
} |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=head2 can_call_new |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
Title : can_call_new |
|
260
|
|
|
|
|
|
|
Usage : if( $obj->can_call_new ) { |
|
261
|
|
|
|
|
|
|
$newobj = $obj->new( %param ); |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
Function: Can_call_new returns 1 or 0 depending |
|
264
|
|
|
|
|
|
|
on whether an implementation allows new |
|
265
|
|
|
|
|
|
|
constructor to be called. If a new constructor |
|
266
|
|
|
|
|
|
|
is allowed, then it should take the followed hashed |
|
267
|
|
|
|
|
|
|
constructor list. |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
$myobject->new( -seq => $sequence_as_string, |
|
270
|
|
|
|
|
|
|
-display_id => $id |
|
271
|
|
|
|
|
|
|
-accession_number => $accession |
|
272
|
|
|
|
|
|
|
-alphabet => 'dna', |
|
273
|
|
|
|
|
|
|
); |
|
274
|
|
|
|
|
|
|
Returns : 1 or 0 |
|
275
|
|
|
|
|
|
|
Args : |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=cut |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
sub can_call_new { |
|
281
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
|
282
|
|
|
|
|
|
|
# we default to 0 here |
|
283
|
0
|
|
|
|
|
0
|
return 0; |
|
284
|
|
|
|
|
|
|
} |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
=head2 alphabet |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
Title : alphabet |
|
290
|
|
|
|
|
|
|
Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ } |
|
291
|
|
|
|
|
|
|
Function: Returns the type of sequence being one of |
|
292
|
|
|
|
|
|
|
'dna', 'rna' or 'protein'. This is case sensitive. |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
This is not called "type" because this would cause |
|
295
|
|
|
|
|
|
|
upgrade problems from the 0.5 and earlier Seq objects. |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
Returns : A string either 'dna','rna','protein'. NB - the object must |
|
298
|
|
|
|
|
|
|
make a call of the alphabet, if there is no alphabet specified it |
|
299
|
|
|
|
|
|
|
has to guess. |
|
300
|
|
|
|
|
|
|
Args : None |
|
301
|
|
|
|
|
|
|
Status : Virtual |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=cut |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
sub alphabet { |
|
306
|
0
|
|
|
0
|
1
|
0
|
my ( $self ) = @_; |
|
307
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented(); |
|
308
|
|
|
|
|
|
|
} |
|
309
|
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=head2 moltype |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
Title : moltype |
|
314
|
|
|
|
|
|
|
Usage : Deprecated. Use alphabet() instead. |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
=cut |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
sub moltype { |
|
319
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
|
320
|
0
|
|
|
|
|
0
|
$self->warn("moltype: pre v1.0 method. Calling alphabet() instead..."); |
|
321
|
0
|
|
|
|
|
0
|
return $self->alphabet(@args); |
|
322
|
|
|
|
|
|
|
} |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
=head1 Implementation-optional Functions |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
The following functions rely on the above functions. An |
|
328
|
|
|
|
|
|
|
implementing class does not need to provide these functions, as they |
|
329
|
|
|
|
|
|
|
will be provided by this class, but is free to override these |
|
330
|
|
|
|
|
|
|
functions. |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
The revcom(), trunc(), and translate() methods create new sequence |
|
333
|
|
|
|
|
|
|
objects. They will call new() on the class of the sequence object |
|
334
|
|
|
|
|
|
|
instance passed as argument, unless can_call_new() returns FALSE. In |
|
335
|
|
|
|
|
|
|
the latter case a Bio::PrimarySeq object will be created. Implementors |
|
336
|
|
|
|
|
|
|
which really want to control how objects are created (eg, for object |
|
337
|
|
|
|
|
|
|
persistence over a database, or objects in a CORBA framework), they |
|
338
|
|
|
|
|
|
|
are encouraged to override these methods |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
=head2 revcom |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
Title : revcom |
|
343
|
|
|
|
|
|
|
Usage : $rev = $seq->revcom() |
|
344
|
|
|
|
|
|
|
Function: Produces a new Bio::PrimarySeqI implementing object which |
|
345
|
|
|
|
|
|
|
is the reversed complement of the sequence. For protein |
|
346
|
|
|
|
|
|
|
sequences this throws an exception of "Sequence is a |
|
347
|
|
|
|
|
|
|
protein. Cannot revcom". |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
The id is the same id as the original sequence, and the |
|
350
|
|
|
|
|
|
|
accession number is also identical. If someone wants to |
|
351
|
|
|
|
|
|
|
track that this sequence has be reversed, it needs to |
|
352
|
|
|
|
|
|
|
define its own extensions. |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
To do an inplace edit of an object you can go: |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
$seq = $seq->revcom(); |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
This of course, causes Perl to handle the garbage |
|
359
|
|
|
|
|
|
|
collection of the old object, but it is roughly speaking as |
|
360
|
|
|
|
|
|
|
efficient as an inplace edit. |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
Returns : A new (fresh) Bio::PrimarySeqI object |
|
363
|
|
|
|
|
|
|
Args : None |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
=cut |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
sub revcom { |
|
369
|
11271
|
|
|
11271
|
1
|
16428
|
my ($self) = @_; |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
# Create a new fresh object if $self is 'Bio::Seq::LargePrimarySeq' |
|
372
|
|
|
|
|
|
|
# or 'Bio::Seq::LargeSeq', if not take advantage of |
|
373
|
|
|
|
|
|
|
# Bio::Root::clone to get an object copy |
|
374
|
11271
|
|
|
|
|
13935
|
my $out; |
|
375
|
11271
|
50
|
33
|
|
|
75361
|
if ( $self->isa('Bio::Seq::LargePrimarySeq') |
|
376
|
|
|
|
|
|
|
or $self->isa('Bio::Seq::LargeSeq') |
|
377
|
|
|
|
|
|
|
) { |
|
378
|
0
|
|
|
|
|
0
|
my ($seqclass, $opts) = $self->_setup_class; |
|
379
|
0
|
|
|
|
|
0
|
$out = $seqclass->new( |
|
380
|
|
|
|
|
|
|
-seq => $self->_revcom_from_string($self->seq, $self->alphabet), |
|
381
|
|
|
|
|
|
|
-is_circular => $self->is_circular, |
|
382
|
|
|
|
|
|
|
-display_id => $self->display_id, |
|
383
|
|
|
|
|
|
|
-accession_number => $self->accession_number, |
|
384
|
|
|
|
|
|
|
-alphabet => $self->alphabet, |
|
385
|
|
|
|
|
|
|
-desc => $self->desc, |
|
386
|
|
|
|
|
|
|
-verbose => $self->verbose, |
|
387
|
|
|
|
|
|
|
%$opts, |
|
388
|
|
|
|
|
|
|
); |
|
389
|
|
|
|
|
|
|
} else { |
|
390
|
11271
|
|
|
|
|
25905
|
$out = $self->clone; |
|
391
|
11271
|
|
|
|
|
22724
|
$out->seq( $out->_revcom_from_string($out->seq, $out->alphabet) ); |
|
392
|
|
|
|
|
|
|
} |
|
393
|
11271
|
|
|
|
|
24517
|
return $out; |
|
394
|
|
|
|
|
|
|
} |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
sub _revcom_from_string { |
|
398
|
11337
|
|
|
11337
|
|
18888
|
my ($self, $string, $alphabet) = @_; |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
# Check that reverse-complementing makes sense |
|
401
|
11337
|
50
|
|
|
|
19610
|
if( $alphabet eq 'protein' ) { |
|
402
|
0
|
|
|
|
|
0
|
$self->throw("Sequence is a protein. Cannot revcom."); |
|
403
|
|
|
|
|
|
|
} |
|
404
|
11337
|
50
|
66
|
|
|
19400
|
if( $alphabet ne 'dna' && $alphabet ne 'rna' ) { |
|
405
|
0
|
|
|
|
|
0
|
my $msg = "Sequence is not dna or rna, but [$alphabet]. Attempting to revcom, ". |
|
406
|
|
|
|
|
|
|
"but unsure if this is right."; |
|
407
|
0
|
0
|
|
|
|
0
|
if( $self->can('warn') ) { |
|
408
|
0
|
|
|
|
|
0
|
$self->warn($msg); |
|
409
|
|
|
|
|
|
|
} else { |
|
410
|
0
|
|
|
|
|
0
|
warn("[$self] $msg"); |
|
411
|
|
|
|
|
|
|
} |
|
412
|
|
|
|
|
|
|
} |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
# If sequence is RNA, map to DNA (then map back later) |
|
415
|
11337
|
100
|
|
|
|
16784
|
if( $alphabet eq 'rna' ) { |
|
416
|
1
|
|
|
|
|
4
|
$string =~ tr/uU/tT/; |
|
417
|
|
|
|
|
|
|
} |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
# Reverse-complement now |
|
420
|
11337
|
|
|
|
|
16325
|
$string =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; |
|
421
|
11337
|
|
|
|
|
17617
|
$string = CORE::reverse $string; |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
# Map back RNA to DNA |
|
424
|
11337
|
100
|
|
|
|
17149
|
if( $alphabet eq 'rna' ) { |
|
425
|
1
|
|
|
|
|
2
|
$string =~ tr/tT/uU/; |
|
426
|
|
|
|
|
|
|
} |
|
427
|
|
|
|
|
|
|
|
|
428
|
11337
|
|
|
|
|
21064
|
return $string; |
|
429
|
|
|
|
|
|
|
} |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
=head2 trunc |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
Title : trunc |
|
435
|
|
|
|
|
|
|
Usage : $subseq = $myseq->trunc(10,100); |
|
436
|
|
|
|
|
|
|
Function: Provides a truncation of a sequence. |
|
437
|
|
|
|
|
|
|
Returns : A fresh Bio::PrimarySeqI implementing object. |
|
438
|
|
|
|
|
|
|
Args : Two integers denoting first and last base of the sub-sequence. |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
=cut |
|
442
|
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
sub trunc { |
|
444
|
324
|
|
|
324
|
1
|
570
|
my ($self,$start,$end) = @_; |
|
445
|
|
|
|
|
|
|
|
|
446
|
324
|
|
|
|
|
368
|
my $str; |
|
447
|
324
|
100
|
66
|
|
|
1638
|
if( defined $start && ref($start) && |
|
|
|
50
|
66
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
$start->isa('Bio::LocationI') ) { |
|
449
|
2
|
|
|
|
|
7
|
$str = $self->subseq($start); # start is a location actually |
|
450
|
|
|
|
|
|
|
} elsif( !$end ) { |
|
451
|
0
|
|
|
|
|
0
|
$self->throw("trunc start,end -- there was no end for $start"); |
|
452
|
|
|
|
|
|
|
} elsif( $end < $start ) { |
|
453
|
0
|
|
|
|
|
0
|
my $msg = "start [$start] is greater than end [$end]. \n". |
|
454
|
|
|
|
|
|
|
"If you want to truncated and reverse complement, \n". |
|
455
|
|
|
|
|
|
|
"you must call trunc followed by revcom. Sorry."; |
|
456
|
0
|
|
|
|
|
0
|
$self->throw($msg); |
|
457
|
|
|
|
|
|
|
} else { |
|
458
|
322
|
|
|
|
|
829
|
$str = $self->subseq($start,$end); |
|
459
|
|
|
|
|
|
|
} |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
# Create a new fresh object if $self is 'Bio::Seq::LargePrimarySeq' |
|
462
|
|
|
|
|
|
|
# or 'Bio::Seq::LargeSeq', if not take advantage of |
|
463
|
|
|
|
|
|
|
# Bio::Root::clone to get an object copy |
|
464
|
324
|
|
|
|
|
398
|
my $out; |
|
465
|
324
|
100
|
66
|
|
|
3481
|
if ( $self->isa('Bio::Seq::LargePrimarySeq') |
|
|
|
|
100
|
|
|
|
|
|
466
|
|
|
|
|
|
|
or $self->isa('Bio::Seq::LargeSeq') |
|
467
|
|
|
|
|
|
|
or $self->isa('Bio::Seq::RichSeq') |
|
468
|
|
|
|
|
|
|
) { |
|
469
|
8
|
|
|
|
|
33
|
my ($seqclass, $opts) = $self->_setup_class; |
|
470
|
8
|
|
|
|
|
37
|
$out = $seqclass->new( |
|
471
|
|
|
|
|
|
|
-seq => $str, |
|
472
|
|
|
|
|
|
|
-is_circular => $self->is_circular, |
|
473
|
|
|
|
|
|
|
-display_id => $self->display_id, |
|
474
|
|
|
|
|
|
|
-accession_number => $self->accession_number, |
|
475
|
|
|
|
|
|
|
-alphabet => $self->alphabet, |
|
476
|
|
|
|
|
|
|
-desc => $self->desc, |
|
477
|
|
|
|
|
|
|
-verbose => $self->verbose, |
|
478
|
|
|
|
|
|
|
%$opts, |
|
479
|
|
|
|
|
|
|
); |
|
480
|
|
|
|
|
|
|
} else { |
|
481
|
316
|
|
|
|
|
900
|
$out = $self->clone; |
|
482
|
316
|
|
|
|
|
778
|
$out->seq($str); |
|
483
|
|
|
|
|
|
|
} |
|
484
|
324
|
|
|
|
|
834
|
return $out; |
|
485
|
|
|
|
|
|
|
} |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
=head2 translate |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
Title : translate |
|
491
|
|
|
|
|
|
|
Usage : $protein_seq_obj = $dna_seq_obj->translate |
|
492
|
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
Or if you expect a complete coding sequence (CDS) translation, |
|
494
|
|
|
|
|
|
|
with initiator at the beginning and terminator at the end: |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
$protein_seq_obj = $cds_seq_obj->translate(-complete => 1); |
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
Or if you want translate() to find the first initiation |
|
499
|
|
|
|
|
|
|
codon and return the corresponding protein: |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
$protein_seq_obj = $cds_seq_obj->translate(-orf => 1); |
|
502
|
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
Function: Provides the translation of the DNA sequence using full |
|
504
|
|
|
|
|
|
|
IUPAC ambiguities in DNA/RNA and amino acid codes. |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
The complete CDS translation is identical to EMBL/TREMBL |
|
507
|
|
|
|
|
|
|
database translation. Note that the trailing terminator |
|
508
|
|
|
|
|
|
|
character is removed before returning the translated protein |
|
509
|
|
|
|
|
|
|
object. |
|
510
|
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
Note: if you set $dna_seq_obj->verbose(1) you will get a |
|
512
|
|
|
|
|
|
|
warning if the first codon is not a valid initiator. |
|
513
|
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
Returns : A Bio::PrimarySeqI implementing object |
|
515
|
|
|
|
|
|
|
Args : -terminator |
|
516
|
|
|
|
|
|
|
character for terminator, default '*' |
|
517
|
|
|
|
|
|
|
-unknown |
|
518
|
|
|
|
|
|
|
character for unknown, default 'X' |
|
519
|
|
|
|
|
|
|
-frame |
|
520
|
|
|
|
|
|
|
positive integer frame shift (in bases), default 0 |
|
521
|
|
|
|
|
|
|
-codontable_id |
|
522
|
|
|
|
|
|
|
integer codon table id, default 1 |
|
523
|
|
|
|
|
|
|
-complete |
|
524
|
|
|
|
|
|
|
boolean, if true, complete CDS is expected. default false |
|
525
|
|
|
|
|
|
|
-complete_codons |
|
526
|
|
|
|
|
|
|
boolean, if true, codons which are incomplete are translated if a |
|
527
|
|
|
|
|
|
|
suitable amino acid is found. For instance, if the incomplete |
|
528
|
|
|
|
|
|
|
codon is 'GG', the completed codon is 'GGN', which is glycine |
|
529
|
|
|
|
|
|
|
(G). Defaults to 'false'; setting '-complete' also makes this |
|
530
|
|
|
|
|
|
|
true. |
|
531
|
|
|
|
|
|
|
-throw |
|
532
|
|
|
|
|
|
|
boolean, throw exception if ORF not complete, default false |
|
533
|
|
|
|
|
|
|
-orf |
|
534
|
|
|
|
|
|
|
if 'longest', find longest ORF. other true value, find |
|
535
|
|
|
|
|
|
|
first ORF. default 0 |
|
536
|
|
|
|
|
|
|
-codontable |
|
537
|
|
|
|
|
|
|
optional L object to use for |
|
538
|
|
|
|
|
|
|
translation |
|
539
|
|
|
|
|
|
|
-start |
|
540
|
|
|
|
|
|
|
optional three-character string to force as initiation |
|
541
|
|
|
|
|
|
|
codon (e.g. 'atg'). If unset, start codons are |
|
542
|
|
|
|
|
|
|
determined by the CodonTable. Case insensitive. |
|
543
|
|
|
|
|
|
|
-offset |
|
544
|
|
|
|
|
|
|
optional positive integer offset for fuzzy locations. |
|
545
|
|
|
|
|
|
|
if set, must be either 1, 2, or 3 |
|
546
|
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
=head3 Notes |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
The -start argument only applies when -orf is set to 1. By default all |
|
550
|
|
|
|
|
|
|
initiation codons found in the given codon table are used but when |
|
551
|
|
|
|
|
|
|
"start" is set to some codon this codon will be used exclusively as |
|
552
|
|
|
|
|
|
|
the initiation codon. Note that the default codon table (NCBI |
|
553
|
|
|
|
|
|
|
"Standard") has 3 initiation codons! |
|
554
|
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
By default translate() translates termination codons to the some |
|
556
|
|
|
|
|
|
|
character (default is *), both internal and trailing codons. Setting |
|
557
|
|
|
|
|
|
|
"-complete" to 1 tells translate() to remove the trailing character. |
|
558
|
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
-offset is used for seqfeatures which contain the the \codon_start tag |
|
560
|
|
|
|
|
|
|
and can be set to 1, 2, or 3. This is the offset by which the |
|
561
|
|
|
|
|
|
|
sequence translation starts relative to the first base of the feature |
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
For details on codon tables used by translate() see L. |
|
564
|
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
Deprecated argument set (v. 1.5.1 and prior versions) where each argument is an |
|
566
|
|
|
|
|
|
|
element in an array: |
|
567
|
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
1: character for terminator (optional), defaults to '*'. |
|
569
|
|
|
|
|
|
|
2: character for unknown amino acid (optional), defaults to 'X'. |
|
570
|
|
|
|
|
|
|
3: frame (optional), valid values are 0, 1, 2, defaults to 0. |
|
571
|
|
|
|
|
|
|
4: codon table id (optional), defaults to 1. |
|
572
|
|
|
|
|
|
|
5: complete coding sequence expected, defaults to 0 (false). |
|
573
|
|
|
|
|
|
|
6: boolean, throw exception if not complete coding sequence |
|
574
|
|
|
|
|
|
|
(true), defaults to warning (false) |
|
575
|
|
|
|
|
|
|
7: codontable, a custom Bio::Tools::CodonTable object (optional). |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
=cut |
|
578
|
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
sub translate { |
|
580
|
287
|
|
|
287
|
1
|
2273
|
my ($self,@args) = @_; |
|
581
|
287
|
|
|
|
|
529
|
my ($terminator, $unknown, $frame, $codonTableId, $complete, |
|
582
|
|
|
|
|
|
|
$complete_codons, $throw, $codonTable, $orf, $start_codon, $offset); |
|
583
|
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
## new API with named parameters, post 1.5.1 |
|
585
|
287
|
100
|
100
|
|
|
774
|
if ($args[0] && $args[0] =~ /^-[A-Z]+/i) { |
|
586
|
24
|
|
|
|
|
109
|
($terminator, $unknown, $frame, $codonTableId, $complete, |
|
587
|
|
|
|
|
|
|
$complete_codons, $throw,$codonTable, $orf, $start_codon, $offset) = |
|
588
|
|
|
|
|
|
|
$self->_rearrange([qw(TERMINATOR |
|
589
|
|
|
|
|
|
|
UNKNOWN |
|
590
|
|
|
|
|
|
|
FRAME |
|
591
|
|
|
|
|
|
|
CODONTABLE_ID |
|
592
|
|
|
|
|
|
|
COMPLETE |
|
593
|
|
|
|
|
|
|
COMPLETE_CODONS |
|
594
|
|
|
|
|
|
|
THROW |
|
595
|
|
|
|
|
|
|
CODONTABLE |
|
596
|
|
|
|
|
|
|
ORF |
|
597
|
|
|
|
|
|
|
START |
|
598
|
|
|
|
|
|
|
OFFSET)], @args); |
|
599
|
|
|
|
|
|
|
## old API, 1.5.1 and preceding versions |
|
600
|
|
|
|
|
|
|
} else { |
|
601
|
263
|
|
|
|
|
501
|
($terminator, $unknown, $frame, $codonTableId, |
|
602
|
|
|
|
|
|
|
$complete, $throw, $codonTable, $offset) = @args; |
|
603
|
|
|
|
|
|
|
} |
|
604
|
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
## Initialize termination codon, unknown codon, codon table id, frame |
|
606
|
287
|
100
|
66
|
|
|
776
|
$terminator = '*' unless (defined($terminator) and $terminator ne ''); |
|
607
|
287
|
100
|
66
|
|
|
652
|
$unknown = "X" unless (defined($unknown) and $unknown ne ''); |
|
608
|
287
|
100
|
66
|
|
|
635
|
$frame = 0 unless (defined($frame) and $frame ne ''); |
|
609
|
287
|
100
|
66
|
|
|
649
|
$codonTableId = 1 unless (defined($codonTableId) and $codonTableId ne ''); |
|
610
|
287
|
|
100
|
|
|
1208
|
$complete_codons ||= $complete || 0; |
|
|
|
|
100
|
|
|
|
|
|
611
|
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
## Get a CodonTable, error if custom CodonTable is invalid |
|
613
|
287
|
100
|
|
|
|
429
|
if ($codonTable) { |
|
614
|
1
|
50
|
|
|
|
5
|
$self->throw("Need a Bio::Tools::CodonTable object, not ". $codonTable) |
|
615
|
|
|
|
|
|
|
unless $codonTable->isa('Bio::Tools::CodonTable'); |
|
616
|
|
|
|
|
|
|
} else { |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
# shouldn't this be cached? Seems wasteful to have a new instance |
|
619
|
|
|
|
|
|
|
# every time... |
|
620
|
286
|
|
|
|
|
1187
|
$codonTable = Bio::Tools::CodonTable->new( -id => $codonTableId); |
|
621
|
|
|
|
|
|
|
} |
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
## Error if alphabet is "protein" |
|
624
|
287
|
50
|
|
|
|
719
|
$self->throw("Can't translate an amino acid sequence.") if |
|
625
|
|
|
|
|
|
|
($self->alphabet =~ /protein/i); |
|
626
|
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
## Error if -start parameter isn't a valid codon |
|
628
|
287
|
100
|
|
|
|
606
|
if ($start_codon) { |
|
629
|
1
|
50
|
|
|
|
5
|
$self->throw("Invalid start codon: $start_codon.") if |
|
630
|
|
|
|
|
|
|
( $start_codon !~ /^[A-Z]{3}$/i ); |
|
631
|
|
|
|
|
|
|
} |
|
632
|
|
|
|
|
|
|
|
|
633
|
287
|
|
|
|
|
338
|
my $seq; |
|
634
|
287
|
50
|
|
|
|
452
|
if ($offset) { |
|
635
|
0
|
0
|
|
|
|
0
|
$self->throw("Offset must be 1, 2, or 3.") if |
|
636
|
|
|
|
|
|
|
( $offset !~ /^[123]$/ ); |
|
637
|
0
|
|
|
|
|
0
|
my ($start, $end) = ($offset, $self->length); |
|
638
|
0
|
|
|
|
|
0
|
($seq) = $self->subseq($start, $end); |
|
639
|
|
|
|
|
|
|
} else { |
|
640
|
287
|
|
|
|
|
575
|
($seq) = $self->seq(); |
|
641
|
|
|
|
|
|
|
} |
|
642
|
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
## ignore frame if an ORF is supposed to be found |
|
644
|
287
|
100
|
|
|
|
572
|
if ( $orf ) { |
|
645
|
12
|
100
|
|
|
|
40
|
my ($orf_region) = $self->_find_orfs_nucleotide( $seq, $codonTable, $start_codon, $orf eq 'longest' ? 0 : 'first_only' ); |
|
646
|
11
|
|
|
|
|
30
|
$seq = $self->_orf_sequence( $seq, $orf_region ); |
|
647
|
|
|
|
|
|
|
} else { |
|
648
|
|
|
|
|
|
|
## use frame, error if frame is not 0, 1 or 2 |
|
649
|
275
|
50
|
100
|
|
|
629
|
$self->throw("Valid values for frame are 0, 1, or 2, not $frame.") |
|
|
|
|
66
|
|
|
|
|
|
650
|
|
|
|
|
|
|
unless ($frame == 0 or $frame == 1 or $frame == 2); |
|
651
|
275
|
|
|
|
|
804
|
$seq = substr($seq,$frame); |
|
652
|
|
|
|
|
|
|
} |
|
653
|
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
## Translate it |
|
655
|
286
|
|
|
|
|
830
|
my $output = $codonTable->translate($seq, $complete_codons); |
|
656
|
|
|
|
|
|
|
# Use user-input terminator/unknown |
|
657
|
286
|
|
|
|
|
1414
|
$output =~ s/\*/$terminator/g; |
|
658
|
286
|
|
|
|
|
540
|
$output =~ s/X/$unknown/g; |
|
659
|
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
## Only if we are expecting to translate a complete coding region |
|
661
|
286
|
100
|
|
|
|
601
|
if ($complete) { |
|
662
|
9
|
|
|
|
|
21
|
my $id = $self->display_id; |
|
663
|
|
|
|
|
|
|
# remove the terminator character |
|
664
|
9
|
50
|
|
|
|
30
|
if( substr($output,-1,1) eq $terminator ) { |
|
665
|
9
|
|
|
|
|
17
|
chop $output; |
|
666
|
|
|
|
|
|
|
} else { |
|
667
|
0
|
0
|
|
|
|
0
|
$throw && $self->throw("Seq [$id]: Not using a valid terminator codon!"); |
|
668
|
0
|
|
|
|
|
0
|
$self->warn("Seq [$id]: Not using a valid terminator codon!"); |
|
669
|
|
|
|
|
|
|
} |
|
670
|
|
|
|
|
|
|
# test if there are terminator characters inside the protein sequence! |
|
671
|
9
|
100
|
|
|
|
71
|
if ($output =~ /\Q$terminator\E/) { |
|
672
|
2
|
|
100
|
|
|
12
|
$id ||= ''; |
|
673
|
2
|
50
|
|
|
|
22
|
$throw && $self->throw("Seq [$id]: Terminator codon inside CDS!"); |
|
674
|
0
|
|
|
|
|
0
|
$self->warn("Seq [$id]: Terminator codon inside CDS!"); |
|
675
|
|
|
|
|
|
|
} |
|
676
|
|
|
|
|
|
|
# if the initiator codon is not ATG, the amino acid needs to be changed to M |
|
677
|
7
|
100
|
|
|
|
18
|
if ( substr($output,0,1) ne 'M' ) { |
|
678
|
3
|
50
|
|
|
|
9
|
if ($codonTable->is_start_codon(substr($seq, 0, 3)) ) { |
|
|
|
0
|
|
|
|
|
|
|
679
|
3
|
|
|
|
|
6
|
$output = 'M'. substr($output,1); |
|
680
|
|
|
|
|
|
|
} elsif ($throw) { |
|
681
|
0
|
|
|
|
|
0
|
$self->throw("Seq [$id]: Not using a valid initiator codon!"); |
|
682
|
|
|
|
|
|
|
} else { |
|
683
|
0
|
|
|
|
|
0
|
$self->warn("Seq [$id]: Not using a valid initiator codon!"); |
|
684
|
|
|
|
|
|
|
} |
|
685
|
|
|
|
|
|
|
} |
|
686
|
|
|
|
|
|
|
} |
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
# Create a new fresh object if $self is 'Bio::Seq::LargePrimarySeq' |
|
689
|
|
|
|
|
|
|
# or 'Bio::Seq::LargeSeq', if not take advantage of |
|
690
|
|
|
|
|
|
|
# Bio::Root::clone to get an object copy |
|
691
|
284
|
|
|
|
|
348
|
my $out; |
|
692
|
284
|
100
|
100
|
|
|
3112
|
if ( $self->isa('Bio::Seq::LargePrimarySeq') |
|
693
|
|
|
|
|
|
|
or $self->isa('Bio::Seq::LargeSeq') |
|
694
|
|
|
|
|
|
|
) { |
|
695
|
3
|
|
|
|
|
13
|
my ($seqclass, $opts) = $self->_setup_class; |
|
696
|
3
|
|
|
|
|
12
|
$out = $seqclass->new( |
|
697
|
|
|
|
|
|
|
-seq => $output, |
|
698
|
|
|
|
|
|
|
-is_circular => $self->is_circular, |
|
699
|
|
|
|
|
|
|
-display_id => $self->display_id, |
|
700
|
|
|
|
|
|
|
-accession_number => $self->accession_number, |
|
701
|
|
|
|
|
|
|
-alphabet => 'protein', |
|
702
|
|
|
|
|
|
|
-desc => $self->desc, |
|
703
|
|
|
|
|
|
|
-verbose => $self->verbose, |
|
704
|
|
|
|
|
|
|
%$opts, |
|
705
|
|
|
|
|
|
|
); |
|
706
|
|
|
|
|
|
|
} else { |
|
707
|
281
|
|
|
|
|
986
|
$out = $self->clone; |
|
708
|
281
|
|
|
|
|
939
|
$out->seq($output); |
|
709
|
281
|
|
|
|
|
601
|
$out->alphabet('protein'); |
|
710
|
|
|
|
|
|
|
} |
|
711
|
284
|
|
|
|
|
1263
|
return $out; |
|
712
|
|
|
|
|
|
|
} |
|
713
|
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
=head2 transcribe() |
|
716
|
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
Title : transcribe |
|
718
|
|
|
|
|
|
|
Usage : $xseq = $seq->transcribe |
|
719
|
|
|
|
|
|
|
Function: Convert base T to base U |
|
720
|
|
|
|
|
|
|
Returns : PrimarySeqI object of alphabet 'rna' or |
|
721
|
|
|
|
|
|
|
undef if $seq->alphabet ne 'dna' |
|
722
|
|
|
|
|
|
|
Args : |
|
723
|
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
=cut |
|
725
|
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
sub transcribe { |
|
727
|
2
|
|
|
2
|
1
|
34
|
my $self = shift; |
|
728
|
2
|
100
|
|
|
|
6
|
return unless $self->alphabet eq 'dna'; |
|
729
|
1
|
|
|
|
|
5
|
my $s = $self->seq; |
|
730
|
1
|
|
|
|
|
3
|
$s =~ tr/tT/uU/; |
|
731
|
1
|
|
50
|
|
|
3
|
my $desc = $self->desc || ''; |
|
732
|
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
# Create a new fresh object if $self is 'Bio::Seq::LargePrimarySeq' |
|
734
|
|
|
|
|
|
|
# or 'Bio::Seq::LargeSeq', if not take advantage of |
|
735
|
|
|
|
|
|
|
# Bio::Root::clone to get an object copy |
|
736
|
1
|
|
|
|
|
1
|
my $out; |
|
737
|
1
|
50
|
33
|
|
|
12
|
if ( $self->isa('Bio::Seq::LargePrimarySeq') |
|
738
|
|
|
|
|
|
|
or $self->isa('Bio::Seq::LargeSeq') |
|
739
|
|
|
|
|
|
|
) { |
|
740
|
0
|
|
|
|
|
0
|
my ($seqclass, $opts) = $self->_setup_class; |
|
741
|
0
|
|
|
|
|
0
|
$out = $seqclass->new( |
|
742
|
|
|
|
|
|
|
-seq => $s, |
|
743
|
|
|
|
|
|
|
-is_circular => $self->is_circular, |
|
744
|
|
|
|
|
|
|
-display_id => $self->display_id, |
|
745
|
|
|
|
|
|
|
-accession_number => $self->accession_number, |
|
746
|
|
|
|
|
|
|
-alphabet => 'rna', |
|
747
|
|
|
|
|
|
|
-desc => "${desc}[TRANSCRIBED]", |
|
748
|
|
|
|
|
|
|
-verbose => $self->verbose, |
|
749
|
|
|
|
|
|
|
%$opts, |
|
750
|
|
|
|
|
|
|
); |
|
751
|
|
|
|
|
|
|
} else { |
|
752
|
1
|
|
|
|
|
5
|
$out = $self->clone; |
|
753
|
1
|
|
|
|
|
4
|
$out->seq($s); |
|
754
|
1
|
|
|
|
|
4
|
$out->alphabet('rna'); |
|
755
|
1
|
|
|
|
|
5
|
$out->desc($desc . "[TRANSCRIBED]"); |
|
756
|
|
|
|
|
|
|
} |
|
757
|
1
|
|
|
|
|
6
|
return $out; |
|
758
|
|
|
|
|
|
|
} |
|
759
|
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
=head2 rev_transcribe() |
|
762
|
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
Title : rev_transcribe |
|
764
|
|
|
|
|
|
|
Usage : $rtseq = $seq->rev_transcribe |
|
765
|
|
|
|
|
|
|
Function: Convert base U to base T |
|
766
|
|
|
|
|
|
|
Returns : PrimarySeqI object of alphabet 'dna' or |
|
767
|
|
|
|
|
|
|
undef if $seq->alphabet ne 'rna' |
|
768
|
|
|
|
|
|
|
Args : |
|
769
|
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
=cut |
|
771
|
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
sub rev_transcribe { |
|
773
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
|
774
|
1
|
50
|
|
|
|
4
|
return unless $self->alphabet eq 'rna'; |
|
775
|
1
|
|
|
|
|
5
|
my $s = $self->seq; |
|
776
|
1
|
|
|
|
|
4
|
$s =~ tr/uU/tT/; |
|
777
|
1
|
|
50
|
|
|
4
|
my $desc = $self->desc || ''; |
|
778
|
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
# Create a new fresh object if $self is 'Bio::Seq::LargePrimarySeq' |
|
780
|
|
|
|
|
|
|
# or 'Bio::Seq::LargeSeq', if not take advantage of |
|
781
|
|
|
|
|
|
|
# Bio::Root::clone to get an object copy |
|
782
|
1
|
|
|
|
|
3
|
my $out; |
|
783
|
1
|
50
|
33
|
|
|
18
|
if ( $self->isa('Bio::Seq::LargePrimarySeq') |
|
784
|
|
|
|
|
|
|
or $self->isa('Bio::Seq::LargeSeq') |
|
785
|
|
|
|
|
|
|
) { |
|
786
|
0
|
|
|
|
|
0
|
my ($seqclass, $opts) = $self->_setup_class; |
|
787
|
0
|
|
|
|
|
0
|
$out = $seqclass->new( |
|
788
|
|
|
|
|
|
|
-seq => $s, |
|
789
|
|
|
|
|
|
|
-is_circular => $self->is_circular, |
|
790
|
|
|
|
|
|
|
-display_id => $self->display_id, |
|
791
|
|
|
|
|
|
|
-accession_number => $self->accession_number, |
|
792
|
|
|
|
|
|
|
-alphabet => 'dna', |
|
793
|
|
|
|
|
|
|
-desc => $self->desc . "[REVERSE TRANSCRIBED]", |
|
794
|
|
|
|
|
|
|
-verbose => $self->verbose, |
|
795
|
|
|
|
|
|
|
%$opts, |
|
796
|
|
|
|
|
|
|
); |
|
797
|
|
|
|
|
|
|
} else { |
|
798
|
1
|
|
|
|
|
8
|
$out = $self->clone; |
|
799
|
1
|
|
|
|
|
3
|
$out->seq($s); |
|
800
|
1
|
|
|
|
|
3
|
$out->alphabet('dna'); |
|
801
|
1
|
|
|
|
|
4
|
$out->desc($desc . "[REVERSE TRANSCRIBED]"); |
|
802
|
|
|
|
|
|
|
} |
|
803
|
1
|
|
|
|
|
5
|
return $out; |
|
804
|
|
|
|
|
|
|
} |
|
805
|
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
=head2 id |
|
808
|
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
Title : id |
|
810
|
|
|
|
|
|
|
Usage : $id = $seq->id() |
|
811
|
|
|
|
|
|
|
Function: ID of the sequence. This should normally be (and actually is in |
|
812
|
|
|
|
|
|
|
the implementation provided here) just a synonym for display_id(). |
|
813
|
|
|
|
|
|
|
Returns : A string. |
|
814
|
|
|
|
|
|
|
Args : |
|
815
|
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
=cut |
|
817
|
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
sub id { |
|
819
|
1
|
|
|
1
|
1
|
2
|
my ($self)= @_; |
|
820
|
1
|
|
|
|
|
4
|
return $self->display_id(); |
|
821
|
|
|
|
|
|
|
} |
|
822
|
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
=head2 length |
|
825
|
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
Title : length |
|
827
|
|
|
|
|
|
|
Usage : $len = $seq->length() |
|
828
|
|
|
|
|
|
|
Function: |
|
829
|
|
|
|
|
|
|
Returns : Integer representing the length of the sequence. |
|
830
|
|
|
|
|
|
|
Args : |
|
831
|
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
=cut |
|
833
|
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
sub length { |
|
835
|
0
|
|
|
0
|
1
|
0
|
my ($self)= @_; |
|
836
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented(); |
|
837
|
|
|
|
|
|
|
} |
|
838
|
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
=head2 desc |
|
841
|
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
Title : desc |
|
843
|
|
|
|
|
|
|
Usage : $seq->desc($newval); |
|
844
|
|
|
|
|
|
|
$description = $seq->desc(); |
|
845
|
|
|
|
|
|
|
Function: Get/set description text for a seq object |
|
846
|
|
|
|
|
|
|
Returns : Value of desc |
|
847
|
|
|
|
|
|
|
Args : newvalue (optional) |
|
848
|
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
=cut |
|
850
|
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
sub desc { |
|
852
|
0
|
|
|
0
|
1
|
0
|
shift->throw_not_implemented(); |
|
853
|
|
|
|
|
|
|
} |
|
854
|
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
=head2 is_circular |
|
857
|
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
Title : is_circular |
|
859
|
|
|
|
|
|
|
Usage : if( $obj->is_circular) { # Do something } |
|
860
|
|
|
|
|
|
|
Function: Returns true if the molecule is circular |
|
861
|
|
|
|
|
|
|
Returns : Boolean value |
|
862
|
|
|
|
|
|
|
Args : none |
|
863
|
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
=cut |
|
865
|
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
sub is_circular { |
|
867
|
0
|
|
|
0
|
1
|
0
|
shift->throw_not_implemented; |
|
868
|
|
|
|
|
|
|
} |
|
869
|
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
=head1 Private functions |
|
872
|
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
These are some private functions for the PrimarySeqI interface. You do not |
|
874
|
|
|
|
|
|
|
need to implement these functions |
|
875
|
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
=head2 _find_orfs_nucleotide |
|
877
|
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
Title : _find_orfs_nucleotide |
|
879
|
|
|
|
|
|
|
Usage : |
|
880
|
|
|
|
|
|
|
Function: Finds ORF starting at 1st initiation codon in nucleotide sequence. |
|
881
|
|
|
|
|
|
|
The ORF is not required to have a termination codon. |
|
882
|
|
|
|
|
|
|
Example : |
|
883
|
|
|
|
|
|
|
Returns : a list of string coordinates of ORF locations (0-based half-open), |
|
884
|
|
|
|
|
|
|
sorted descending by length (so that the longest is first) |
|
885
|
|
|
|
|
|
|
as: [ start, end, frame, length ], [ start, end, frame, length ], ... |
|
886
|
|
|
|
|
|
|
Args : Nucleotide sequence, |
|
887
|
|
|
|
|
|
|
CodonTable object, |
|
888
|
|
|
|
|
|
|
(optional) alternative initiation codon (e.g. 'ATA'), |
|
889
|
|
|
|
|
|
|
(optional) boolean that, if true, stops after finding the |
|
890
|
|
|
|
|
|
|
first available ORF |
|
891
|
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
=cut |
|
893
|
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
sub _find_orfs_nucleotide { |
|
895
|
14
|
|
|
14
|
|
25
|
my ( $self, $sequence, $codon_table, $start_codon, $first_only ) = @_; |
|
896
|
14
|
|
|
|
|
24
|
$sequence = uc $sequence; |
|
897
|
14
|
100
|
|
|
|
24
|
$start_codon = uc $start_codon if $start_codon; |
|
898
|
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
my $is_start = $start_codon |
|
900
|
12
|
|
|
12
|
|
24
|
? sub { shift eq $start_codon } |
|
901
|
14
|
100
|
|
697
|
|
54
|
: sub { $codon_table->is_start_codon( shift ) }; |
|
|
697
|
|
|
|
|
1007
|
|
|
902
|
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
# stores the begin index of the currently-running ORF in each |
|
904
|
|
|
|
|
|
|
# reading frame |
|
905
|
14
|
|
|
|
|
26
|
my @current_orf_start = (-1,-1,-1); |
|
906
|
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
#< stores coordinates of longest observed orf (so far) in each |
|
908
|
|
|
|
|
|
|
# reading frame |
|
909
|
14
|
|
|
|
|
15
|
my @orfs; |
|
910
|
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
# go through each base of the sequence, and each reading frame for each base |
|
912
|
14
|
|
|
|
|
21
|
my $seqlen = CORE::length $sequence; |
|
913
|
14
|
|
|
|
|
11
|
my @start_frame_order; |
|
914
|
14
|
|
|
|
|
32
|
for( my $j = 0; $j <= $seqlen-3; $j++ ) { |
|
915
|
1239
|
|
|
|
|
1387
|
my $frame = $j % 3; |
|
916
|
|
|
|
|
|
|
|
|
917
|
1239
|
|
|
|
|
1388
|
my $this_codon = substr( $sequence, $j, 3 ); |
|
918
|
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
# if in an orf and this is either a stop codon or the last in-frame codon in the string |
|
920
|
1239
|
100
|
|
|
|
1832
|
if ( $current_orf_start[$frame] >= 0 ) { |
|
|
|
100
|
|
|
|
|
|
|
921
|
530
|
100
|
100
|
|
|
762
|
if ( $codon_table->is_ter_codon( $this_codon ) ||( my $is_last_codon_in_frame = ($j >= $seqlen-5)) ) { |
|
922
|
|
|
|
|
|
|
# record ORF start, end (half-open), length, and frame |
|
923
|
44
|
|
|
|
|
86
|
my @this_orf = ( $current_orf_start[$frame], $j+3, undef, $frame ); |
|
924
|
44
|
|
|
|
|
64
|
my $this_orf_length = $this_orf[2] = ( $this_orf[1] - $this_orf[0] ); |
|
925
|
|
|
|
|
|
|
|
|
926
|
44
|
100
|
100
|
|
|
83
|
if ($first_only && $frame == $start_frame_order[0]) { |
|
927
|
10
|
100
|
|
|
|
23
|
$self->warn( "Translating partial ORF " |
|
928
|
|
|
|
|
|
|
.$self->_truncate_seq( $self->_orf_sequence( $sequence, \@this_orf )) |
|
929
|
|
|
|
|
|
|
.' from end of nucleotide sequence' |
|
930
|
|
|
|
|
|
|
) if $is_last_codon_in_frame; |
|
931
|
9
|
|
|
|
|
40
|
return \@this_orf; |
|
932
|
|
|
|
|
|
|
} |
|
933
|
34
|
|
|
|
|
48
|
push @orfs, \@this_orf; |
|
934
|
34
|
|
|
|
|
71
|
$current_orf_start[$frame] = -1; |
|
935
|
|
|
|
|
|
|
} |
|
936
|
|
|
|
|
|
|
} |
|
937
|
|
|
|
|
|
|
# if this is a start codon |
|
938
|
|
|
|
|
|
|
elsif ( $is_start->($this_codon) ) { |
|
939
|
44
|
|
|
|
|
50
|
$current_orf_start[$frame] = $j; |
|
940
|
44
|
|
|
|
|
81
|
push @start_frame_order, $frame; |
|
941
|
|
|
|
|
|
|
} |
|
942
|
|
|
|
|
|
|
} |
|
943
|
|
|
|
|
|
|
|
|
944
|
4
|
|
|
|
|
22
|
return sort { $b->[2] <=> $a->[2] } @orfs; |
|
|
88
|
|
|
|
|
95
|
|
|
945
|
|
|
|
|
|
|
} |
|
946
|
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
sub _truncate_seq { |
|
949
|
4
|
|
|
4
|
|
6
|
my ($self, $seq) = @_; |
|
950
|
4
|
50
|
|
|
|
19
|
return CORE::length($seq) > 200 ? substr($seq,0,50).'...'.substr($seq,-50) : $seq; |
|
951
|
|
|
|
|
|
|
} |
|
952
|
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
sub _orf_sequence { |
|
955
|
15
|
|
|
15
|
|
24
|
my ($self, $seq, $orf ) = @_; |
|
956
|
15
|
50
|
|
|
|
25
|
return '' unless $orf; |
|
957
|
15
|
|
|
|
|
40
|
return substr( $seq, $orf->[0], $orf->[2] ) |
|
958
|
|
|
|
|
|
|
} |
|
959
|
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
=head2 _attempt_to_load_Seq |
|
962
|
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
Title : _attempt_to_load_Seq |
|
964
|
|
|
|
|
|
|
Usage : |
|
965
|
|
|
|
|
|
|
Function: |
|
966
|
|
|
|
|
|
|
Example : |
|
967
|
|
|
|
|
|
|
Returns : |
|
968
|
|
|
|
|
|
|
Args : |
|
969
|
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
=cut |
|
971
|
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
sub _attempt_to_load_Seq { |
|
973
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
|
974
|
|
|
|
|
|
|
|
|
975
|
0
|
0
|
|
|
|
0
|
if( $main::{'Bio::PrimarySeq'} ) { |
|
976
|
0
|
|
|
|
|
0
|
return 1; |
|
977
|
|
|
|
|
|
|
} else { |
|
978
|
0
|
|
|
|
|
0
|
eval { |
|
979
|
0
|
|
|
|
|
0
|
require Bio::PrimarySeq; |
|
980
|
|
|
|
|
|
|
}; |
|
981
|
0
|
0
|
|
|
|
0
|
if( $@ ) { |
|
982
|
0
|
|
|
|
|
0
|
my $text = "Bio::PrimarySeq could not be loaded for [$self]\n". |
|
983
|
|
|
|
|
|
|
"This indicates that you are using Bio::PrimarySeqI ". |
|
984
|
|
|
|
|
|
|
"without Bio::PrimarySeq loaded or without providing a ". |
|
985
|
|
|
|
|
|
|
"complete implementation.\nThe most likely problem is that there ". |
|
986
|
|
|
|
|
|
|
"has been a misconfiguration of the bioperl environment\n". |
|
987
|
|
|
|
|
|
|
"Actual exception:\n\n"; |
|
988
|
0
|
|
|
|
|
0
|
$self->throw("$text$@\n"); |
|
989
|
0
|
|
|
|
|
0
|
return 0; |
|
990
|
|
|
|
|
|
|
} |
|
991
|
0
|
|
|
|
|
0
|
return 1; |
|
992
|
|
|
|
|
|
|
} |
|
993
|
|
|
|
|
|
|
} |
|
994
|
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
sub _setup_class { |
|
997
|
|
|
|
|
|
|
# Return name of class and setup some default parameters |
|
998
|
11
|
|
|
11
|
|
17
|
my ($self) = @_; |
|
999
|
11
|
|
|
|
|
14
|
my $seqclass; |
|
1000
|
11
|
50
|
|
|
|
32
|
if ($self->can_call_new()) { |
|
1001
|
11
|
|
|
|
|
19
|
$seqclass = ref($self); |
|
1002
|
|
|
|
|
|
|
} else { |
|
1003
|
0
|
|
|
|
|
0
|
$seqclass = 'Bio::PrimarySeq'; |
|
1004
|
0
|
|
|
|
|
0
|
$self->_attempt_to_load_Seq(); |
|
1005
|
|
|
|
|
|
|
} |
|
1006
|
11
|
|
|
|
|
17
|
my %opts; |
|
1007
|
11
|
50
|
|
|
|
73
|
if ($seqclass eq 'Bio::PrimarySeq') { |
|
1008
|
|
|
|
|
|
|
# Since sequence is in a Seq object, it has already been validated. |
|
1009
|
|
|
|
|
|
|
# We do not need to validate its trunc(), revcom(), etc |
|
1010
|
0
|
|
|
|
|
0
|
$opts{ -direct } = 1; |
|
1011
|
|
|
|
|
|
|
} |
|
1012
|
11
|
|
|
|
|
31
|
return $seqclass, \%opts; |
|
1013
|
|
|
|
|
|
|
} |
|
1014
|
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
1; |