| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Seq::EncodedSeq |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Cared for by Aaron Mackey |
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
# Copyright Aaron Mackey |
|
9
|
|
|
|
|
|
|
# |
|
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Seq::EncodedSeq - subtype of L to store DNA that encodes a protein |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
$obj = Bio::Seq::EncodedSeq->new( -seq => $dna, |
|
21
|
|
|
|
|
|
|
-encoding => "CCCCCCCIIIIICCCCC", |
|
22
|
|
|
|
|
|
|
-start => 1, |
|
23
|
|
|
|
|
|
|
-strand => 1, |
|
24
|
|
|
|
|
|
|
-length => 17 ); |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# splice out (and possibly revcomp) the coding sequence |
|
27
|
|
|
|
|
|
|
$cds = obj->cds; |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# obtain the protein translation of the sequence |
|
30
|
|
|
|
|
|
|
$prot = $obj->translate; |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# other access/inspection routines as with Bio::LocatableSeq and |
|
33
|
|
|
|
|
|
|
# Bio::SeqI; note that coordinates are relative only to the DNA |
|
34
|
|
|
|
|
|
|
# sequence, not it's implicit encoded protein sequence. |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
Bio::Seq::EncodedSeq is a L |
|
39
|
|
|
|
|
|
|
object that holds a DNA sequence as well as information about the |
|
40
|
|
|
|
|
|
|
coding potential of that DNA sequence. It is meant to be useful in an |
|
41
|
|
|
|
|
|
|
alignment context, where the DNA may contain frameshifts, gaps and/or |
|
42
|
|
|
|
|
|
|
introns, or in describing the transcript of a gene. An EncodedSeq |
|
43
|
|
|
|
|
|
|
provides the ability to access the "spliced" coding sequence, meaning |
|
44
|
|
|
|
|
|
|
that all introns and gaps are removed, and any frameshifts are |
|
45
|
|
|
|
|
|
|
adjusted to provide a "clean" CDS. |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
In order to make simultaneous use of either the DNA or the implicit |
|
48
|
|
|
|
|
|
|
encoded protein sequence coordinates, please see |
|
49
|
|
|
|
|
|
|
L. |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head1 ENCODING |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
We use the term "encoding" here to refer to the series of symbols that |
|
54
|
|
|
|
|
|
|
we use to identify which residues of a DNA sequence are protein-coding |
|
55
|
|
|
|
|
|
|
(i.e. part of a codon), intronic, part of a 5' or 3', frameshift |
|
56
|
|
|
|
|
|
|
"mutations", etc. From this information, a Bio::Seq::EncodedSeq is |
|
57
|
|
|
|
|
|
|
able to "figure out" its translational CDS. There are two sets of |
|
58
|
|
|
|
|
|
|
coding characters, one termed "implicit" and one termed "explicit". |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
The "implicit" encoding is a bit simpler than the "explicit" encoding: |
|
61
|
|
|
|
|
|
|
'C' is used for any nucleotide that's part of a codon, 'U' for any |
|
62
|
|
|
|
|
|
|
UTR, etc. The full list is shown below: |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
Code Meaning |
|
65
|
|
|
|
|
|
|
---- ------- |
|
66
|
|
|
|
|
|
|
C coding |
|
67
|
|
|
|
|
|
|
I intronic |
|
68
|
|
|
|
|
|
|
U untranslated |
|
69
|
|
|
|
|
|
|
G gapped (for use in alignments) |
|
70
|
|
|
|
|
|
|
F a "forward", +1 frameshift |
|
71
|
|
|
|
|
|
|
B a "backward", -1 frameshift |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
The "explicit" encoding is just an expansion of the "implicit" |
|
74
|
|
|
|
|
|
|
encoding, to denote phase: |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
Code Meaning |
|
77
|
|
|
|
|
|
|
---- ------- |
|
78
|
|
|
|
|
|
|
C coding, 1st codon position |
|
79
|
|
|
|
|
|
|
D coding, 2nd codon position |
|
80
|
|
|
|
|
|
|
E coding, 3rd codon position |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
I intronic, phase 0 (relative to intron begin) |
|
83
|
|
|
|
|
|
|
J intronic, phase 1 |
|
84
|
|
|
|
|
|
|
K intronic, phase 2 |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
U untranslated 3'UTR |
|
87
|
|
|
|
|
|
|
V untranslated 5'UTR |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
G gapped (for use in alignments) |
|
90
|
|
|
|
|
|
|
F a "forward", +1 frameshift |
|
91
|
|
|
|
|
|
|
B a "backward", -1 frameshift |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
Note that the explicit coding is meant to provide easy access to |
|
94
|
|
|
|
|
|
|
position/phase specific nucleotides: |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
$obj = Bio::Seq::EncodedSeq->new(-seq => "ACAATCAGACTACG...", |
|
97
|
|
|
|
|
|
|
-encoding => "CCCCCCIII..." |
|
98
|
|
|
|
|
|
|
); |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# fetch arrays of nucleotides at each codon position: |
|
101
|
|
|
|
|
|
|
my @pos1 = $obj->dnaseq(encoding => 'C', explicit => 1); |
|
102
|
|
|
|
|
|
|
my @pos2 = $obj->dnaseq(encoding => 'D'); |
|
103
|
|
|
|
|
|
|
my @pos3 = $obj->dnaseq(encoding => 'E'); |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# fetch arrays of "3-1" codon dinucleotides, useful for genomic |
|
106
|
|
|
|
|
|
|
# signature analyses without compounding influences of codon bias: |
|
107
|
|
|
|
|
|
|
my @pairs = $obj->dnaseq(encoding => 'EC'); |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=head1 FEEDBACK |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=head2 Mailing Lists |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
|
114
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
|
115
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
|
118
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=head2 Support |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
I |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
|
127
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
|
128
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
|
129
|
|
|
|
|
|
|
with code and data examples if at all possible. |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=head2 Reporting Bugs |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
|
134
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
|
135
|
|
|
|
|
|
|
web: |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=head1 AUTHOR - Aaron Mackey |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
Email amackey@virginia.edu |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=head1 APPENDIX |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
|
146
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=cut |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
package Bio::Seq::EncodedSeq; |
|
153
|
|
|
|
|
|
|
|
|
154
|
1
|
|
|
1
|
|
477
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
24
|
|
|
155
|
|
|
|
|
|
|
|
|
156
|
1
|
|
|
1
|
|
3
|
use base qw(Bio::LocatableSeq); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
284
|
|
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head2 new |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Title : new |
|
162
|
|
|
|
|
|
|
Usage : $obj = Bio::Seq::EncodedSeq->new(-seq => "AGTACGTGTCATG", |
|
163
|
|
|
|
|
|
|
-encoding => "CCCCCCFCCCCCC", |
|
164
|
|
|
|
|
|
|
-id => "myseq", |
|
165
|
|
|
|
|
|
|
-start => 1, |
|
166
|
|
|
|
|
|
|
-end => 13, |
|
167
|
|
|
|
|
|
|
-strand => 1 |
|
168
|
|
|
|
|
|
|
); |
|
169
|
|
|
|
|
|
|
Function: creates a new Bio::Seq::EncodedSeq object from a supplied DNA |
|
170
|
|
|
|
|
|
|
sequence |
|
171
|
|
|
|
|
|
|
Returns : a new Bio::Seq::EncodedSeq object |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
Args : seq - primary nucleotide sequence used to encode the |
|
174
|
|
|
|
|
|
|
protein; note that any positions involved in a |
|
175
|
|
|
|
|
|
|
gap ('G') or backward frameshift ('B') should |
|
176
|
|
|
|
|
|
|
have one or more gap characters; if the encoding |
|
177
|
|
|
|
|
|
|
specifies G or B, but no (or not enough) gap |
|
178
|
|
|
|
|
|
|
characters exist, *they'll be added*; similary, |
|
179
|
|
|
|
|
|
|
if there are gap characters without a |
|
180
|
|
|
|
|
|
|
corresponding G or B encoding, G's will be |
|
181
|
|
|
|
|
|
|
inserted into the encoding. This allows some |
|
182
|
|
|
|
|
|
|
flexibility in specifying your sequence and |
|
183
|
|
|
|
|
|
|
coding without having to calculate a lot of the |
|
184
|
|
|
|
|
|
|
encoding for yourself. |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
encoding - a string of characters (see Encoding Table) |
|
187
|
|
|
|
|
|
|
describing backwards frameshifts implied by the |
|
188
|
|
|
|
|
|
|
encoding but not present in the sequence will be |
|
189
|
|
|
|
|
|
|
added (as '-'s) to the sequence. If not |
|
190
|
|
|
|
|
|
|
supplied, it will be assumed that all positions |
|
191
|
|
|
|
|
|
|
are coding (C). Encoding may include either |
|
192
|
|
|
|
|
|
|
implicit phase encoding characters (i.e. "CCC") |
|
193
|
|
|
|
|
|
|
and/or explicit encoding characters (i.e. "CDE"). |
|
194
|
|
|
|
|
|
|
Additionally, prefixed numbers may be used to |
|
195
|
|
|
|
|
|
|
denote repetition (i.e. "27C3I28C"). |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
Alternatively, encoding may be a hashref |
|
198
|
|
|
|
|
|
|
datastructure, with encoding characters as keys |
|
199
|
|
|
|
|
|
|
and Bio::LocationI objects (or arrayrefs of |
|
200
|
|
|
|
|
|
|
Bio::LocationI objects) as values, e.g.: |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
{ C => [ Bio::Location::Simple->new(1,9), |
|
203
|
|
|
|
|
|
|
Bio::Location::Simple->new(11,13) ], |
|
204
|
|
|
|
|
|
|
F => Bio::Location::Simple->new(10,10), |
|
205
|
|
|
|
|
|
|
} # same as "CCCCCCCCCFCCC" |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
Note that if the location ranges overlap, the |
|
208
|
|
|
|
|
|
|
behavior of the encoding will be undefined |
|
209
|
|
|
|
|
|
|
(well, it will be defined, but only according to |
|
210
|
|
|
|
|
|
|
the order in which the hash keys are read, which |
|
211
|
|
|
|
|
|
|
is basically undefined ... just don't do that). |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
id, start, end, strand - as with Bio::LocatableSeq; note |
|
214
|
|
|
|
|
|
|
that the coordinates are relative to the |
|
215
|
|
|
|
|
|
|
encoding DNA sequence, not the implicit protein |
|
216
|
|
|
|
|
|
|
sequence. If strand is reversed, then the |
|
217
|
|
|
|
|
|
|
encoding is assumed to be relative to the |
|
218
|
|
|
|
|
|
|
reverse strand as well. |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=cut |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
sub new { |
|
223
|
4
|
|
|
4
|
1
|
135
|
my ($self, @args) = @_; |
|
224
|
4
|
|
|
|
|
16
|
$self = $self->SUPER::new(@args, -alphabet => 'dna'); |
|
225
|
4
|
|
|
|
|
9
|
my ($enc) = $self->_rearrange([qw(ENCODING)], @args); |
|
226
|
|
|
|
|
|
|
# set the real encoding: |
|
227
|
4
|
50
|
|
|
|
8
|
if ($enc) { |
|
228
|
0
|
|
|
|
|
0
|
$self->encoding($enc); |
|
229
|
|
|
|
|
|
|
} else { |
|
230
|
4
|
|
|
|
|
7
|
$self->_recheck_encoding; |
|
231
|
|
|
|
|
|
|
} |
|
232
|
4
|
|
|
|
|
22
|
return $self; |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=head2 encoding |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
Title : encoding |
|
239
|
|
|
|
|
|
|
Usage : $obj->encoding("CCCCCC"); |
|
240
|
|
|
|
|
|
|
$obj->encoding( -encoding => { I => $location } ); |
|
241
|
|
|
|
|
|
|
$enc = $obj->encoding(-explicit => 1); |
|
242
|
|
|
|
|
|
|
$enc = $obj->encoding("CCCCCC", -explicit => 1); |
|
243
|
|
|
|
|
|
|
$enc = $obj->encoding(-location => $location, |
|
244
|
|
|
|
|
|
|
-explicit => 1, |
|
245
|
|
|
|
|
|
|
-absolute => 1 ); |
|
246
|
|
|
|
|
|
|
Function: get/set the objects encoding, either globally or by location(s). |
|
247
|
|
|
|
|
|
|
Returns : the (possibly new) encoding string. |
|
248
|
|
|
|
|
|
|
Args : encoding - see the encoding argument to the new() function. |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
explicit - whether or not to return explicit phase |
|
251
|
|
|
|
|
|
|
information in the coding (i.e. "CCC" becomes |
|
252
|
|
|
|
|
|
|
"CDE", "III" becomes "IJK", etc); defaults to 0. |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
location - optional; location to get/set the encoding. |
|
255
|
|
|
|
|
|
|
Defaults to the entire sequence. |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
absolute - whether or not the locational elements (either |
|
258
|
|
|
|
|
|
|
in the encoding hashref or the location |
|
259
|
|
|
|
|
|
|
argument) are relative to the absolute start/end |
|
260
|
|
|
|
|
|
|
of the Bio::LocatableSeq, or to the internal, |
|
261
|
|
|
|
|
|
|
relative coordinate system (beginning at 1); |
|
262
|
|
|
|
|
|
|
defaults to 0 (i.e. relative) |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=cut |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
sub encoding { |
|
267
|
8
|
|
|
8
|
1
|
13
|
my ($self, @args) = @_; |
|
268
|
8
|
|
|
|
|
24
|
my ($enc, $loc, $exp, $abs) = $self->_rearrange([qw(ENCODING LOCATION EXPLICIT ABSOLUTE)], @args); |
|
269
|
|
|
|
|
|
|
|
|
270
|
8
|
100
|
|
|
|
23
|
if (!$enc) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
# do nothing; _recheck_encoding will fix for us, if necessary |
|
272
|
|
|
|
|
|
|
} elsif (ref $enc eq 'HASH') { |
|
273
|
0
|
|
|
|
|
0
|
$self->throw( -class => 'Bio::Root::NotImplemented', |
|
274
|
|
|
|
|
|
|
-text => "Hashref functionality not yet implemented;\nplease email me if you really need this."); |
|
275
|
|
|
|
|
|
|
#TODO: finish all this |
|
276
|
0
|
|
|
|
|
0
|
while (my ($char, $locs) = each %$enc) { |
|
277
|
0
|
0
|
|
|
|
0
|
if (ref $locs eq 'ARRAY') { |
|
|
|
0
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
} elsif (UNIVERSAL::isa($locs, "Bio::LocationI")) { |
|
279
|
|
|
|
|
|
|
} else { |
|
280
|
0
|
|
|
|
|
0
|
$self->throw("Only a scalar or a ref to a hash; not a ref to a @{{lc ref $enc}}"); |
|
|
0
|
|
|
|
|
0
|
|
|
281
|
|
|
|
|
|
|
} |
|
282
|
|
|
|
|
|
|
} |
|
283
|
|
|
|
|
|
|
} elsif (! ref $enc) { |
|
284
|
4
|
|
|
|
|
7
|
$enc = uc $enc; |
|
285
|
4
|
100
|
66
|
|
|
19
|
$exp = 1 if (!defined $exp && $enc =~ m/[DEJKV]/o); |
|
286
|
|
|
|
|
|
|
|
|
287
|
4
|
100
|
|
|
|
7
|
if ($enc =~ m/\d/o) { # numerically "enhanced" encoding |
|
288
|
1
|
|
|
|
|
2
|
my $numenc = $enc; |
|
289
|
1
|
|
|
|
|
2
|
$enc = ''; |
|
290
|
1
|
|
|
|
|
5
|
while ($numenc =~ m/\G(\d*)([CDEIJKUVGFB])/g) { |
|
291
|
4
|
|
|
|
|
7
|
my ($num, $char) = ($1, $2); |
|
292
|
4
|
100
|
|
|
|
7
|
$num = 1 unless $num; |
|
293
|
4
|
|
|
|
|
10
|
$enc .= $char x $num; |
|
294
|
|
|
|
|
|
|
} |
|
295
|
|
|
|
|
|
|
} |
|
296
|
|
|
|
|
|
|
|
|
297
|
4
|
50
|
66
|
|
|
21
|
if (defined $exp && $exp == 0 && $enc =~ m/([^CIUGFB])/) { |
|
|
|
50
|
33
|
|
|
|
|
|
298
|
0
|
|
|
|
|
0
|
$self->throw("Unrecognized character '$1' in implicit encoding"); |
|
299
|
|
|
|
|
|
|
} elsif ($enc =~ m/[^CDEIJKUVGFB]/) { |
|
300
|
0
|
|
|
|
|
0
|
$self->throw("Unrecognized character '$1' in explicit encoding"); |
|
301
|
|
|
|
|
|
|
} |
|
302
|
|
|
|
|
|
|
|
|
303
|
4
|
100
|
|
|
|
5
|
if ($loc) { # a global location, over which to apply the specified encoding. |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
# balk if too many non-gap characters present in encoding: |
|
306
|
1
|
|
|
|
|
3
|
my ($ct) = $enc =~ tr/GB/GB/; |
|
307
|
1
|
|
|
|
|
2
|
$ct = length($enc) - $ct; |
|
308
|
1
|
50
|
33
|
|
|
3
|
$self->throw("Location length doesn't match number of coding chars in encoding!") |
|
309
|
|
|
|
|
|
|
if ($loc->location_type eq 'EXACT' && $loc->length != $ct); |
|
310
|
|
|
|
|
|
|
|
|
311
|
1
|
|
|
|
|
2
|
my $start = $loc->start; |
|
312
|
1
|
|
|
|
|
13
|
my $end = $loc->end; |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
# strip any encoding that hangs off the ends of the sequence: |
|
315
|
1
|
50
|
|
|
|
3
|
if ($start < $self->start) { |
|
316
|
0
|
|
|
|
|
0
|
my $diff = $self->start - $start; |
|
317
|
0
|
|
|
|
|
0
|
$start = $self->start; |
|
318
|
0
|
|
|
|
|
0
|
$enc = substr($enc, $diff); |
|
319
|
|
|
|
|
|
|
} |
|
320
|
1
|
50
|
|
|
|
3
|
if ($end > $self->end) { |
|
321
|
0
|
|
|
|
|
0
|
my $diff = $end - $self->end; |
|
322
|
0
|
|
|
|
|
0
|
$end = $self->end; |
|
323
|
0
|
|
|
|
|
0
|
$enc = substr($enc, -$diff); |
|
324
|
|
|
|
|
|
|
} |
|
325
|
|
|
|
|
|
|
|
|
326
|
1
|
|
|
|
|
1
|
my $currenc = $self->{_encoding}; |
|
327
|
1
|
|
|
|
|
3
|
my $currseq = $self->seq; |
|
328
|
|
|
|
|
|
|
|
|
329
|
1
|
|
|
|
|
2
|
my ($spanstart, $spanend) = ($self->column_from_residue_number($start), |
|
330
|
|
|
|
|
|
|
$self->column_from_residue_number($end) ); |
|
331
|
|
|
|
|
|
|
|
|
332
|
1
|
50
|
|
|
|
4
|
if ($currseq) { |
|
333
|
|
|
|
|
|
|
# strip any gaps in sequence spanned by this location: |
|
334
|
1
|
50
|
|
|
|
2
|
($spanstart, $spanend) = ($spanend, $spanstart) if $self->strand < 0; |
|
335
|
1
|
50
|
|
|
|
2
|
my ($before, $in, $after) = $currseq =~ m/(.{@{[ $spanstart - ($loc->location_type eq 'IN-BETWEEN' ? 0 : 1) ]}}) |
|
|
1
|
|
|
|
|
2
|
|
|
336
|
1
|
50
|
|
|
|
3
|
(.{@{[ $spanend - $spanstart + ($loc->location_type eq 'IN-BETWEEN' ? -1 : 1) ]}}) |
|
337
|
|
|
|
|
|
|
(.*) |
|
338
|
|
|
|
|
|
|
/x; |
|
339
|
1
|
|
50
|
|
|
5
|
$in ||= ''; |
|
340
|
1
|
|
|
|
|
1
|
$in =~ s/[\.\-]+//g; |
|
341
|
1
|
|
50
|
|
|
4
|
$currseq = ($before||'') . $in. ($after||''); |
|
|
|
|
50
|
|
|
|
|
|
342
|
|
|
|
|
|
|
# change seq without changing the alphabet |
|
343
|
1
|
|
|
|
|
3
|
$self->seq($currseq,$self->alphabet()); |
|
344
|
|
|
|
|
|
|
} |
|
345
|
|
|
|
|
|
|
|
|
346
|
1
|
50
|
|
|
|
3
|
$currenc = reverse $currenc if $self->strand < 0; |
|
347
|
1
|
50
|
|
|
|
3
|
substr($currenc, $spanstart, $spanend - $spanstart + ($loc->location_type eq 'IN-BETWEEN' ? -1 : 1), |
|
|
|
50
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
$self->strand >= 0 ? $enc : reverse $enc); |
|
349
|
1
|
50
|
|
|
|
2
|
$currenc = reverse $currenc if $self->strand < 0; |
|
350
|
|
|
|
|
|
|
|
|
351
|
1
|
|
|
|
|
2
|
$self->{_encoding} = $currenc; |
|
352
|
1
|
|
|
|
|
2
|
$self->_recheck_encoding; |
|
353
|
|
|
|
|
|
|
|
|
354
|
1
|
|
|
|
|
2
|
$currenc = $self->{_encoding}; |
|
355
|
1
|
50
|
|
|
|
2
|
$currenc = reverse $currenc if $self->strand < 0; |
|
356
|
1
|
|
|
|
|
2
|
$enc = substr($currenc, $spanstart, length $enc); |
|
357
|
1
|
50
|
|
|
|
2
|
$enc = reverse $enc if $self->strand < 0; |
|
358
|
|
|
|
|
|
|
|
|
359
|
1
|
50
|
|
|
|
3
|
return $exp ? $enc: $self->_convert2implicit($enc); |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
} else { |
|
362
|
|
|
|
|
|
|
# presume a global redefinition; strip any current gap |
|
363
|
|
|
|
|
|
|
# characters in the sequence so they don't corrupt the |
|
364
|
|
|
|
|
|
|
# encoding |
|
365
|
3
|
|
|
|
|
7
|
my $dna = $self->seq; |
|
366
|
3
|
|
|
|
|
8
|
$dna =~ s/[\.\-]//g; |
|
367
|
3
|
|
|
|
|
5
|
$self->seq($dna, 'dna'); |
|
368
|
3
|
|
|
|
|
2
|
$self->{_encoding} = $enc; |
|
369
|
|
|
|
|
|
|
} |
|
370
|
|
|
|
|
|
|
} else { |
|
371
|
0
|
|
|
|
|
0
|
$self->throw("Only a scalar or a ref to a hash; not a ref to a @{{lc ref $enc}}"); |
|
|
0
|
|
|
|
|
0
|
|
|
372
|
|
|
|
|
|
|
} |
|
373
|
|
|
|
|
|
|
|
|
374
|
7
|
|
|
|
|
12
|
$self->_recheck_encoding(); |
|
375
|
|
|
|
|
|
|
|
|
376
|
7
|
100
|
|
|
|
15
|
return $exp ? $self->{_encoding} : $self->_convert2implicit($self->{_encoding}); |
|
377
|
|
|
|
|
|
|
} |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
sub _convert2implicit { |
|
381
|
8
|
|
|
8
|
|
9
|
my ($self, $enc) = @_; |
|
382
|
|
|
|
|
|
|
|
|
383
|
8
|
|
|
|
|
23
|
$enc =~ s/[DE]/C/g; |
|
384
|
8
|
|
|
|
|
9
|
$enc =~ s/[JK]/I/g; |
|
385
|
8
|
|
|
|
|
9
|
$enc =~ s/V/U/g; |
|
386
|
|
|
|
|
|
|
|
|
387
|
8
|
|
|
|
|
25
|
return $enc; |
|
388
|
|
|
|
|
|
|
} |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
sub _recheck_encoding { |
|
392
|
|
|
|
|
|
|
|
|
393
|
12
|
|
|
12
|
|
13
|
my $self = shift; |
|
394
|
|
|
|
|
|
|
|
|
395
|
12
|
|
100
|
|
|
40
|
my @enc = split //, ($self->{_encoding} || ''); |
|
396
|
|
|
|
|
|
|
|
|
397
|
12
|
|
|
|
|
24
|
my @nt = split(//, $self->SUPER::seq); |
|
398
|
12
|
100
|
66
|
|
|
22
|
@nt = reverse @nt if $self->strand && $self->strand < 0; |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
# make sure an encoding exists! |
|
401
|
12
|
100
|
|
|
|
23
|
@enc = ('C') x scalar grep { !/[\.\-]/o } @nt |
|
|
31
|
|
|
|
|
37
|
|
|
402
|
|
|
|
|
|
|
unless @enc; |
|
403
|
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
# check for gaps to be truly present in the sequence |
|
405
|
|
|
|
|
|
|
# and vice versa |
|
406
|
12
|
|
|
|
|
10
|
my $i; |
|
407
|
12
|
|
100
|
|
|
39
|
for ($i = 0 ; $i < @nt && $i < @enc ; $i++) { |
|
408
|
101
|
100
|
100
|
|
|
541
|
if ($nt[$i] =~ /[\.\-]/o && $enc[$i] !~ m/[GB]/o) { |
|
|
|
100
|
100
|
|
|
|
|
|
409
|
5
|
|
|
|
|
18
|
splice(@enc, $i, 0, 'G'); |
|
410
|
|
|
|
|
|
|
} elsif ($nt[$i] !~ /[\.\-]/o && $enc[$i] =~ m/[GB]/o) { |
|
411
|
10
|
|
|
|
|
32
|
splice(@nt, $i, 0, '-'); |
|
412
|
|
|
|
|
|
|
} |
|
413
|
|
|
|
|
|
|
} |
|
414
|
12
|
50
|
|
|
|
26
|
if ($i < @enc) { |
|
|
|
100
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
# extra encoding; presumably all gaps? |
|
416
|
0
|
|
|
|
|
0
|
for ( ; $i < @enc ; $i++) { |
|
417
|
0
|
0
|
|
|
|
0
|
if ($enc[$i] =~ m/[GB]/o) { |
|
418
|
0
|
|
|
|
|
0
|
push @nt, '-'; |
|
419
|
|
|
|
|
|
|
} else { |
|
420
|
0
|
|
|
|
|
0
|
$self->throw("Extraneous encoding info: " . join('', @enc[$i..$#enc])); |
|
421
|
|
|
|
|
|
|
} |
|
422
|
|
|
|
|
|
|
} |
|
423
|
|
|
|
|
|
|
} elsif ($i < @nt) { |
|
424
|
3
|
|
|
|
|
5
|
for ( ; $i < @nt ; $i++) { |
|
425
|
8
|
100
|
|
|
|
11
|
if ($nt[$i] =~ m/[\.\-]/o) { |
|
426
|
2
|
|
|
|
|
6
|
push @enc, 'G'; |
|
427
|
|
|
|
|
|
|
} else { |
|
428
|
6
|
|
|
|
|
10
|
push @enc, 'C'; |
|
429
|
|
|
|
|
|
|
} |
|
430
|
|
|
|
|
|
|
} |
|
431
|
|
|
|
|
|
|
} |
|
432
|
|
|
|
|
|
|
|
|
433
|
12
|
|
|
|
|
16
|
my @cde_array = qw(C D E); |
|
434
|
12
|
|
|
|
|
13
|
my @ijk_array = qw(I J K); |
|
435
|
|
|
|
|
|
|
# convert any leftover implicit coding into explicit coding |
|
436
|
12
|
|
|
|
|
13
|
my ($Cct, $Ict, $Uct, $Vct, $Vwarned) = (0, 0, 0, 0); |
|
437
|
12
|
|
|
|
|
20
|
for ($i = 0 ; $i < @enc ; $i++) { |
|
438
|
109
|
100
|
|
|
|
225
|
if ($enc[$i] =~ m/[CDE]/o) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
439
|
68
|
|
|
|
|
53
|
my $temp_index = $Cct %3; |
|
440
|
68
|
|
|
|
|
34
|
$enc[$i] = $cde_array[$temp_index]; |
|
441
|
68
|
|
|
|
|
47
|
$Cct++; $Ict = 0; $Uct = 1; |
|
|
68
|
|
|
|
|
33
|
|
|
|
68
|
|
|
|
|
44
|
|
|
442
|
68
|
50
|
33
|
|
|
149
|
$self->warn("3' untranslated encoding (V) seen prior to other coding symbols") |
|
443
|
|
|
|
|
|
|
if ($Vct && !$Vwarned++); |
|
444
|
|
|
|
|
|
|
} elsif ($enc[$i] =~ m/[IJK]/o) { |
|
445
|
0
|
|
|
|
|
0
|
$enc[$i] = $ijk_array[$Ict % 3]; |
|
446
|
0
|
|
|
|
|
0
|
$Ict++; $Uct = 1; |
|
|
0
|
|
|
|
|
0
|
|
|
447
|
0
|
0
|
0
|
|
|
0
|
$self->warn("3' untranslated encoding (V) seen before other coding symbols") |
|
448
|
|
|
|
|
|
|
if ($Vct && !$Vwarned++); |
|
449
|
|
|
|
|
|
|
} elsif ($enc[$i] =~ m/[UV]/o) { |
|
450
|
0
|
0
|
|
|
|
0
|
if ($Uct == 1) { |
|
451
|
0
|
|
|
|
|
0
|
$enc[$i] = 'V'; |
|
452
|
0
|
|
|
|
|
0
|
$Vct = 1; |
|
453
|
|
|
|
|
|
|
} |
|
454
|
|
|
|
|
|
|
} elsif ($enc[$i] eq 'B') { |
|
455
|
4
|
|
|
|
|
3
|
$Cct++; $Ict++ |
|
|
4
|
|
|
|
|
6
|
|
|
456
|
|
|
|
|
|
|
} elsif ($enc[$i] eq 'G') { |
|
457
|
|
|
|
|
|
|
# gap; leave alone |
|
458
|
|
|
|
|
|
|
} |
|
459
|
|
|
|
|
|
|
} |
|
460
|
|
|
|
|
|
|
|
|
461
|
12
|
100
|
66
|
|
|
17
|
@nt = reverse @nt if $self->strand && $self->strand < 0; |
|
462
|
12
|
|
|
|
|
31
|
$self->seq(join('', @nt), 'dna'); |
|
463
|
|
|
|
|
|
|
|
|
464
|
12
|
|
|
|
|
40
|
$self->{_encoding} = join '', @enc; |
|
465
|
|
|
|
|
|
|
} |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
=head2 cds |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
Title : cds |
|
471
|
|
|
|
|
|
|
Usage : $cds = $obj->cds(-nogaps => 1); |
|
472
|
|
|
|
|
|
|
Function: obtain the "spliced" DNA sequence, by removing any |
|
473
|
|
|
|
|
|
|
nucleotides that participate in an UTR, forward frameshift |
|
474
|
|
|
|
|
|
|
or intron, and replacing any unknown nucleotide implied by |
|
475
|
|
|
|
|
|
|
a backward frameshift or gap with N's. |
|
476
|
|
|
|
|
|
|
Returns : a Bio::Seq::EncodedSeq object, with an encoding consisting only |
|
477
|
|
|
|
|
|
|
of "CCCC..". |
|
478
|
|
|
|
|
|
|
Args : nogaps - strip any gap characters (resulting from 'G' or 'B' |
|
479
|
|
|
|
|
|
|
encodings), rather than replacing them with N's. |
|
480
|
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
=cut |
|
482
|
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
sub cds { |
|
484
|
2
|
|
|
2
|
1
|
4
|
my ($self, @args) = @_; |
|
485
|
|
|
|
|
|
|
|
|
486
|
2
|
|
|
|
|
7
|
my ($nogaps, $loc) = $self->_rearrange([qw(NOGAPS LOCATION)], @args); |
|
487
|
2
|
50
|
|
|
|
6
|
$nogaps = 0 unless defined $nogaps; |
|
488
|
|
|
|
|
|
|
|
|
489
|
2
|
50
|
|
|
|
5
|
my @nt = split //, $self->strand < 0 ? $self->revcom->seq : $self->seq; |
|
490
|
2
|
|
|
|
|
4
|
my @enc = split //, $self->_convert2implicit($self->{_encoding}); |
|
491
|
|
|
|
|
|
|
|
|
492
|
2
|
|
|
|
|
2
|
my ($start, $end) = (0, scalar @nt); |
|
493
|
|
|
|
|
|
|
|
|
494
|
2
|
50
|
|
|
|
5
|
if ($loc) { |
|
495
|
0
|
|
|
|
|
0
|
$start = $loc->start; |
|
496
|
0
|
0
|
|
|
|
0
|
$start++ if $loc->location_type eq 'IN-BETWEEN'; |
|
497
|
0
|
|
|
|
|
0
|
$start = $self->column_from_residue_number($start); |
|
498
|
0
|
|
|
|
|
0
|
$start--; |
|
499
|
|
|
|
|
|
|
|
|
500
|
0
|
|
|
|
|
0
|
$end = $loc->end; |
|
501
|
0
|
|
|
|
|
0
|
$end = $self->column_from_residue_number($end); |
|
502
|
|
|
|
|
|
|
|
|
503
|
0
|
0
|
|
|
|
0
|
($start, $end) = ($end, $start) if $self->strand < 0; |
|
504
|
0
|
|
|
|
|
0
|
$start--; |
|
505
|
|
|
|
|
|
|
} |
|
506
|
|
|
|
|
|
|
|
|
507
|
2
|
|
|
|
|
5
|
for (my $i = $start ; $i < $end ; $i++) { |
|
508
|
20
|
50
|
33
|
|
|
112
|
if ($enc[$i] eq 'I' || $enc[$i] eq 'U' || $enc[$i] eq 'F') { |
|
|
|
100
|
33
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
509
|
|
|
|
|
|
|
# remove introns, untranslated and forward frameshift nucleotides |
|
510
|
0
|
|
|
|
|
0
|
$nt[$i] = undef; |
|
511
|
|
|
|
|
|
|
} elsif ($enc[$i] eq 'G' || $enc[$i] eq 'B') { |
|
512
|
|
|
|
|
|
|
# replace gaps and backward frameshifts with N's, unless asked not to. |
|
513
|
8
|
50
|
|
|
|
15
|
$nt[$i] = $nogaps ? undef : 'N'; |
|
514
|
|
|
|
|
|
|
} |
|
515
|
|
|
|
|
|
|
} |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
return ($self->can_call_new ? ref($self) : __PACKAGE__)->new( |
|
518
|
2
|
50
|
|
|
|
7
|
-seq => join('', grep { defined } @nt[$start..--$end]), |
|
|
20
|
|
|
|
|
22
|
|
|
519
|
|
|
|
|
|
|
-start => $self->start, |
|
520
|
|
|
|
|
|
|
-end => $self->end, |
|
521
|
|
|
|
|
|
|
-strand => 1, |
|
522
|
|
|
|
|
|
|
-alphabet => 'dna' ); |
|
523
|
|
|
|
|
|
|
} |
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
=head2 translate |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
Title : translate |
|
529
|
|
|
|
|
|
|
Usage : $prot = $obj->translate(@args); |
|
530
|
|
|
|
|
|
|
Function: obtain the protein sequence encoded by the underlying DNA |
|
531
|
|
|
|
|
|
|
sequence; same as $obj->cds()->translate(@args). |
|
532
|
|
|
|
|
|
|
Returns : a Bio::PrimarySeq object. |
|
533
|
|
|
|
|
|
|
Args : same as the translate() function of Bio::PrimarySeqI |
|
534
|
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
=cut |
|
536
|
|
|
|
|
|
|
|
|
537
|
1
|
|
|
1
|
1
|
4
|
sub translate { shift->cds(-nogaps => 1, @_)->SUPER::translate(@_) }; |
|
538
|
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
=head2 protseq |
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
Title : seq |
|
543
|
|
|
|
|
|
|
Usage : $protseq = $obj->protseq(); |
|
544
|
|
|
|
|
|
|
Function: obtain the raw protein sequence encoded by the underlying |
|
545
|
|
|
|
|
|
|
DNA sequence; This is the same as calling |
|
546
|
|
|
|
|
|
|
$obj->translate()->seq(); |
|
547
|
|
|
|
|
|
|
Returns : a string of single-letter amino acid codes |
|
548
|
|
|
|
|
|
|
Args : same as the seq() function of Bio::PrimarySeq; note that this |
|
549
|
|
|
|
|
|
|
function may not be used to set the protein sequence; see |
|
550
|
|
|
|
|
|
|
the dnaseq() function for that. |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
=cut |
|
553
|
|
|
|
|
|
|
|
|
554
|
0
|
|
|
0
|
1
|
0
|
sub protseq { shift->cds(-nogaps => 1, @_)->SUPER::translate(@_)->seq }; |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
=head2 dnaseq |
|
558
|
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
Title : dnaseq |
|
560
|
|
|
|
|
|
|
Usage : $dnaseq = $obj->dnaseq(); |
|
561
|
|
|
|
|
|
|
$obj->dnaseq("ACGTGTCGT", "CCCCCCCCC"); |
|
562
|
|
|
|
|
|
|
$obj->dnaseq(-seq => "ATG", |
|
563
|
|
|
|
|
|
|
-encoding => "CCC", |
|
564
|
|
|
|
|
|
|
-location => $loc ); |
|
565
|
|
|
|
|
|
|
@introns = $obj->$dnaseq(-encoding => 'I') |
|
566
|
|
|
|
|
|
|
Function: get/set the underlying DNA sequence; will overwrite any |
|
567
|
|
|
|
|
|
|
current DNA and/or encoding information present. |
|
568
|
|
|
|
|
|
|
Returns : a string of single-letter nucleotide codes, including any |
|
569
|
|
|
|
|
|
|
gaps implied by the encoding. |
|
570
|
|
|
|
|
|
|
Args : seq - the DNA sequence to be used as a replacement |
|
571
|
|
|
|
|
|
|
encoding - the encoding of the DNA sequence (see the new() |
|
572
|
|
|
|
|
|
|
constructor); defaults to all 'C' if setting a |
|
573
|
|
|
|
|
|
|
new DNA sequence. If no new DNA sequence is |
|
574
|
|
|
|
|
|
|
being provided, then the encoding is used as a |
|
575
|
|
|
|
|
|
|
"filter" for which to return fragments of |
|
576
|
|
|
|
|
|
|
non-overlapping DNA that match the encoding. |
|
577
|
|
|
|
|
|
|
location - optional, the location of the DNA sequence to |
|
578
|
|
|
|
|
|
|
get/set; defaults to the entire sequence. |
|
579
|
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
=cut |
|
581
|
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
sub dnaseq { |
|
583
|
0
|
|
|
0
|
1
|
0
|
my ($self, @args) = @_; |
|
584
|
0
|
|
|
|
|
0
|
my ($seq, $enc, $loc) = $self->_rearrange([qw(DNASEQ ENCODING LOCATION)], @args); |
|
585
|
0
|
|
|
|
|
0
|
return $self; |
|
586
|
|
|
|
|
|
|
} |
|
587
|
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
# need to overload this so that we truncate both the seq and the encoding! |
|
590
|
|
|
|
|
|
|
sub trunc { |
|
591
|
1
|
|
|
1
|
1
|
1
|
my ($self, $start, $end) = @_; |
|
592
|
1
|
|
|
|
|
6
|
my $new = $self->SUPER::trunc($start, $end); |
|
593
|
1
|
|
|
|
|
2
|
$start--; |
|
594
|
1
|
|
|
|
|
1
|
my $enc = $self->{_encoding}; |
|
595
|
1
|
50
|
|
|
|
2
|
$enc = reverse $enc if $self->strand < 0; |
|
596
|
1
|
|
|
|
|
2
|
$enc = substr($enc, $start, $end - $start); |
|
597
|
1
|
50
|
|
|
|
2
|
$enc = reverse $enc if $self->strand < 0; |
|
598
|
1
|
|
|
|
|
2
|
$new->encoding($enc); |
|
599
|
1
|
|
|
|
|
4
|
return $new; |
|
600
|
|
|
|
|
|
|
} |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
1; |