line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# BioPerl module for Bio::SeqUtils |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# Please direct questions and support issues to |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# Cared for by Heikki Lehvaslaiho |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# Copyright Heikki Lehvaslaiho |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 NAME |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Bio::SeqUtils - Additional methods for PrimarySeq objects |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 SYNOPSIS |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
use Bio::SeqUtils; |
20
|
|
|
|
|
|
|
# get a Bio::PrimarySeqI compliant object, $seq, somehow |
21
|
|
|
|
|
|
|
$util = Bio::SeqUtils->new(); |
22
|
|
|
|
|
|
|
$polypeptide_3char = $util->seq3($seq); |
23
|
|
|
|
|
|
|
# or |
24
|
|
|
|
|
|
|
$polypeptide_3char = Bio::SeqUtils->seq3($seq); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# set the sequence string (stored in one char code in the object) |
27
|
|
|
|
|
|
|
Bio::SeqUtils->seq3($seq, $polypeptide_3char); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# translate a sequence in all six frames |
30
|
|
|
|
|
|
|
@seqs = Bio::SeqUtils->translate_6frames($seq); |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# inplace editing of the sequence |
33
|
|
|
|
|
|
|
Bio::SeqUtils->mutate($seq, |
34
|
|
|
|
|
|
|
Bio::LiveSeq::Mutation->new(-seq => 'c', |
35
|
|
|
|
|
|
|
-pos => 3 |
36
|
|
|
|
|
|
|
)); |
37
|
|
|
|
|
|
|
# mutate a sequence to desired similarity% |
38
|
|
|
|
|
|
|
$newseq = Bio::SeqUtils-> evolve |
39
|
|
|
|
|
|
|
($seq, $similarity, $transition_transversion_rate); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# concatenate two or more sequences with annotations and features, |
42
|
|
|
|
|
|
|
# the first sequence will be modified |
43
|
|
|
|
|
|
|
Bio::SeqUtils->cat(@seqs); |
44
|
|
|
|
|
|
|
my $catseq=$seqs[0]; |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# truncate a sequence, retaining features and adjusting their |
47
|
|
|
|
|
|
|
# coordinates if necessary |
48
|
|
|
|
|
|
|
my $truncseq = Bio::SeqUtils->trunc_with_features($seq, 100, 200); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# reverse complement a sequence and its features |
51
|
|
|
|
|
|
|
my $revcomseq = Bio::SeqUtils->revcom_with_features($seq); |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# simulate cloning of a fragment into a vector. Cut the vector at |
54
|
|
|
|
|
|
|
# positions 1000 and 1100 (deleting postions 1001 to 1099) and |
55
|
|
|
|
|
|
|
# "ligate" a fragment into the sites. The fragment is |
56
|
|
|
|
|
|
|
# reverse-complemented in this example (option "flip"). |
57
|
|
|
|
|
|
|
# All features of the vector and fragment are preserved and |
58
|
|
|
|
|
|
|
# features that are affected by the deletion/insertion are |
59
|
|
|
|
|
|
|
# modified accordingly. |
60
|
|
|
|
|
|
|
# $vector and $fragment must be Bio::SeqI compliant objects |
61
|
|
|
|
|
|
|
my $new_molecule = Bio::Sequtils->ligate( |
62
|
|
|
|
|
|
|
-vector => $vector, |
63
|
|
|
|
|
|
|
-fragment => $fragment, |
64
|
|
|
|
|
|
|
-left => 1000, |
65
|
|
|
|
|
|
|
-right => 1100, |
66
|
|
|
|
|
|
|
-flip => 1 |
67
|
|
|
|
|
|
|
); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# delete a segment of a sequence (from pos 1000 to 1100, inclusive), |
70
|
|
|
|
|
|
|
# again preserving features and annotations |
71
|
|
|
|
|
|
|
my $new_molecule = Bio::SeqUtils->cut( $seq, 1000, 1100 ); |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# insert a fragment into a recipient between positions 1000 and |
74
|
|
|
|
|
|
|
# 1001. $recipient is a Bio::SeqI compliant object |
75
|
|
|
|
|
|
|
my $new_molecule = Bio::SeqUtils::PbrTools->insert( |
76
|
|
|
|
|
|
|
$recipient_seq, |
77
|
|
|
|
|
|
|
$fragment_seq, |
78
|
|
|
|
|
|
|
1000 |
79
|
|
|
|
|
|
|
); |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head1 DESCRIPTION |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
This class is a holder of methods that work on Bio::PrimarySeqI- |
84
|
|
|
|
|
|
|
compliant sequence objects, e.g. Bio::PrimarySeq and |
85
|
|
|
|
|
|
|
Bio::Seq. These methods are not part of the Bio::PrimarySeqI |
86
|
|
|
|
|
|
|
interface and should in general not be essential to the primary function |
87
|
|
|
|
|
|
|
of sequence objects. If you are thinking of adding essential |
88
|
|
|
|
|
|
|
functions, it might be better to create your own sequence class. |
89
|
|
|
|
|
|
|
See L, L, and L for more. |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
The methods take as their first argument a sequence object. It is |
92
|
|
|
|
|
|
|
possible to use methods without first creating a SeqUtils object, |
93
|
|
|
|
|
|
|
i.e. use it as an anonymous hash. |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
The first two methods, seq3() and seq3in(), give out or read in protein |
96
|
|
|
|
|
|
|
sequences coded in three letter IUPAC amino acid codes. |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
The next two methods, translate_3frames() and translate_6frames(), wrap |
99
|
|
|
|
|
|
|
around the standard translate method to give back an array of three |
100
|
|
|
|
|
|
|
forward or all six frame translations. |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
The mutate() method mutates the sequence string with a mutation |
103
|
|
|
|
|
|
|
description object. |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
The cat() method concatenates two or more sequences. The first sequence |
106
|
|
|
|
|
|
|
is modified by addition of the remaining sequences. All annotations and |
107
|
|
|
|
|
|
|
sequence features will be transferred. |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
The revcom_with_features() and trunc_with_features() methods are similar |
110
|
|
|
|
|
|
|
to the revcom() and trunc() methods from Bio::Seq, but also adjust any |
111
|
|
|
|
|
|
|
features associated with the sequence as appropriate. |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
There are also methods that simulate molecular cloning with rich |
114
|
|
|
|
|
|
|
sequence objects. |
115
|
|
|
|
|
|
|
The delete() method cuts a segment out of a sequence and re-joins the |
116
|
|
|
|
|
|
|
left and right fragments (like splicing or digesting and re-ligating a |
117
|
|
|
|
|
|
|
molecule). Positions (and types) of sequence features are adjusted |
118
|
|
|
|
|
|
|
accordingly: |
119
|
|
|
|
|
|
|
Features that span the deleted segment are converted to split featuress |
120
|
|
|
|
|
|
|
to indicate the disruption. (Sub)Features that extend into the deleted |
121
|
|
|
|
|
|
|
segment are truncated. |
122
|
|
|
|
|
|
|
A new molecule is created and returned. |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
The insert() method inserts a fragment (which can be a rich Bio::Seq |
125
|
|
|
|
|
|
|
object) into another sequence object adding all annotations and |
126
|
|
|
|
|
|
|
features to the final product. |
127
|
|
|
|
|
|
|
Features that span the insertion site are converted to split features |
128
|
|
|
|
|
|
|
to indicate the disruption. |
129
|
|
|
|
|
|
|
A new feature is added to indicate the inserted fragment itself. |
130
|
|
|
|
|
|
|
A new molecule is created and returned. |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
The ligate() method simulates digesting a recipient (vector) and |
133
|
|
|
|
|
|
|
ligating a fragment into it, which can also be flipped if needed. It |
134
|
|
|
|
|
|
|
is simply a combination of a deletion and an insertion step and |
135
|
|
|
|
|
|
|
returns a new molecule. The rules for modifying feature locations |
136
|
|
|
|
|
|
|
outlined above are also used here, e.g. features that span the cut |
137
|
|
|
|
|
|
|
sites are converted to split features with truncated sub-locations. |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=head1 FEEDBACK |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=head2 Mailing Lists |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
145
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
146
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
149
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head2 Support |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
I |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
158
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
159
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
160
|
|
|
|
|
|
|
with code and data examples if at all possible. |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=head2 Reporting Bugs |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
165
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
166
|
|
|
|
|
|
|
web: |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=head1 AUTHOR - Heikki Lehvaslaiho |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
Email: heikki-at-bioperl-dot-org |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
Roy R. Chaudhuri - roy.chaudhuri at gmail.com |
177
|
|
|
|
|
|
|
Frank Schwach - frank.schwach@sanger.ac.uk |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=head1 APPENDIX |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
182
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=cut |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# Let the code begin... |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
package Bio::SeqUtils; |
189
|
204
|
|
|
204
|
|
755
|
use strict; |
|
204
|
|
|
|
|
217
|
|
|
204
|
|
|
|
|
5131
|
|
190
|
204
|
|
|
204
|
|
636
|
use warnings; |
|
204
|
|
|
|
|
269
|
|
|
204
|
|
|
|
|
5518
|
|
191
|
204
|
|
|
204
|
|
890
|
use Scalar::Util qw(blessed); |
|
204
|
|
|
|
|
234
|
|
|
204
|
|
|
|
|
9191
|
|
192
|
204
|
|
|
204
|
|
712
|
use parent qw(Bio::Root::Root); |
|
204
|
|
|
|
|
219
|
|
|
204
|
|
|
|
|
1300
|
|
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
# new inherited from RootI |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
our %ONECODE = ( |
197
|
|
|
|
|
|
|
'Ala' => 'A', |
198
|
|
|
|
|
|
|
'Asx' => 'B', |
199
|
|
|
|
|
|
|
'Cys' => 'C', |
200
|
|
|
|
|
|
|
'Asp' => 'D', |
201
|
|
|
|
|
|
|
'Glu' => 'E', |
202
|
|
|
|
|
|
|
'Phe' => 'F', |
203
|
|
|
|
|
|
|
'Gly' => 'G', |
204
|
|
|
|
|
|
|
'His' => 'H', |
205
|
|
|
|
|
|
|
'Ile' => 'I', |
206
|
|
|
|
|
|
|
'Lys' => 'K', |
207
|
|
|
|
|
|
|
'Leu' => 'L', |
208
|
|
|
|
|
|
|
'Met' => 'M', |
209
|
|
|
|
|
|
|
'Asn' => 'N', |
210
|
|
|
|
|
|
|
'Pro' => 'P', |
211
|
|
|
|
|
|
|
'Gln' => 'Q', |
212
|
|
|
|
|
|
|
'Arg' => 'R', |
213
|
|
|
|
|
|
|
'Ser' => 'S', |
214
|
|
|
|
|
|
|
'Thr' => 'T', |
215
|
|
|
|
|
|
|
'Val' => 'V', |
216
|
|
|
|
|
|
|
'Trp' => 'W', |
217
|
|
|
|
|
|
|
'Xaa' => 'X', |
218
|
|
|
|
|
|
|
'Tyr' => 'Y', |
219
|
|
|
|
|
|
|
'Glx' => 'Z', |
220
|
|
|
|
|
|
|
'Ter' => '*', |
221
|
|
|
|
|
|
|
'Sec' => 'U', |
222
|
|
|
|
|
|
|
'Pyl' => 'O', |
223
|
|
|
|
|
|
|
'Xle' => 'J' |
224
|
|
|
|
|
|
|
); |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
our %THREECODE = ( |
227
|
|
|
|
|
|
|
'A' => 'Ala', |
228
|
|
|
|
|
|
|
'B' => 'Asx', |
229
|
|
|
|
|
|
|
'C' => 'Cys', |
230
|
|
|
|
|
|
|
'D' => 'Asp', |
231
|
|
|
|
|
|
|
'E' => 'Glu', |
232
|
|
|
|
|
|
|
'F' => 'Phe', |
233
|
|
|
|
|
|
|
'G' => 'Gly', |
234
|
|
|
|
|
|
|
'H' => 'His', |
235
|
|
|
|
|
|
|
'I' => 'Ile', |
236
|
|
|
|
|
|
|
'K' => 'Lys', |
237
|
|
|
|
|
|
|
'L' => 'Leu', |
238
|
|
|
|
|
|
|
'M' => 'Met', |
239
|
|
|
|
|
|
|
'N' => 'Asn', |
240
|
|
|
|
|
|
|
'P' => 'Pro', |
241
|
|
|
|
|
|
|
'Q' => 'Gln', |
242
|
|
|
|
|
|
|
'R' => 'Arg', |
243
|
|
|
|
|
|
|
'S' => 'Ser', |
244
|
|
|
|
|
|
|
'T' => 'Thr', |
245
|
|
|
|
|
|
|
'V' => 'Val', |
246
|
|
|
|
|
|
|
'W' => 'Trp', |
247
|
|
|
|
|
|
|
'Y' => 'Tyr', |
248
|
|
|
|
|
|
|
'Z' => 'Glx', |
249
|
|
|
|
|
|
|
'X' => 'Xaa', |
250
|
|
|
|
|
|
|
'*' => 'Ter', |
251
|
|
|
|
|
|
|
'U' => 'Sec', |
252
|
|
|
|
|
|
|
'O' => 'Pyl', |
253
|
|
|
|
|
|
|
'J' => 'Xle' |
254
|
|
|
|
|
|
|
); |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=head2 seq3 |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
Title : seq3 |
259
|
|
|
|
|
|
|
Usage : $string = Bio::SeqUtils->seq3($seq) |
260
|
|
|
|
|
|
|
Function: Read only method that returns the amino acid sequence as a |
261
|
|
|
|
|
|
|
string of three letter codes. alphabet has to be |
262
|
|
|
|
|
|
|
'protein'. Output follows the IUPAC standard plus 'Ter' for |
263
|
|
|
|
|
|
|
terminator. Any unknown character, including the default |
264
|
|
|
|
|
|
|
unknown character 'X', is changed into 'Xaa'. A noncoded |
265
|
|
|
|
|
|
|
aminoacid selenocystein is recognized (Sec, U). |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
Returns : A scalar |
268
|
|
|
|
|
|
|
Args : character used for stop in the protein sequence optional, |
269
|
|
|
|
|
|
|
defaults to '*' string used to separate the output amino |
270
|
|
|
|
|
|
|
acid codes, optional, defaults to '' |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=cut |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
sub seq3 { |
275
|
4
|
|
|
4
|
1
|
6
|
my ( $self, $seq, $stop, $sep ) = @_; |
276
|
|
|
|
|
|
|
|
277
|
4
|
50
|
|
|
|
19
|
$seq->isa('Bio::PrimarySeqI') |
278
|
|
|
|
|
|
|
|| $self->throw('Not a Bio::PrimarySeqI object but [$self]'); |
279
|
4
|
50
|
|
|
|
9
|
$seq->alphabet eq 'protein' |
280
|
|
|
|
|
|
|
|| $self->throw('Not a protein sequence'); |
281
|
|
|
|
|
|
|
|
282
|
4
|
100
|
|
|
|
8
|
if ( defined $stop ) { |
283
|
1
|
50
|
|
|
|
3
|
length $stop != 1 |
284
|
|
|
|
|
|
|
and $self->throw('One character stop needed, not [$stop]'); |
285
|
1
|
|
|
|
|
2
|
$THREECODE{$stop} = "Ter"; |
286
|
|
|
|
|
|
|
} |
287
|
4
|
|
100
|
|
|
9
|
$sep ||= ''; |
288
|
|
|
|
|
|
|
|
289
|
4
|
|
|
|
|
3
|
my $aa3s; |
290
|
4
|
|
|
|
|
6
|
foreach my $aa ( split //, uc $seq->seq ) { |
291
|
87
|
50
|
|
|
|
133
|
$THREECODE{$aa} and $aa3s .= $THREECODE{$aa} . $sep, next; |
292
|
0
|
|
|
|
|
0
|
$aa3s .= 'Xaa' . $sep; |
293
|
|
|
|
|
|
|
} |
294
|
4
|
100
|
|
|
|
13
|
$sep and substr( $aa3s, -( length $sep ), length $sep ) = ''; |
295
|
4
|
|
|
|
|
13
|
return $aa3s; |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=head2 seq3in |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
Title : seq3in |
301
|
|
|
|
|
|
|
Usage : $seq = Bio::SeqUtils->seq3in($seq, 'MetGlyTer') |
302
|
|
|
|
|
|
|
Function: Method for changing of the sequence of a |
303
|
|
|
|
|
|
|
Bio::PrimarySeqI sequence object. The three letter amino |
304
|
|
|
|
|
|
|
acid input string is converted into one letter code. Any |
305
|
|
|
|
|
|
|
unknown character triplet, including the default 'Xaa', is |
306
|
|
|
|
|
|
|
converted into 'X'. |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
Returns : Bio::PrimarySeq object |
309
|
|
|
|
|
|
|
Args : sequence string |
310
|
|
|
|
|
|
|
optional character to be used for stop in the protein sequence, |
311
|
|
|
|
|
|
|
defaults to '*' |
312
|
|
|
|
|
|
|
optional character to be used for unknown in the protein sequence, |
313
|
|
|
|
|
|
|
defaults to 'X' |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=cut |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
sub seq3in { |
318
|
3
|
|
|
3
|
1
|
6
|
my ( $self, $seq, $string, $stop, $unknown ) = @_; |
319
|
|
|
|
|
|
|
|
320
|
3
|
50
|
|
|
|
16
|
$seq->isa('Bio::PrimarySeqI') |
321
|
|
|
|
|
|
|
|| $self->throw("Not a Bio::PrimarySeqI object but [$self]"); |
322
|
3
|
50
|
|
|
|
10
|
$seq->alphabet eq 'protein' |
323
|
|
|
|
|
|
|
|| $self->throw('Not a protein sequence'); |
324
|
|
|
|
|
|
|
|
325
|
3
|
50
|
|
|
|
13
|
if ( defined $stop ) { |
326
|
0
|
0
|
|
|
|
0
|
length $stop != 1 |
327
|
|
|
|
|
|
|
and $self->throw("One character stop needed, not [$stop]"); |
328
|
0
|
|
|
|
|
0
|
$ONECODE{'Ter'} = $stop; |
329
|
|
|
|
|
|
|
} |
330
|
3
|
50
|
|
|
|
9
|
if ( defined $unknown ) { |
331
|
0
|
0
|
|
|
|
0
|
length $unknown != 1 |
332
|
|
|
|
|
|
|
and $self->throw("One character stop needed, not [$unknown]"); |
333
|
0
|
|
|
|
|
0
|
$ONECODE{'Xaa'} = $unknown; |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
|
336
|
3
|
|
|
|
|
4
|
my ( $aas, $aa3 ); |
337
|
3
|
|
|
|
|
6
|
my $length = ( length $string ) - 2; |
338
|
3
|
|
|
|
|
10
|
for ( my $i = 0 ; $i < $length ; $i += 3 ) { |
339
|
89
|
|
|
|
|
81
|
$aa3 = substr( $string, $i, 3 ); |
340
|
89
|
|
|
|
|
67
|
$aa3 = ucfirst( lc($aa3) ); |
341
|
89
|
100
|
|
|
|
228
|
$ONECODE{$aa3} and $aas .= $ONECODE{$aa3}, next; |
342
|
1
|
|
|
|
|
3
|
$aas .= $ONECODE{'Xaa'}; |
343
|
|
|
|
|
|
|
} |
344
|
3
|
|
|
|
|
11
|
$seq->seq($aas); |
345
|
3
|
|
|
|
|
9
|
return $seq; |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
=head2 translate_3frames |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
Title : translate_3frames |
351
|
|
|
|
|
|
|
Usage : @prots = Bio::SeqUtils->translate_3frames($seq) |
352
|
|
|
|
|
|
|
Function: Translate a nucleotide sequence in three forward frames. |
353
|
|
|
|
|
|
|
The IDs of the sequences are appended with '-0F', '-1F', '-2F'. |
354
|
|
|
|
|
|
|
Returns : An array of seq objects |
355
|
|
|
|
|
|
|
Args : sequence object |
356
|
|
|
|
|
|
|
same arguments as to Bio::PrimarySeqI::translate |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=cut |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
sub translate_3frames { |
361
|
3
|
|
|
3
|
1
|
4
|
my ( $self, $seq, @args ) = @_; |
362
|
|
|
|
|
|
|
|
363
|
3
|
50
|
|
|
|
18
|
$self->throw( 'Object [$seq] ' |
364
|
|
|
|
|
|
|
. 'of class [' |
365
|
|
|
|
|
|
|
. ref($seq) |
366
|
|
|
|
|
|
|
. '] can not be translated.' ) |
367
|
|
|
|
|
|
|
unless $seq->can('translate'); |
368
|
|
|
|
|
|
|
|
369
|
3
|
|
|
|
|
4
|
my ( $stop, $unknown, $frame, $tableid, $fullCDS, $throw ) = @args; |
370
|
3
|
|
|
|
|
1
|
my @seqs; |
371
|
3
|
|
|
|
|
4
|
my $f = 0; |
372
|
3
|
|
|
|
|
7
|
while ( $f != 3 ) { |
373
|
9
|
|
|
|
|
17
|
my $translation = |
374
|
|
|
|
|
|
|
$seq->translate( $stop, $unknown, $f, $tableid, $fullCDS, $throw ); |
375
|
9
|
|
|
|
|
15
|
$translation->id( $seq->id . "-" . $f . "F" ); |
376
|
9
|
|
|
|
|
10
|
push @seqs, $translation; |
377
|
9
|
|
|
|
|
15
|
$f++; |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
3
|
|
|
|
|
8
|
return @seqs; |
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=head2 translate_6frames |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
Title : translate_6frames |
386
|
|
|
|
|
|
|
Usage : @prots = Bio::SeqUtils->translate_6frames($seq) |
387
|
|
|
|
|
|
|
Function: translate a nucleotide sequence in all six frames |
388
|
|
|
|
|
|
|
The IDs of the sequences are appended with '-0F', '-1F', '-2F', |
389
|
|
|
|
|
|
|
'-0R', '-1R', '-2R'. |
390
|
|
|
|
|
|
|
Returns : An array of seq objects |
391
|
|
|
|
|
|
|
Args : sequence object |
392
|
|
|
|
|
|
|
same arguments as to Bio::PrimarySeqI::translate |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
=cut |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
sub translate_6frames { |
397
|
1
|
|
|
1
|
1
|
351
|
my ( $self, $seq, @args ) = @_; |
398
|
|
|
|
|
|
|
|
399
|
1
|
|
|
|
|
2
|
my @seqs = $self->translate_3frames( $seq, @args ); |
400
|
1
|
|
|
|
|
8
|
my @seqs2 = $self->translate_3frames( $seq->revcom, @args ); |
401
|
1
|
|
|
|
|
3
|
foreach my $seq2 (@seqs2) { |
402
|
3
|
|
|
|
|
5
|
my ($tmp) = $seq2->id; |
403
|
3
|
|
|
|
|
7
|
$tmp =~ s/F$/R/g; |
404
|
3
|
|
|
|
|
4
|
$seq2->id($tmp); |
405
|
|
|
|
|
|
|
} |
406
|
1
|
|
|
|
|
4
|
return @seqs, @seqs2; |
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=head2 valid_aa |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
Title : valid_aa |
412
|
|
|
|
|
|
|
Usage : my @aa = $table->valid_aa |
413
|
|
|
|
|
|
|
Function: Retrieves a list of the valid amino acid codes. |
414
|
|
|
|
|
|
|
The list is ordered so that first 21 codes are for unique |
415
|
|
|
|
|
|
|
amino acids. The rest are ['B', 'Z', 'X', '*']. |
416
|
|
|
|
|
|
|
Returns : array of all the valid amino acid codes |
417
|
|
|
|
|
|
|
Args : [optional] $code => [0 -> return list of 1 letter aa codes, |
418
|
|
|
|
|
|
|
1 -> return list of 3 letter aa codes, |
419
|
|
|
|
|
|
|
2 -> return associative array of both ] |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=cut |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
sub valid_aa { |
424
|
410
|
|
|
410
|
1
|
2194
|
my ( $self, $code ) = @_; |
425
|
|
|
|
|
|
|
|
426
|
410
|
100
|
|
|
|
1528
|
if ( !$code ) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
427
|
204
|
|
|
|
|
267
|
my @codes; |
428
|
204
|
|
|
|
|
2080
|
foreach my $c ( sort values %ONECODE ) { |
429
|
5508
|
100
|
|
|
|
9030
|
push @codes, $c unless ( $c =~ /[BZX\*]/ ); |
430
|
|
|
|
|
|
|
} |
431
|
204
|
|
|
|
|
402
|
push @codes, qw(B Z X *); # so they are in correct order ? |
432
|
204
|
|
|
|
|
1345
|
return @codes; |
433
|
|
|
|
|
|
|
} |
434
|
|
|
|
|
|
|
elsif ( $code == 1 ) { |
435
|
1
|
|
|
|
|
2
|
my @codes; |
436
|
1
|
|
|
|
|
10
|
foreach my $c ( sort keys %ONECODE ) { |
437
|
27
|
100
|
|
|
|
52
|
push @codes, $c unless ( $c =~ /(Asx|Glx|Xaa|Ter)/ ); |
438
|
|
|
|
|
|
|
} |
439
|
1
|
|
|
|
|
3
|
push @codes, ( 'Asx', 'Glx', 'Xaa', 'Ter' ); |
440
|
1
|
|
|
|
|
11
|
return @codes; |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
elsif ( $code == 2 ) { |
443
|
205
|
|
|
|
|
2186
|
my %codes = %ONECODE; |
444
|
205
|
|
|
|
|
813
|
foreach my $c ( keys %ONECODE ) { |
445
|
5535
|
|
|
|
|
3556
|
my $aa = $ONECODE{$c}; |
446
|
5535
|
|
|
|
|
5153
|
$codes{$aa} = $c; |
447
|
|
|
|
|
|
|
} |
448
|
205
|
|
|
|
|
4761
|
return %codes; |
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
else { |
451
|
0
|
|
|
|
|
0
|
$self->warn( |
452
|
|
|
|
|
|
|
"unrecognized code in " . ref($self) . " method valid_aa()" ); |
453
|
0
|
|
|
|
|
0
|
return (); |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=head2 mutate |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
Title : mutate |
460
|
|
|
|
|
|
|
Usage : Bio::SeqUtils->mutate($seq,$mutation1, $mutation2); |
461
|
|
|
|
|
|
|
Function: Inplace editing of the sequence. |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
The second argument can be a Bio::LiveSeq::Mutation object |
464
|
|
|
|
|
|
|
or an array of them. The mutations are applied sequentially |
465
|
|
|
|
|
|
|
checking only that their position is within the current |
466
|
|
|
|
|
|
|
sequence. Insertions are inserted before the given |
467
|
|
|
|
|
|
|
position. |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
Returns : boolean |
470
|
|
|
|
|
|
|
Args : sequence object |
471
|
|
|
|
|
|
|
mutation, a Bio::LiveSeq::Mutation object, or an array of them |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
See L. |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
=cut |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
sub mutate { |
478
|
3
|
|
|
3
|
1
|
7
|
my ( $self, $seq, @mutations ) = @_; |
479
|
|
|
|
|
|
|
|
480
|
3
|
50
|
|
|
|
8
|
$self->throw( 'Object [$seq] ' |
481
|
|
|
|
|
|
|
. 'of class [' |
482
|
|
|
|
|
|
|
. ref($seq) |
483
|
|
|
|
|
|
|
. '] should be a Bio::PrimarySeqI ' ) |
484
|
|
|
|
|
|
|
unless $seq->isa('Bio::PrimarySeqI'); |
485
|
3
|
50
|
|
|
|
9
|
$self->throw( 'Object [$mutations[0]] ' |
486
|
|
|
|
|
|
|
. 'of class [' |
487
|
|
|
|
|
|
|
. ref( $mutations[0] ) |
488
|
|
|
|
|
|
|
. '] should be a Bio::LiveSeq::Mutation' ) |
489
|
|
|
|
|
|
|
unless $mutations[0]->isa('Bio::LiveSeq::Mutation'); |
490
|
|
|
|
|
|
|
|
491
|
3
|
|
|
|
|
4
|
foreach my $mutation (@mutations) { |
492
|
4
|
50
|
|
|
|
8
|
$self->throw('Attempting to mutate sequence beyond its length') |
493
|
|
|
|
|
|
|
unless $mutation->pos - 1 <= $seq->length; |
494
|
|
|
|
|
|
|
|
495
|
4
|
|
|
|
|
8
|
my $string = $seq->seq; |
496
|
4
|
|
|
|
|
5
|
substr $string, $mutation->pos - 1, $mutation->len, $mutation->seq; |
497
|
4
|
|
|
|
|
5
|
$seq->seq($string); |
498
|
|
|
|
|
|
|
} |
499
|
3
|
|
|
|
|
6
|
1; |
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
=head2 cat |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
Title : cat |
505
|
|
|
|
|
|
|
Usage : Bio::SeqUtils->cat(@seqs); |
506
|
|
|
|
|
|
|
my $catseq=$seqs[0]; |
507
|
|
|
|
|
|
|
Function: Concatenates a list of Bio::Seq objects, adding them all on to the |
508
|
|
|
|
|
|
|
end of the first sequence. Annotations and sequence features are |
509
|
|
|
|
|
|
|
copied over from any additional objects, and the coordinates of any |
510
|
|
|
|
|
|
|
copied features are adjusted appropriately. |
511
|
|
|
|
|
|
|
Returns : a boolean |
512
|
|
|
|
|
|
|
Args : array of sequence objects |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
Note that annotations have no sequence locations. If you concatenate |
515
|
|
|
|
|
|
|
sequences with the same annotations they will all be added. |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=cut |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
sub cat { |
520
|
4
|
|
|
4
|
1
|
42
|
my ( $self, $seq, @seqs ) = @_; |
521
|
4
|
50
|
|
|
|
17
|
$self->throw( 'Object [$seq] ' |
522
|
|
|
|
|
|
|
. 'of class [' |
523
|
|
|
|
|
|
|
. ref($seq) |
524
|
|
|
|
|
|
|
. '] should be a Bio::PrimarySeqI ' ) |
525
|
|
|
|
|
|
|
unless $seq->isa('Bio::PrimarySeqI'); |
526
|
|
|
|
|
|
|
|
527
|
4
|
|
|
|
|
5
|
for my $catseq (@seqs) { |
528
|
5
|
50
|
|
|
|
12
|
$self->throw( 'Object [$catseq] ' |
529
|
|
|
|
|
|
|
. 'of class [' |
530
|
|
|
|
|
|
|
. ref($catseq) |
531
|
|
|
|
|
|
|
. '] should be a Bio::PrimarySeqI ' ) |
532
|
|
|
|
|
|
|
unless $catseq->isa('Bio::PrimarySeqI'); |
533
|
|
|
|
|
|
|
|
534
|
5
|
100
|
|
|
|
11
|
$self->throw( |
535
|
|
|
|
|
|
|
'Trying to concatenate sequences with different alphabets: ' |
536
|
|
|
|
|
|
|
. $seq->display_id . '(' |
537
|
|
|
|
|
|
|
. $seq->alphabet |
538
|
|
|
|
|
|
|
. ') and ' |
539
|
|
|
|
|
|
|
. $catseq->display_id . '(' |
540
|
|
|
|
|
|
|
. $catseq->alphabet |
541
|
|
|
|
|
|
|
. ')' ) |
542
|
|
|
|
|
|
|
unless $catseq->alphabet eq $seq->alphabet; |
543
|
|
|
|
|
|
|
|
544
|
4
|
|
|
|
|
10
|
my $length = $seq->length; |
545
|
4
|
|
|
|
|
6
|
$seq->seq( $seq->seq . $catseq->seq ); |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
# move annotations |
548
|
4
|
100
|
66
|
|
|
25
|
if ( $seq->isa("Bio::AnnotatableI") |
549
|
|
|
|
|
|
|
and $catseq->isa("Bio::AnnotatableI") ) |
550
|
|
|
|
|
|
|
{ |
551
|
3
|
|
|
|
|
5
|
foreach my $key ( $catseq->annotation->get_all_annotation_keys() ) { |
552
|
|
|
|
|
|
|
|
553
|
3
|
|
|
|
|
4
|
foreach my $value ( $catseq->annotation->get_Annotations($key) ) |
554
|
|
|
|
|
|
|
{ |
555
|
5
|
|
|
|
|
7
|
$seq->annotation->add_Annotation( $key, $value ); |
556
|
|
|
|
|
|
|
} |
557
|
|
|
|
|
|
|
} |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
# move SeqFeatures |
561
|
4
|
100
|
66
|
|
|
21
|
if ( $seq->isa('Bio::SeqI') and $catseq->isa('Bio::SeqI') ) { |
562
|
3
|
|
|
|
|
6
|
for my $feat ( $catseq->get_SeqFeatures ) { |
563
|
2
|
|
|
|
|
6
|
$seq->add_SeqFeature( $self->_coord_adjust( $feat, $length ) ); |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
} |
568
|
3
|
|
|
|
|
8
|
1; |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
=head2 trunc_with_features |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
Title : trunc_with_features |
574
|
|
|
|
|
|
|
Usage : $trunc=Bio::SeqUtils->trunc_with_features($seq, $start, $end); |
575
|
|
|
|
|
|
|
Function: Like Bio::Seq::trunc, but keeps features (adjusting coordinates |
576
|
|
|
|
|
|
|
where necessary. Features that partially overlap the region have |
577
|
|
|
|
|
|
|
their location changed to a Bio::Location::Fuzzy. |
578
|
|
|
|
|
|
|
Returns : A new sequence object |
579
|
|
|
|
|
|
|
Args : A sequence object, start coordinate, end coordinate (inclusive) |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
=cut |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
sub trunc_with_features { |
585
|
204
|
|
|
204
|
|
283835
|
use Bio::Range; |
|
204
|
|
|
|
|
306
|
|
|
204
|
|
|
|
|
652135
|
|
586
|
1
|
|
|
1
|
1
|
2
|
my ( $self, $seq, $start, $end ) = @_; |
587
|
1
|
50
|
|
|
|
5
|
$self->throw( 'Object [$seq] ' |
588
|
|
|
|
|
|
|
. 'of class [' |
589
|
|
|
|
|
|
|
. ref($seq) |
590
|
|
|
|
|
|
|
. '] should be a Bio::SeqI ' ) |
591
|
|
|
|
|
|
|
unless $seq->isa('Bio::SeqI'); |
592
|
1
|
|
|
|
|
9
|
my $trunc = $seq->trunc( $start, $end ); |
593
|
1
|
|
|
|
|
9
|
my $truncrange = |
594
|
|
|
|
|
|
|
Bio::Range->new( -start => $start, -end => $end, -strand => 0 ); |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
# make sure that there is no annotation or features in $trunc |
597
|
|
|
|
|
|
|
# (->trunc() now clone objects except for Bio::Seq::LargePrimarySeq) |
598
|
1
|
|
|
|
|
4
|
$trunc->annotation->remove_Annotations; |
599
|
1
|
|
|
|
|
4
|
$trunc->remove_SeqFeatures; |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
# move annotations |
602
|
1
|
|
|
|
|
3
|
foreach my $key ( $seq->annotation->get_all_annotation_keys() ) { |
603
|
1
|
|
|
|
|
3
|
foreach my $value ( $seq->annotation->get_Annotations($key) ) { |
604
|
1
|
|
|
|
|
3
|
$trunc->annotation->add_Annotation( $key, $value ); |
605
|
|
|
|
|
|
|
} |
606
|
|
|
|
|
|
|
} |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
# move features |
609
|
1
|
|
|
|
|
3
|
foreach ( |
610
|
|
|
|
|
|
|
grep { |
611
|
3
|
50
|
|
|
|
10
|
$_ = $self->_coord_adjust( $_, 1 - $start, $end + 1 - $start ) |
612
|
|
|
|
|
|
|
if $_->overlaps($truncrange) |
613
|
|
|
|
|
|
|
} $seq->get_SeqFeatures |
614
|
|
|
|
|
|
|
) |
615
|
|
|
|
|
|
|
{ |
616
|
3
|
|
|
|
|
6
|
$trunc->add_SeqFeature($_); |
617
|
|
|
|
|
|
|
} |
618
|
1
|
|
|
|
|
5
|
return $trunc; |
619
|
|
|
|
|
|
|
} |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
=head2 delete |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
Title : delete |
624
|
|
|
|
|
|
|
Function: cuts a segment out of a sequence and re-joins the left and right fragments |
625
|
|
|
|
|
|
|
(like splicing or digesting and re-ligating a molecule). |
626
|
|
|
|
|
|
|
Positions (and types) of sequence features are adjusted accordingly: |
627
|
|
|
|
|
|
|
Features that span the cut site are converted to split featuress to |
628
|
|
|
|
|
|
|
indicate the disruption. |
629
|
|
|
|
|
|
|
Features that extend into the cut-out fragment are truncated. |
630
|
|
|
|
|
|
|
A new molecule is created and returned. |
631
|
|
|
|
|
|
|
Usage : my $cutseq = Bio::SeqUtils::PbrTools->cut( $seq, 1000, 1100 ); |
632
|
|
|
|
|
|
|
Args : a Bio::PrimarySeqI compliant object to cut, |
633
|
|
|
|
|
|
|
first nt of the segment to be deleted |
634
|
|
|
|
|
|
|
last nt of the segment to be deleted |
635
|
|
|
|
|
|
|
optional: |
636
|
|
|
|
|
|
|
hash-ref of options: |
637
|
|
|
|
|
|
|
clone_obj: if true, clone the input sequence object rather |
638
|
|
|
|
|
|
|
than calling "new" on the object's class |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
Returns : a new Bio::Seq object |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
=cut |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
sub delete { |
645
|
6
|
|
|
6
|
1
|
267
|
my $self = shift; |
646
|
6
|
|
|
|
|
9
|
my ( $seq, $left, $right, $opts_ref ) = @_; |
647
|
6
|
50
|
66
|
|
|
22
|
$self->throw( 'was expecting 3-4 paramters but got ' . @_ ) |
648
|
|
|
|
|
|
|
unless @_ == 3 || @_ == 4; |
649
|
|
|
|
|
|
|
|
650
|
6
|
50
|
33
|
|
|
49
|
$self->throw( |
651
|
|
|
|
|
|
|
'Object of class [' . ref($seq) . '] should be a Bio::PrimarySeqI ' ) |
652
|
|
|
|
|
|
|
unless blessed($seq) && $seq->isa('Bio::PrimarySeqI'); |
653
|
|
|
|
|
|
|
|
654
|
6
|
50
|
|
|
|
12
|
$self->throw("Left coordinate ($left) must be >= 1") if $left < 1; |
655
|
6
|
50
|
|
|
|
15
|
if ( $right > $seq->length ) { |
656
|
0
|
|
|
|
|
0
|
$self->throw( "Right coordinate ($right) must be less than " |
657
|
|
|
|
|
|
|
. 'sequence length (' |
658
|
|
|
|
|
|
|
. $seq->length |
659
|
|
|
|
|
|
|
. ')' ); |
660
|
|
|
|
|
|
|
} |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
# piece together the sequence string of the remaining fragments |
663
|
6
|
|
|
|
|
19
|
my $left_seq = $seq->subseq( 1, $left - 1 ); |
664
|
6
|
|
|
|
|
14
|
my $right_seq = $seq->subseq( $right + 1, $seq->length ); |
665
|
6
|
50
|
33
|
|
|
20
|
if ( !$left_seq || !$right_seq ) { |
666
|
0
|
|
|
|
|
0
|
$self->throw( |
667
|
|
|
|
|
|
|
'could not assemble sequences. At least one of the fragments is empty' |
668
|
|
|
|
|
|
|
); |
669
|
|
|
|
|
|
|
} |
670
|
6
|
|
|
|
|
9
|
my $seq_str = $left_seq . $right_seq; |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
# create the new seq object with the same class as the recipient |
673
|
|
|
|
|
|
|
# or (if requested), make a clone of the existing object. In the |
674
|
|
|
|
|
|
|
# latter case we need to remove sequence features from the cloned |
675
|
|
|
|
|
|
|
# object instead of copying them |
676
|
6
|
|
|
|
|
14
|
my $product; |
677
|
6
|
100
|
|
|
|
11
|
if ( $opts_ref->{clone_obj} ) { |
678
|
2
|
|
|
|
|
7
|
$product = $self->_new_seq_via_clone( $seq, $seq_str ); |
679
|
|
|
|
|
|
|
} |
680
|
|
|
|
|
|
|
else { |
681
|
4
|
|
|
|
|
19
|
$product = $self->_new_seq_from_old( $seq, { seq => $seq_str } ); |
682
|
|
|
|
|
|
|
} |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
# move sequence features |
685
|
4
|
50
|
33
|
|
|
25
|
if ( $product->isa('Bio::SeqI') && $seq->isa('Bio::SeqI') ) { |
686
|
4
|
|
|
|
|
10
|
for my $feat ( $seq->get_SeqFeatures ) { |
687
|
26
|
|
|
|
|
45
|
my $adjfeat = $self->_coord_adjust_deletion( $feat, $left, $right ); |
688
|
26
|
100
|
|
|
|
79
|
$product->add_SeqFeature($adjfeat) if $adjfeat; |
689
|
|
|
|
|
|
|
} |
690
|
|
|
|
|
|
|
} |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
# add a feature to annotatde the deletion |
693
|
4
|
|
|
|
|
38
|
my $deletion_feature = Bio::SeqFeature::Generic->new( |
694
|
|
|
|
|
|
|
-primary_tag => 'misc_feature', |
695
|
|
|
|
|
|
|
-tag => { note => 'deletion of ' . ( $right - $left + 1 ) . 'bp' }, |
696
|
|
|
|
|
|
|
-location => Bio::Location::Simple->new( |
697
|
|
|
|
|
|
|
-start => $left - 1, |
698
|
|
|
|
|
|
|
-end => $left, |
699
|
|
|
|
|
|
|
-location_type => 'IN-BETWEEN' |
700
|
|
|
|
|
|
|
) |
701
|
|
|
|
|
|
|
); |
702
|
4
|
|
|
|
|
16
|
$product->add_SeqFeature($deletion_feature); |
703
|
4
|
|
|
|
|
19
|
return $product; |
704
|
|
|
|
|
|
|
} |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
=head2 insert |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
Title : insert |
709
|
|
|
|
|
|
|
Function: inserts a fragment (a Bio::Seq object) into a nother sequence object |
710
|
|
|
|
|
|
|
adding all annotations and features to the final product. |
711
|
|
|
|
|
|
|
Features that span the insertion site are converted to split |
712
|
|
|
|
|
|
|
features to indicate the disruption. |
713
|
|
|
|
|
|
|
A new feature is added to indicate the inserted fragment itself. |
714
|
|
|
|
|
|
|
A new molecule is created and returned. |
715
|
|
|
|
|
|
|
Usage : # insert a fragment after pos 1000 |
716
|
|
|
|
|
|
|
my $insert_seq = Bio::SeqUtils::PbrTools->insert( |
717
|
|
|
|
|
|
|
$recipient_seq, |
718
|
|
|
|
|
|
|
$fragment_seq, |
719
|
|
|
|
|
|
|
1000 |
720
|
|
|
|
|
|
|
); |
721
|
|
|
|
|
|
|
Args : recipient sequence (a Bio::PrimarySeqI compliant object), |
722
|
|
|
|
|
|
|
a fragmetn to insert (Bio::PrimarySeqI compliant object), |
723
|
|
|
|
|
|
|
insertion position (fragment is inserted to the right of this pos) |
724
|
|
|
|
|
|
|
pos=0 will prepend the fragment to the recipient |
725
|
|
|
|
|
|
|
optional: |
726
|
|
|
|
|
|
|
hash-ref of options: |
727
|
|
|
|
|
|
|
clone_obj: if true, clone the input sequence object rather |
728
|
|
|
|
|
|
|
than calling "new" on the object's class |
729
|
|
|
|
|
|
|
Returns : a new Bio::Seq object |
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
=cut |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
sub insert { |
734
|
3
|
|
|
3
|
1
|
37
|
my $self = shift; |
735
|
3
|
|
|
|
|
5
|
my ( $recipient, $fragment, $insert_pos, $opts_ref ) = @_; |
736
|
3
|
50
|
66
|
|
|
48
|
$self->throw( 'was expecting 3-4 paramters but got ' . @_ ) |
737
|
|
|
|
|
|
|
unless @_ == 3 || @_ == 4; |
738
|
|
|
|
|
|
|
|
739
|
3
|
50
|
33
|
|
|
28
|
$self->throw( 'Recipient object of class [' |
740
|
|
|
|
|
|
|
. ref($recipient) |
741
|
|
|
|
|
|
|
. '] should be a Bio::PrimarySeqI ' ) |
742
|
|
|
|
|
|
|
unless blessed($recipient) && $recipient->isa('Bio::PrimarySeqI'); |
743
|
|
|
|
|
|
|
|
744
|
3
|
50
|
33
|
|
|
19
|
$self->throw( 'Fragment object of class [' |
745
|
|
|
|
|
|
|
. ref($fragment) |
746
|
|
|
|
|
|
|
. '] should be a Bio::PrimarySeqI ' ) |
747
|
|
|
|
|
|
|
unless blessed($fragment) && $fragment->isa('Bio::PrimarySeqI'); |
748
|
|
|
|
|
|
|
|
749
|
3
|
50
|
|
|
|
15
|
$self->throw( 'Can\'t concatenate sequences with different alphabets: ' |
750
|
|
|
|
|
|
|
. 'recipient is ' |
751
|
|
|
|
|
|
|
. $recipient->alphabet |
752
|
|
|
|
|
|
|
. ' and fragment is ' |
753
|
|
|
|
|
|
|
. $fragment->alphabet ) |
754
|
|
|
|
|
|
|
unless $recipient->alphabet eq $fragment->alphabet; |
755
|
|
|
|
|
|
|
|
756
|
3
|
50
|
33
|
|
|
16
|
if ( $insert_pos < 0 or $insert_pos > $recipient->length ) { |
757
|
0
|
|
|
|
|
0
|
$self->throw( "insertion position ($insert_pos) must be between 0 and " |
758
|
|
|
|
|
|
|
. 'recipient sequence length (' |
759
|
|
|
|
|
|
|
. $recipient->length |
760
|
|
|
|
|
|
|
. ')' ); |
761
|
|
|
|
|
|
|
} |
762
|
|
|
|
|
|
|
|
763
|
3
|
50
|
33
|
|
|
30
|
if ( $fragment->can('is_circular') && $fragment->is_circular ) { |
764
|
0
|
|
|
|
|
0
|
$self->throw('Can\'t insert circular fragments'); |
765
|
|
|
|
|
|
|
} |
766
|
|
|
|
|
|
|
|
767
|
3
|
50
|
|
|
|
8
|
if ( !$recipient->seq ) { |
768
|
0
|
|
|
|
|
0
|
$self->throw( |
769
|
|
|
|
|
|
|
'Recipient has no sequence, can not insert into this object'); |
770
|
|
|
|
|
|
|
} |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
# construct raw sequence of the new molecule |
773
|
3
|
50
|
|
|
|
11
|
my $left_seq = |
774
|
|
|
|
|
|
|
$insert_pos > 0 |
775
|
|
|
|
|
|
|
? $recipient->subseq( 1, $insert_pos ) |
776
|
|
|
|
|
|
|
: ''; |
777
|
3
|
|
|
|
|
7
|
my $mid_seq = $fragment->seq; |
778
|
3
|
50
|
|
|
|
7
|
my $right_seq = |
779
|
|
|
|
|
|
|
$insert_pos < $recipient->length |
780
|
|
|
|
|
|
|
? $recipient->subseq( $insert_pos + 1, $recipient->length ) |
781
|
|
|
|
|
|
|
: ''; |
782
|
3
|
|
|
|
|
7
|
my $seq_str = $left_seq . $mid_seq . $right_seq; |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
# create the new seq object with the same class as the recipient |
785
|
|
|
|
|
|
|
# or (if requested), make a clone of the existing object. In the |
786
|
|
|
|
|
|
|
# latter case we need to remove sequence features from the cloned |
787
|
|
|
|
|
|
|
# object instead of copying them |
788
|
3
|
|
|
|
|
3
|
my $product; |
789
|
3
|
100
|
|
|
|
6
|
if ( $opts_ref->{clone_obj} ) { |
790
|
1
|
|
|
|
|
5
|
$product = $self->_new_seq_via_clone( $recipient, $seq_str ); |
791
|
|
|
|
|
|
|
} |
792
|
|
|
|
|
|
|
else { |
793
|
2
|
|
|
|
|
2
|
my @desc; |
794
|
2
|
50
|
|
|
|
6
|
push @desc, 'Inserted fragment: ' . $fragment->desc |
795
|
|
|
|
|
|
|
if defined $fragment->desc; |
796
|
2
|
50
|
|
|
|
5
|
push @desc, 'Recipient: ' . $recipient->desc |
797
|
|
|
|
|
|
|
if defined $recipient->desc; |
798
|
2
|
|
50
|
|
|
7
|
$product = $self->_new_seq_from_old( |
|
|
|
33
|
|
|
|
|
|
|
|
50
|
|
|
|
|
799
|
|
|
|
|
|
|
$recipient, |
800
|
|
|
|
|
|
|
{ |
801
|
|
|
|
|
|
|
seq => $seq_str, |
802
|
|
|
|
|
|
|
display_id => $recipient->display_id, |
803
|
|
|
|
|
|
|
accession_number => $recipient->accession_number || '', |
804
|
|
|
|
|
|
|
alphabet => $recipient->alphabet, |
805
|
|
|
|
|
|
|
desc => join( '; ', @desc ), |
806
|
|
|
|
|
|
|
verbose => $recipient->verbose || $fragment->verbose, |
807
|
|
|
|
|
|
|
is_circular => $recipient->is_circular || 0, |
808
|
|
|
|
|
|
|
} |
809
|
|
|
|
|
|
|
); |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
} # if clone_obj |
812
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
# move annotations from fragment to product |
814
|
3
|
50
|
33
|
|
|
24
|
if ( $product->isa("Bio::AnnotatableI") |
815
|
|
|
|
|
|
|
&& $fragment->isa("Bio::AnnotatableI") ) |
816
|
|
|
|
|
|
|
{ |
817
|
3
|
|
|
|
|
12
|
foreach my $key ( $fragment->annotation->get_all_annotation_keys ) { |
818
|
3
|
|
|
|
|
7
|
foreach my $value ( $fragment->annotation->get_Annotations($key) ) { |
819
|
3
|
|
|
|
|
10
|
$product->annotation->add_Annotation( $key, $value ); |
820
|
|
|
|
|
|
|
} |
821
|
|
|
|
|
|
|
} |
822
|
|
|
|
|
|
|
} |
823
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
# move sequence features to product with adjusted coordinates |
825
|
3
|
50
|
|
|
|
13
|
if ( $product->isa('Bio::SeqI') ) { |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
# for the fragment, just shift the features to new position |
828
|
3
|
50
|
|
|
|
11
|
if ( $fragment->isa('Bio::SeqI') ) { |
829
|
3
|
|
|
|
|
5
|
for my $feat ( $fragment->get_SeqFeatures ) { |
830
|
3
|
|
|
|
|
9
|
my $adjfeat = $self->_coord_adjust( $feat, $insert_pos ); |
831
|
3
|
50
|
|
|
|
14
|
$product->add_SeqFeature($adjfeat) if $adjfeat; |
832
|
|
|
|
|
|
|
} |
833
|
|
|
|
|
|
|
} |
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
# for recipient, shift and modify features according to insertion. |
836
|
3
|
50
|
|
|
|
15
|
if ( $recipient->isa('Bio::SeqI') ) { |
837
|
3
|
|
|
|
|
8
|
for my $feat ( $recipient->get_SeqFeatures ) { |
838
|
17
|
|
|
|
|
25
|
my $adjfeat = |
839
|
|
|
|
|
|
|
$self->_coord_adjust_insertion( $feat, $insert_pos, |
840
|
|
|
|
|
|
|
$fragment->length ); |
841
|
17
|
100
|
|
|
|
46
|
$product->add_SeqFeature($adjfeat) if $adjfeat; |
842
|
|
|
|
|
|
|
} |
843
|
|
|
|
|
|
|
} |
844
|
|
|
|
|
|
|
} |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
# add a feature to annotate the insertion |
847
|
3
|
|
|
|
|
14
|
my $insertion_feature = Bio::SeqFeature::Generic->new( |
848
|
|
|
|
|
|
|
-start => $insert_pos + 1, |
849
|
|
|
|
|
|
|
-end => $insert_pos + $fragment->length, |
850
|
|
|
|
|
|
|
-primary_tag => 'misc_feature', |
851
|
|
|
|
|
|
|
-tag => { note => 'inserted fragment' }, |
852
|
|
|
|
|
|
|
); |
853
|
3
|
|
|
|
|
12
|
$product->add_SeqFeature($insertion_feature); |
854
|
|
|
|
|
|
|
|
855
|
3
|
|
|
|
|
11
|
return $product; |
856
|
|
|
|
|
|
|
} |
857
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
=head2 ligate |
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
title : ligate |
861
|
|
|
|
|
|
|
function: pastes a fragment (which can also have features) into a recipient |
862
|
|
|
|
|
|
|
sequence between two "cut" sites, preserving features and adjusting |
863
|
|
|
|
|
|
|
their locations. |
864
|
|
|
|
|
|
|
This is a shortcut for deleting a segment from a sequence object followed |
865
|
|
|
|
|
|
|
by an insertion of a fragmnet and is supposed to be used to simulate |
866
|
|
|
|
|
|
|
in-vitro cloning where a recipient (a vector) is digested and a fragment |
867
|
|
|
|
|
|
|
is then ligated into the recipient molecule. The fragment can be flipped |
868
|
|
|
|
|
|
|
(reverse-complemented with all its features). |
869
|
|
|
|
|
|
|
A new sequence object is returned to represent the product of the reaction. |
870
|
|
|
|
|
|
|
Features and annotations are transferred from the insert to the product |
871
|
|
|
|
|
|
|
and features on the recipient are adjusted according to the methods |
872
|
|
|
|
|
|
|
L"delete"> amd L"insert">: |
873
|
|
|
|
|
|
|
Features spanning the insertion site will be split up into two sub-locations. |
874
|
|
|
|
|
|
|
(Sub-)features in the deleted region are themselves deleted. |
875
|
|
|
|
|
|
|
(Sub-)features that extend into the deleted region are truncated. |
876
|
|
|
|
|
|
|
The class of the product object depends on the class of the recipient (vector) |
877
|
|
|
|
|
|
|
sequence object. if it is not possible to instantiate a new |
878
|
|
|
|
|
|
|
object of that class, a Bio::Primaryseq object is created instead. |
879
|
|
|
|
|
|
|
usage : # insert the flipped fragment between positions 1000 and 1100 of the |
880
|
|
|
|
|
|
|
# vector, i.e. everything between these two positions is deleted and |
881
|
|
|
|
|
|
|
# replaced by the fragment |
882
|
|
|
|
|
|
|
my $new_molecule = Bio::Sequtils::Pbrtools->ligate( |
883
|
|
|
|
|
|
|
-recipient => $vector, |
884
|
|
|
|
|
|
|
-fragment => $fragment, |
885
|
|
|
|
|
|
|
-left => 1000, |
886
|
|
|
|
|
|
|
-right => 1100, |
887
|
|
|
|
|
|
|
-flip => 1, |
888
|
|
|
|
|
|
|
-clone_obj => 1 |
889
|
|
|
|
|
|
|
); |
890
|
|
|
|
|
|
|
args : recipient: the recipient/vector molecule |
891
|
|
|
|
|
|
|
fragment: molecule that is to be ligated into the vector |
892
|
|
|
|
|
|
|
left: left cut site (fragment will be inserted to the right of |
893
|
|
|
|
|
|
|
this position) |
894
|
|
|
|
|
|
|
optional: |
895
|
|
|
|
|
|
|
right: right cut site (fragment will be inseterted to the |
896
|
|
|
|
|
|
|
left of this position). defaults to left+1 |
897
|
|
|
|
|
|
|
flip: boolean, if true, the fragment is reverse-complemented |
898
|
|
|
|
|
|
|
(including features) before inserting |
899
|
|
|
|
|
|
|
clone_obj: if true, clone the recipient object to create the product |
900
|
|
|
|
|
|
|
instead of calling "new" on its class |
901
|
|
|
|
|
|
|
returns : a new Bio::Seq object of the ligated fragments |
902
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
=cut |
904
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
sub ligate { |
906
|
3
|
|
|
3
|
1
|
82
|
my $self = shift; |
907
|
3
|
|
|
|
|
14
|
my ( $recipient, $fragment, $left, $right, $flip, $clone_obj ) = |
908
|
|
|
|
|
|
|
$self->_rearrange( [qw(RECIPIENT FRAGMENT LEFT RIGHT FLIP CLONE_OBJ )], |
909
|
|
|
|
|
|
|
@_ ); |
910
|
3
|
50
|
|
|
|
12
|
$self->throw("missing required parameter 'recipient'") unless $recipient; |
911
|
3
|
50
|
|
|
|
4
|
$self->throw("missing required parameter 'fragment'") unless $fragment; |
912
|
3
|
50
|
|
|
|
7
|
$self->throw("missing required parameter 'left'") unless defined $left; |
913
|
|
|
|
|
|
|
|
914
|
3
|
|
33
|
|
|
4
|
$right ||= $left + 1; |
915
|
|
|
|
|
|
|
|
916
|
3
|
50
|
33
|
|
|
24
|
$self->throw( |
917
|
|
|
|
|
|
|
"Fragment must be a Bio::PrimarySeqI compliant object but it is a " |
918
|
|
|
|
|
|
|
. ref($fragment) ) |
919
|
|
|
|
|
|
|
unless blessed($fragment) && $fragment->isa('Bio::PrimarySeqI'); |
920
|
|
|
|
|
|
|
|
921
|
3
|
50
|
|
|
|
12
|
$fragment = $self->revcom_with_features($fragment) if $flip; |
922
|
|
|
|
|
|
|
|
923
|
3
|
|
|
|
|
6
|
my $opts_ref = {}; |
924
|
3
|
100
|
|
|
|
8
|
$opts_ref->{clone_obj} = 1 if $clone_obj; |
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
# clone in two steps: first delete between the insertion sites, |
927
|
|
|
|
|
|
|
# then insert the fragment. Step 1 is skipped if insert positions |
928
|
|
|
|
|
|
|
# are adjacent (no deletion) |
929
|
3
|
|
|
|
|
3
|
my ( $product1, $product2 ); |
930
|
3
|
|
|
|
|
4
|
eval { |
931
|
3
|
50
|
|
|
|
8
|
if ( $right == $left + 1 ) { |
932
|
0
|
|
|
|
|
0
|
$product1 = $recipient; |
933
|
|
|
|
|
|
|
} |
934
|
|
|
|
|
|
|
else { |
935
|
3
|
|
|
|
|
11
|
$product1 = |
936
|
|
|
|
|
|
|
$self->delete( $recipient, $left + 1, $right - 1, $opts_ref ); |
937
|
|
|
|
|
|
|
} |
938
|
|
|
|
|
|
|
}; |
939
|
3
|
100
|
|
|
|
13
|
$self->throw( "Failed in step 1 (cut recipient): " . $@ ) if $@; |
940
|
2
|
|
|
|
|
3
|
eval { $product2 = $self->insert( $product1, $fragment, $left, $opts_ref ) }; |
|
2
|
|
|
|
|
5
|
|
941
|
2
|
50
|
|
|
|
5
|
$self->throw( "Failed in step 2 (insert fragment): " . $@ ) if $@; |
942
|
|
|
|
|
|
|
|
943
|
2
|
|
|
|
|
11
|
return $product2; |
944
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
} |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
=head2 _coord_adjust_deletion |
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
title : _coord_adjust_deletion |
950
|
|
|
|
|
|
|
function: recursively adjusts coordinates of seqfeatures on a molecule |
951
|
|
|
|
|
|
|
where a segment has been deleted. |
952
|
|
|
|
|
|
|
(sub)features that span the deletion site become split features. |
953
|
|
|
|
|
|
|
(sub)features that extend into the deletion site are truncated. |
954
|
|
|
|
|
|
|
A note is added to the feature to inform about the size and |
955
|
|
|
|
|
|
|
position of the deletion. |
956
|
|
|
|
|
|
|
usage : my $adjusted_feature = Bio::Sequtils::_coord_adjust_deletion( |
957
|
|
|
|
|
|
|
$feature, |
958
|
|
|
|
|
|
|
$start, |
959
|
|
|
|
|
|
|
$end |
960
|
|
|
|
|
|
|
); |
961
|
|
|
|
|
|
|
args : a Bio::SeqFeatureI compliant object, |
962
|
|
|
|
|
|
|
start (inclusive) position of the deletion site, |
963
|
|
|
|
|
|
|
end (inclusive) position of the deletion site |
964
|
|
|
|
|
|
|
returns : a Bio::SeqFeatureI compliant object |
965
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
=cut |
967
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
sub _coord_adjust_deletion { |
969
|
38
|
|
|
38
|
|
40
|
my ( $self, $feat, $left, $right ) = @_; |
970
|
|
|
|
|
|
|
|
971
|
38
|
50
|
|
|
|
79
|
$self->throw( 'object [$feat] ' |
972
|
|
|
|
|
|
|
. 'of class [' |
973
|
|
|
|
|
|
|
. ref($feat) |
974
|
|
|
|
|
|
|
. '] should be a Bio::SeqFeatureI ' ) |
975
|
|
|
|
|
|
|
unless $feat->isa('Bio::SeqFeatureI'); |
976
|
38
|
50
|
33
|
|
|
102
|
$self->throw('missing coordinates: need a left and a right position') |
977
|
|
|
|
|
|
|
unless defined $left && defined $right; |
978
|
|
|
|
|
|
|
|
979
|
38
|
50
|
|
|
|
58
|
if ( $left > $right ) { |
980
|
0
|
0
|
0
|
|
|
0
|
if ( $feat->can('is_circular') && $feat->is_circular ) { |
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
# todo handle circular molecules |
983
|
0
|
|
|
|
|
0
|
$self->throw( |
984
|
|
|
|
|
|
|
'can not yet handle deletions in circular molecules if deletion spans origin' |
985
|
|
|
|
|
|
|
); |
986
|
|
|
|
|
|
|
} |
987
|
|
|
|
|
|
|
else { |
988
|
0
|
|
|
|
|
0
|
$self->throw( |
989
|
|
|
|
|
|
|
"left coordinate ($left) must be less than right ($right)" |
990
|
|
|
|
|
|
|
. " but it was greater" ); |
991
|
|
|
|
|
|
|
} |
992
|
|
|
|
|
|
|
} |
993
|
38
|
|
|
|
|
109
|
my $deletion = Bio::Location::Simple->new( |
994
|
|
|
|
|
|
|
-start => $left, |
995
|
|
|
|
|
|
|
-end => $right, |
996
|
|
|
|
|
|
|
); |
997
|
38
|
|
|
|
|
54
|
my $del_length = $right - $left + 1; |
998
|
|
|
|
|
|
|
|
999
|
38
|
|
|
|
|
25
|
my @adjsubfeat; |
1000
|
38
|
|
|
|
|
59
|
for my $subfeat ( $feat->get_SeqFeatures ) { |
1001
|
12
|
|
|
|
|
31
|
my $adjsubfeat = |
1002
|
|
|
|
|
|
|
$self->_coord_adjust_deletion( $subfeat, $left, $right ); |
1003
|
12
|
100
|
|
|
|
26
|
push @adjsubfeat, $adjsubfeat if $adjsubfeat; |
1004
|
|
|
|
|
|
|
} |
1005
|
|
|
|
|
|
|
|
1006
|
38
|
|
|
|
|
40
|
my @loc; |
1007
|
|
|
|
|
|
|
my $note; |
1008
|
38
|
|
|
|
|
60
|
for ( $feat->location->each_Location ) { |
1009
|
38
|
100
|
|
|
|
80
|
next if $deletion->contains($_); # this location will be deleted; |
1010
|
25
|
|
|
|
|
46
|
my $strand = $_->strand; |
1011
|
25
|
|
|
|
|
45
|
my $type = $_->location_type; |
1012
|
25
|
|
|
|
|
34
|
my $start = $_->start; |
1013
|
25
|
50
|
|
|
|
85
|
my $start_type = $_->can('start_pos_type') ? $_->start_pos_type : undef; |
1014
|
25
|
|
|
|
|
36
|
my $end = $_->end; |
1015
|
25
|
50
|
|
|
|
69
|
my $end_type = $_->can('end_pos_type') ? $_->end_pos_type : undef; |
1016
|
25
|
|
|
|
|
29
|
my @newcoords = (); |
1017
|
25
|
100
|
100
|
|
|
31
|
if ( $start < $deletion->start && $end > $deletion->end ) |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
{ # split the feature |
1019
|
4
|
|
|
|
|
10
|
@newcoords = ( |
1020
|
|
|
|
|
|
|
[ $start, ( $deletion->start - 1 ), $start_type, $end_type ], |
1021
|
|
|
|
|
|
|
[ |
1022
|
|
|
|
|
|
|
( $deletion->start ), $end - $del_length, |
1023
|
|
|
|
|
|
|
$start_type, $end_type |
1024
|
|
|
|
|
|
|
] |
1025
|
|
|
|
|
|
|
); |
1026
|
4
|
|
|
|
|
12
|
$note = |
1027
|
|
|
|
|
|
|
$del_length |
1028
|
|
|
|
|
|
|
. 'bp internal deletion between pos ' |
1029
|
|
|
|
|
|
|
. ( $deletion->start - 1 ) . ' and ' |
1030
|
|
|
|
|
|
|
. $deletion->start; |
1031
|
|
|
|
|
|
|
} |
1032
|
|
|
|
|
|
|
elsif ( $_->start < $deletion->start && $_->end >= $deletion->start ) |
1033
|
|
|
|
|
|
|
{ # truncate feature end |
1034
|
8
|
|
|
|
|
13
|
@newcoords = |
1035
|
|
|
|
|
|
|
( [ $start, ( $deletion->start - 1 ), $start_type, $end_type ] ); |
1036
|
8
|
|
|
|
|
16
|
$note = |
1037
|
|
|
|
|
|
|
( $end - $deletion->start + 1 ) . 'bp deleted from feature '; |
1038
|
8
|
50
|
|
|
|
18
|
if ( $feat->strand ) { |
1039
|
0
|
0
|
|
|
|
0
|
$note .= $feat->strand == 1 ? "3' " : "5' "; |
1040
|
|
|
|
|
|
|
} |
1041
|
8
|
|
|
|
|
12
|
$note .= 'end'; |
1042
|
|
|
|
|
|
|
} |
1043
|
|
|
|
|
|
|
elsif ( $_->start <= $deletion->end && $_->end > $deletion->end ) |
1044
|
|
|
|
|
|
|
{ # truncate feature start and shift end |
1045
|
5
|
|
|
|
|
11
|
@newcoords = ( |
1046
|
|
|
|
|
|
|
[ |
1047
|
|
|
|
|
|
|
( $deletion->start ), $end - $del_length, |
1048
|
|
|
|
|
|
|
$start_type, $end_type |
1049
|
|
|
|
|
|
|
] |
1050
|
|
|
|
|
|
|
); |
1051
|
5
|
|
|
|
|
12
|
$note = |
1052
|
|
|
|
|
|
|
( $deletion->end - $start + 1 ) . 'bp deleted from feature '; |
1053
|
5
|
100
|
|
|
|
14
|
if ( $feat->strand ) { |
1054
|
2
|
50
|
|
|
|
4
|
$note .= $feat->strand == 1 ? "5' end" : "3' end"; |
1055
|
|
|
|
|
|
|
} |
1056
|
|
|
|
|
|
|
else { |
1057
|
3
|
|
|
|
|
6
|
$note .= 'start'; |
1058
|
|
|
|
|
|
|
} |
1059
|
|
|
|
|
|
|
} |
1060
|
|
|
|
|
|
|
elsif ( $start >= $deletion->end ) { # just shift entire location |
1061
|
4
|
|
|
|
|
11
|
@newcoords = ( |
1062
|
|
|
|
|
|
|
[ |
1063
|
|
|
|
|
|
|
$start - $del_length, $end - $del_length, |
1064
|
|
|
|
|
|
|
$start_type, $end_type |
1065
|
|
|
|
|
|
|
] |
1066
|
|
|
|
|
|
|
); |
1067
|
|
|
|
|
|
|
} |
1068
|
|
|
|
|
|
|
else { # not affected by deletion |
1069
|
4
|
|
|
|
|
9
|
@newcoords = ( [ $start, $end, $start_type, $end_type ] ); |
1070
|
|
|
|
|
|
|
} |
1071
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
# if we have no coordinates, we return nothing |
1073
|
|
|
|
|
|
|
# the feature is deleted |
1074
|
25
|
50
|
|
|
|
50
|
return unless @newcoords; |
1075
|
|
|
|
|
|
|
|
1076
|
25
|
|
|
|
|
50
|
my @subloc = |
1077
|
|
|
|
|
|
|
$self->_location_objects_from_coordinate_list( \@newcoords, $strand, |
1078
|
|
|
|
|
|
|
$type ); |
1079
|
25
|
|
|
|
|
51
|
push @loc, $self->_single_loc_object_from_collection(@subloc); |
1080
|
|
|
|
|
|
|
} # each location |
1081
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
# create new feature based on original one and move annotation across |
1083
|
38
|
|
|
|
|
91
|
my $newfeat = |
1084
|
|
|
|
|
|
|
Bio::SeqFeature::Generic->new( -primary => $feat->primary_tag ); |
1085
|
38
|
|
|
|
|
74
|
foreach my $key ( $feat->annotation->get_all_annotation_keys() ) { |
1086
|
4
|
|
|
|
|
8
|
foreach my $value ( $feat->annotation->get_Annotations($key) ) { |
1087
|
4
|
|
|
|
|
9
|
$newfeat->annotation->add_Annotation( $key, $value ); |
1088
|
|
|
|
|
|
|
} |
1089
|
|
|
|
|
|
|
} |
1090
|
38
|
|
|
|
|
74
|
foreach my $key ( $feat->get_all_tags() ) { |
1091
|
0
|
|
|
|
|
0
|
$newfeat->add_tag_value( $key, $feat->get_tag_values($key) ); |
1092
|
|
|
|
|
|
|
} |
1093
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
# If we have a note about the deleted bases, add it |
1095
|
38
|
100
|
|
|
|
58
|
if ($note) { |
1096
|
17
|
|
|
|
|
29
|
$newfeat->add_tag_value( 'note', $note ); |
1097
|
|
|
|
|
|
|
} |
1098
|
|
|
|
|
|
|
|
1099
|
|
|
|
|
|
|
# set modified location(s) for the new feature and |
1100
|
|
|
|
|
|
|
# add its subfeatures if any |
1101
|
38
|
|
|
|
|
71
|
my $loc = $self->_single_loc_object_from_collection(@loc); |
1102
|
38
|
100
|
|
|
|
87
|
$loc ? $newfeat->location($loc) : return; |
1103
|
25
|
|
|
|
|
48
|
$newfeat->add_SeqFeature($_) for @adjsubfeat; |
1104
|
|
|
|
|
|
|
|
1105
|
25
|
|
|
|
|
63
|
return $newfeat; |
1106
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
} |
1108
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
=head2 _coord_adjust_insertion |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
title : _coord_adjust_insertion |
1112
|
|
|
|
|
|
|
function: recursively adjusts coordinates of seqfeatures on a molecule |
1113
|
|
|
|
|
|
|
where another sequence has been inserted. |
1114
|
|
|
|
|
|
|
(sub)features that span the insertion site become split features |
1115
|
|
|
|
|
|
|
and a note is added about the size and positin of the insertion. |
1116
|
|
|
|
|
|
|
Features with an IN-BETWEEN location at the insertion site |
1117
|
|
|
|
|
|
|
are lost (such features can only exist between adjacent bases) |
1118
|
|
|
|
|
|
|
usage : my $adjusted_feature = Bio::Sequtils::_coord_adjust_insertion( |
1119
|
|
|
|
|
|
|
$feature, |
1120
|
|
|
|
|
|
|
$insert_pos, |
1121
|
|
|
|
|
|
|
$insert_length |
1122
|
|
|
|
|
|
|
); |
1123
|
|
|
|
|
|
|
args : a Bio::SeqFeatureI compliant object, |
1124
|
|
|
|
|
|
|
insertion position (insert to the right of this position) |
1125
|
|
|
|
|
|
|
length of inserted fragment |
1126
|
|
|
|
|
|
|
returns : a Bio::SeqFeatureI compliant object |
1127
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
=cut |
1129
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
sub _coord_adjust_insertion { |
1131
|
22
|
|
|
22
|
|
29
|
my ( $self, $feat, $insert_pos, $insert_len ) = @_; |
1132
|
|
|
|
|
|
|
|
1133
|
22
|
50
|
|
|
|
48
|
$self->throw( 'object [$feat] ' |
1134
|
|
|
|
|
|
|
. 'of class [' |
1135
|
|
|
|
|
|
|
. ref($feat) |
1136
|
|
|
|
|
|
|
. '] should be a Bio::SeqFeatureI ' ) |
1137
|
|
|
|
|
|
|
unless $feat->isa('Bio::SeqFeatureI'); |
1138
|
22
|
50
|
|
|
|
30
|
$self->throw('missing insert position') unless defined $insert_pos; |
1139
|
22
|
50
|
|
|
|
30
|
$self->throw('missing insert length') unless defined $insert_len; |
1140
|
|
|
|
|
|
|
|
1141
|
22
|
|
|
|
|
15
|
my @adjsubfeat; |
1142
|
22
|
|
|
|
|
32
|
for my $subfeat ( $feat->get_SeqFeatures ) { |
1143
|
5
|
|
|
|
|
12
|
push @adjsubfeat, |
1144
|
|
|
|
|
|
|
$self->_coord_adjust_insertion( $subfeat, $insert_pos, $insert_len ); |
1145
|
|
|
|
|
|
|
} |
1146
|
|
|
|
|
|
|
|
1147
|
22
|
|
|
|
|
23
|
my @loc; |
1148
|
|
|
|
|
|
|
my $note; |
1149
|
22
|
|
|
|
|
32
|
for ( $feat->location->each_Location ) { |
1150
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
# loose IN-BETWEEN features at the insertion site |
1152
|
|
|
|
|
|
|
next |
1153
|
22
|
100
|
66
|
|
|
308
|
if ( $_->location_type eq 'IN-BETWEEN' && $_->start == $insert_pos ); |
1154
|
20
|
|
|
|
|
37
|
my $strand = $_->strand; |
1155
|
20
|
|
|
|
|
27
|
my $type = $_->location_type; |
1156
|
20
|
|
|
|
|
33
|
my $start = $_->start; |
1157
|
20
|
50
|
|
|
|
69
|
my $start_type = $_->can('start_pos_type') ? $_->start_pos_type : undef; |
1158
|
20
|
|
|
|
|
29
|
my $end = $_->end; |
1159
|
20
|
50
|
|
|
|
57
|
my $end_type = $_->can('end_pos_type') ? $_->end_pos_type : undef; |
1160
|
20
|
|
|
|
|
22
|
my @newcoords = (); |
1161
|
20
|
100
|
100
|
|
|
66
|
if ( $start <= $insert_pos && $end > $insert_pos ) { # split the feature |
|
|
100
|
|
|
|
|
|
1162
|
3
|
|
|
|
|
9
|
@newcoords = ( |
1163
|
|
|
|
|
|
|
[ $start, $insert_pos, $start_type, $end_type ], |
1164
|
|
|
|
|
|
|
[ |
1165
|
|
|
|
|
|
|
( $insert_pos + 1 + $insert_len ), $end + $insert_len, |
1166
|
|
|
|
|
|
|
$start_type, $end_type |
1167
|
|
|
|
|
|
|
] |
1168
|
|
|
|
|
|
|
); |
1169
|
3
|
|
|
|
|
10
|
$note = |
1170
|
|
|
|
|
|
|
$insert_len |
1171
|
|
|
|
|
|
|
. 'bp internal insertion between pos ' |
1172
|
|
|
|
|
|
|
. $insert_pos . ' and ' |
1173
|
|
|
|
|
|
|
. ( $insert_pos + $insert_len + 1 ); |
1174
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
} |
1176
|
|
|
|
|
|
|
elsif ( $start > $insert_pos ) { # just shift entire location |
1177
|
8
|
|
|
|
|
18
|
@newcoords = ( |
1178
|
|
|
|
|
|
|
[ |
1179
|
|
|
|
|
|
|
$start + $insert_len, $end + $insert_len, |
1180
|
|
|
|
|
|
|
$start_type, $end_type |
1181
|
|
|
|
|
|
|
] |
1182
|
|
|
|
|
|
|
); |
1183
|
|
|
|
|
|
|
} |
1184
|
|
|
|
|
|
|
else { # not affected |
1185
|
9
|
|
|
|
|
20
|
@newcoords = ( [ $start, $end, $start_type, $end_type ] ); |
1186
|
|
|
|
|
|
|
} |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
# if we have deleted all coordinates, return nothing |
1189
|
|
|
|
|
|
|
# (possible if all locations are IN-BETWEEN) |
1190
|
20
|
50
|
|
|
|
32
|
return unless @newcoords; |
1191
|
|
|
|
|
|
|
|
1192
|
20
|
|
|
|
|
33
|
my @subloc = |
1193
|
|
|
|
|
|
|
$self->_location_objects_from_coordinate_list( \@newcoords, $strand, |
1194
|
|
|
|
|
|
|
$type ); |
1195
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
# put together final location which could be a split now |
1197
|
20
|
|
|
|
|
42
|
push @loc, $self->_single_loc_object_from_collection(@subloc); |
1198
|
|
|
|
|
|
|
} # each location |
1199
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
# create new feature based on original one and move annotation across |
1201
|
22
|
|
|
|
|
56
|
my $newfeat = |
1202
|
|
|
|
|
|
|
Bio::SeqFeature::Generic->new( -primary => $feat->primary_tag ); |
1203
|
22
|
|
|
|
|
48
|
foreach my $key ( $feat->annotation->get_all_annotation_keys() ) { |
1204
|
3
|
|
|
|
|
6
|
foreach my $value ( $feat->annotation->get_Annotations($key) ) { |
1205
|
3
|
|
|
|
|
7
|
$newfeat->annotation->add_Annotation( $key, $value ); |
1206
|
|
|
|
|
|
|
} |
1207
|
|
|
|
|
|
|
} |
1208
|
22
|
|
|
|
|
38
|
foreach my $key ( $feat->get_all_tags() ) { |
1209
|
10
|
|
|
|
|
13
|
$newfeat->add_tag_value( $key, $feat->get_tag_values($key) ); |
1210
|
|
|
|
|
|
|
} |
1211
|
|
|
|
|
|
|
|
1212
|
|
|
|
|
|
|
# If we have a note about the inserted bases, add it |
1213
|
22
|
100
|
|
|
|
29
|
if ($note) { |
1214
|
3
|
|
|
|
|
4
|
$newfeat->add_tag_value( 'note', $note ); |
1215
|
|
|
|
|
|
|
} |
1216
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
# set modified location(s) for the new feature and |
1218
|
|
|
|
|
|
|
# add its subfeatures if any |
1219
|
22
|
|
|
|
|
39
|
my $loc = $self->_single_loc_object_from_collection(@loc); |
1220
|
22
|
100
|
|
|
|
52
|
$loc ? $newfeat->location($loc) : return; |
1221
|
20
|
|
|
|
|
33
|
$newfeat->add_SeqFeature($_) for @adjsubfeat; |
1222
|
|
|
|
|
|
|
|
1223
|
20
|
|
|
|
|
37
|
return $newfeat; |
1224
|
|
|
|
|
|
|
|
1225
|
|
|
|
|
|
|
} |
1226
|
|
|
|
|
|
|
|
1227
|
|
|
|
|
|
|
=head2 _single_loc_object_from_collection |
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
Title : _single_loc_object_from_collection |
1230
|
|
|
|
|
|
|
Function: takes an array of location objects. Returns either a split |
1231
|
|
|
|
|
|
|
location object if there are more than one locations in the |
1232
|
|
|
|
|
|
|
array or returns the single location if there is only one |
1233
|
|
|
|
|
|
|
Usage : my $loc = _single_loc_object_from_collection( @sublocs ); |
1234
|
|
|
|
|
|
|
Args : array of Bio::Location objects |
1235
|
|
|
|
|
|
|
Returns : a single Bio:;Location object containing all locations |
1236
|
|
|
|
|
|
|
|
1237
|
|
|
|
|
|
|
=cut |
1238
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
sub _single_loc_object_from_collection { |
1240
|
119
|
|
|
119
|
|
117
|
my ( $self, @locs ) = @_; |
1241
|
119
|
|
|
|
|
76
|
my $loc; |
1242
|
119
|
100
|
|
|
|
239
|
if ( @locs > 1 ) { |
|
|
100
|
|
|
|
|
|
1243
|
7
|
|
|
|
|
26
|
$loc = Bio::Location::Split->new; |
1244
|
7
|
|
|
|
|
11
|
$loc->add_sub_Location(@locs); |
1245
|
|
|
|
|
|
|
} |
1246
|
|
|
|
|
|
|
elsif ( @locs == 1 ) { |
1247
|
97
|
|
|
|
|
77
|
$loc = shift @locs; |
1248
|
|
|
|
|
|
|
} |
1249
|
119
|
|
|
|
|
221
|
return $loc; |
1250
|
|
|
|
|
|
|
} # _single_loc_object_from_collection |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
=head2 _location_objects_from_coordinate_list |
1253
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
Title : _location_objects_from_coordinate_list |
1255
|
|
|
|
|
|
|
Function: takes an array-ref of start/end coordinates, a strand and a |
1256
|
|
|
|
|
|
|
type and returns a list of Bio::Location objects (Fuzzy by |
1257
|
|
|
|
|
|
|
default, Simple in case of in-between coordinates). |
1258
|
|
|
|
|
|
|
If location type is not "IN-BETWEEN", individual types may be |
1259
|
|
|
|
|
|
|
passed in for start and end location as per Bio::Location::Fuzzy |
1260
|
|
|
|
|
|
|
documentation. |
1261
|
|
|
|
|
|
|
Usage : my @loc_objs = $self->_location_objects_from_coordinate_list( |
1262
|
|
|
|
|
|
|
\@coords, |
1263
|
|
|
|
|
|
|
$strand, |
1264
|
|
|
|
|
|
|
$type |
1265
|
|
|
|
|
|
|
); |
1266
|
|
|
|
|
|
|
Args : array-ref of array-refs each containing: |
1267
|
|
|
|
|
|
|
start, end [, start-type, end-type] |
1268
|
|
|
|
|
|
|
where types are optional. If given, must be |
1269
|
|
|
|
|
|
|
a one of ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN') |
1270
|
|
|
|
|
|
|
strand (all locations must be on same strand) |
1271
|
|
|
|
|
|
|
location-type (EXACT, IN-BETWEEN etc) |
1272
|
|
|
|
|
|
|
Returns : list of Bio::Location objects |
1273
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
=cut |
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
sub _location_objects_from_coordinate_list { |
1277
|
59
|
|
|
59
|
|
46
|
my $self = shift; |
1278
|
59
|
|
|
|
|
61
|
my ( $coords_ref, $strand, $type ) = @_; |
1279
|
59
|
50
|
|
|
|
85
|
$self->throw( 'expected 3 parameters but got ' . @_ ) unless @_ == 3; |
1280
|
59
|
50
|
|
|
|
89
|
$self->throw('first argument must be an ARRAY reference#') |
1281
|
|
|
|
|
|
|
unless ref($coords_ref) eq 'ARRAY'; |
1282
|
|
|
|
|
|
|
|
1283
|
59
|
|
|
|
|
40
|
my @loc; |
1284
|
59
|
|
|
|
|
70
|
foreach my $coords_set (@$coords_ref) { |
1285
|
66
|
|
|
|
|
95
|
my ( $start, $end, $start_type, $end_type ) = @$coords_set; |
1286
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
# taken from Bio::SeqUtils::_coord_adjust |
1288
|
66
|
50
|
|
|
|
91
|
if ( $type ne 'IN-BETWEEN' ) { |
1289
|
66
|
|
|
|
|
196
|
my $loc = Bio::Location::Fuzzy->new( |
1290
|
|
|
|
|
|
|
-start => $start, |
1291
|
|
|
|
|
|
|
-end => $end, |
1292
|
|
|
|
|
|
|
-strand => $strand, |
1293
|
|
|
|
|
|
|
-location_type => $type |
1294
|
|
|
|
|
|
|
); |
1295
|
66
|
100
|
|
|
|
159
|
$loc->start_pos_type($start_type) if $start_type; |
1296
|
66
|
100
|
|
|
|
122
|
$loc->end_pos_type($end_type) if $end_type; |
1297
|
66
|
|
|
|
|
118
|
push @loc, $loc; |
1298
|
|
|
|
|
|
|
} |
1299
|
|
|
|
|
|
|
else { |
1300
|
0
|
|
|
|
|
0
|
push @loc, |
1301
|
|
|
|
|
|
|
Bio::Location::Simple->new( |
1302
|
|
|
|
|
|
|
-start => $start, |
1303
|
|
|
|
|
|
|
-end => $end, |
1304
|
|
|
|
|
|
|
-strand => $strand, |
1305
|
|
|
|
|
|
|
-location_type => $type |
1306
|
|
|
|
|
|
|
); |
1307
|
|
|
|
|
|
|
} |
1308
|
|
|
|
|
|
|
} # each coords_set |
1309
|
59
|
|
|
|
|
110
|
return @loc; |
1310
|
|
|
|
|
|
|
} # _location_objects_from_coordinate_list |
1311
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
=head2 _new_seq_via_clone |
1313
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
Title : _new_seq_via_clone |
1315
|
|
|
|
|
|
|
Function: clone a sequence object using Bio::Root::Root::clone and set the new sequence string |
1316
|
|
|
|
|
|
|
sequence features are removed. |
1317
|
|
|
|
|
|
|
Usage : my $new_seq = $self->_new_seq_via_clone( $seq_obj, $seq_str ); |
1318
|
|
|
|
|
|
|
Args : original seq object [, new sequence string] |
1319
|
|
|
|
|
|
|
Returns : a clone of the original sequence object, optionally with new sequence string |
1320
|
|
|
|
|
|
|
|
1321
|
|
|
|
|
|
|
=cut |
1322
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
sub _new_seq_via_clone { |
1324
|
3
|
|
|
3
|
|
4
|
my ( $self, $in_seq_obj, $seq_str ) = @_; |
1325
|
3
|
|
|
|
|
12
|
my $out_seq_obj = $in_seq_obj->clone; |
1326
|
3
|
50
|
|
|
|
25
|
$out_seq_obj->remove_SeqFeatures if $out_seq_obj->can('remove_SeqFeatures'); |
1327
|
3
|
50
|
33
|
|
|
10
|
if ( blessed $out_seq_obj->seq |
1328
|
|
|
|
|
|
|
&& $out_seq_obj->seq->isa('Bio::PrimarySeq') ) |
1329
|
|
|
|
|
|
|
{ |
1330
|
0
|
|
|
|
|
0
|
$out_seq_obj->seq->seq($seq_str); |
1331
|
|
|
|
|
|
|
} |
1332
|
|
|
|
|
|
|
else { |
1333
|
3
|
|
|
|
|
6
|
$out_seq_obj->seq($seq_str); |
1334
|
|
|
|
|
|
|
} |
1335
|
3
|
|
|
|
|
8
|
return $out_seq_obj; |
1336
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
} # _new_seq_via_clone |
1338
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
=head2 _new_seq_from_old |
1340
|
|
|
|
|
|
|
|
1341
|
|
|
|
|
|
|
Title : _new_seq_from_old |
1342
|
|
|
|
|
|
|
Function: creates a new sequence obejct, if possible of the same class as the old and adds |
1343
|
|
|
|
|
|
|
attributes to it. Also copies annotation across to the new object. |
1344
|
|
|
|
|
|
|
Usage : my $new_seq = $self->_new_seq_from_old( $seq_obj, { seq => $seq_str, display_id => 'some_ID'}); |
1345
|
|
|
|
|
|
|
Args : old sequence object |
1346
|
|
|
|
|
|
|
hashref of attributes for the new sequence (sequence string etc.) |
1347
|
|
|
|
|
|
|
Returns : a new Bio::Seq object |
1348
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
=cut |
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
sub _new_seq_from_old { |
1352
|
6
|
|
|
6
|
|
19
|
my ( $self, $in_seq_obj, $attr ) = @_; |
1353
|
6
|
50
|
33
|
|
|
30
|
$self->throw('attributes must be a hashref') |
1354
|
|
|
|
|
|
|
if $attr && ref($attr) ne 'HASH'; |
1355
|
|
|
|
|
|
|
|
1356
|
6
|
|
|
|
|
6
|
my $seqclass; |
1357
|
6
|
100
|
|
|
|
14
|
if ( $in_seq_obj->can_call_new ) { |
1358
|
4
|
|
|
|
|
6
|
$seqclass = ref($in_seq_obj); |
1359
|
|
|
|
|
|
|
} |
1360
|
|
|
|
|
|
|
else { |
1361
|
2
|
|
|
|
|
9
|
$seqclass = 'Bio::Primaryseq'; |
1362
|
2
|
|
|
|
|
29
|
$self->_attempt_to_load_seq; |
1363
|
|
|
|
|
|
|
} |
1364
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
my $out_seq_obj = $seqclass->new( |
1366
|
|
|
|
|
|
|
-seq => $attr->{seq} || $in_seq_obj->seq, |
1367
|
|
|
|
|
|
|
-display_id => $attr->{display_id} || $in_seq_obj->display_id, |
1368
|
|
|
|
|
|
|
-accession_number => $attr->{accession_number} |
1369
|
|
|
|
|
|
|
|| $in_seq_obj->accession_number |
1370
|
|
|
|
|
|
|
|| '', |
1371
|
|
|
|
|
|
|
-alphabet => $in_seq_obj->alphabet, |
1372
|
|
|
|
|
|
|
-desc => $attr->{desc} || $in_seq_obj->desc, |
1373
|
|
|
|
|
|
|
-verbose => $attr->{verbose} || $in_seq_obj->verbose, |
1374
|
4
|
|
33
|
|
|
33
|
-is_circular => $attr->{is_circular} || $in_seq_obj->is_circular || 0, |
|
|
|
66
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
50
|
|
|
|
|
1375
|
|
|
|
|
|
|
); |
1376
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
# move the annotation across to the product |
1378
|
4
|
50
|
33
|
|
|
28
|
if ( $out_seq_obj->isa("Bio::AnnotatableI") |
1379
|
|
|
|
|
|
|
&& $in_seq_obj->isa("Bio::AnnotatableI") ) |
1380
|
|
|
|
|
|
|
{ |
1381
|
4
|
|
|
|
|
10
|
foreach my $key ( $in_seq_obj->annotation->get_all_annotation_keys ) { |
1382
|
4
|
|
|
|
|
14
|
foreach my $value ( $in_seq_obj->annotation->get_Annotations($key) ) |
1383
|
|
|
|
|
|
|
{ |
1384
|
4
|
|
|
|
|
6
|
$out_seq_obj->annotation->add_Annotation( $key, $value ); |
1385
|
|
|
|
|
|
|
} |
1386
|
|
|
|
|
|
|
} |
1387
|
|
|
|
|
|
|
} |
1388
|
4
|
|
|
|
|
11
|
return $out_seq_obj; |
1389
|
|
|
|
|
|
|
} # _new_seq_from_old |
1390
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
=head2 _coord_adjust |
1392
|
|
|
|
|
|
|
|
1393
|
|
|
|
|
|
|
Title : _coord_adjust |
1394
|
|
|
|
|
|
|
Usage : my $newfeat=Bio::SeqUtils->_coord_adjust($feature, 100, $seq->length); |
1395
|
|
|
|
|
|
|
Function: Recursive subroutine to adjust the coordinates of a feature |
1396
|
|
|
|
|
|
|
and all its subfeatures. If a sequence length is specified, then |
1397
|
|
|
|
|
|
|
any adjusted features that have locations beyond the boundaries |
1398
|
|
|
|
|
|
|
of the sequence are converted to Bio::Location::Fuzzy objects. |
1399
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
Returns : A Bio::SeqFeatureI compliant object. |
1401
|
|
|
|
|
|
|
Args : A Bio::SeqFeatureI compliant object, |
1402
|
|
|
|
|
|
|
the number of bases to add to the coordinates |
1403
|
|
|
|
|
|
|
(optional) the length of the parent sequence |
1404
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
|
1406
|
|
|
|
|
|
|
=cut |
1407
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
sub _coord_adjust { |
1409
|
8
|
|
|
8
|
|
11
|
my ( $self, $feat, $add, $length ) = @_; |
1410
|
8
|
50
|
|
|
|
21
|
$self->throw( 'Object [$feat] ' |
1411
|
|
|
|
|
|
|
. 'of class [' |
1412
|
|
|
|
|
|
|
. ref($feat) |
1413
|
|
|
|
|
|
|
. '] should be a Bio::SeqFeatureI ' ) |
1414
|
|
|
|
|
|
|
unless $feat->isa('Bio::SeqFeatureI'); |
1415
|
8
|
|
|
|
|
7
|
my @adjsubfeat; |
1416
|
8
|
|
|
|
|
15
|
for my $subfeat ( $feat->get_SeqFeatures ) { |
1417
|
0
|
|
|
|
|
0
|
push @adjsubfeat, $self->_coord_adjust( $subfeat, $add, $length ); |
1418
|
|
|
|
|
|
|
} |
1419
|
8
|
|
|
|
|
5
|
my @loc; |
1420
|
8
|
|
|
|
|
19
|
for ( $feat->location->each_Location ) { |
1421
|
8
|
|
|
|
|
15
|
my @coords = ( $_->start, $_->end ); |
1422
|
8
|
|
|
|
|
17
|
my $strand = $_->strand; |
1423
|
8
|
|
|
|
|
12
|
my $type = $_->location_type; |
1424
|
8
|
|
|
|
|
16
|
foreach (@coords) { |
1425
|
16
|
50
|
|
|
|
23
|
$self->throw("can not handle negative feature positions (got: $_)") |
1426
|
|
|
|
|
|
|
if $_ < 0; |
1427
|
16
|
100
|
100
|
|
|
46
|
if ( $add + $_ < 1 ) { |
|
|
100
|
|
|
|
|
|
1428
|
2
|
|
|
|
|
3
|
$_ = '<1'; |
1429
|
|
|
|
|
|
|
} |
1430
|
|
|
|
|
|
|
elsif ( defined $length and $add + $_ > $length ) { |
1431
|
1
|
|
|
|
|
3
|
$_ = ">$length"; |
1432
|
|
|
|
|
|
|
} |
1433
|
|
|
|
|
|
|
else { |
1434
|
13
|
|
|
|
|
15
|
$_ = $add + $_; |
1435
|
|
|
|
|
|
|
} |
1436
|
|
|
|
|
|
|
} |
1437
|
8
|
|
|
|
|
22
|
push @loc, |
1438
|
|
|
|
|
|
|
$self->_location_objects_from_coordinate_list( [ \@coords ], |
1439
|
|
|
|
|
|
|
$strand, $type ); |
1440
|
|
|
|
|
|
|
} |
1441
|
8
|
|
|
|
|
19
|
my $newfeat = |
1442
|
|
|
|
|
|
|
Bio::SeqFeature::Generic->new( -primary => $feat->primary_tag ); |
1443
|
8
|
|
|
|
|
20
|
foreach my $key ( $feat->annotation->get_all_annotation_keys() ) { |
1444
|
0
|
|
|
|
|
0
|
foreach my $value ( $feat->annotation->get_Annotations($key) ) { |
1445
|
0
|
|
|
|
|
0
|
$newfeat->annotation->add_Annotation( $key, $value ); |
1446
|
|
|
|
|
|
|
} |
1447
|
|
|
|
|
|
|
} |
1448
|
8
|
|
|
|
|
16
|
foreach my $key ( $feat->get_all_tags() ) { |
1449
|
6
|
|
|
|
|
11
|
$newfeat->add_tag_value( $key, $feat->get_tag_values($key) ); |
1450
|
|
|
|
|
|
|
} |
1451
|
8
|
|
|
|
|
20
|
my $loc = $self->_single_loc_object_from_collection(@loc); |
1452
|
8
|
50
|
|
|
|
21
|
$loc ? $newfeat->location($loc) : return; |
1453
|
8
|
|
|
|
|
12
|
$newfeat->add_SeqFeature($_) for @adjsubfeat; |
1454
|
8
|
|
|
|
|
24
|
return $newfeat; |
1455
|
|
|
|
|
|
|
} |
1456
|
|
|
|
|
|
|
|
1457
|
|
|
|
|
|
|
=head2 revcom_with_features |
1458
|
|
|
|
|
|
|
|
1459
|
|
|
|
|
|
|
Title : revcom_with_features |
1460
|
|
|
|
|
|
|
Usage : $revcom=Bio::SeqUtils->revcom_with_features($seq); |
1461
|
|
|
|
|
|
|
Function: Like Bio::Seq::revcom, but keeps features (adjusting coordinates |
1462
|
|
|
|
|
|
|
as appropriate. |
1463
|
|
|
|
|
|
|
Returns : A new sequence object |
1464
|
|
|
|
|
|
|
Args : A sequence object |
1465
|
|
|
|
|
|
|
|
1466
|
|
|
|
|
|
|
|
1467
|
|
|
|
|
|
|
=cut |
1468
|
|
|
|
|
|
|
|
1469
|
|
|
|
|
|
|
sub revcom_with_features { |
1470
|
5
|
|
|
5
|
1
|
8
|
my ( $self, $seq ) = @_; |
1471
|
5
|
50
|
|
|
|
14
|
$self->throw( 'Object [$seq] ' |
1472
|
|
|
|
|
|
|
. 'of class [' |
1473
|
|
|
|
|
|
|
. ref($seq) |
1474
|
|
|
|
|
|
|
. '] should be a Bio::SeqI ' ) |
1475
|
|
|
|
|
|
|
unless $seq->isa('Bio::SeqI'); |
1476
|
5
|
|
|
|
|
24
|
my $revcom = $seq->revcom; |
1477
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
# make sure that there is no annotation or features in $trunc |
1479
|
|
|
|
|
|
|
# (->revcom() now clone objects except for Bio::Seq::LargePrimarySeq) |
1480
|
5
|
|
|
|
|
11
|
$revcom->annotation->remove_Annotations; |
1481
|
5
|
|
|
|
|
15
|
$revcom->remove_SeqFeatures; |
1482
|
|
|
|
|
|
|
|
1483
|
|
|
|
|
|
|
#move annotations |
1484
|
5
|
|
|
|
|
11
|
foreach my $key ( $seq->annotation->get_all_annotation_keys() ) { |
1485
|
4
|
|
|
|
|
9
|
foreach my $value ( $seq->annotation->get_Annotations($key) ) { |
1486
|
4
|
|
|
|
|
7
|
$revcom->annotation->add_Annotation( $key, $value ); |
1487
|
|
|
|
|
|
|
} |
1488
|
|
|
|
|
|
|
} |
1489
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
#move features |
1491
|
5
|
|
|
|
|
14
|
for ( map { $self->_feature_revcom( $_, $seq->length ) } |
|
6
|
|
|
|
|
13
|
|
1492
|
|
|
|
|
|
|
reverse $seq->get_SeqFeatures ) |
1493
|
|
|
|
|
|
|
{ |
1494
|
6
|
|
|
|
|
14
|
$revcom->add_SeqFeature($_); |
1495
|
|
|
|
|
|
|
} |
1496
|
5
|
|
|
|
|
9
|
return $revcom; |
1497
|
|
|
|
|
|
|
} |
1498
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
=head2 _feature_revcom |
1500
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
Title : _feature_revcom |
1502
|
|
|
|
|
|
|
Usage : my $newfeat=Bio::SeqUtils->_feature_revcom($feature, $seq->length); |
1503
|
|
|
|
|
|
|
Function: Recursive subroutine to reverse complement a feature and |
1504
|
|
|
|
|
|
|
all its subfeatures. The length of the parent sequence must be |
1505
|
|
|
|
|
|
|
specified. |
1506
|
|
|
|
|
|
|
|
1507
|
|
|
|
|
|
|
Returns : A Bio::SeqFeatureI compliant object. |
1508
|
|
|
|
|
|
|
Args : A Bio::SeqFeatureI compliant object, |
1509
|
|
|
|
|
|
|
the length of the parent sequence |
1510
|
|
|
|
|
|
|
|
1511
|
|
|
|
|
|
|
|
1512
|
|
|
|
|
|
|
=cut |
1513
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
sub _feature_revcom { |
1515
|
6
|
|
|
6
|
|
7
|
my ( $self, $feat, $length ) = @_; |
1516
|
6
|
50
|
|
|
|
18
|
$self->throw( 'Object [$feat] ' |
1517
|
|
|
|
|
|
|
. 'of class [' |
1518
|
|
|
|
|
|
|
. ref($feat) |
1519
|
|
|
|
|
|
|
. '] should be a Bio::SeqFeatureI ' ) |
1520
|
|
|
|
|
|
|
unless $feat->isa('Bio::SeqFeatureI'); |
1521
|
6
|
|
|
|
|
5
|
my @adjsubfeat; |
1522
|
6
|
|
|
|
|
14
|
for my $subfeat ( $feat->get_SeqFeatures ) { |
1523
|
0
|
|
|
|
|
0
|
push @adjsubfeat, $self->_feature_revcom( $subfeat, $length ); |
1524
|
|
|
|
|
|
|
} |
1525
|
6
|
|
|
|
|
7
|
my @loc; |
1526
|
6
|
|
|
|
|
12
|
for ( $feat->location->each_Location ) { |
1527
|
6
|
|
|
|
|
12
|
my $type = $_->location_type; |
1528
|
6
|
|
|
|
|
7
|
my $strand; |
1529
|
6
|
100
|
|
|
|
13
|
if ( $_->strand == -1 ) { $strand = 1 } |
|
5
|
50
|
|
|
|
8
|
|
1530
|
1
|
|
|
|
|
2
|
elsif ( $_->strand == 1 ) { $strand = -1 } |
1531
|
0
|
|
|
|
|
0
|
else { $strand = $_->strand } |
1532
|
6
|
|
|
|
|
12
|
my $newend = |
1533
|
|
|
|
|
|
|
$self->_coord_revcom( $_->start, $_->start_pos_type, $length ); |
1534
|
6
|
|
|
|
|
16
|
my $newstart = |
1535
|
|
|
|
|
|
|
$self->_coord_revcom( $_->end, $_->end_pos_type, $length ); |
1536
|
6
|
|
|
|
|
9
|
my $newstart_type = $_->end_pos_type; |
1537
|
6
|
50
|
|
|
|
11
|
$newstart_type = 'BEFORE' if $_->end_pos_type eq 'AFTER'; |
1538
|
6
|
50
|
|
|
|
10
|
$newstart_type = 'AFTER' if $_->end_pos_type eq 'BEFORE'; |
1539
|
6
|
|
|
|
|
12
|
my $newend_type = $_->start_pos_type; |
1540
|
6
|
50
|
|
|
|
9
|
$newend_type = 'BEFORE' if $_->start_pos_type eq 'AFTER'; |
1541
|
6
|
100
|
|
|
|
8
|
$newend_type = 'AFTER' if $_->start_pos_type eq 'BEFORE'; |
1542
|
6
|
|
|
|
|
19
|
push @loc, |
1543
|
|
|
|
|
|
|
$self->_location_objects_from_coordinate_list( |
1544
|
|
|
|
|
|
|
[ [ $newstart, $newend, $newstart_type, $newend_type ] ], |
1545
|
|
|
|
|
|
|
$strand, $type ); |
1546
|
|
|
|
|
|
|
} |
1547
|
6
|
|
|
|
|
20
|
my $newfeat = |
1548
|
|
|
|
|
|
|
Bio::SeqFeature::Generic->new( -primary => $feat->primary_tag ); |
1549
|
6
|
|
|
|
|
12
|
foreach my $key ( $feat->annotation->get_all_annotation_keys() ) { |
1550
|
0
|
|
|
|
|
0
|
foreach my $value ( $feat->annotation->get_Annotations($key) ) { |
1551
|
0
|
|
|
|
|
0
|
$newfeat->annotation->add_Annotation( $key, $value ); |
1552
|
|
|
|
|
|
|
} |
1553
|
|
|
|
|
|
|
} |
1554
|
6
|
|
|
|
|
15
|
foreach my $key ( $feat->get_all_tags() ) { |
1555
|
3
|
|
|
|
|
7
|
$newfeat->add_tag_value( $key, $feat->get_tag_values($key) ); |
1556
|
|
|
|
|
|
|
} |
1557
|
|
|
|
|
|
|
|
1558
|
6
|
|
|
|
|
16
|
my $loc = $self->_single_loc_object_from_collection(@loc); |
1559
|
6
|
50
|
|
|
|
19
|
$loc ? $newfeat->location($loc) : return; |
1560
|
|
|
|
|
|
|
|
1561
|
6
|
|
|
|
|
9
|
$newfeat->add_SeqFeature($_) for @adjsubfeat; |
1562
|
6
|
|
|
|
|
16
|
return $newfeat; |
1563
|
|
|
|
|
|
|
} |
1564
|
|
|
|
|
|
|
|
1565
|
|
|
|
|
|
|
sub _coord_revcom { |
1566
|
12
|
|
|
12
|
|
12
|
my ( $self, $coord, $type, $length ) = @_; |
1567
|
12
|
50
|
33
|
|
|
42
|
if ( $type eq 'BETWEEN' or $type eq 'WITHIN' ) { |
1568
|
0
|
|
|
|
|
0
|
$coord =~ s/(\d+)(\D*)(\d+)/$length+1-$3.$2.$length+1-$1/ge; |
|
0
|
|
|
|
|
0
|
|
1569
|
|
|
|
|
|
|
} |
1570
|
|
|
|
|
|
|
else { |
1571
|
12
|
|
|
|
|
62
|
$coord =~ s/(\d+)/$length+1-$1/ge; |
|
12
|
|
|
|
|
40
|
|
1572
|
12
|
|
|
|
|
14
|
$coord =~ tr/<>/>; |
1573
|
12
|
100
|
66
|
|
|
31
|
$coord = '>' . $coord |
1574
|
|
|
|
|
|
|
if $type eq 'BEFORE' and substr( $coord, 0, 1 ) ne '>'; |
1575
|
12
|
50
|
33
|
|
|
23
|
$coord = '<' . $coord |
1576
|
|
|
|
|
|
|
if $type eq 'AFTER' and substr( $coord, 0, 1 ) ne '<'; |
1577
|
|
|
|
|
|
|
} |
1578
|
12
|
|
|
|
|
24
|
return $coord; |
1579
|
|
|
|
|
|
|
} |
1580
|
|
|
|
|
|
|
|
1581
|
|
|
|
|
|
|
=head2 evolve |
1582
|
|
|
|
|
|
|
|
1583
|
|
|
|
|
|
|
Title : evolve |
1584
|
|
|
|
|
|
|
Usage : my $newseq = Bio::SeqUtils-> |
1585
|
|
|
|
|
|
|
evolve($seq, $similarity, $transition_transversion_rate); |
1586
|
|
|
|
|
|
|
Function: Mutates the sequence by point mutations until the similarity of |
1587
|
|
|
|
|
|
|
the new sequence has decreased to the required level. |
1588
|
|
|
|
|
|
|
Transition/transversion rate is adjustable. |
1589
|
|
|
|
|
|
|
Returns : A new Bio::PrimarySeq object |
1590
|
|
|
|
|
|
|
Args : sequence object |
1591
|
|
|
|
|
|
|
percentage similarity (e.g. 80) |
1592
|
|
|
|
|
|
|
tr/tv rate, optional, defaults to 1 (= 1:1) |
1593
|
|
|
|
|
|
|
|
1594
|
|
|
|
|
|
|
Set the verbosity of the Bio::SeqUtils object to positive integer to |
1595
|
|
|
|
|
|
|
see the mutations as they happen. |
1596
|
|
|
|
|
|
|
|
1597
|
|
|
|
|
|
|
This method works only on nucleotide sequences. It prints a warning if |
1598
|
|
|
|
|
|
|
you set the target similarity to be less than 25%. |
1599
|
|
|
|
|
|
|
|
1600
|
|
|
|
|
|
|
Transition/transversion ratio is an observed attribute of an sequence |
1601
|
|
|
|
|
|
|
comparison. We are dealing here with the transition/transversion rate |
1602
|
|
|
|
|
|
|
that we set for our model of sequence evolution. |
1603
|
|
|
|
|
|
|
|
1604
|
|
|
|
|
|
|
=cut |
1605
|
|
|
|
|
|
|
|
1606
|
|
|
|
|
|
|
sub evolve { |
1607
|
1
|
|
|
1
|
1
|
3
|
my ( $self, $seq, $sim, $rate ) = @_; |
1608
|
1
|
|
50
|
|
|
2
|
$rate ||= 1; |
1609
|
|
|
|
|
|
|
|
1610
|
1
|
50
|
|
|
|
8
|
$self->throw( 'Object [$seq] ' |
1611
|
|
|
|
|
|
|
. 'of class [' |
1612
|
|
|
|
|
|
|
. ref($seq) |
1613
|
|
|
|
|
|
|
. '] should be a Bio::PrimarySeqI ' ) |
1614
|
|
|
|
|
|
|
unless $seq->isa('Bio::PrimarySeqI'); |
1615
|
|
|
|
|
|
|
|
1616
|
1
|
50
|
33
|
|
|
9
|
$self->throw( |
1617
|
|
|
|
|
|
|
"[$sim] " . ' should be a positive integer or float under 100' ) |
1618
|
|
|
|
|
|
|
unless $sim =~ /^[+\d.]+$/ and $sim <= 100; |
1619
|
|
|
|
|
|
|
|
1620
|
1
|
50
|
|
|
|
2
|
$self->warn( |
1621
|
|
|
|
|
|
|
"Nucleotide sequences are 25% similar by chance. |
1622
|
|
|
|
|
|
|
Do you really want to set similarity to [$sim]%?\n" |
1623
|
|
|
|
|
|
|
) unless $sim > 25; |
1624
|
|
|
|
|
|
|
|
1625
|
1
|
50
|
|
|
|
2
|
$self->throw('Only nucleotide sequences are supported') |
1626
|
|
|
|
|
|
|
if $seq->alphabet eq 'protein'; |
1627
|
|
|
|
|
|
|
|
1628
|
|
|
|
|
|
|
# arrays of possible changes have transitions as first items |
1629
|
1
|
|
|
|
|
1
|
my %changes; |
1630
|
1
|
|
|
|
|
4
|
$changes{'a'} = [ 't', 'c', 'g' ]; |
1631
|
1
|
|
|
|
|
3
|
$changes{'t'} = [ 'a', 'c', 'g' ]; |
1632
|
1
|
|
|
|
|
3
|
$changes{'c'} = [ 'g', 'a', 't' ]; |
1633
|
1
|
|
|
|
|
2
|
$changes{'g'} = [ 'c', 'a', 't' ]; |
1634
|
|
|
|
|
|
|
|
1635
|
|
|
|
|
|
|
# given the desired rate, find out where cut off points need to be |
1636
|
|
|
|
|
|
|
# when random numbers are generated from 0 to 100 |
1637
|
|
|
|
|
|
|
# we are ignoring identical mutations (e.g. A->A) to speed things up |
1638
|
1
|
|
|
|
|
2
|
my $bin_size = 100 / ( $rate + 2 ); |
1639
|
1
|
|
|
|
|
3
|
my $transition = 100 - ( 2 * $bin_size ); |
1640
|
1
|
|
|
|
|
2
|
my $first_transversion = $transition + $bin_size; |
1641
|
|
|
|
|
|
|
|
1642
|
|
|
|
|
|
|
# unify the look of sequence strings |
1643
|
1
|
|
|
|
|
2
|
my $string = lc $seq->seq; # lower case |
1644
|
1
|
|
|
|
|
2
|
$string =~ |
1645
|
|
|
|
|
|
|
s/u/t/; # simplyfy our life; modules should deal with the change anyway |
1646
|
|
|
|
|
|
|
# store the original sequence string |
1647
|
1
|
|
|
|
|
1
|
my $oristring = $string; |
1648
|
1
|
|
|
|
|
2
|
my $length = $seq->length; |
1649
|
|
|
|
|
|
|
|
1650
|
|
|
|
|
|
|
# stop evolving if the limit has been reached |
1651
|
1
|
|
|
|
|
3
|
until ( $self->_get_similarity( $oristring, $string ) <= $sim ) { |
1652
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
# find the location in the string to change |
1654
|
5
|
|
|
|
|
86
|
my $loc = int( rand $length ) + 1; |
1655
|
|
|
|
|
|
|
|
1656
|
|
|
|
|
|
|
# nucleotide to change |
1657
|
5
|
|
|
|
|
6
|
my $oldnuc = substr $string, $loc - 1, 1; |
1658
|
5
|
|
|
|
|
4
|
my $newnuc; |
1659
|
|
|
|
|
|
|
|
1660
|
|
|
|
|
|
|
# nucleotide it is changed to |
1661
|
5
|
|
|
|
|
5
|
my $choose = rand(100); |
1662
|
5
|
100
|
|
|
|
10
|
if ( $choose < $transition ) { |
|
|
50
|
|
|
|
|
|
1663
|
3
|
|
|
|
|
4
|
$newnuc = $changes{$oldnuc}[0]; |
1664
|
|
|
|
|
|
|
} |
1665
|
|
|
|
|
|
|
elsif ( $choose < $first_transversion ) { |
1666
|
0
|
|
|
|
|
0
|
$newnuc = $changes{$oldnuc}[1]; |
1667
|
|
|
|
|
|
|
} |
1668
|
|
|
|
|
|
|
else { |
1669
|
2
|
|
|
|
|
3
|
$newnuc = $changes{$oldnuc}[2]; |
1670
|
|
|
|
|
|
|
} |
1671
|
|
|
|
|
|
|
|
1672
|
|
|
|
|
|
|
# do the change |
1673
|
5
|
|
|
|
|
5
|
substr $string, $loc - 1, 1, $newnuc; |
1674
|
|
|
|
|
|
|
|
1675
|
5
|
|
|
|
|
15
|
$self->debug("$loc$oldnuc>$newnuc\n"); |
1676
|
|
|
|
|
|
|
} |
1677
|
|
|
|
|
|
|
|
1678
|
1
|
|
|
|
|
3
|
return new Bio::PrimarySeq( |
1679
|
|
|
|
|
|
|
-id => $seq->id . "-$sim", |
1680
|
|
|
|
|
|
|
-description => $seq->description, |
1681
|
|
|
|
|
|
|
-seq => $string |
1682
|
|
|
|
|
|
|
); |
1683
|
|
|
|
|
|
|
} |
1684
|
|
|
|
|
|
|
|
1685
|
|
|
|
|
|
|
sub _get_similarity { |
1686
|
6
|
|
|
6
|
|
6
|
my ( $self, $oriseq, $seq ) = @_; |
1687
|
|
|
|
|
|
|
|
1688
|
6
|
|
|
|
|
4
|
my $len = length($oriseq); |
1689
|
6
|
|
|
|
|
4
|
my $c; |
1690
|
|
|
|
|
|
|
|
1691
|
6
|
|
|
|
|
11
|
for ( my $i = 0 ; $i < $len ; $i++ ) { |
1692
|
60
|
100
|
|
|
|
106
|
$c++ if substr( $oriseq, $i, 1 ) eq substr( $seq, $i, 1 ); |
1693
|
|
|
|
|
|
|
} |
1694
|
6
|
|
|
|
|
20
|
return 100 * $c / $len; |
1695
|
|
|
|
|
|
|
} |
1696
|
|
|
|
|
|
|
|
1697
|
|
|
|
|
|
|
1; |