| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# BioPerl module for Bio::SeqUtils |
|
2
|
|
|
|
|
|
|
# |
|
3
|
|
|
|
|
|
|
# Please direct questions and support issues to |
|
4
|
|
|
|
|
|
|
# |
|
5
|
|
|
|
|
|
|
# Cared for by Heikki Lehvaslaiho |
|
6
|
|
|
|
|
|
|
# |
|
7
|
|
|
|
|
|
|
# Copyright Heikki Lehvaslaiho |
|
8
|
|
|
|
|
|
|
# |
|
9
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 NAME |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Bio::SeqUtils - Additional methods for PrimarySeq objects |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
use Bio::SeqUtils; |
|
20
|
|
|
|
|
|
|
# get a Bio::PrimarySeqI compliant object, $seq, somehow |
|
21
|
|
|
|
|
|
|
$util = Bio::SeqUtils->new(); |
|
22
|
|
|
|
|
|
|
$polypeptide_3char = $util->seq3($seq); |
|
23
|
|
|
|
|
|
|
# or |
|
24
|
|
|
|
|
|
|
$polypeptide_3char = Bio::SeqUtils->seq3($seq); |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# set the sequence string (stored in one char code in the object) |
|
27
|
|
|
|
|
|
|
Bio::SeqUtils->seq3($seq, $polypeptide_3char); |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# translate a sequence in all six frames |
|
30
|
|
|
|
|
|
|
@seqs = Bio::SeqUtils->translate_6frames($seq); |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# inplace editing of the sequence |
|
33
|
|
|
|
|
|
|
Bio::SeqUtils->mutate($seq, |
|
34
|
|
|
|
|
|
|
Bio::LiveSeq::Mutation->new(-seq => 'c', |
|
35
|
|
|
|
|
|
|
-pos => 3 |
|
36
|
|
|
|
|
|
|
)); |
|
37
|
|
|
|
|
|
|
# mutate a sequence to desired similarity% |
|
38
|
|
|
|
|
|
|
$newseq = Bio::SeqUtils-> evolve |
|
39
|
|
|
|
|
|
|
($seq, $similarity, $transition_transversion_rate); |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# concatenate two or more sequences with annotations and features, |
|
42
|
|
|
|
|
|
|
# the first sequence will be modified |
|
43
|
|
|
|
|
|
|
Bio::SeqUtils->cat(@seqs); |
|
44
|
|
|
|
|
|
|
my $catseq=$seqs[0]; |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# truncate a sequence, retaining features and adjusting their |
|
47
|
|
|
|
|
|
|
# coordinates if necessary |
|
48
|
|
|
|
|
|
|
my $truncseq = Bio::SeqUtils->trunc_with_features($seq, 100, 200); |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# reverse complement a sequence and its features |
|
51
|
|
|
|
|
|
|
my $revcomseq = Bio::SeqUtils->revcom_with_features($seq); |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# simulate cloning of a fragment into a vector. Cut the vector at |
|
54
|
|
|
|
|
|
|
# positions 1000 and 1100 (deleting postions 1001 to 1099) and |
|
55
|
|
|
|
|
|
|
# "ligate" a fragment into the sites. The fragment is |
|
56
|
|
|
|
|
|
|
# reverse-complemented in this example (option "flip"). |
|
57
|
|
|
|
|
|
|
# All features of the vector and fragment are preserved and |
|
58
|
|
|
|
|
|
|
# features that are affected by the deletion/insertion are |
|
59
|
|
|
|
|
|
|
# modified accordingly. |
|
60
|
|
|
|
|
|
|
# $vector and $fragment must be Bio::SeqI compliant objects |
|
61
|
|
|
|
|
|
|
my $new_molecule = Bio::Sequtils->ligate( |
|
62
|
|
|
|
|
|
|
-vector => $vector, |
|
63
|
|
|
|
|
|
|
-fragment => $fragment, |
|
64
|
|
|
|
|
|
|
-left => 1000, |
|
65
|
|
|
|
|
|
|
-right => 1100, |
|
66
|
|
|
|
|
|
|
-flip => 1 |
|
67
|
|
|
|
|
|
|
); |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# delete a segment of a sequence (from pos 1000 to 1100, inclusive), |
|
70
|
|
|
|
|
|
|
# again preserving features and annotations |
|
71
|
|
|
|
|
|
|
my $new_molecule = Bio::SeqUtils->cut( $seq, 1000, 1100 ); |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# insert a fragment into a recipient between positions 1000 and |
|
74
|
|
|
|
|
|
|
# 1001. $recipient is a Bio::SeqI compliant object |
|
75
|
|
|
|
|
|
|
my $new_molecule = Bio::SeqUtils::PbrTools->insert( |
|
76
|
|
|
|
|
|
|
$recipient_seq, |
|
77
|
|
|
|
|
|
|
$fragment_seq, |
|
78
|
|
|
|
|
|
|
1000 |
|
79
|
|
|
|
|
|
|
); |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
This class is a holder of methods that work on Bio::PrimarySeqI- |
|
84
|
|
|
|
|
|
|
compliant sequence objects, e.g. Bio::PrimarySeq and |
|
85
|
|
|
|
|
|
|
Bio::Seq. These methods are not part of the Bio::PrimarySeqI |
|
86
|
|
|
|
|
|
|
interface and should in general not be essential to the primary function |
|
87
|
|
|
|
|
|
|
of sequence objects. If you are thinking of adding essential |
|
88
|
|
|
|
|
|
|
functions, it might be better to create your own sequence class. |
|
89
|
|
|
|
|
|
|
See L, L, and L for more. |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
The methods take as their first argument a sequence object. It is |
|
92
|
|
|
|
|
|
|
possible to use methods without first creating a SeqUtils object, |
|
93
|
|
|
|
|
|
|
i.e. use it as an anonymous hash. |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
The first two methods, seq3() and seq3in(), give out or read in protein |
|
96
|
|
|
|
|
|
|
sequences coded in three letter IUPAC amino acid codes. |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
The next two methods, translate_3frames() and translate_6frames(), wrap |
|
99
|
|
|
|
|
|
|
around the standard translate method to give back an array of three |
|
100
|
|
|
|
|
|
|
forward or all six frame translations. |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
The mutate() method mutates the sequence string with a mutation |
|
103
|
|
|
|
|
|
|
description object. |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
The cat() method concatenates two or more sequences. The first sequence |
|
106
|
|
|
|
|
|
|
is modified by addition of the remaining sequences. All annotations and |
|
107
|
|
|
|
|
|
|
sequence features will be transferred. |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
The revcom_with_features() and trunc_with_features() methods are similar |
|
110
|
|
|
|
|
|
|
to the revcom() and trunc() methods from Bio::Seq, but also adjust any |
|
111
|
|
|
|
|
|
|
features associated with the sequence as appropriate. |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
There are also methods that simulate molecular cloning with rich |
|
114
|
|
|
|
|
|
|
sequence objects. |
|
115
|
|
|
|
|
|
|
The delete() method cuts a segment out of a sequence and re-joins the |
|
116
|
|
|
|
|
|
|
left and right fragments (like splicing or digesting and re-ligating a |
|
117
|
|
|
|
|
|
|
molecule). Positions (and types) of sequence features are adjusted |
|
118
|
|
|
|
|
|
|
accordingly: |
|
119
|
|
|
|
|
|
|
Features that span the deleted segment are converted to split featuress |
|
120
|
|
|
|
|
|
|
to indicate the disruption. (Sub)Features that extend into the deleted |
|
121
|
|
|
|
|
|
|
segment are truncated. |
|
122
|
|
|
|
|
|
|
A new molecule is created and returned. |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
The insert() method inserts a fragment (which can be a rich Bio::Seq |
|
125
|
|
|
|
|
|
|
object) into another sequence object adding all annotations and |
|
126
|
|
|
|
|
|
|
features to the final product. |
|
127
|
|
|
|
|
|
|
Features that span the insertion site are converted to split features |
|
128
|
|
|
|
|
|
|
to indicate the disruption. |
|
129
|
|
|
|
|
|
|
A new feature is added to indicate the inserted fragment itself. |
|
130
|
|
|
|
|
|
|
A new molecule is created and returned. |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
The ligate() method simulates digesting a recipient (vector) and |
|
133
|
|
|
|
|
|
|
ligating a fragment into it, which can also be flipped if needed. It |
|
134
|
|
|
|
|
|
|
is simply a combination of a deletion and an insertion step and |
|
135
|
|
|
|
|
|
|
returns a new molecule. The rules for modifying feature locations |
|
136
|
|
|
|
|
|
|
outlined above are also used here, e.g. features that span the cut |
|
137
|
|
|
|
|
|
|
sites are converted to split features with truncated sub-locations. |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=head1 FEEDBACK |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=head2 Mailing Lists |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
|
145
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
|
146
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
|
149
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head2 Support |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
I |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
|
158
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
|
159
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
|
160
|
|
|
|
|
|
|
with code and data examples if at all possible. |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=head2 Reporting Bugs |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
|
165
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
|
166
|
|
|
|
|
|
|
web: |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=head1 AUTHOR - Heikki Lehvaslaiho |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
Email: heikki-at-bioperl-dot-org |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
Roy R. Chaudhuri - roy.chaudhuri at gmail.com |
|
177
|
|
|
|
|
|
|
Frank Schwach - frank.schwach@sanger.ac.uk |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=head1 APPENDIX |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
|
182
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=cut |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# Let the code begin... |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
package Bio::SeqUtils; |
|
189
|
204
|
|
|
204
|
|
755
|
use strict; |
|
|
204
|
|
|
|
|
217
|
|
|
|
204
|
|
|
|
|
5131
|
|
|
190
|
204
|
|
|
204
|
|
636
|
use warnings; |
|
|
204
|
|
|
|
|
269
|
|
|
|
204
|
|
|
|
|
5518
|
|
|
191
|
204
|
|
|
204
|
|
890
|
use Scalar::Util qw(blessed); |
|
|
204
|
|
|
|
|
234
|
|
|
|
204
|
|
|
|
|
9191
|
|
|
192
|
204
|
|
|
204
|
|
712
|
use parent qw(Bio::Root::Root); |
|
|
204
|
|
|
|
|
219
|
|
|
|
204
|
|
|
|
|
1300
|
|
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
# new inherited from RootI |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
our %ONECODE = ( |
|
197
|
|
|
|
|
|
|
'Ala' => 'A', |
|
198
|
|
|
|
|
|
|
'Asx' => 'B', |
|
199
|
|
|
|
|
|
|
'Cys' => 'C', |
|
200
|
|
|
|
|
|
|
'Asp' => 'D', |
|
201
|
|
|
|
|
|
|
'Glu' => 'E', |
|
202
|
|
|
|
|
|
|
'Phe' => 'F', |
|
203
|
|
|
|
|
|
|
'Gly' => 'G', |
|
204
|
|
|
|
|
|
|
'His' => 'H', |
|
205
|
|
|
|
|
|
|
'Ile' => 'I', |
|
206
|
|
|
|
|
|
|
'Lys' => 'K', |
|
207
|
|
|
|
|
|
|
'Leu' => 'L', |
|
208
|
|
|
|
|
|
|
'Met' => 'M', |
|
209
|
|
|
|
|
|
|
'Asn' => 'N', |
|
210
|
|
|
|
|
|
|
'Pro' => 'P', |
|
211
|
|
|
|
|
|
|
'Gln' => 'Q', |
|
212
|
|
|
|
|
|
|
'Arg' => 'R', |
|
213
|
|
|
|
|
|
|
'Ser' => 'S', |
|
214
|
|
|
|
|
|
|
'Thr' => 'T', |
|
215
|
|
|
|
|
|
|
'Val' => 'V', |
|
216
|
|
|
|
|
|
|
'Trp' => 'W', |
|
217
|
|
|
|
|
|
|
'Xaa' => 'X', |
|
218
|
|
|
|
|
|
|
'Tyr' => 'Y', |
|
219
|
|
|
|
|
|
|
'Glx' => 'Z', |
|
220
|
|
|
|
|
|
|
'Ter' => '*', |
|
221
|
|
|
|
|
|
|
'Sec' => 'U', |
|
222
|
|
|
|
|
|
|
'Pyl' => 'O', |
|
223
|
|
|
|
|
|
|
'Xle' => 'J' |
|
224
|
|
|
|
|
|
|
); |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
our %THREECODE = ( |
|
227
|
|
|
|
|
|
|
'A' => 'Ala', |
|
228
|
|
|
|
|
|
|
'B' => 'Asx', |
|
229
|
|
|
|
|
|
|
'C' => 'Cys', |
|
230
|
|
|
|
|
|
|
'D' => 'Asp', |
|
231
|
|
|
|
|
|
|
'E' => 'Glu', |
|
232
|
|
|
|
|
|
|
'F' => 'Phe', |
|
233
|
|
|
|
|
|
|
'G' => 'Gly', |
|
234
|
|
|
|
|
|
|
'H' => 'His', |
|
235
|
|
|
|
|
|
|
'I' => 'Ile', |
|
236
|
|
|
|
|
|
|
'K' => 'Lys', |
|
237
|
|
|
|
|
|
|
'L' => 'Leu', |
|
238
|
|
|
|
|
|
|
'M' => 'Met', |
|
239
|
|
|
|
|
|
|
'N' => 'Asn', |
|
240
|
|
|
|
|
|
|
'P' => 'Pro', |
|
241
|
|
|
|
|
|
|
'Q' => 'Gln', |
|
242
|
|
|
|
|
|
|
'R' => 'Arg', |
|
243
|
|
|
|
|
|
|
'S' => 'Ser', |
|
244
|
|
|
|
|
|
|
'T' => 'Thr', |
|
245
|
|
|
|
|
|
|
'V' => 'Val', |
|
246
|
|
|
|
|
|
|
'W' => 'Trp', |
|
247
|
|
|
|
|
|
|
'Y' => 'Tyr', |
|
248
|
|
|
|
|
|
|
'Z' => 'Glx', |
|
249
|
|
|
|
|
|
|
'X' => 'Xaa', |
|
250
|
|
|
|
|
|
|
'*' => 'Ter', |
|
251
|
|
|
|
|
|
|
'U' => 'Sec', |
|
252
|
|
|
|
|
|
|
'O' => 'Pyl', |
|
253
|
|
|
|
|
|
|
'J' => 'Xle' |
|
254
|
|
|
|
|
|
|
); |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=head2 seq3 |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
Title : seq3 |
|
259
|
|
|
|
|
|
|
Usage : $string = Bio::SeqUtils->seq3($seq) |
|
260
|
|
|
|
|
|
|
Function: Read only method that returns the amino acid sequence as a |
|
261
|
|
|
|
|
|
|
string of three letter codes. alphabet has to be |
|
262
|
|
|
|
|
|
|
'protein'. Output follows the IUPAC standard plus 'Ter' for |
|
263
|
|
|
|
|
|
|
terminator. Any unknown character, including the default |
|
264
|
|
|
|
|
|
|
unknown character 'X', is changed into 'Xaa'. A noncoded |
|
265
|
|
|
|
|
|
|
aminoacid selenocystein is recognized (Sec, U). |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
Returns : A scalar |
|
268
|
|
|
|
|
|
|
Args : character used for stop in the protein sequence optional, |
|
269
|
|
|
|
|
|
|
defaults to '*' string used to separate the output amino |
|
270
|
|
|
|
|
|
|
acid codes, optional, defaults to '' |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=cut |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
sub seq3 { |
|
275
|
4
|
|
|
4
|
1
|
6
|
my ( $self, $seq, $stop, $sep ) = @_; |
|
276
|
|
|
|
|
|
|
|
|
277
|
4
|
50
|
|
|
|
19
|
$seq->isa('Bio::PrimarySeqI') |
|
278
|
|
|
|
|
|
|
|| $self->throw('Not a Bio::PrimarySeqI object but [$self]'); |
|
279
|
4
|
50
|
|
|
|
9
|
$seq->alphabet eq 'protein' |
|
280
|
|
|
|
|
|
|
|| $self->throw('Not a protein sequence'); |
|
281
|
|
|
|
|
|
|
|
|
282
|
4
|
100
|
|
|
|
8
|
if ( defined $stop ) { |
|
283
|
1
|
50
|
|
|
|
3
|
length $stop != 1 |
|
284
|
|
|
|
|
|
|
and $self->throw('One character stop needed, not [$stop]'); |
|
285
|
1
|
|
|
|
|
2
|
$THREECODE{$stop} = "Ter"; |
|
286
|
|
|
|
|
|
|
} |
|
287
|
4
|
|
100
|
|
|
9
|
$sep ||= ''; |
|
288
|
|
|
|
|
|
|
|
|
289
|
4
|
|
|
|
|
3
|
my $aa3s; |
|
290
|
4
|
|
|
|
|
6
|
foreach my $aa ( split //, uc $seq->seq ) { |
|
291
|
87
|
50
|
|
|
|
133
|
$THREECODE{$aa} and $aa3s .= $THREECODE{$aa} . $sep, next; |
|
292
|
0
|
|
|
|
|
0
|
$aa3s .= 'Xaa' . $sep; |
|
293
|
|
|
|
|
|
|
} |
|
294
|
4
|
100
|
|
|
|
13
|
$sep and substr( $aa3s, -( length $sep ), length $sep ) = ''; |
|
295
|
4
|
|
|
|
|
13
|
return $aa3s; |
|
296
|
|
|
|
|
|
|
} |
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=head2 seq3in |
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
Title : seq3in |
|
301
|
|
|
|
|
|
|
Usage : $seq = Bio::SeqUtils->seq3in($seq, 'MetGlyTer') |
|
302
|
|
|
|
|
|
|
Function: Method for changing of the sequence of a |
|
303
|
|
|
|
|
|
|
Bio::PrimarySeqI sequence object. The three letter amino |
|
304
|
|
|
|
|
|
|
acid input string is converted into one letter code. Any |
|
305
|
|
|
|
|
|
|
unknown character triplet, including the default 'Xaa', is |
|
306
|
|
|
|
|
|
|
converted into 'X'. |
|
307
|
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
Returns : Bio::PrimarySeq object |
|
309
|
|
|
|
|
|
|
Args : sequence string |
|
310
|
|
|
|
|
|
|
optional character to be used for stop in the protein sequence, |
|
311
|
|
|
|
|
|
|
defaults to '*' |
|
312
|
|
|
|
|
|
|
optional character to be used for unknown in the protein sequence, |
|
313
|
|
|
|
|
|
|
defaults to 'X' |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=cut |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
sub seq3in { |
|
318
|
3
|
|
|
3
|
1
|
6
|
my ( $self, $seq, $string, $stop, $unknown ) = @_; |
|
319
|
|
|
|
|
|
|
|
|
320
|
3
|
50
|
|
|
|
16
|
$seq->isa('Bio::PrimarySeqI') |
|
321
|
|
|
|
|
|
|
|| $self->throw("Not a Bio::PrimarySeqI object but [$self]"); |
|
322
|
3
|
50
|
|
|
|
10
|
$seq->alphabet eq 'protein' |
|
323
|
|
|
|
|
|
|
|| $self->throw('Not a protein sequence'); |
|
324
|
|
|
|
|
|
|
|
|
325
|
3
|
50
|
|
|
|
13
|
if ( defined $stop ) { |
|
326
|
0
|
0
|
|
|
|
0
|
length $stop != 1 |
|
327
|
|
|
|
|
|
|
and $self->throw("One character stop needed, not [$stop]"); |
|
328
|
0
|
|
|
|
|
0
|
$ONECODE{'Ter'} = $stop; |
|
329
|
|
|
|
|
|
|
} |
|
330
|
3
|
50
|
|
|
|
9
|
if ( defined $unknown ) { |
|
331
|
0
|
0
|
|
|
|
0
|
length $unknown != 1 |
|
332
|
|
|
|
|
|
|
and $self->throw("One character stop needed, not [$unknown]"); |
|
333
|
0
|
|
|
|
|
0
|
$ONECODE{'Xaa'} = $unknown; |
|
334
|
|
|
|
|
|
|
} |
|
335
|
|
|
|
|
|
|
|
|
336
|
3
|
|
|
|
|
4
|
my ( $aas, $aa3 ); |
|
337
|
3
|
|
|
|
|
6
|
my $length = ( length $string ) - 2; |
|
338
|
3
|
|
|
|
|
10
|
for ( my $i = 0 ; $i < $length ; $i += 3 ) { |
|
339
|
89
|
|
|
|
|
81
|
$aa3 = substr( $string, $i, 3 ); |
|
340
|
89
|
|
|
|
|
67
|
$aa3 = ucfirst( lc($aa3) ); |
|
341
|
89
|
100
|
|
|
|
228
|
$ONECODE{$aa3} and $aas .= $ONECODE{$aa3}, next; |
|
342
|
1
|
|
|
|
|
3
|
$aas .= $ONECODE{'Xaa'}; |
|
343
|
|
|
|
|
|
|
} |
|
344
|
3
|
|
|
|
|
11
|
$seq->seq($aas); |
|
345
|
3
|
|
|
|
|
9
|
return $seq; |
|
346
|
|
|
|
|
|
|
} |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
=head2 translate_3frames |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
Title : translate_3frames |
|
351
|
|
|
|
|
|
|
Usage : @prots = Bio::SeqUtils->translate_3frames($seq) |
|
352
|
|
|
|
|
|
|
Function: Translate a nucleotide sequence in three forward frames. |
|
353
|
|
|
|
|
|
|
The IDs of the sequences are appended with '-0F', '-1F', '-2F'. |
|
354
|
|
|
|
|
|
|
Returns : An array of seq objects |
|
355
|
|
|
|
|
|
|
Args : sequence object |
|
356
|
|
|
|
|
|
|
same arguments as to Bio::PrimarySeqI::translate |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=cut |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
sub translate_3frames { |
|
361
|
3
|
|
|
3
|
1
|
4
|
my ( $self, $seq, @args ) = @_; |
|
362
|
|
|
|
|
|
|
|
|
363
|
3
|
50
|
|
|
|
18
|
$self->throw( 'Object [$seq] ' |
|
364
|
|
|
|
|
|
|
. 'of class [' |
|
365
|
|
|
|
|
|
|
. ref($seq) |
|
366
|
|
|
|
|
|
|
. '] can not be translated.' ) |
|
367
|
|
|
|
|
|
|
unless $seq->can('translate'); |
|
368
|
|
|
|
|
|
|
|
|
369
|
3
|
|
|
|
|
4
|
my ( $stop, $unknown, $frame, $tableid, $fullCDS, $throw ) = @args; |
|
370
|
3
|
|
|
|
|
1
|
my @seqs; |
|
371
|
3
|
|
|
|
|
4
|
my $f = 0; |
|
372
|
3
|
|
|
|
|
7
|
while ( $f != 3 ) { |
|
373
|
9
|
|
|
|
|
17
|
my $translation = |
|
374
|
|
|
|
|
|
|
$seq->translate( $stop, $unknown, $f, $tableid, $fullCDS, $throw ); |
|
375
|
9
|
|
|
|
|
15
|
$translation->id( $seq->id . "-" . $f . "F" ); |
|
376
|
9
|
|
|
|
|
10
|
push @seqs, $translation; |
|
377
|
9
|
|
|
|
|
15
|
$f++; |
|
378
|
|
|
|
|
|
|
} |
|
379
|
|
|
|
|
|
|
|
|
380
|
3
|
|
|
|
|
8
|
return @seqs; |
|
381
|
|
|
|
|
|
|
} |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=head2 translate_6frames |
|
384
|
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
Title : translate_6frames |
|
386
|
|
|
|
|
|
|
Usage : @prots = Bio::SeqUtils->translate_6frames($seq) |
|
387
|
|
|
|
|
|
|
Function: translate a nucleotide sequence in all six frames |
|
388
|
|
|
|
|
|
|
The IDs of the sequences are appended with '-0F', '-1F', '-2F', |
|
389
|
|
|
|
|
|
|
'-0R', '-1R', '-2R'. |
|
390
|
|
|
|
|
|
|
Returns : An array of seq objects |
|
391
|
|
|
|
|
|
|
Args : sequence object |
|
392
|
|
|
|
|
|
|
same arguments as to Bio::PrimarySeqI::translate |
|
393
|
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
=cut |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
sub translate_6frames { |
|
397
|
1
|
|
|
1
|
1
|
351
|
my ( $self, $seq, @args ) = @_; |
|
398
|
|
|
|
|
|
|
|
|
399
|
1
|
|
|
|
|
2
|
my @seqs = $self->translate_3frames( $seq, @args ); |
|
400
|
1
|
|
|
|
|
8
|
my @seqs2 = $self->translate_3frames( $seq->revcom, @args ); |
|
401
|
1
|
|
|
|
|
3
|
foreach my $seq2 (@seqs2) { |
|
402
|
3
|
|
|
|
|
5
|
my ($tmp) = $seq2->id; |
|
403
|
3
|
|
|
|
|
7
|
$tmp =~ s/F$/R/g; |
|
404
|
3
|
|
|
|
|
4
|
$seq2->id($tmp); |
|
405
|
|
|
|
|
|
|
} |
|
406
|
1
|
|
|
|
|
4
|
return @seqs, @seqs2; |
|
407
|
|
|
|
|
|
|
} |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=head2 valid_aa |
|
410
|
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
Title : valid_aa |
|
412
|
|
|
|
|
|
|
Usage : my @aa = $table->valid_aa |
|
413
|
|
|
|
|
|
|
Function: Retrieves a list of the valid amino acid codes. |
|
414
|
|
|
|
|
|
|
The list is ordered so that first 21 codes are for unique |
|
415
|
|
|
|
|
|
|
amino acids. The rest are ['B', 'Z', 'X', '*']. |
|
416
|
|
|
|
|
|
|
Returns : array of all the valid amino acid codes |
|
417
|
|
|
|
|
|
|
Args : [optional] $code => [0 -> return list of 1 letter aa codes, |
|
418
|
|
|
|
|
|
|
1 -> return list of 3 letter aa codes, |
|
419
|
|
|
|
|
|
|
2 -> return associative array of both ] |
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=cut |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
sub valid_aa { |
|
424
|
410
|
|
|
410
|
1
|
2194
|
my ( $self, $code ) = @_; |
|
425
|
|
|
|
|
|
|
|
|
426
|
410
|
100
|
|
|
|
1528
|
if ( !$code ) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
427
|
204
|
|
|
|
|
267
|
my @codes; |
|
428
|
204
|
|
|
|
|
2080
|
foreach my $c ( sort values %ONECODE ) { |
|
429
|
5508
|
100
|
|
|
|
9030
|
push @codes, $c unless ( $c =~ /[BZX\*]/ ); |
|
430
|
|
|
|
|
|
|
} |
|
431
|
204
|
|
|
|
|
402
|
push @codes, qw(B Z X *); # so they are in correct order ? |
|
432
|
204
|
|
|
|
|
1345
|
return @codes; |
|
433
|
|
|
|
|
|
|
} |
|
434
|
|
|
|
|
|
|
elsif ( $code == 1 ) { |
|
435
|
1
|
|
|
|
|
2
|
my @codes; |
|
436
|
1
|
|
|
|
|
10
|
foreach my $c ( sort keys %ONECODE ) { |
|
437
|
27
|
100
|
|
|
|
52
|
push @codes, $c unless ( $c =~ /(Asx|Glx|Xaa|Ter)/ ); |
|
438
|
|
|
|
|
|
|
} |
|
439
|
1
|
|
|
|
|
3
|
push @codes, ( 'Asx', 'Glx', 'Xaa', 'Ter' ); |
|
440
|
1
|
|
|
|
|
11
|
return @codes; |
|
441
|
|
|
|
|
|
|
} |
|
442
|
|
|
|
|
|
|
elsif ( $code == 2 ) { |
|
443
|
205
|
|
|
|
|
2186
|
my %codes = %ONECODE; |
|
444
|
205
|
|
|
|
|
813
|
foreach my $c ( keys %ONECODE ) { |
|
445
|
5535
|
|
|
|
|
3556
|
my $aa = $ONECODE{$c}; |
|
446
|
5535
|
|
|
|
|
5153
|
$codes{$aa} = $c; |
|
447
|
|
|
|
|
|
|
} |
|
448
|
205
|
|
|
|
|
4761
|
return %codes; |
|
449
|
|
|
|
|
|
|
} |
|
450
|
|
|
|
|
|
|
else { |
|
451
|
0
|
|
|
|
|
0
|
$self->warn( |
|
452
|
|
|
|
|
|
|
"unrecognized code in " . ref($self) . " method valid_aa()" ); |
|
453
|
0
|
|
|
|
|
0
|
return (); |
|
454
|
|
|
|
|
|
|
} |
|
455
|
|
|
|
|
|
|
} |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=head2 mutate |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
Title : mutate |
|
460
|
|
|
|
|
|
|
Usage : Bio::SeqUtils->mutate($seq,$mutation1, $mutation2); |
|
461
|
|
|
|
|
|
|
Function: Inplace editing of the sequence. |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
The second argument can be a Bio::LiveSeq::Mutation object |
|
464
|
|
|
|
|
|
|
or an array of them. The mutations are applied sequentially |
|
465
|
|
|
|
|
|
|
checking only that their position is within the current |
|
466
|
|
|
|
|
|
|
sequence. Insertions are inserted before the given |
|
467
|
|
|
|
|
|
|
position. |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
Returns : boolean |
|
470
|
|
|
|
|
|
|
Args : sequence object |
|
471
|
|
|
|
|
|
|
mutation, a Bio::LiveSeq::Mutation object, or an array of them |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
See L. |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
=cut |
|
476
|
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
sub mutate { |
|
478
|
3
|
|
|
3
|
1
|
7
|
my ( $self, $seq, @mutations ) = @_; |
|
479
|
|
|
|
|
|
|
|
|
480
|
3
|
50
|
|
|
|
8
|
$self->throw( 'Object [$seq] ' |
|
481
|
|
|
|
|
|
|
. 'of class [' |
|
482
|
|
|
|
|
|
|
. ref($seq) |
|
483
|
|
|
|
|
|
|
. '] should be a Bio::PrimarySeqI ' ) |
|
484
|
|
|
|
|
|
|
unless $seq->isa('Bio::PrimarySeqI'); |
|
485
|
3
|
50
|
|
|
|
9
|
$self->throw( 'Object [$mutations[0]] ' |
|
486
|
|
|
|
|
|
|
. 'of class [' |
|
487
|
|
|
|
|
|
|
. ref( $mutations[0] ) |
|
488
|
|
|
|
|
|
|
. '] should be a Bio::LiveSeq::Mutation' ) |
|
489
|
|
|
|
|
|
|
unless $mutations[0]->isa('Bio::LiveSeq::Mutation'); |
|
490
|
|
|
|
|
|
|
|
|
491
|
3
|
|
|
|
|
4
|
foreach my $mutation (@mutations) { |
|
492
|
4
|
50
|
|
|
|
8
|
$self->throw('Attempting to mutate sequence beyond its length') |
|
493
|
|
|
|
|
|
|
unless $mutation->pos - 1 <= $seq->length; |
|
494
|
|
|
|
|
|
|
|
|
495
|
4
|
|
|
|
|
8
|
my $string = $seq->seq; |
|
496
|
4
|
|
|
|
|
5
|
substr $string, $mutation->pos - 1, $mutation->len, $mutation->seq; |
|
497
|
4
|
|
|
|
|
5
|
$seq->seq($string); |
|
498
|
|
|
|
|
|
|
} |
|
499
|
3
|
|
|
|
|
6
|
1; |
|
500
|
|
|
|
|
|
|
} |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
=head2 cat |
|
503
|
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
Title : cat |
|
505
|
|
|
|
|
|
|
Usage : Bio::SeqUtils->cat(@seqs); |
|
506
|
|
|
|
|
|
|
my $catseq=$seqs[0]; |
|
507
|
|
|
|
|
|
|
Function: Concatenates a list of Bio::Seq objects, adding them all on to the |
|
508
|
|
|
|
|
|
|
end of the first sequence. Annotations and sequence features are |
|
509
|
|
|
|
|
|
|
copied over from any additional objects, and the coordinates of any |
|
510
|
|
|
|
|
|
|
copied features are adjusted appropriately. |
|
511
|
|
|
|
|
|
|
Returns : a boolean |
|
512
|
|
|
|
|
|
|
Args : array of sequence objects |
|
513
|
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
Note that annotations have no sequence locations. If you concatenate |
|
515
|
|
|
|
|
|
|
sequences with the same annotations they will all be added. |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=cut |
|
518
|
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
sub cat { |
|
520
|
4
|
|
|
4
|
1
|
42
|
my ( $self, $seq, @seqs ) = @_; |
|
521
|
4
|
50
|
|
|
|
17
|
$self->throw( 'Object [$seq] ' |
|
522
|
|
|
|
|
|
|
. 'of class [' |
|
523
|
|
|
|
|
|
|
. ref($seq) |
|
524
|
|
|
|
|
|
|
. '] should be a Bio::PrimarySeqI ' ) |
|
525
|
|
|
|
|
|
|
unless $seq->isa('Bio::PrimarySeqI'); |
|
526
|
|
|
|
|
|
|
|
|
527
|
4
|
|
|
|
|
5
|
for my $catseq (@seqs) { |
|
528
|
5
|
50
|
|
|
|
12
|
$self->throw( 'Object [$catseq] ' |
|
529
|
|
|
|
|
|
|
. 'of class [' |
|
530
|
|
|
|
|
|
|
. ref($catseq) |
|
531
|
|
|
|
|
|
|
. '] should be a Bio::PrimarySeqI ' ) |
|
532
|
|
|
|
|
|
|
unless $catseq->isa('Bio::PrimarySeqI'); |
|
533
|
|
|
|
|
|
|
|
|
534
|
5
|
100
|
|
|
|
11
|
$self->throw( |
|
535
|
|
|
|
|
|
|
'Trying to concatenate sequences with different alphabets: ' |
|
536
|
|
|
|
|
|
|
. $seq->display_id . '(' |
|
537
|
|
|
|
|
|
|
. $seq->alphabet |
|
538
|
|
|
|
|
|
|
. ') and ' |
|
539
|
|
|
|
|
|
|
. $catseq->display_id . '(' |
|
540
|
|
|
|
|
|
|
. $catseq->alphabet |
|
541
|
|
|
|
|
|
|
. ')' ) |
|
542
|
|
|
|
|
|
|
unless $catseq->alphabet eq $seq->alphabet; |
|
543
|
|
|
|
|
|
|
|
|
544
|
4
|
|
|
|
|
10
|
my $length = $seq->length; |
|
545
|
4
|
|
|
|
|
6
|
$seq->seq( $seq->seq . $catseq->seq ); |
|
546
|
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
# move annotations |
|
548
|
4
|
100
|
66
|
|
|
25
|
if ( $seq->isa("Bio::AnnotatableI") |
|
549
|
|
|
|
|
|
|
and $catseq->isa("Bio::AnnotatableI") ) |
|
550
|
|
|
|
|
|
|
{ |
|
551
|
3
|
|
|
|
|
5
|
foreach my $key ( $catseq->annotation->get_all_annotation_keys() ) { |
|
552
|
|
|
|
|
|
|
|
|
553
|
3
|
|
|
|
|
4
|
foreach my $value ( $catseq->annotation->get_Annotations($key) ) |
|
554
|
|
|
|
|
|
|
{ |
|
555
|
5
|
|
|
|
|
7
|
$seq->annotation->add_Annotation( $key, $value ); |
|
556
|
|
|
|
|
|
|
} |
|
557
|
|
|
|
|
|
|
} |
|
558
|
|
|
|
|
|
|
} |
|
559
|
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
# move SeqFeatures |
|
561
|
4
|
100
|
66
|
|
|
21
|
if ( $seq->isa('Bio::SeqI') and $catseq->isa('Bio::SeqI') ) { |
|
562
|
3
|
|
|
|
|
6
|
for my $feat ( $catseq->get_SeqFeatures ) { |
|
563
|
2
|
|
|
|
|
6
|
$seq->add_SeqFeature( $self->_coord_adjust( $feat, $length ) ); |
|
564
|
|
|
|
|
|
|
} |
|
565
|
|
|
|
|
|
|
} |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
} |
|
568
|
3
|
|
|
|
|
8
|
1; |
|
569
|
|
|
|
|
|
|
} |
|
570
|
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
=head2 trunc_with_features |
|
572
|
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
Title : trunc_with_features |
|
574
|
|
|
|
|
|
|
Usage : $trunc=Bio::SeqUtils->trunc_with_features($seq, $start, $end); |
|
575
|
|
|
|
|
|
|
Function: Like Bio::Seq::trunc, but keeps features (adjusting coordinates |
|
576
|
|
|
|
|
|
|
where necessary. Features that partially overlap the region have |
|
577
|
|
|
|
|
|
|
their location changed to a Bio::Location::Fuzzy. |
|
578
|
|
|
|
|
|
|
Returns : A new sequence object |
|
579
|
|
|
|
|
|
|
Args : A sequence object, start coordinate, end coordinate (inclusive) |
|
580
|
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
=cut |
|
583
|
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
sub trunc_with_features { |
|
585
|
204
|
|
|
204
|
|
283835
|
use Bio::Range; |
|
|
204
|
|
|
|
|
306
|
|
|
|
204
|
|
|
|
|
652135
|
|
|
586
|
1
|
|
|
1
|
1
|
2
|
my ( $self, $seq, $start, $end ) = @_; |
|
587
|
1
|
50
|
|
|
|
5
|
$self->throw( 'Object [$seq] ' |
|
588
|
|
|
|
|
|
|
. 'of class [' |
|
589
|
|
|
|
|
|
|
. ref($seq) |
|
590
|
|
|
|
|
|
|
. '] should be a Bio::SeqI ' ) |
|
591
|
|
|
|
|
|
|
unless $seq->isa('Bio::SeqI'); |
|
592
|
1
|
|
|
|
|
9
|
my $trunc = $seq->trunc( $start, $end ); |
|
593
|
1
|
|
|
|
|
9
|
my $truncrange = |
|
594
|
|
|
|
|
|
|
Bio::Range->new( -start => $start, -end => $end, -strand => 0 ); |
|
595
|
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
# make sure that there is no annotation or features in $trunc |
|
597
|
|
|
|
|
|
|
# (->trunc() now clone objects except for Bio::Seq::LargePrimarySeq) |
|
598
|
1
|
|
|
|
|
4
|
$trunc->annotation->remove_Annotations; |
|
599
|
1
|
|
|
|
|
4
|
$trunc->remove_SeqFeatures; |
|
600
|
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
# move annotations |
|
602
|
1
|
|
|
|
|
3
|
foreach my $key ( $seq->annotation->get_all_annotation_keys() ) { |
|
603
|
1
|
|
|
|
|
3
|
foreach my $value ( $seq->annotation->get_Annotations($key) ) { |
|
604
|
1
|
|
|
|
|
3
|
$trunc->annotation->add_Annotation( $key, $value ); |
|
605
|
|
|
|
|
|
|
} |
|
606
|
|
|
|
|
|
|
} |
|
607
|
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
# move features |
|
609
|
1
|
|
|
|
|
3
|
foreach ( |
|
610
|
|
|
|
|
|
|
grep { |
|
611
|
3
|
50
|
|
|
|
10
|
$_ = $self->_coord_adjust( $_, 1 - $start, $end + 1 - $start ) |
|
612
|
|
|
|
|
|
|
if $_->overlaps($truncrange) |
|
613
|
|
|
|
|
|
|
} $seq->get_SeqFeatures |
|
614
|
|
|
|
|
|
|
) |
|
615
|
|
|
|
|
|
|
{ |
|
616
|
3
|
|
|
|
|
6
|
$trunc->add_SeqFeature($_); |
|
617
|
|
|
|
|
|
|
} |
|
618
|
1
|
|
|
|
|
5
|
return $trunc; |
|
619
|
|
|
|
|
|
|
} |
|
620
|
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
=head2 delete |
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
Title : delete |
|
624
|
|
|
|
|
|
|
Function: cuts a segment out of a sequence and re-joins the left and right fragments |
|
625
|
|
|
|
|
|
|
(like splicing or digesting and re-ligating a molecule). |
|
626
|
|
|
|
|
|
|
Positions (and types) of sequence features are adjusted accordingly: |
|
627
|
|
|
|
|
|
|
Features that span the cut site are converted to split featuress to |
|
628
|
|
|
|
|
|
|
indicate the disruption. |
|
629
|
|
|
|
|
|
|
Features that extend into the cut-out fragment are truncated. |
|
630
|
|
|
|
|
|
|
A new molecule is created and returned. |
|
631
|
|
|
|
|
|
|
Usage : my $cutseq = Bio::SeqUtils::PbrTools->cut( $seq, 1000, 1100 ); |
|
632
|
|
|
|
|
|
|
Args : a Bio::PrimarySeqI compliant object to cut, |
|
633
|
|
|
|
|
|
|
first nt of the segment to be deleted |
|
634
|
|
|
|
|
|
|
last nt of the segment to be deleted |
|
635
|
|
|
|
|
|
|
optional: |
|
636
|
|
|
|
|
|
|
hash-ref of options: |
|
637
|
|
|
|
|
|
|
clone_obj: if true, clone the input sequence object rather |
|
638
|
|
|
|
|
|
|
than calling "new" on the object's class |
|
639
|
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
Returns : a new Bio::Seq object |
|
641
|
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
=cut |
|
643
|
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
sub delete { |
|
645
|
6
|
|
|
6
|
1
|
267
|
my $self = shift; |
|
646
|
6
|
|
|
|
|
9
|
my ( $seq, $left, $right, $opts_ref ) = @_; |
|
647
|
6
|
50
|
66
|
|
|
22
|
$self->throw( 'was expecting 3-4 paramters but got ' . @_ ) |
|
648
|
|
|
|
|
|
|
unless @_ == 3 || @_ == 4; |
|
649
|
|
|
|
|
|
|
|
|
650
|
6
|
50
|
33
|
|
|
49
|
$self->throw( |
|
651
|
|
|
|
|
|
|
'Object of class [' . ref($seq) . '] should be a Bio::PrimarySeqI ' ) |
|
652
|
|
|
|
|
|
|
unless blessed($seq) && $seq->isa('Bio::PrimarySeqI'); |
|
653
|
|
|
|
|
|
|
|
|
654
|
6
|
50
|
|
|
|
12
|
$self->throw("Left coordinate ($left) must be >= 1") if $left < 1; |
|
655
|
6
|
50
|
|
|
|
15
|
if ( $right > $seq->length ) { |
|
656
|
0
|
|
|
|
|
0
|
$self->throw( "Right coordinate ($right) must be less than " |
|
657
|
|
|
|
|
|
|
. 'sequence length (' |
|
658
|
|
|
|
|
|
|
. $seq->length |
|
659
|
|
|
|
|
|
|
. ')' ); |
|
660
|
|
|
|
|
|
|
} |
|
661
|
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
# piece together the sequence string of the remaining fragments |
|
663
|
6
|
|
|
|
|
19
|
my $left_seq = $seq->subseq( 1, $left - 1 ); |
|
664
|
6
|
|
|
|
|
14
|
my $right_seq = $seq->subseq( $right + 1, $seq->length ); |
|
665
|
6
|
50
|
33
|
|
|
20
|
if ( !$left_seq || !$right_seq ) { |
|
666
|
0
|
|
|
|
|
0
|
$self->throw( |
|
667
|
|
|
|
|
|
|
'could not assemble sequences. At least one of the fragments is empty' |
|
668
|
|
|
|
|
|
|
); |
|
669
|
|
|
|
|
|
|
} |
|
670
|
6
|
|
|
|
|
9
|
my $seq_str = $left_seq . $right_seq; |
|
671
|
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
# create the new seq object with the same class as the recipient |
|
673
|
|
|
|
|
|
|
# or (if requested), make a clone of the existing object. In the |
|
674
|
|
|
|
|
|
|
# latter case we need to remove sequence features from the cloned |
|
675
|
|
|
|
|
|
|
# object instead of copying them |
|
676
|
6
|
|
|
|
|
14
|
my $product; |
|
677
|
6
|
100
|
|
|
|
11
|
if ( $opts_ref->{clone_obj} ) { |
|
678
|
2
|
|
|
|
|
7
|
$product = $self->_new_seq_via_clone( $seq, $seq_str ); |
|
679
|
|
|
|
|
|
|
} |
|
680
|
|
|
|
|
|
|
else { |
|
681
|
4
|
|
|
|
|
19
|
$product = $self->_new_seq_from_old( $seq, { seq => $seq_str } ); |
|
682
|
|
|
|
|
|
|
} |
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
# move sequence features |
|
685
|
4
|
50
|
33
|
|
|
25
|
if ( $product->isa('Bio::SeqI') && $seq->isa('Bio::SeqI') ) { |
|
686
|
4
|
|
|
|
|
10
|
for my $feat ( $seq->get_SeqFeatures ) { |
|
687
|
26
|
|
|
|
|
45
|
my $adjfeat = $self->_coord_adjust_deletion( $feat, $left, $right ); |
|
688
|
26
|
100
|
|
|
|
79
|
$product->add_SeqFeature($adjfeat) if $adjfeat; |
|
689
|
|
|
|
|
|
|
} |
|
690
|
|
|
|
|
|
|
} |
|
691
|
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
# add a feature to annotatde the deletion |
|
693
|
4
|
|
|
|
|
38
|
my $deletion_feature = Bio::SeqFeature::Generic->new( |
|
694
|
|
|
|
|
|
|
-primary_tag => 'misc_feature', |
|
695
|
|
|
|
|
|
|
-tag => { note => 'deletion of ' . ( $right - $left + 1 ) . 'bp' }, |
|
696
|
|
|
|
|
|
|
-location => Bio::Location::Simple->new( |
|
697
|
|
|
|
|
|
|
-start => $left - 1, |
|
698
|
|
|
|
|
|
|
-end => $left, |
|
699
|
|
|
|
|
|
|
-location_type => 'IN-BETWEEN' |
|
700
|
|
|
|
|
|
|
) |
|
701
|
|
|
|
|
|
|
); |
|
702
|
4
|
|
|
|
|
16
|
$product->add_SeqFeature($deletion_feature); |
|
703
|
4
|
|
|
|
|
19
|
return $product; |
|
704
|
|
|
|
|
|
|
} |
|
705
|
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
=head2 insert |
|
707
|
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
Title : insert |
|
709
|
|
|
|
|
|
|
Function: inserts a fragment (a Bio::Seq object) into a nother sequence object |
|
710
|
|
|
|
|
|
|
adding all annotations and features to the final product. |
|
711
|
|
|
|
|
|
|
Features that span the insertion site are converted to split |
|
712
|
|
|
|
|
|
|
features to indicate the disruption. |
|
713
|
|
|
|
|
|
|
A new feature is added to indicate the inserted fragment itself. |
|
714
|
|
|
|
|
|
|
A new molecule is created and returned. |
|
715
|
|
|
|
|
|
|
Usage : # insert a fragment after pos 1000 |
|
716
|
|
|
|
|
|
|
my $insert_seq = Bio::SeqUtils::PbrTools->insert( |
|
717
|
|
|
|
|
|
|
$recipient_seq, |
|
718
|
|
|
|
|
|
|
$fragment_seq, |
|
719
|
|
|
|
|
|
|
1000 |
|
720
|
|
|
|
|
|
|
); |
|
721
|
|
|
|
|
|
|
Args : recipient sequence (a Bio::PrimarySeqI compliant object), |
|
722
|
|
|
|
|
|
|
a fragmetn to insert (Bio::PrimarySeqI compliant object), |
|
723
|
|
|
|
|
|
|
insertion position (fragment is inserted to the right of this pos) |
|
724
|
|
|
|
|
|
|
pos=0 will prepend the fragment to the recipient |
|
725
|
|
|
|
|
|
|
optional: |
|
726
|
|
|
|
|
|
|
hash-ref of options: |
|
727
|
|
|
|
|
|
|
clone_obj: if true, clone the input sequence object rather |
|
728
|
|
|
|
|
|
|
than calling "new" on the object's class |
|
729
|
|
|
|
|
|
|
Returns : a new Bio::Seq object |
|
730
|
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
=cut |
|
732
|
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
sub insert { |
|
734
|
3
|
|
|
3
|
1
|
37
|
my $self = shift; |
|
735
|
3
|
|
|
|
|
5
|
my ( $recipient, $fragment, $insert_pos, $opts_ref ) = @_; |
|
736
|
3
|
50
|
66
|
|
|
48
|
$self->throw( 'was expecting 3-4 paramters but got ' . @_ ) |
|
737
|
|
|
|
|
|
|
unless @_ == 3 || @_ == 4; |
|
738
|
|
|
|
|
|
|
|
|
739
|
3
|
50
|
33
|
|
|
28
|
$self->throw( 'Recipient object of class [' |
|
740
|
|
|
|
|
|
|
. ref($recipient) |
|
741
|
|
|
|
|
|
|
. '] should be a Bio::PrimarySeqI ' ) |
|
742
|
|
|
|
|
|
|
unless blessed($recipient) && $recipient->isa('Bio::PrimarySeqI'); |
|
743
|
|
|
|
|
|
|
|
|
744
|
3
|
50
|
33
|
|
|
19
|
$self->throw( 'Fragment object of class [' |
|
745
|
|
|
|
|
|
|
. ref($fragment) |
|
746
|
|
|
|
|
|
|
. '] should be a Bio::PrimarySeqI ' ) |
|
747
|
|
|
|
|
|
|
unless blessed($fragment) && $fragment->isa('Bio::PrimarySeqI'); |
|
748
|
|
|
|
|
|
|
|
|
749
|
3
|
50
|
|
|
|
15
|
$self->throw( 'Can\'t concatenate sequences with different alphabets: ' |
|
750
|
|
|
|
|
|
|
. 'recipient is ' |
|
751
|
|
|
|
|
|
|
. $recipient->alphabet |
|
752
|
|
|
|
|
|
|
. ' and fragment is ' |
|
753
|
|
|
|
|
|
|
. $fragment->alphabet ) |
|
754
|
|
|
|
|
|
|
unless $recipient->alphabet eq $fragment->alphabet; |
|
755
|
|
|
|
|
|
|
|
|
756
|
3
|
50
|
33
|
|
|
16
|
if ( $insert_pos < 0 or $insert_pos > $recipient->length ) { |
|
757
|
0
|
|
|
|
|
0
|
$self->throw( "insertion position ($insert_pos) must be between 0 and " |
|
758
|
|
|
|
|
|
|
. 'recipient sequence length (' |
|
759
|
|
|
|
|
|
|
. $recipient->length |
|
760
|
|
|
|
|
|
|
. ')' ); |
|
761
|
|
|
|
|
|
|
} |
|
762
|
|
|
|
|
|
|
|
|
763
|
3
|
50
|
33
|
|
|
30
|
if ( $fragment->can('is_circular') && $fragment->is_circular ) { |
|
764
|
0
|
|
|
|
|
0
|
$self->throw('Can\'t insert circular fragments'); |
|
765
|
|
|
|
|
|
|
} |
|
766
|
|
|
|
|
|
|
|
|
767
|
3
|
50
|
|
|
|
8
|
if ( !$recipient->seq ) { |
|
768
|
0
|
|
|
|
|
0
|
$self->throw( |
|
769
|
|
|
|
|
|
|
'Recipient has no sequence, can not insert into this object'); |
|
770
|
|
|
|
|
|
|
} |
|
771
|
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
# construct raw sequence of the new molecule |
|
773
|
3
|
50
|
|
|
|
11
|
my $left_seq = |
|
774
|
|
|
|
|
|
|
$insert_pos > 0 |
|
775
|
|
|
|
|
|
|
? $recipient->subseq( 1, $insert_pos ) |
|
776
|
|
|
|
|
|
|
: ''; |
|
777
|
3
|
|
|
|
|
7
|
my $mid_seq = $fragment->seq; |
|
778
|
3
|
50
|
|
|
|
7
|
my $right_seq = |
|
779
|
|
|
|
|
|
|
$insert_pos < $recipient->length |
|
780
|
|
|
|
|
|
|
? $recipient->subseq( $insert_pos + 1, $recipient->length ) |
|
781
|
|
|
|
|
|
|
: ''; |
|
782
|
3
|
|
|
|
|
7
|
my $seq_str = $left_seq . $mid_seq . $right_seq; |
|
783
|
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
# create the new seq object with the same class as the recipient |
|
785
|
|
|
|
|
|
|
# or (if requested), make a clone of the existing object. In the |
|
786
|
|
|
|
|
|
|
# latter case we need to remove sequence features from the cloned |
|
787
|
|
|
|
|
|
|
# object instead of copying them |
|
788
|
3
|
|
|
|
|
3
|
my $product; |
|
789
|
3
|
100
|
|
|
|
6
|
if ( $opts_ref->{clone_obj} ) { |
|
790
|
1
|
|
|
|
|
5
|
$product = $self->_new_seq_via_clone( $recipient, $seq_str ); |
|
791
|
|
|
|
|
|
|
} |
|
792
|
|
|
|
|
|
|
else { |
|
793
|
2
|
|
|
|
|
2
|
my @desc; |
|
794
|
2
|
50
|
|
|
|
6
|
push @desc, 'Inserted fragment: ' . $fragment->desc |
|
795
|
|
|
|
|
|
|
if defined $fragment->desc; |
|
796
|
2
|
50
|
|
|
|
5
|
push @desc, 'Recipient: ' . $recipient->desc |
|
797
|
|
|
|
|
|
|
if defined $recipient->desc; |
|
798
|
2
|
|
50
|
|
|
7
|
$product = $self->_new_seq_from_old( |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
799
|
|
|
|
|
|
|
$recipient, |
|
800
|
|
|
|
|
|
|
{ |
|
801
|
|
|
|
|
|
|
seq => $seq_str, |
|
802
|
|
|
|
|
|
|
display_id => $recipient->display_id, |
|
803
|
|
|
|
|
|
|
accession_number => $recipient->accession_number || '', |
|
804
|
|
|
|
|
|
|
alphabet => $recipient->alphabet, |
|
805
|
|
|
|
|
|
|
desc => join( '; ', @desc ), |
|
806
|
|
|
|
|
|
|
verbose => $recipient->verbose || $fragment->verbose, |
|
807
|
|
|
|
|
|
|
is_circular => $recipient->is_circular || 0, |
|
808
|
|
|
|
|
|
|
} |
|
809
|
|
|
|
|
|
|
); |
|
810
|
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
} # if clone_obj |
|
812
|
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
# move annotations from fragment to product |
|
814
|
3
|
50
|
33
|
|
|
24
|
if ( $product->isa("Bio::AnnotatableI") |
|
815
|
|
|
|
|
|
|
&& $fragment->isa("Bio::AnnotatableI") ) |
|
816
|
|
|
|
|
|
|
{ |
|
817
|
3
|
|
|
|
|
12
|
foreach my $key ( $fragment->annotation->get_all_annotation_keys ) { |
|
818
|
3
|
|
|
|
|
7
|
foreach my $value ( $fragment->annotation->get_Annotations($key) ) { |
|
819
|
3
|
|
|
|
|
10
|
$product->annotation->add_Annotation( $key, $value ); |
|
820
|
|
|
|
|
|
|
} |
|
821
|
|
|
|
|
|
|
} |
|
822
|
|
|
|
|
|
|
} |
|
823
|
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
# move sequence features to product with adjusted coordinates |
|
825
|
3
|
50
|
|
|
|
13
|
if ( $product->isa('Bio::SeqI') ) { |
|
826
|
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
# for the fragment, just shift the features to new position |
|
828
|
3
|
50
|
|
|
|
11
|
if ( $fragment->isa('Bio::SeqI') ) { |
|
829
|
3
|
|
|
|
|
5
|
for my $feat ( $fragment->get_SeqFeatures ) { |
|
830
|
3
|
|
|
|
|
9
|
my $adjfeat = $self->_coord_adjust( $feat, $insert_pos ); |
|
831
|
3
|
50
|
|
|
|
14
|
$product->add_SeqFeature($adjfeat) if $adjfeat; |
|
832
|
|
|
|
|
|
|
} |
|
833
|
|
|
|
|
|
|
} |
|
834
|
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
# for recipient, shift and modify features according to insertion. |
|
836
|
3
|
50
|
|
|
|
15
|
if ( $recipient->isa('Bio::SeqI') ) { |
|
837
|
3
|
|
|
|
|
8
|
for my $feat ( $recipient->get_SeqFeatures ) { |
|
838
|
17
|
|
|
|
|
25
|
my $adjfeat = |
|
839
|
|
|
|
|
|
|
$self->_coord_adjust_insertion( $feat, $insert_pos, |
|
840
|
|
|
|
|
|
|
$fragment->length ); |
|
841
|
17
|
100
|
|
|
|
46
|
$product->add_SeqFeature($adjfeat) if $adjfeat; |
|
842
|
|
|
|
|
|
|
} |
|
843
|
|
|
|
|
|
|
} |
|
844
|
|
|
|
|
|
|
} |
|
845
|
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
# add a feature to annotate the insertion |
|
847
|
3
|
|
|
|
|
14
|
my $insertion_feature = Bio::SeqFeature::Generic->new( |
|
848
|
|
|
|
|
|
|
-start => $insert_pos + 1, |
|
849
|
|
|
|
|
|
|
-end => $insert_pos + $fragment->length, |
|
850
|
|
|
|
|
|
|
-primary_tag => 'misc_feature', |
|
851
|
|
|
|
|
|
|
-tag => { note => 'inserted fragment' }, |
|
852
|
|
|
|
|
|
|
); |
|
853
|
3
|
|
|
|
|
12
|
$product->add_SeqFeature($insertion_feature); |
|
854
|
|
|
|
|
|
|
|
|
855
|
3
|
|
|
|
|
11
|
return $product; |
|
856
|
|
|
|
|
|
|
} |
|
857
|
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
=head2 ligate |
|
859
|
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
title : ligate |
|
861
|
|
|
|
|
|
|
function: pastes a fragment (which can also have features) into a recipient |
|
862
|
|
|
|
|
|
|
sequence between two "cut" sites, preserving features and adjusting |
|
863
|
|
|
|
|
|
|
their locations. |
|
864
|
|
|
|
|
|
|
This is a shortcut for deleting a segment from a sequence object followed |
|
865
|
|
|
|
|
|
|
by an insertion of a fragmnet and is supposed to be used to simulate |
|
866
|
|
|
|
|
|
|
in-vitro cloning where a recipient (a vector) is digested and a fragment |
|
867
|
|
|
|
|
|
|
is then ligated into the recipient molecule. The fragment can be flipped |
|
868
|
|
|
|
|
|
|
(reverse-complemented with all its features). |
|
869
|
|
|
|
|
|
|
A new sequence object is returned to represent the product of the reaction. |
|
870
|
|
|
|
|
|
|
Features and annotations are transferred from the insert to the product |
|
871
|
|
|
|
|
|
|
and features on the recipient are adjusted according to the methods |
|
872
|
|
|
|
|
|
|
L"delete"> amd L"insert">: |
|
873
|
|
|
|
|
|
|
Features spanning the insertion site will be split up into two sub-locations. |
|
874
|
|
|
|
|
|
|
(Sub-)features in the deleted region are themselves deleted. |
|
875
|
|
|
|
|
|
|
(Sub-)features that extend into the deleted region are truncated. |
|
876
|
|
|
|
|
|
|
The class of the product object depends on the class of the recipient (vector) |
|
877
|
|
|
|
|
|
|
sequence object. if it is not possible to instantiate a new |
|
878
|
|
|
|
|
|
|
object of that class, a Bio::Primaryseq object is created instead. |
|
879
|
|
|
|
|
|
|
usage : # insert the flipped fragment between positions 1000 and 1100 of the |
|
880
|
|
|
|
|
|
|
# vector, i.e. everything between these two positions is deleted and |
|
881
|
|
|
|
|
|
|
# replaced by the fragment |
|
882
|
|
|
|
|
|
|
my $new_molecule = Bio::Sequtils::Pbrtools->ligate( |
|
883
|
|
|
|
|
|
|
-recipient => $vector, |
|
884
|
|
|
|
|
|
|
-fragment => $fragment, |
|
885
|
|
|
|
|
|
|
-left => 1000, |
|
886
|
|
|
|
|
|
|
-right => 1100, |
|
887
|
|
|
|
|
|
|
-flip => 1, |
|
888
|
|
|
|
|
|
|
-clone_obj => 1 |
|
889
|
|
|
|
|
|
|
); |
|
890
|
|
|
|
|
|
|
args : recipient: the recipient/vector molecule |
|
891
|
|
|
|
|
|
|
fragment: molecule that is to be ligated into the vector |
|
892
|
|
|
|
|
|
|
left: left cut site (fragment will be inserted to the right of |
|
893
|
|
|
|
|
|
|
this position) |
|
894
|
|
|
|
|
|
|
optional: |
|
895
|
|
|
|
|
|
|
right: right cut site (fragment will be inseterted to the |
|
896
|
|
|
|
|
|
|
left of this position). defaults to left+1 |
|
897
|
|
|
|
|
|
|
flip: boolean, if true, the fragment is reverse-complemented |
|
898
|
|
|
|
|
|
|
(including features) before inserting |
|
899
|
|
|
|
|
|
|
clone_obj: if true, clone the recipient object to create the product |
|
900
|
|
|
|
|
|
|
instead of calling "new" on its class |
|
901
|
|
|
|
|
|
|
returns : a new Bio::Seq object of the ligated fragments |
|
902
|
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
=cut |
|
904
|
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
sub ligate { |
|
906
|
3
|
|
|
3
|
1
|
82
|
my $self = shift; |
|
907
|
3
|
|
|
|
|
14
|
my ( $recipient, $fragment, $left, $right, $flip, $clone_obj ) = |
|
908
|
|
|
|
|
|
|
$self->_rearrange( [qw(RECIPIENT FRAGMENT LEFT RIGHT FLIP CLONE_OBJ )], |
|
909
|
|
|
|
|
|
|
@_ ); |
|
910
|
3
|
50
|
|
|
|
12
|
$self->throw("missing required parameter 'recipient'") unless $recipient; |
|
911
|
3
|
50
|
|
|
|
4
|
$self->throw("missing required parameter 'fragment'") unless $fragment; |
|
912
|
3
|
50
|
|
|
|
7
|
$self->throw("missing required parameter 'left'") unless defined $left; |
|
913
|
|
|
|
|
|
|
|
|
914
|
3
|
|
33
|
|
|
4
|
$right ||= $left + 1; |
|
915
|
|
|
|
|
|
|
|
|
916
|
3
|
50
|
33
|
|
|
24
|
$self->throw( |
|
917
|
|
|
|
|
|
|
"Fragment must be a Bio::PrimarySeqI compliant object but it is a " |
|
918
|
|
|
|
|
|
|
. ref($fragment) ) |
|
919
|
|
|
|
|
|
|
unless blessed($fragment) && $fragment->isa('Bio::PrimarySeqI'); |
|
920
|
|
|
|
|
|
|
|
|
921
|
3
|
50
|
|
|
|
12
|
$fragment = $self->revcom_with_features($fragment) if $flip; |
|
922
|
|
|
|
|
|
|
|
|
923
|
3
|
|
|
|
|
6
|
my $opts_ref = {}; |
|
924
|
3
|
100
|
|
|
|
8
|
$opts_ref->{clone_obj} = 1 if $clone_obj; |
|
925
|
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
# clone in two steps: first delete between the insertion sites, |
|
927
|
|
|
|
|
|
|
# then insert the fragment. Step 1 is skipped if insert positions |
|
928
|
|
|
|
|
|
|
# are adjacent (no deletion) |
|
929
|
3
|
|
|
|
|
3
|
my ( $product1, $product2 ); |
|
930
|
3
|
|
|
|
|
4
|
eval { |
|
931
|
3
|
50
|
|
|
|
8
|
if ( $right == $left + 1 ) { |
|
932
|
0
|
|
|
|
|
0
|
$product1 = $recipient; |
|
933
|
|
|
|
|
|
|
} |
|
934
|
|
|
|
|
|
|
else { |
|
935
|
3
|
|
|
|
|
11
|
$product1 = |
|
936
|
|
|
|
|
|
|
$self->delete( $recipient, $left + 1, $right - 1, $opts_ref ); |
|
937
|
|
|
|
|
|
|
} |
|
938
|
|
|
|
|
|
|
}; |
|
939
|
3
|
100
|
|
|
|
13
|
$self->throw( "Failed in step 1 (cut recipient): " . $@ ) if $@; |
|
940
|
2
|
|
|
|
|
3
|
eval { $product2 = $self->insert( $product1, $fragment, $left, $opts_ref ) }; |
|
|
2
|
|
|
|
|
5
|
|
|
941
|
2
|
50
|
|
|
|
5
|
$self->throw( "Failed in step 2 (insert fragment): " . $@ ) if $@; |
|
942
|
|
|
|
|
|
|
|
|
943
|
2
|
|
|
|
|
11
|
return $product2; |
|
944
|
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
} |
|
946
|
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
=head2 _coord_adjust_deletion |
|
948
|
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
title : _coord_adjust_deletion |
|
950
|
|
|
|
|
|
|
function: recursively adjusts coordinates of seqfeatures on a molecule |
|
951
|
|
|
|
|
|
|
where a segment has been deleted. |
|
952
|
|
|
|
|
|
|
(sub)features that span the deletion site become split features. |
|
953
|
|
|
|
|
|
|
(sub)features that extend into the deletion site are truncated. |
|
954
|
|
|
|
|
|
|
A note is added to the feature to inform about the size and |
|
955
|
|
|
|
|
|
|
position of the deletion. |
|
956
|
|
|
|
|
|
|
usage : my $adjusted_feature = Bio::Sequtils::_coord_adjust_deletion( |
|
957
|
|
|
|
|
|
|
$feature, |
|
958
|
|
|
|
|
|
|
$start, |
|
959
|
|
|
|
|
|
|
$end |
|
960
|
|
|
|
|
|
|
); |
|
961
|
|
|
|
|
|
|
args : a Bio::SeqFeatureI compliant object, |
|
962
|
|
|
|
|
|
|
start (inclusive) position of the deletion site, |
|
963
|
|
|
|
|
|
|
end (inclusive) position of the deletion site |
|
964
|
|
|
|
|
|
|
returns : a Bio::SeqFeatureI compliant object |
|
965
|
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
=cut |
|
967
|
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
sub _coord_adjust_deletion { |
|
969
|
38
|
|
|
38
|
|
40
|
my ( $self, $feat, $left, $right ) = @_; |
|
970
|
|
|
|
|
|
|
|
|
971
|
38
|
50
|
|
|
|
79
|
$self->throw( 'object [$feat] ' |
|
972
|
|
|
|
|
|
|
. 'of class [' |
|
973
|
|
|
|
|
|
|
. ref($feat) |
|
974
|
|
|
|
|
|
|
. '] should be a Bio::SeqFeatureI ' ) |
|
975
|
|
|
|
|
|
|
unless $feat->isa('Bio::SeqFeatureI'); |
|
976
|
38
|
50
|
33
|
|
|
102
|
$self->throw('missing coordinates: need a left and a right position') |
|
977
|
|
|
|
|
|
|
unless defined $left && defined $right; |
|
978
|
|
|
|
|
|
|
|
|
979
|
38
|
50
|
|
|
|
58
|
if ( $left > $right ) { |
|
980
|
0
|
0
|
0
|
|
|
0
|
if ( $feat->can('is_circular') && $feat->is_circular ) { |
|
981
|
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
# todo handle circular molecules |
|
983
|
0
|
|
|
|
|
0
|
$self->throw( |
|
984
|
|
|
|
|
|
|
'can not yet handle deletions in circular molecules if deletion spans origin' |
|
985
|
|
|
|
|
|
|
); |
|
986
|
|
|
|
|
|
|
} |
|
987
|
|
|
|
|
|
|
else { |
|
988
|
0
|
|
|
|
|
0
|
$self->throw( |
|
989
|
|
|
|
|
|
|
"left coordinate ($left) must be less than right ($right)" |
|
990
|
|
|
|
|
|
|
. " but it was greater" ); |
|
991
|
|
|
|
|
|
|
} |
|
992
|
|
|
|
|
|
|
} |
|
993
|
38
|
|
|
|
|
109
|
my $deletion = Bio::Location::Simple->new( |
|
994
|
|
|
|
|
|
|
-start => $left, |
|
995
|
|
|
|
|
|
|
-end => $right, |
|
996
|
|
|
|
|
|
|
); |
|
997
|
38
|
|
|
|
|
54
|
my $del_length = $right - $left + 1; |
|
998
|
|
|
|
|
|
|
|
|
999
|
38
|
|
|
|
|
25
|
my @adjsubfeat; |
|
1000
|
38
|
|
|
|
|
59
|
for my $subfeat ( $feat->get_SeqFeatures ) { |
|
1001
|
12
|
|
|
|
|
31
|
my $adjsubfeat = |
|
1002
|
|
|
|
|
|
|
$self->_coord_adjust_deletion( $subfeat, $left, $right ); |
|
1003
|
12
|
100
|
|
|
|
26
|
push @adjsubfeat, $adjsubfeat if $adjsubfeat; |
|
1004
|
|
|
|
|
|
|
} |
|
1005
|
|
|
|
|
|
|
|
|
1006
|
38
|
|
|
|
|
40
|
my @loc; |
|
1007
|
|
|
|
|
|
|
my $note; |
|
1008
|
38
|
|
|
|
|
60
|
for ( $feat->location->each_Location ) { |
|
1009
|
38
|
100
|
|
|
|
80
|
next if $deletion->contains($_); # this location will be deleted; |
|
1010
|
25
|
|
|
|
|
46
|
my $strand = $_->strand; |
|
1011
|
25
|
|
|
|
|
45
|
my $type = $_->location_type; |
|
1012
|
25
|
|
|
|
|
34
|
my $start = $_->start; |
|
1013
|
25
|
50
|
|
|
|
85
|
my $start_type = $_->can('start_pos_type') ? $_->start_pos_type : undef; |
|
1014
|
25
|
|
|
|
|
36
|
my $end = $_->end; |
|
1015
|
25
|
50
|
|
|
|
69
|
my $end_type = $_->can('end_pos_type') ? $_->end_pos_type : undef; |
|
1016
|
25
|
|
|
|
|
29
|
my @newcoords = (); |
|
1017
|
25
|
100
|
100
|
|
|
31
|
if ( $start < $deletion->start && $end > $deletion->end ) |
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
{ # split the feature |
|
1019
|
4
|
|
|
|
|
10
|
@newcoords = ( |
|
1020
|
|
|
|
|
|
|
[ $start, ( $deletion->start - 1 ), $start_type, $end_type ], |
|
1021
|
|
|
|
|
|
|
[ |
|
1022
|
|
|
|
|
|
|
( $deletion->start ), $end - $del_length, |
|
1023
|
|
|
|
|
|
|
$start_type, $end_type |
|
1024
|
|
|
|
|
|
|
] |
|
1025
|
|
|
|
|
|
|
); |
|
1026
|
4
|
|
|
|
|
12
|
$note = |
|
1027
|
|
|
|
|
|
|
$del_length |
|
1028
|
|
|
|
|
|
|
. 'bp internal deletion between pos ' |
|
1029
|
|
|
|
|
|
|
. ( $deletion->start - 1 ) . ' and ' |
|
1030
|
|
|
|
|
|
|
. $deletion->start; |
|
1031
|
|
|
|
|
|
|
} |
|
1032
|
|
|
|
|
|
|
elsif ( $_->start < $deletion->start && $_->end >= $deletion->start ) |
|
1033
|
|
|
|
|
|
|
{ # truncate feature end |
|
1034
|
8
|
|
|
|
|
13
|
@newcoords = |
|
1035
|
|
|
|
|
|
|
( [ $start, ( $deletion->start - 1 ), $start_type, $end_type ] ); |
|
1036
|
8
|
|
|
|
|
16
|
$note = |
|
1037
|
|
|
|
|
|
|
( $end - $deletion->start + 1 ) . 'bp deleted from feature '; |
|
1038
|
8
|
50
|
|
|
|
18
|
if ( $feat->strand ) { |
|
1039
|
0
|
0
|
|
|
|
0
|
$note .= $feat->strand == 1 ? "3' " : "5' "; |
|
1040
|
|
|
|
|
|
|
} |
|
1041
|
8
|
|
|
|
|
12
|
$note .= 'end'; |
|
1042
|
|
|
|
|
|
|
} |
|
1043
|
|
|
|
|
|
|
elsif ( $_->start <= $deletion->end && $_->end > $deletion->end ) |
|
1044
|
|
|
|
|
|
|
{ # truncate feature start and shift end |
|
1045
|
5
|
|
|
|
|
11
|
@newcoords = ( |
|
1046
|
|
|
|
|
|
|
[ |
|
1047
|
|
|
|
|
|
|
( $deletion->start ), $end - $del_length, |
|
1048
|
|
|
|
|
|
|
$start_type, $end_type |
|
1049
|
|
|
|
|
|
|
] |
|
1050
|
|
|
|
|
|
|
); |
|
1051
|
5
|
|
|
|
|
12
|
$note = |
|
1052
|
|
|
|
|
|
|
( $deletion->end - $start + 1 ) . 'bp deleted from feature '; |
|
1053
|
5
|
100
|
|
|
|
14
|
if ( $feat->strand ) { |
|
1054
|
2
|
50
|
|
|
|
4
|
$note .= $feat->strand == 1 ? "5' end" : "3' end"; |
|
1055
|
|
|
|
|
|
|
} |
|
1056
|
|
|
|
|
|
|
else { |
|
1057
|
3
|
|
|
|
|
6
|
$note .= 'start'; |
|
1058
|
|
|
|
|
|
|
} |
|
1059
|
|
|
|
|
|
|
} |
|
1060
|
|
|
|
|
|
|
elsif ( $start >= $deletion->end ) { # just shift entire location |
|
1061
|
4
|
|
|
|
|
11
|
@newcoords = ( |
|
1062
|
|
|
|
|
|
|
[ |
|
1063
|
|
|
|
|
|
|
$start - $del_length, $end - $del_length, |
|
1064
|
|
|
|
|
|
|
$start_type, $end_type |
|
1065
|
|
|
|
|
|
|
] |
|
1066
|
|
|
|
|
|
|
); |
|
1067
|
|
|
|
|
|
|
} |
|
1068
|
|
|
|
|
|
|
else { # not affected by deletion |
|
1069
|
4
|
|
|
|
|
9
|
@newcoords = ( [ $start, $end, $start_type, $end_type ] ); |
|
1070
|
|
|
|
|
|
|
} |
|
1071
|
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
# if we have no coordinates, we return nothing |
|
1073
|
|
|
|
|
|
|
# the feature is deleted |
|
1074
|
25
|
50
|
|
|
|
50
|
return unless @newcoords; |
|
1075
|
|
|
|
|
|
|
|
|
1076
|
25
|
|
|
|
|
50
|
my @subloc = |
|
1077
|
|
|
|
|
|
|
$self->_location_objects_from_coordinate_list( \@newcoords, $strand, |
|
1078
|
|
|
|
|
|
|
$type ); |
|
1079
|
25
|
|
|
|
|
51
|
push @loc, $self->_single_loc_object_from_collection(@subloc); |
|
1080
|
|
|
|
|
|
|
} # each location |
|
1081
|
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
# create new feature based on original one and move annotation across |
|
1083
|
38
|
|
|
|
|
91
|
my $newfeat = |
|
1084
|
|
|
|
|
|
|
Bio::SeqFeature::Generic->new( -primary => $feat->primary_tag ); |
|
1085
|
38
|
|
|
|
|
74
|
foreach my $key ( $feat->annotation->get_all_annotation_keys() ) { |
|
1086
|
4
|
|
|
|
|
8
|
foreach my $value ( $feat->annotation->get_Annotations($key) ) { |
|
1087
|
4
|
|
|
|
|
9
|
$newfeat->annotation->add_Annotation( $key, $value ); |
|
1088
|
|
|
|
|
|
|
} |
|
1089
|
|
|
|
|
|
|
} |
|
1090
|
38
|
|
|
|
|
74
|
foreach my $key ( $feat->get_all_tags() ) { |
|
1091
|
0
|
|
|
|
|
0
|
$newfeat->add_tag_value( $key, $feat->get_tag_values($key) ); |
|
1092
|
|
|
|
|
|
|
} |
|
1093
|
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
# If we have a note about the deleted bases, add it |
|
1095
|
38
|
100
|
|
|
|
58
|
if ($note) { |
|
1096
|
17
|
|
|
|
|
29
|
$newfeat->add_tag_value( 'note', $note ); |
|
1097
|
|
|
|
|
|
|
} |
|
1098
|
|
|
|
|
|
|
|
|
1099
|
|
|
|
|
|
|
# set modified location(s) for the new feature and |
|
1100
|
|
|
|
|
|
|
# add its subfeatures if any |
|
1101
|
38
|
|
|
|
|
71
|
my $loc = $self->_single_loc_object_from_collection(@loc); |
|
1102
|
38
|
100
|
|
|
|
87
|
$loc ? $newfeat->location($loc) : return; |
|
1103
|
25
|
|
|
|
|
48
|
$newfeat->add_SeqFeature($_) for @adjsubfeat; |
|
1104
|
|
|
|
|
|
|
|
|
1105
|
25
|
|
|
|
|
63
|
return $newfeat; |
|
1106
|
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
} |
|
1108
|
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
=head2 _coord_adjust_insertion |
|
1110
|
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
title : _coord_adjust_insertion |
|
1112
|
|
|
|
|
|
|
function: recursively adjusts coordinates of seqfeatures on a molecule |
|
1113
|
|
|
|
|
|
|
where another sequence has been inserted. |
|
1114
|
|
|
|
|
|
|
(sub)features that span the insertion site become split features |
|
1115
|
|
|
|
|
|
|
and a note is added about the size and positin of the insertion. |
|
1116
|
|
|
|
|
|
|
Features with an IN-BETWEEN location at the insertion site |
|
1117
|
|
|
|
|
|
|
are lost (such features can only exist between adjacent bases) |
|
1118
|
|
|
|
|
|
|
usage : my $adjusted_feature = Bio::Sequtils::_coord_adjust_insertion( |
|
1119
|
|
|
|
|
|
|
$feature, |
|
1120
|
|
|
|
|
|
|
$insert_pos, |
|
1121
|
|
|
|
|
|
|
$insert_length |
|
1122
|
|
|
|
|
|
|
); |
|
1123
|
|
|
|
|
|
|
args : a Bio::SeqFeatureI compliant object, |
|
1124
|
|
|
|
|
|
|
insertion position (insert to the right of this position) |
|
1125
|
|
|
|
|
|
|
length of inserted fragment |
|
1126
|
|
|
|
|
|
|
returns : a Bio::SeqFeatureI compliant object |
|
1127
|
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
=cut |
|
1129
|
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
sub _coord_adjust_insertion { |
|
1131
|
22
|
|
|
22
|
|
29
|
my ( $self, $feat, $insert_pos, $insert_len ) = @_; |
|
1132
|
|
|
|
|
|
|
|
|
1133
|
22
|
50
|
|
|
|
48
|
$self->throw( 'object [$feat] ' |
|
1134
|
|
|
|
|
|
|
. 'of class [' |
|
1135
|
|
|
|
|
|
|
. ref($feat) |
|
1136
|
|
|
|
|
|
|
. '] should be a Bio::SeqFeatureI ' ) |
|
1137
|
|
|
|
|
|
|
unless $feat->isa('Bio::SeqFeatureI'); |
|
1138
|
22
|
50
|
|
|
|
30
|
$self->throw('missing insert position') unless defined $insert_pos; |
|
1139
|
22
|
50
|
|
|
|
30
|
$self->throw('missing insert length') unless defined $insert_len; |
|
1140
|
|
|
|
|
|
|
|
|
1141
|
22
|
|
|
|
|
15
|
my @adjsubfeat; |
|
1142
|
22
|
|
|
|
|
32
|
for my $subfeat ( $feat->get_SeqFeatures ) { |
|
1143
|
5
|
|
|
|
|
12
|
push @adjsubfeat, |
|
1144
|
|
|
|
|
|
|
$self->_coord_adjust_insertion( $subfeat, $insert_pos, $insert_len ); |
|
1145
|
|
|
|
|
|
|
} |
|
1146
|
|
|
|
|
|
|
|
|
1147
|
22
|
|
|
|
|
23
|
my @loc; |
|
1148
|
|
|
|
|
|
|
my $note; |
|
1149
|
22
|
|
|
|
|
32
|
for ( $feat->location->each_Location ) { |
|
1150
|
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
# loose IN-BETWEEN features at the insertion site |
|
1152
|
|
|
|
|
|
|
next |
|
1153
|
22
|
100
|
66
|
|
|
308
|
if ( $_->location_type eq 'IN-BETWEEN' && $_->start == $insert_pos ); |
|
1154
|
20
|
|
|
|
|
37
|
my $strand = $_->strand; |
|
1155
|
20
|
|
|
|
|
27
|
my $type = $_->location_type; |
|
1156
|
20
|
|
|
|
|
33
|
my $start = $_->start; |
|
1157
|
20
|
50
|
|
|
|
69
|
my $start_type = $_->can('start_pos_type') ? $_->start_pos_type : undef; |
|
1158
|
20
|
|
|
|
|
29
|
my $end = $_->end; |
|
1159
|
20
|
50
|
|
|
|
57
|
my $end_type = $_->can('end_pos_type') ? $_->end_pos_type : undef; |
|
1160
|
20
|
|
|
|
|
22
|
my @newcoords = (); |
|
1161
|
20
|
100
|
100
|
|
|
66
|
if ( $start <= $insert_pos && $end > $insert_pos ) { # split the feature |
|
|
|
100
|
|
|
|
|
|
|
1162
|
3
|
|
|
|
|
9
|
@newcoords = ( |
|
1163
|
|
|
|
|
|
|
[ $start, $insert_pos, $start_type, $end_type ], |
|
1164
|
|
|
|
|
|
|
[ |
|
1165
|
|
|
|
|
|
|
( $insert_pos + 1 + $insert_len ), $end + $insert_len, |
|
1166
|
|
|
|
|
|
|
$start_type, $end_type |
|
1167
|
|
|
|
|
|
|
] |
|
1168
|
|
|
|
|
|
|
); |
|
1169
|
3
|
|
|
|
|
10
|
$note = |
|
1170
|
|
|
|
|
|
|
$insert_len |
|
1171
|
|
|
|
|
|
|
. 'bp internal insertion between pos ' |
|
1172
|
|
|
|
|
|
|
. $insert_pos . ' and ' |
|
1173
|
|
|
|
|
|
|
. ( $insert_pos + $insert_len + 1 ); |
|
1174
|
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
} |
|
1176
|
|
|
|
|
|
|
elsif ( $start > $insert_pos ) { # just shift entire location |
|
1177
|
8
|
|
|
|
|
18
|
@newcoords = ( |
|
1178
|
|
|
|
|
|
|
[ |
|
1179
|
|
|
|
|
|
|
$start + $insert_len, $end + $insert_len, |
|
1180
|
|
|
|
|
|
|
$start_type, $end_type |
|
1181
|
|
|
|
|
|
|
] |
|
1182
|
|
|
|
|
|
|
); |
|
1183
|
|
|
|
|
|
|
} |
|
1184
|
|
|
|
|
|
|
else { # not affected |
|
1185
|
9
|
|
|
|
|
20
|
@newcoords = ( [ $start, $end, $start_type, $end_type ] ); |
|
1186
|
|
|
|
|
|
|
} |
|
1187
|
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
# if we have deleted all coordinates, return nothing |
|
1189
|
|
|
|
|
|
|
# (possible if all locations are IN-BETWEEN) |
|
1190
|
20
|
50
|
|
|
|
32
|
return unless @newcoords; |
|
1191
|
|
|
|
|
|
|
|
|
1192
|
20
|
|
|
|
|
33
|
my @subloc = |
|
1193
|
|
|
|
|
|
|
$self->_location_objects_from_coordinate_list( \@newcoords, $strand, |
|
1194
|
|
|
|
|
|
|
$type ); |
|
1195
|
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
# put together final location which could be a split now |
|
1197
|
20
|
|
|
|
|
42
|
push @loc, $self->_single_loc_object_from_collection(@subloc); |
|
1198
|
|
|
|
|
|
|
} # each location |
|
1199
|
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
# create new feature based on original one and move annotation across |
|
1201
|
22
|
|
|
|
|
56
|
my $newfeat = |
|
1202
|
|
|
|
|
|
|
Bio::SeqFeature::Generic->new( -primary => $feat->primary_tag ); |
|
1203
|
22
|
|
|
|
|
48
|
foreach my $key ( $feat->annotation->get_all_annotation_keys() ) { |
|
1204
|
3
|
|
|
|
|
6
|
foreach my $value ( $feat->annotation->get_Annotations($key) ) { |
|
1205
|
3
|
|
|
|
|
7
|
$newfeat->annotation->add_Annotation( $key, $value ); |
|
1206
|
|
|
|
|
|
|
} |
|
1207
|
|
|
|
|
|
|
} |
|
1208
|
22
|
|
|
|
|
38
|
foreach my $key ( $feat->get_all_tags() ) { |
|
1209
|
10
|
|
|
|
|
13
|
$newfeat->add_tag_value( $key, $feat->get_tag_values($key) ); |
|
1210
|
|
|
|
|
|
|
} |
|
1211
|
|
|
|
|
|
|
|
|
1212
|
|
|
|
|
|
|
# If we have a note about the inserted bases, add it |
|
1213
|
22
|
100
|
|
|
|
29
|
if ($note) { |
|
1214
|
3
|
|
|
|
|
4
|
$newfeat->add_tag_value( 'note', $note ); |
|
1215
|
|
|
|
|
|
|
} |
|
1216
|
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
# set modified location(s) for the new feature and |
|
1218
|
|
|
|
|
|
|
# add its subfeatures if any |
|
1219
|
22
|
|
|
|
|
39
|
my $loc = $self->_single_loc_object_from_collection(@loc); |
|
1220
|
22
|
100
|
|
|
|
52
|
$loc ? $newfeat->location($loc) : return; |
|
1221
|
20
|
|
|
|
|
33
|
$newfeat->add_SeqFeature($_) for @adjsubfeat; |
|
1222
|
|
|
|
|
|
|
|
|
1223
|
20
|
|
|
|
|
37
|
return $newfeat; |
|
1224
|
|
|
|
|
|
|
|
|
1225
|
|
|
|
|
|
|
} |
|
1226
|
|
|
|
|
|
|
|
|
1227
|
|
|
|
|
|
|
=head2 _single_loc_object_from_collection |
|
1228
|
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
Title : _single_loc_object_from_collection |
|
1230
|
|
|
|
|
|
|
Function: takes an array of location objects. Returns either a split |
|
1231
|
|
|
|
|
|
|
location object if there are more than one locations in the |
|
1232
|
|
|
|
|
|
|
array or returns the single location if there is only one |
|
1233
|
|
|
|
|
|
|
Usage : my $loc = _single_loc_object_from_collection( @sublocs ); |
|
1234
|
|
|
|
|
|
|
Args : array of Bio::Location objects |
|
1235
|
|
|
|
|
|
|
Returns : a single Bio:;Location object containing all locations |
|
1236
|
|
|
|
|
|
|
|
|
1237
|
|
|
|
|
|
|
=cut |
|
1238
|
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
sub _single_loc_object_from_collection { |
|
1240
|
119
|
|
|
119
|
|
117
|
my ( $self, @locs ) = @_; |
|
1241
|
119
|
|
|
|
|
76
|
my $loc; |
|
1242
|
119
|
100
|
|
|
|
239
|
if ( @locs > 1 ) { |
|
|
|
100
|
|
|
|
|
|
|
1243
|
7
|
|
|
|
|
26
|
$loc = Bio::Location::Split->new; |
|
1244
|
7
|
|
|
|
|
11
|
$loc->add_sub_Location(@locs); |
|
1245
|
|
|
|
|
|
|
} |
|
1246
|
|
|
|
|
|
|
elsif ( @locs == 1 ) { |
|
1247
|
97
|
|
|
|
|
77
|
$loc = shift @locs; |
|
1248
|
|
|
|
|
|
|
} |
|
1249
|
119
|
|
|
|
|
221
|
return $loc; |
|
1250
|
|
|
|
|
|
|
} # _single_loc_object_from_collection |
|
1251
|
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
=head2 _location_objects_from_coordinate_list |
|
1253
|
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
Title : _location_objects_from_coordinate_list |
|
1255
|
|
|
|
|
|
|
Function: takes an array-ref of start/end coordinates, a strand and a |
|
1256
|
|
|
|
|
|
|
type and returns a list of Bio::Location objects (Fuzzy by |
|
1257
|
|
|
|
|
|
|
default, Simple in case of in-between coordinates). |
|
1258
|
|
|
|
|
|
|
If location type is not "IN-BETWEEN", individual types may be |
|
1259
|
|
|
|
|
|
|
passed in for start and end location as per Bio::Location::Fuzzy |
|
1260
|
|
|
|
|
|
|
documentation. |
|
1261
|
|
|
|
|
|
|
Usage : my @loc_objs = $self->_location_objects_from_coordinate_list( |
|
1262
|
|
|
|
|
|
|
\@coords, |
|
1263
|
|
|
|
|
|
|
$strand, |
|
1264
|
|
|
|
|
|
|
$type |
|
1265
|
|
|
|
|
|
|
); |
|
1266
|
|
|
|
|
|
|
Args : array-ref of array-refs each containing: |
|
1267
|
|
|
|
|
|
|
start, end [, start-type, end-type] |
|
1268
|
|
|
|
|
|
|
where types are optional. If given, must be |
|
1269
|
|
|
|
|
|
|
a one of ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN') |
|
1270
|
|
|
|
|
|
|
strand (all locations must be on same strand) |
|
1271
|
|
|
|
|
|
|
location-type (EXACT, IN-BETWEEN etc) |
|
1272
|
|
|
|
|
|
|
Returns : list of Bio::Location objects |
|
1273
|
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
=cut |
|
1275
|
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
sub _location_objects_from_coordinate_list { |
|
1277
|
59
|
|
|
59
|
|
46
|
my $self = shift; |
|
1278
|
59
|
|
|
|
|
61
|
my ( $coords_ref, $strand, $type ) = @_; |
|
1279
|
59
|
50
|
|
|
|
85
|
$self->throw( 'expected 3 parameters but got ' . @_ ) unless @_ == 3; |
|
1280
|
59
|
50
|
|
|
|
89
|
$self->throw('first argument must be an ARRAY reference#') |
|
1281
|
|
|
|
|
|
|
unless ref($coords_ref) eq 'ARRAY'; |
|
1282
|
|
|
|
|
|
|
|
|
1283
|
59
|
|
|
|
|
40
|
my @loc; |
|
1284
|
59
|
|
|
|
|
70
|
foreach my $coords_set (@$coords_ref) { |
|
1285
|
66
|
|
|
|
|
95
|
my ( $start, $end, $start_type, $end_type ) = @$coords_set; |
|
1286
|
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
# taken from Bio::SeqUtils::_coord_adjust |
|
1288
|
66
|
50
|
|
|
|
91
|
if ( $type ne 'IN-BETWEEN' ) { |
|
1289
|
66
|
|
|
|
|
196
|
my $loc = Bio::Location::Fuzzy->new( |
|
1290
|
|
|
|
|
|
|
-start => $start, |
|
1291
|
|
|
|
|
|
|
-end => $end, |
|
1292
|
|
|
|
|
|
|
-strand => $strand, |
|
1293
|
|
|
|
|
|
|
-location_type => $type |
|
1294
|
|
|
|
|
|
|
); |
|
1295
|
66
|
100
|
|
|
|
159
|
$loc->start_pos_type($start_type) if $start_type; |
|
1296
|
66
|
100
|
|
|
|
122
|
$loc->end_pos_type($end_type) if $end_type; |
|
1297
|
66
|
|
|
|
|
118
|
push @loc, $loc; |
|
1298
|
|
|
|
|
|
|
} |
|
1299
|
|
|
|
|
|
|
else { |
|
1300
|
0
|
|
|
|
|
0
|
push @loc, |
|
1301
|
|
|
|
|
|
|
Bio::Location::Simple->new( |
|
1302
|
|
|
|
|
|
|
-start => $start, |
|
1303
|
|
|
|
|
|
|
-end => $end, |
|
1304
|
|
|
|
|
|
|
-strand => $strand, |
|
1305
|
|
|
|
|
|
|
-location_type => $type |
|
1306
|
|
|
|
|
|
|
); |
|
1307
|
|
|
|
|
|
|
} |
|
1308
|
|
|
|
|
|
|
} # each coords_set |
|
1309
|
59
|
|
|
|
|
110
|
return @loc; |
|
1310
|
|
|
|
|
|
|
} # _location_objects_from_coordinate_list |
|
1311
|
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
=head2 _new_seq_via_clone |
|
1313
|
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
Title : _new_seq_via_clone |
|
1315
|
|
|
|
|
|
|
Function: clone a sequence object using Bio::Root::Root::clone and set the new sequence string |
|
1316
|
|
|
|
|
|
|
sequence features are removed. |
|
1317
|
|
|
|
|
|
|
Usage : my $new_seq = $self->_new_seq_via_clone( $seq_obj, $seq_str ); |
|
1318
|
|
|
|
|
|
|
Args : original seq object [, new sequence string] |
|
1319
|
|
|
|
|
|
|
Returns : a clone of the original sequence object, optionally with new sequence string |
|
1320
|
|
|
|
|
|
|
|
|
1321
|
|
|
|
|
|
|
=cut |
|
1322
|
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
sub _new_seq_via_clone { |
|
1324
|
3
|
|
|
3
|
|
4
|
my ( $self, $in_seq_obj, $seq_str ) = @_; |
|
1325
|
3
|
|
|
|
|
12
|
my $out_seq_obj = $in_seq_obj->clone; |
|
1326
|
3
|
50
|
|
|
|
25
|
$out_seq_obj->remove_SeqFeatures if $out_seq_obj->can('remove_SeqFeatures'); |
|
1327
|
3
|
50
|
33
|
|
|
10
|
if ( blessed $out_seq_obj->seq |
|
1328
|
|
|
|
|
|
|
&& $out_seq_obj->seq->isa('Bio::PrimarySeq') ) |
|
1329
|
|
|
|
|
|
|
{ |
|
1330
|
0
|
|
|
|
|
0
|
$out_seq_obj->seq->seq($seq_str); |
|
1331
|
|
|
|
|
|
|
} |
|
1332
|
|
|
|
|
|
|
else { |
|
1333
|
3
|
|
|
|
|
6
|
$out_seq_obj->seq($seq_str); |
|
1334
|
|
|
|
|
|
|
} |
|
1335
|
3
|
|
|
|
|
8
|
return $out_seq_obj; |
|
1336
|
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
} # _new_seq_via_clone |
|
1338
|
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
=head2 _new_seq_from_old |
|
1340
|
|
|
|
|
|
|
|
|
1341
|
|
|
|
|
|
|
Title : _new_seq_from_old |
|
1342
|
|
|
|
|
|
|
Function: creates a new sequence obejct, if possible of the same class as the old and adds |
|
1343
|
|
|
|
|
|
|
attributes to it. Also copies annotation across to the new object. |
|
1344
|
|
|
|
|
|
|
Usage : my $new_seq = $self->_new_seq_from_old( $seq_obj, { seq => $seq_str, display_id => 'some_ID'}); |
|
1345
|
|
|
|
|
|
|
Args : old sequence object |
|
1346
|
|
|
|
|
|
|
hashref of attributes for the new sequence (sequence string etc.) |
|
1347
|
|
|
|
|
|
|
Returns : a new Bio::Seq object |
|
1348
|
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
=cut |
|
1350
|
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
sub _new_seq_from_old { |
|
1352
|
6
|
|
|
6
|
|
19
|
my ( $self, $in_seq_obj, $attr ) = @_; |
|
1353
|
6
|
50
|
33
|
|
|
30
|
$self->throw('attributes must be a hashref') |
|
1354
|
|
|
|
|
|
|
if $attr && ref($attr) ne 'HASH'; |
|
1355
|
|
|
|
|
|
|
|
|
1356
|
6
|
|
|
|
|
6
|
my $seqclass; |
|
1357
|
6
|
100
|
|
|
|
14
|
if ( $in_seq_obj->can_call_new ) { |
|
1358
|
4
|
|
|
|
|
6
|
$seqclass = ref($in_seq_obj); |
|
1359
|
|
|
|
|
|
|
} |
|
1360
|
|
|
|
|
|
|
else { |
|
1361
|
2
|
|
|
|
|
9
|
$seqclass = 'Bio::Primaryseq'; |
|
1362
|
2
|
|
|
|
|
29
|
$self->_attempt_to_load_seq; |
|
1363
|
|
|
|
|
|
|
} |
|
1364
|
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
my $out_seq_obj = $seqclass->new( |
|
1366
|
|
|
|
|
|
|
-seq => $attr->{seq} || $in_seq_obj->seq, |
|
1367
|
|
|
|
|
|
|
-display_id => $attr->{display_id} || $in_seq_obj->display_id, |
|
1368
|
|
|
|
|
|
|
-accession_number => $attr->{accession_number} |
|
1369
|
|
|
|
|
|
|
|| $in_seq_obj->accession_number |
|
1370
|
|
|
|
|
|
|
|| '', |
|
1371
|
|
|
|
|
|
|
-alphabet => $in_seq_obj->alphabet, |
|
1372
|
|
|
|
|
|
|
-desc => $attr->{desc} || $in_seq_obj->desc, |
|
1373
|
|
|
|
|
|
|
-verbose => $attr->{verbose} || $in_seq_obj->verbose, |
|
1374
|
4
|
|
33
|
|
|
33
|
-is_circular => $attr->{is_circular} || $in_seq_obj->is_circular || 0, |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
); |
|
1376
|
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
# move the annotation across to the product |
|
1378
|
4
|
50
|
33
|
|
|
28
|
if ( $out_seq_obj->isa("Bio::AnnotatableI") |
|
1379
|
|
|
|
|
|
|
&& $in_seq_obj->isa("Bio::AnnotatableI") ) |
|
1380
|
|
|
|
|
|
|
{ |
|
1381
|
4
|
|
|
|
|
10
|
foreach my $key ( $in_seq_obj->annotation->get_all_annotation_keys ) { |
|
1382
|
4
|
|
|
|
|
14
|
foreach my $value ( $in_seq_obj->annotation->get_Annotations($key) ) |
|
1383
|
|
|
|
|
|
|
{ |
|
1384
|
4
|
|
|
|
|
6
|
$out_seq_obj->annotation->add_Annotation( $key, $value ); |
|
1385
|
|
|
|
|
|
|
} |
|
1386
|
|
|
|
|
|
|
} |
|
1387
|
|
|
|
|
|
|
} |
|
1388
|
4
|
|
|
|
|
11
|
return $out_seq_obj; |
|
1389
|
|
|
|
|
|
|
} # _new_seq_from_old |
|
1390
|
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
=head2 _coord_adjust |
|
1392
|
|
|
|
|
|
|
|
|
1393
|
|
|
|
|
|
|
Title : _coord_adjust |
|
1394
|
|
|
|
|
|
|
Usage : my $newfeat=Bio::SeqUtils->_coord_adjust($feature, 100, $seq->length); |
|
1395
|
|
|
|
|
|
|
Function: Recursive subroutine to adjust the coordinates of a feature |
|
1396
|
|
|
|
|
|
|
and all its subfeatures. If a sequence length is specified, then |
|
1397
|
|
|
|
|
|
|
any adjusted features that have locations beyond the boundaries |
|
1398
|
|
|
|
|
|
|
of the sequence are converted to Bio::Location::Fuzzy objects. |
|
1399
|
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
Returns : A Bio::SeqFeatureI compliant object. |
|
1401
|
|
|
|
|
|
|
Args : A Bio::SeqFeatureI compliant object, |
|
1402
|
|
|
|
|
|
|
the number of bases to add to the coordinates |
|
1403
|
|
|
|
|
|
|
(optional) the length of the parent sequence |
|
1404
|
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
|
|
1406
|
|
|
|
|
|
|
=cut |
|
1407
|
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
sub _coord_adjust { |
|
1409
|
8
|
|
|
8
|
|
11
|
my ( $self, $feat, $add, $length ) = @_; |
|
1410
|
8
|
50
|
|
|
|
21
|
$self->throw( 'Object [$feat] ' |
|
1411
|
|
|
|
|
|
|
. 'of class [' |
|
1412
|
|
|
|
|
|
|
. ref($feat) |
|
1413
|
|
|
|
|
|
|
. '] should be a Bio::SeqFeatureI ' ) |
|
1414
|
|
|
|
|
|
|
unless $feat->isa('Bio::SeqFeatureI'); |
|
1415
|
8
|
|
|
|
|
7
|
my @adjsubfeat; |
|
1416
|
8
|
|
|
|
|
15
|
for my $subfeat ( $feat->get_SeqFeatures ) { |
|
1417
|
0
|
|
|
|
|
0
|
push @adjsubfeat, $self->_coord_adjust( $subfeat, $add, $length ); |
|
1418
|
|
|
|
|
|
|
} |
|
1419
|
8
|
|
|
|
|
5
|
my @loc; |
|
1420
|
8
|
|
|
|
|
19
|
for ( $feat->location->each_Location ) { |
|
1421
|
8
|
|
|
|
|
15
|
my @coords = ( $_->start, $_->end ); |
|
1422
|
8
|
|
|
|
|
17
|
my $strand = $_->strand; |
|
1423
|
8
|
|
|
|
|
12
|
my $type = $_->location_type; |
|
1424
|
8
|
|
|
|
|
16
|
foreach (@coords) { |
|
1425
|
16
|
50
|
|
|
|
23
|
$self->throw("can not handle negative feature positions (got: $_)") |
|
1426
|
|
|
|
|
|
|
if $_ < 0; |
|
1427
|
16
|
100
|
100
|
|
|
46
|
if ( $add + $_ < 1 ) { |
|
|
|
100
|
|
|
|
|
|
|
1428
|
2
|
|
|
|
|
3
|
$_ = '<1'; |
|
1429
|
|
|
|
|
|
|
} |
|
1430
|
|
|
|
|
|
|
elsif ( defined $length and $add + $_ > $length ) { |
|
1431
|
1
|
|
|
|
|
3
|
$_ = ">$length"; |
|
1432
|
|
|
|
|
|
|
} |
|
1433
|
|
|
|
|
|
|
else { |
|
1434
|
13
|
|
|
|
|
15
|
$_ = $add + $_; |
|
1435
|
|
|
|
|
|
|
} |
|
1436
|
|
|
|
|
|
|
} |
|
1437
|
8
|
|
|
|
|
22
|
push @loc, |
|
1438
|
|
|
|
|
|
|
$self->_location_objects_from_coordinate_list( [ \@coords ], |
|
1439
|
|
|
|
|
|
|
$strand, $type ); |
|
1440
|
|
|
|
|
|
|
} |
|
1441
|
8
|
|
|
|
|
19
|
my $newfeat = |
|
1442
|
|
|
|
|
|
|
Bio::SeqFeature::Generic->new( -primary => $feat->primary_tag ); |
|
1443
|
8
|
|
|
|
|
20
|
foreach my $key ( $feat->annotation->get_all_annotation_keys() ) { |
|
1444
|
0
|
|
|
|
|
0
|
foreach my $value ( $feat->annotation->get_Annotations($key) ) { |
|
1445
|
0
|
|
|
|
|
0
|
$newfeat->annotation->add_Annotation( $key, $value ); |
|
1446
|
|
|
|
|
|
|
} |
|
1447
|
|
|
|
|
|
|
} |
|
1448
|
8
|
|
|
|
|
16
|
foreach my $key ( $feat->get_all_tags() ) { |
|
1449
|
6
|
|
|
|
|
11
|
$newfeat->add_tag_value( $key, $feat->get_tag_values($key) ); |
|
1450
|
|
|
|
|
|
|
} |
|
1451
|
8
|
|
|
|
|
20
|
my $loc = $self->_single_loc_object_from_collection(@loc); |
|
1452
|
8
|
50
|
|
|
|
21
|
$loc ? $newfeat->location($loc) : return; |
|
1453
|
8
|
|
|
|
|
12
|
$newfeat->add_SeqFeature($_) for @adjsubfeat; |
|
1454
|
8
|
|
|
|
|
24
|
return $newfeat; |
|
1455
|
|
|
|
|
|
|
} |
|
1456
|
|
|
|
|
|
|
|
|
1457
|
|
|
|
|
|
|
=head2 revcom_with_features |
|
1458
|
|
|
|
|
|
|
|
|
1459
|
|
|
|
|
|
|
Title : revcom_with_features |
|
1460
|
|
|
|
|
|
|
Usage : $revcom=Bio::SeqUtils->revcom_with_features($seq); |
|
1461
|
|
|
|
|
|
|
Function: Like Bio::Seq::revcom, but keeps features (adjusting coordinates |
|
1462
|
|
|
|
|
|
|
as appropriate. |
|
1463
|
|
|
|
|
|
|
Returns : A new sequence object |
|
1464
|
|
|
|
|
|
|
Args : A sequence object |
|
1465
|
|
|
|
|
|
|
|
|
1466
|
|
|
|
|
|
|
|
|
1467
|
|
|
|
|
|
|
=cut |
|
1468
|
|
|
|
|
|
|
|
|
1469
|
|
|
|
|
|
|
sub revcom_with_features { |
|
1470
|
5
|
|
|
5
|
1
|
8
|
my ( $self, $seq ) = @_; |
|
1471
|
5
|
50
|
|
|
|
14
|
$self->throw( 'Object [$seq] ' |
|
1472
|
|
|
|
|
|
|
. 'of class [' |
|
1473
|
|
|
|
|
|
|
. ref($seq) |
|
1474
|
|
|
|
|
|
|
. '] should be a Bio::SeqI ' ) |
|
1475
|
|
|
|
|
|
|
unless $seq->isa('Bio::SeqI'); |
|
1476
|
5
|
|
|
|
|
24
|
my $revcom = $seq->revcom; |
|
1477
|
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
# make sure that there is no annotation or features in $trunc |
|
1479
|
|
|
|
|
|
|
# (->revcom() now clone objects except for Bio::Seq::LargePrimarySeq) |
|
1480
|
5
|
|
|
|
|
11
|
$revcom->annotation->remove_Annotations; |
|
1481
|
5
|
|
|
|
|
15
|
$revcom->remove_SeqFeatures; |
|
1482
|
|
|
|
|
|
|
|
|
1483
|
|
|
|
|
|
|
#move annotations |
|
1484
|
5
|
|
|
|
|
11
|
foreach my $key ( $seq->annotation->get_all_annotation_keys() ) { |
|
1485
|
4
|
|
|
|
|
9
|
foreach my $value ( $seq->annotation->get_Annotations($key) ) { |
|
1486
|
4
|
|
|
|
|
7
|
$revcom->annotation->add_Annotation( $key, $value ); |
|
1487
|
|
|
|
|
|
|
} |
|
1488
|
|
|
|
|
|
|
} |
|
1489
|
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
#move features |
|
1491
|
5
|
|
|
|
|
14
|
for ( map { $self->_feature_revcom( $_, $seq->length ) } |
|
|
6
|
|
|
|
|
13
|
|
|
1492
|
|
|
|
|
|
|
reverse $seq->get_SeqFeatures ) |
|
1493
|
|
|
|
|
|
|
{ |
|
1494
|
6
|
|
|
|
|
14
|
$revcom->add_SeqFeature($_); |
|
1495
|
|
|
|
|
|
|
} |
|
1496
|
5
|
|
|
|
|
9
|
return $revcom; |
|
1497
|
|
|
|
|
|
|
} |
|
1498
|
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
=head2 _feature_revcom |
|
1500
|
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
Title : _feature_revcom |
|
1502
|
|
|
|
|
|
|
Usage : my $newfeat=Bio::SeqUtils->_feature_revcom($feature, $seq->length); |
|
1503
|
|
|
|
|
|
|
Function: Recursive subroutine to reverse complement a feature and |
|
1504
|
|
|
|
|
|
|
all its subfeatures. The length of the parent sequence must be |
|
1505
|
|
|
|
|
|
|
specified. |
|
1506
|
|
|
|
|
|
|
|
|
1507
|
|
|
|
|
|
|
Returns : A Bio::SeqFeatureI compliant object. |
|
1508
|
|
|
|
|
|
|
Args : A Bio::SeqFeatureI compliant object, |
|
1509
|
|
|
|
|
|
|
the length of the parent sequence |
|
1510
|
|
|
|
|
|
|
|
|
1511
|
|
|
|
|
|
|
|
|
1512
|
|
|
|
|
|
|
=cut |
|
1513
|
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
sub _feature_revcom { |
|
1515
|
6
|
|
|
6
|
|
7
|
my ( $self, $feat, $length ) = @_; |
|
1516
|
6
|
50
|
|
|
|
18
|
$self->throw( 'Object [$feat] ' |
|
1517
|
|
|
|
|
|
|
. 'of class [' |
|
1518
|
|
|
|
|
|
|
. ref($feat) |
|
1519
|
|
|
|
|
|
|
. '] should be a Bio::SeqFeatureI ' ) |
|
1520
|
|
|
|
|
|
|
unless $feat->isa('Bio::SeqFeatureI'); |
|
1521
|
6
|
|
|
|
|
5
|
my @adjsubfeat; |
|
1522
|
6
|
|
|
|
|
14
|
for my $subfeat ( $feat->get_SeqFeatures ) { |
|
1523
|
0
|
|
|
|
|
0
|
push @adjsubfeat, $self->_feature_revcom( $subfeat, $length ); |
|
1524
|
|
|
|
|
|
|
} |
|
1525
|
6
|
|
|
|
|
7
|
my @loc; |
|
1526
|
6
|
|
|
|
|
12
|
for ( $feat->location->each_Location ) { |
|
1527
|
6
|
|
|
|
|
12
|
my $type = $_->location_type; |
|
1528
|
6
|
|
|
|
|
7
|
my $strand; |
|
1529
|
6
|
100
|
|
|
|
13
|
if ( $_->strand == -1 ) { $strand = 1 } |
|
|
5
|
50
|
|
|
|
8
|
|
|
1530
|
1
|
|
|
|
|
2
|
elsif ( $_->strand == 1 ) { $strand = -1 } |
|
1531
|
0
|
|
|
|
|
0
|
else { $strand = $_->strand } |
|
1532
|
6
|
|
|
|
|
12
|
my $newend = |
|
1533
|
|
|
|
|
|
|
$self->_coord_revcom( $_->start, $_->start_pos_type, $length ); |
|
1534
|
6
|
|
|
|
|
16
|
my $newstart = |
|
1535
|
|
|
|
|
|
|
$self->_coord_revcom( $_->end, $_->end_pos_type, $length ); |
|
1536
|
6
|
|
|
|
|
9
|
my $newstart_type = $_->end_pos_type; |
|
1537
|
6
|
50
|
|
|
|
11
|
$newstart_type = 'BEFORE' if $_->end_pos_type eq 'AFTER'; |
|
1538
|
6
|
50
|
|
|
|
10
|
$newstart_type = 'AFTER' if $_->end_pos_type eq 'BEFORE'; |
|
1539
|
6
|
|
|
|
|
12
|
my $newend_type = $_->start_pos_type; |
|
1540
|
6
|
50
|
|
|
|
9
|
$newend_type = 'BEFORE' if $_->start_pos_type eq 'AFTER'; |
|
1541
|
6
|
100
|
|
|
|
8
|
$newend_type = 'AFTER' if $_->start_pos_type eq 'BEFORE'; |
|
1542
|
6
|
|
|
|
|
19
|
push @loc, |
|
1543
|
|
|
|
|
|
|
$self->_location_objects_from_coordinate_list( |
|
1544
|
|
|
|
|
|
|
[ [ $newstart, $newend, $newstart_type, $newend_type ] ], |
|
1545
|
|
|
|
|
|
|
$strand, $type ); |
|
1546
|
|
|
|
|
|
|
} |
|
1547
|
6
|
|
|
|
|
20
|
my $newfeat = |
|
1548
|
|
|
|
|
|
|
Bio::SeqFeature::Generic->new( -primary => $feat->primary_tag ); |
|
1549
|
6
|
|
|
|
|
12
|
foreach my $key ( $feat->annotation->get_all_annotation_keys() ) { |
|
1550
|
0
|
|
|
|
|
0
|
foreach my $value ( $feat->annotation->get_Annotations($key) ) { |
|
1551
|
0
|
|
|
|
|
0
|
$newfeat->annotation->add_Annotation( $key, $value ); |
|
1552
|
|
|
|
|
|
|
} |
|
1553
|
|
|
|
|
|
|
} |
|
1554
|
6
|
|
|
|
|
15
|
foreach my $key ( $feat->get_all_tags() ) { |
|
1555
|
3
|
|
|
|
|
7
|
$newfeat->add_tag_value( $key, $feat->get_tag_values($key) ); |
|
1556
|
|
|
|
|
|
|
} |
|
1557
|
|
|
|
|
|
|
|
|
1558
|
6
|
|
|
|
|
16
|
my $loc = $self->_single_loc_object_from_collection(@loc); |
|
1559
|
6
|
50
|
|
|
|
19
|
$loc ? $newfeat->location($loc) : return; |
|
1560
|
|
|
|
|
|
|
|
|
1561
|
6
|
|
|
|
|
9
|
$newfeat->add_SeqFeature($_) for @adjsubfeat; |
|
1562
|
6
|
|
|
|
|
16
|
return $newfeat; |
|
1563
|
|
|
|
|
|
|
} |
|
1564
|
|
|
|
|
|
|
|
|
1565
|
|
|
|
|
|
|
sub _coord_revcom { |
|
1566
|
12
|
|
|
12
|
|
12
|
my ( $self, $coord, $type, $length ) = @_; |
|
1567
|
12
|
50
|
33
|
|
|
42
|
if ( $type eq 'BETWEEN' or $type eq 'WITHIN' ) { |
|
1568
|
0
|
|
|
|
|
0
|
$coord =~ s/(\d+)(\D*)(\d+)/$length+1-$3.$2.$length+1-$1/ge; |
|
|
0
|
|
|
|
|
0
|
|
|
1569
|
|
|
|
|
|
|
} |
|
1570
|
|
|
|
|
|
|
else { |
|
1571
|
12
|
|
|
|
|
62
|
$coord =~ s/(\d+)/$length+1-$1/ge; |
|
|
12
|
|
|
|
|
40
|
|
|
1572
|
12
|
|
|
|
|
14
|
$coord =~ tr/<>/>; |
|
1573
|
12
|
100
|
66
|
|
|
31
|
$coord = '>' . $coord |
|
1574
|
|
|
|
|
|
|
if $type eq 'BEFORE' and substr( $coord, 0, 1 ) ne '>'; |
|
1575
|
12
|
50
|
33
|
|
|
23
|
$coord = '<' . $coord |
|
1576
|
|
|
|
|
|
|
if $type eq 'AFTER' and substr( $coord, 0, 1 ) ne '<'; |
|
1577
|
|
|
|
|
|
|
} |
|
1578
|
12
|
|
|
|
|
24
|
return $coord; |
|
1579
|
|
|
|
|
|
|
} |
|
1580
|
|
|
|
|
|
|
|
|
1581
|
|
|
|
|
|
|
=head2 evolve |
|
1582
|
|
|
|
|
|
|
|
|
1583
|
|
|
|
|
|
|
Title : evolve |
|
1584
|
|
|
|
|
|
|
Usage : my $newseq = Bio::SeqUtils-> |
|
1585
|
|
|
|
|
|
|
evolve($seq, $similarity, $transition_transversion_rate); |
|
1586
|
|
|
|
|
|
|
Function: Mutates the sequence by point mutations until the similarity of |
|
1587
|
|
|
|
|
|
|
the new sequence has decreased to the required level. |
|
1588
|
|
|
|
|
|
|
Transition/transversion rate is adjustable. |
|
1589
|
|
|
|
|
|
|
Returns : A new Bio::PrimarySeq object |
|
1590
|
|
|
|
|
|
|
Args : sequence object |
|
1591
|
|
|
|
|
|
|
percentage similarity (e.g. 80) |
|
1592
|
|
|
|
|
|
|
tr/tv rate, optional, defaults to 1 (= 1:1) |
|
1593
|
|
|
|
|
|
|
|
|
1594
|
|
|
|
|
|
|
Set the verbosity of the Bio::SeqUtils object to positive integer to |
|
1595
|
|
|
|
|
|
|
see the mutations as they happen. |
|
1596
|
|
|
|
|
|
|
|
|
1597
|
|
|
|
|
|
|
This method works only on nucleotide sequences. It prints a warning if |
|
1598
|
|
|
|
|
|
|
you set the target similarity to be less than 25%. |
|
1599
|
|
|
|
|
|
|
|
|
1600
|
|
|
|
|
|
|
Transition/transversion ratio is an observed attribute of an sequence |
|
1601
|
|
|
|
|
|
|
comparison. We are dealing here with the transition/transversion rate |
|
1602
|
|
|
|
|
|
|
that we set for our model of sequence evolution. |
|
1603
|
|
|
|
|
|
|
|
|
1604
|
|
|
|
|
|
|
=cut |
|
1605
|
|
|
|
|
|
|
|
|
1606
|
|
|
|
|
|
|
sub evolve { |
|
1607
|
1
|
|
|
1
|
1
|
3
|
my ( $self, $seq, $sim, $rate ) = @_; |
|
1608
|
1
|
|
50
|
|
|
2
|
$rate ||= 1; |
|
1609
|
|
|
|
|
|
|
|
|
1610
|
1
|
50
|
|
|
|
8
|
$self->throw( 'Object [$seq] ' |
|
1611
|
|
|
|
|
|
|
. 'of class [' |
|
1612
|
|
|
|
|
|
|
. ref($seq) |
|
1613
|
|
|
|
|
|
|
. '] should be a Bio::PrimarySeqI ' ) |
|
1614
|
|
|
|
|
|
|
unless $seq->isa('Bio::PrimarySeqI'); |
|
1615
|
|
|
|
|
|
|
|
|
1616
|
1
|
50
|
33
|
|
|
9
|
$self->throw( |
|
1617
|
|
|
|
|
|
|
"[$sim] " . ' should be a positive integer or float under 100' ) |
|
1618
|
|
|
|
|
|
|
unless $sim =~ /^[+\d.]+$/ and $sim <= 100; |
|
1619
|
|
|
|
|
|
|
|
|
1620
|
1
|
50
|
|
|
|
2
|
$self->warn( |
|
1621
|
|
|
|
|
|
|
"Nucleotide sequences are 25% similar by chance. |
|
1622
|
|
|
|
|
|
|
Do you really want to set similarity to [$sim]%?\n" |
|
1623
|
|
|
|
|
|
|
) unless $sim > 25; |
|
1624
|
|
|
|
|
|
|
|
|
1625
|
1
|
50
|
|
|
|
2
|
$self->throw('Only nucleotide sequences are supported') |
|
1626
|
|
|
|
|
|
|
if $seq->alphabet eq 'protein'; |
|
1627
|
|
|
|
|
|
|
|
|
1628
|
|
|
|
|
|
|
# arrays of possible changes have transitions as first items |
|
1629
|
1
|
|
|
|
|
1
|
my %changes; |
|
1630
|
1
|
|
|
|
|
4
|
$changes{'a'} = [ 't', 'c', 'g' ]; |
|
1631
|
1
|
|
|
|
|
3
|
$changes{'t'} = [ 'a', 'c', 'g' ]; |
|
1632
|
1
|
|
|
|
|
3
|
$changes{'c'} = [ 'g', 'a', 't' ]; |
|
1633
|
1
|
|
|
|
|
2
|
$changes{'g'} = [ 'c', 'a', 't' ]; |
|
1634
|
|
|
|
|
|
|
|
|
1635
|
|
|
|
|
|
|
# given the desired rate, find out where cut off points need to be |
|
1636
|
|
|
|
|
|
|
# when random numbers are generated from 0 to 100 |
|
1637
|
|
|
|
|
|
|
# we are ignoring identical mutations (e.g. A->A) to speed things up |
|
1638
|
1
|
|
|
|
|
2
|
my $bin_size = 100 / ( $rate + 2 ); |
|
1639
|
1
|
|
|
|
|
3
|
my $transition = 100 - ( 2 * $bin_size ); |
|
1640
|
1
|
|
|
|
|
2
|
my $first_transversion = $transition + $bin_size; |
|
1641
|
|
|
|
|
|
|
|
|
1642
|
|
|
|
|
|
|
# unify the look of sequence strings |
|
1643
|
1
|
|
|
|
|
2
|
my $string = lc $seq->seq; # lower case |
|
1644
|
1
|
|
|
|
|
2
|
$string =~ |
|
1645
|
|
|
|
|
|
|
s/u/t/; # simplyfy our life; modules should deal with the change anyway |
|
1646
|
|
|
|
|
|
|
# store the original sequence string |
|
1647
|
1
|
|
|
|
|
1
|
my $oristring = $string; |
|
1648
|
1
|
|
|
|
|
2
|
my $length = $seq->length; |
|
1649
|
|
|
|
|
|
|
|
|
1650
|
|
|
|
|
|
|
# stop evolving if the limit has been reached |
|
1651
|
1
|
|
|
|
|
3
|
until ( $self->_get_similarity( $oristring, $string ) <= $sim ) { |
|
1652
|
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
# find the location in the string to change |
|
1654
|
5
|
|
|
|
|
86
|
my $loc = int( rand $length ) + 1; |
|
1655
|
|
|
|
|
|
|
|
|
1656
|
|
|
|
|
|
|
# nucleotide to change |
|
1657
|
5
|
|
|
|
|
6
|
my $oldnuc = substr $string, $loc - 1, 1; |
|
1658
|
5
|
|
|
|
|
4
|
my $newnuc; |
|
1659
|
|
|
|
|
|
|
|
|
1660
|
|
|
|
|
|
|
# nucleotide it is changed to |
|
1661
|
5
|
|
|
|
|
5
|
my $choose = rand(100); |
|
1662
|
5
|
100
|
|
|
|
10
|
if ( $choose < $transition ) { |
|
|
|
50
|
|
|
|
|
|
|
1663
|
3
|
|
|
|
|
4
|
$newnuc = $changes{$oldnuc}[0]; |
|
1664
|
|
|
|
|
|
|
} |
|
1665
|
|
|
|
|
|
|
elsif ( $choose < $first_transversion ) { |
|
1666
|
0
|
|
|
|
|
0
|
$newnuc = $changes{$oldnuc}[1]; |
|
1667
|
|
|
|
|
|
|
} |
|
1668
|
|
|
|
|
|
|
else { |
|
1669
|
2
|
|
|
|
|
3
|
$newnuc = $changes{$oldnuc}[2]; |
|
1670
|
|
|
|
|
|
|
} |
|
1671
|
|
|
|
|
|
|
|
|
1672
|
|
|
|
|
|
|
# do the change |
|
1673
|
5
|
|
|
|
|
5
|
substr $string, $loc - 1, 1, $newnuc; |
|
1674
|
|
|
|
|
|
|
|
|
1675
|
5
|
|
|
|
|
15
|
$self->debug("$loc$oldnuc>$newnuc\n"); |
|
1676
|
|
|
|
|
|
|
} |
|
1677
|
|
|
|
|
|
|
|
|
1678
|
1
|
|
|
|
|
3
|
return new Bio::PrimarySeq( |
|
1679
|
|
|
|
|
|
|
-id => $seq->id . "-$sim", |
|
1680
|
|
|
|
|
|
|
-description => $seq->description, |
|
1681
|
|
|
|
|
|
|
-seq => $string |
|
1682
|
|
|
|
|
|
|
); |
|
1683
|
|
|
|
|
|
|
} |
|
1684
|
|
|
|
|
|
|
|
|
1685
|
|
|
|
|
|
|
sub _get_similarity { |
|
1686
|
6
|
|
|
6
|
|
6
|
my ( $self, $oriseq, $seq ) = @_; |
|
1687
|
|
|
|
|
|
|
|
|
1688
|
6
|
|
|
|
|
4
|
my $len = length($oriseq); |
|
1689
|
6
|
|
|
|
|
4
|
my $c; |
|
1690
|
|
|
|
|
|
|
|
|
1691
|
6
|
|
|
|
|
11
|
for ( my $i = 0 ; $i < $len ; $i++ ) { |
|
1692
|
60
|
100
|
|
|
|
106
|
$c++ if substr( $oriseq, $i, 1 ) eq substr( $seq, $i, 1 ); |
|
1693
|
|
|
|
|
|
|
} |
|
1694
|
6
|
|
|
|
|
20
|
return 100 * $c / $len; |
|
1695
|
|
|
|
|
|
|
} |
|
1696
|
|
|
|
|
|
|
|
|
1697
|
|
|
|
|
|
|
1; |