line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#------------------------------------------------------------------ |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# BioPerl module Bio::Restriction::Enzyme |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# Please direct questions and support issues to |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# Cared for by Rob Edwards |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
10
|
|
|
|
|
|
|
#------------------------------------------------------------------ |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
## POD Documentation: |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Restriction::Enzyme - A single restriction endonuclease |
17
|
|
|
|
|
|
|
(cuts DNA at specific locations) |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# set up a single restriction enzyme. This contains lots of |
22
|
|
|
|
|
|
|
# information about the enzyme that is generally parsed from a |
23
|
|
|
|
|
|
|
# rebase file and can then be read back |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
use Bio::Restriction::Enzyme; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# define a new enzyme with the cut sequence |
28
|
|
|
|
|
|
|
my $re=Bio::Restriction::Enzyme->new |
29
|
|
|
|
|
|
|
(-enzyme=>'EcoRI', -seq=>'G^AATTC'); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# once the sequence has been defined a bunch of stuff is calculated |
32
|
|
|
|
|
|
|
# for you: |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
#### PRECALCULATED |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# find where the enzyme cuts after ... |
37
|
|
|
|
|
|
|
my $ca=$re->cut; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# ... and where it cuts on the opposite strand |
40
|
|
|
|
|
|
|
my $oca = $re->complementary_cut; |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# get the cut sequence string back. |
43
|
|
|
|
|
|
|
# Note that site will return the sequence with a caret |
44
|
|
|
|
|
|
|
my $with_caret=$re->site; #returns 'G^AATTC'; |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# but it is also a Bio::PrimarySeq object .... |
47
|
|
|
|
|
|
|
my $without_caret=$re->seq; # returns 'GAATTC'; |
48
|
|
|
|
|
|
|
# ... and so does string |
49
|
|
|
|
|
|
|
$without_caret=$re->string; #returns 'GAATTC'; |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# what is the reverse complement of the cut site |
52
|
|
|
|
|
|
|
my $rc=$re->revcom; # returns 'GAATTC'; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# now the recognition length. There are two types: |
55
|
|
|
|
|
|
|
# recognition_length() is the length of the sequence |
56
|
|
|
|
|
|
|
# cutter() estimate of cut frequency |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
my $recog_length = $re->recognition_length; # returns 6 |
59
|
|
|
|
|
|
|
# also returns 6 in this case but would return |
60
|
|
|
|
|
|
|
# 4 for GANNTC and 5 for RGATCY (BstX2I)! |
61
|
|
|
|
|
|
|
$recog_length=$re->cutter; |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# is the sequence a palindrome - the same forwards and backwards |
64
|
|
|
|
|
|
|
my $pal= $re->palindromic; # this is a boolean |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# is the sequence blunt (i.e. no overhang - the forward and reverse |
67
|
|
|
|
|
|
|
# cuts are the same) |
68
|
|
|
|
|
|
|
print "blunt\n" if $re->overhang eq 'blunt'; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# Overhang can have three values: "5'", "3'", "blunt", and undef |
71
|
|
|
|
|
|
|
# Direction is very important if you use Klenow! |
72
|
|
|
|
|
|
|
my $oh=$re->overhang; |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# what is the overhang sequence |
75
|
|
|
|
|
|
|
my $ohseq=$re->overhang_seq; # will return 'AATT'; |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# is the sequence ambiguous - does it contain non-GATC bases? |
78
|
|
|
|
|
|
|
my $ambig=$re->is_ambiguous; # this is boolean |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
print "Stuff about the enzyme\nCuts after: $ca\n", |
81
|
|
|
|
|
|
|
"Complementary cut: $oca\nSite:\n\t$with_caret or\n", |
82
|
|
|
|
|
|
|
"\t$without_caret\n"; |
83
|
|
|
|
|
|
|
print "Reverse of the sequence: $rc\nRecognition length: $recog_length\n", |
84
|
|
|
|
|
|
|
"Is it palindromic? $pal\n"; |
85
|
|
|
|
|
|
|
print "The overhang is $oh with sequence $ohseq\n", |
86
|
|
|
|
|
|
|
"And is it ambiguous? $ambig\n\n"; |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
### THINGS YOU CAN SET, and get from rich REBASE file |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# get or set the isoschizomers (enzymes that recognize the same |
92
|
|
|
|
|
|
|
# site) |
93
|
|
|
|
|
|
|
$re->isoschizomers('PvuII', 'SmaI'); # not really true :) |
94
|
|
|
|
|
|
|
print "Isoschizomers are ", join " ", $re->isoschizomers, "\n"; |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# get or set the methylation sites |
97
|
|
|
|
|
|
|
$re->methylation_sites(2); # not really true :) |
98
|
|
|
|
|
|
|
print "Methylated at ", join " ", keys %{$re->methylation_sites},"\n"; |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
#Get or set the source microbe |
101
|
|
|
|
|
|
|
$re->microbe('E. coli'); |
102
|
|
|
|
|
|
|
print "It came from ", $re->microbe, "\n"; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# get or set the person who isolated it |
105
|
|
|
|
|
|
|
$re->source("Rob"); # not really true :) |
106
|
|
|
|
|
|
|
print $re->source, " sent it to us\n"; |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
# get or set whether it is commercially available and the company |
109
|
|
|
|
|
|
|
# that it can be bought at |
110
|
|
|
|
|
|
|
$re->vendors('NEB'); # my favorite |
111
|
|
|
|
|
|
|
print "Is it commercially available :"; |
112
|
|
|
|
|
|
|
print $re->vendors ? "Yes" : "No"; |
113
|
|
|
|
|
|
|
print " and it can be got from ", join " ", |
114
|
|
|
|
|
|
|
$re->vendors, "\n"; |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# get or set a reference for this |
117
|
|
|
|
|
|
|
$re->reference('Edwards et al. J. Bacteriology'); |
118
|
|
|
|
|
|
|
print "It was not published in ", $re->reference, "\n"; |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# get or set the enzyme name |
121
|
|
|
|
|
|
|
$re->name('BamHI'); |
122
|
|
|
|
|
|
|
print "The name of EcoRI is not really ", $re->name, "\n"; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=head1 DESCRIPTION |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
This module defines a single restriction endonuclease. You can use it |
128
|
|
|
|
|
|
|
to make custom restriction enzymes, and it is used by |
129
|
|
|
|
|
|
|
Bio::Restriction::IO to define enzymes in the New England Biolabs |
130
|
|
|
|
|
|
|
REBASE collection. |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
Use Bio::Restriction::Analysis to figure out which enzymes are available |
133
|
|
|
|
|
|
|
and where they cut your sequence. |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=head1 RESTRICTION MODIFICATION SYSTEMS |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
At least three geneticaly and biochamically distinct restriction |
139
|
|
|
|
|
|
|
modification systems exist. The cutting components of them are known |
140
|
|
|
|
|
|
|
as restriction endonuleases. The three systems are known by roman |
141
|
|
|
|
|
|
|
numerals: Type I, II, and III restriction enzymes. |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
REBASE format 'cutzymes'(#15) lists enzyme type in its last field. The |
144
|
|
|
|
|
|
|
categories there do not always match the the following short |
145
|
|
|
|
|
|
|
descriptions of the enzymes types. See |
146
|
|
|
|
|
|
|
http://it.stlawu.edu/~tbudd/rmsyst.html for a better overview. |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=head2 TypeI |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Type I systems recognize a bipartite asymetrical sequence of 5-7 bp: |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
---TGA*NnTGCT--- * = methylation sites |
154
|
|
|
|
|
|
|
---ACTNnA*CGA--- n = 6 for EcoK, n = 8 for EcoB |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
The cleavage site is roughly 1000 (400-7000) base pairs from the |
157
|
|
|
|
|
|
|
recognition site. |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head2 TypeII |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
The simplest and most common (at least commercially). |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
Site recognition is via short palindromic base sequences that are 4-6 |
164
|
|
|
|
|
|
|
base pairs long. Cleavage is at the recognition site (but may |
165
|
|
|
|
|
|
|
occasionally be just adjacent to the palindromic sequence, usually |
166
|
|
|
|
|
|
|
within) and may produce blunt end termini or staggered, "sticky |
167
|
|
|
|
|
|
|
end" termini. |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=head2 TypeIII |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
The recognition site is a 5-7 bp asymmetrical sequence. Cleavage is |
172
|
|
|
|
|
|
|
ATP dependent 24-26 base pairs downstream from the recognition site |
173
|
|
|
|
|
|
|
and usually yields staggered cuts 2-4 bases apart. |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=head1 COMMENTS |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
I am trying to make this backwards compatible with |
179
|
|
|
|
|
|
|
Bio::Tools::RestrictionEnzyme. Undoubtedly some things will break, |
180
|
|
|
|
|
|
|
but we can fix things as we progress.....! |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
I have added another comments section at the end of this POD that |
183
|
|
|
|
|
|
|
discusses a couple of areas I know are broken (at the moment) |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
=head1 TO DO |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=over 2 |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=item * |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
Convert vendors touse full names of companies instead of code |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=item * |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
Add regular expression based matching to vendors |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=item * |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
Move away from the archaic ^ notation for cut sites. Ideally |
201
|
|
|
|
|
|
|
I'd totally like to remove this altogether, or add a method |
202
|
|
|
|
|
|
|
that adds it in if someone really wants it. We should be |
203
|
|
|
|
|
|
|
fixed on a sequence, number notation. |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=back |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=head1 FEEDBACK |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=head2 Mailing Lists |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
212
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
213
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
216
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=head2 Support |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
I |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
225
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
226
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
227
|
|
|
|
|
|
|
with code and data examples if at all possible. |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
=head2 Reporting Bugs |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
232
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
233
|
|
|
|
|
|
|
web: |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=head1 AUTHOR |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
Rob Edwards, redwards@utmem.edu |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
Heikki Lehvaslaiho, heikki-at-bioperl-dot-org |
244
|
|
|
|
|
|
|
Peter Blaiklock, pblaiklo@restrictionmapper.org |
245
|
|
|
|
|
|
|
Mark A. Jensen, maj-at-fortinbras-dot-us |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=head1 COPYRIGHT |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
Copyright (c) 2003 Rob Edwards. |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
Some of this work is Copyright (c) 1997-2002 Steve A. Chervitz. All |
252
|
|
|
|
|
|
|
Rights Reserved. This module is free software; you can redistribute |
253
|
|
|
|
|
|
|
it and/or modify it under the same terms as Perl itself. |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=head1 SEE ALSO |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
L, |
258
|
|
|
|
|
|
|
L, L |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=head1 APPENDIX |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
Methods beginning with a leading underscore are considered private and |
263
|
|
|
|
|
|
|
are intended for internal use by this module. They are not considered |
264
|
|
|
|
|
|
|
part of the public interface and are described here for documentation |
265
|
|
|
|
|
|
|
purposes only. |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=cut |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
package Bio::Restriction::Enzyme; |
270
|
4
|
|
|
4
|
|
497
|
use strict; |
|
4
|
|
|
|
|
4
|
|
|
4
|
|
|
|
|
85
|
|
271
|
|
|
|
|
|
|
|
272
|
4
|
|
|
4
|
|
600
|
use Bio::PrimarySeq; |
|
4
|
|
|
|
|
5
|
|
|
4
|
|
|
|
|
85
|
|
273
|
|
|
|
|
|
|
|
274
|
4
|
|
|
4
|
|
15
|
use Data::Dumper; |
|
4
|
|
|
|
|
5
|
|
|
4
|
|
|
|
|
178
|
|
275
|
4
|
|
|
4
|
|
1622
|
use Tie::RefHash; |
|
4
|
|
|
|
|
8926
|
|
|
4
|
|
|
|
|
83
|
|
276
|
4
|
|
|
4
|
|
18
|
use vars qw (%TYPE); |
|
4
|
|
|
|
|
4
|
|
|
4
|
|
|
|
|
119
|
|
277
|
4
|
|
|
4
|
|
12
|
use base qw(Bio::Root::Root Bio::Restriction::EnzymeI); |
|
4
|
|
|
|
|
4
|
|
|
4
|
|
|
|
|
1398
|
|
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
BEGIN { |
280
|
4
|
|
|
4
|
|
6180
|
my %TYPE = (I => 1, II => 1, III => 1); |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=head2 new |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
Title : new |
286
|
|
|
|
|
|
|
Function |
287
|
|
|
|
|
|
|
Function : Initializes the Enzyme object |
288
|
|
|
|
|
|
|
Returns : The Restriction::Enzyme object |
289
|
|
|
|
|
|
|
Argument : A standard definition can have several formats. For example: |
290
|
|
|
|
|
|
|
$re->new(-enzyme='EcoRI', -seq->'GAATTC' -cut->'1') |
291
|
|
|
|
|
|
|
Or, you can define the cut site in the sequence, for example |
292
|
|
|
|
|
|
|
$re->new(-enzyme='EcoRI', -seq->'G^AATTC'), but you must use a caret |
293
|
|
|
|
|
|
|
Or, a sequence can cut outside the recognition site, for example |
294
|
|
|
|
|
|
|
$re->new(-enzyme='AbeI', -seq->'CCTCAGC' -cut->'-5/-2') |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
Other arguments: |
297
|
|
|
|
|
|
|
-isoschizomers=>\@list a reference to an array of |
298
|
|
|
|
|
|
|
known isoschizomers |
299
|
|
|
|
|
|
|
-references=>$ref a reference to the enzyme |
300
|
|
|
|
|
|
|
-source=>$source the source (person) of the enzyme |
301
|
|
|
|
|
|
|
-commercial_availability=>@companies a list of companies |
302
|
|
|
|
|
|
|
that supply the enzyme |
303
|
|
|
|
|
|
|
-methylation_site=>\%sites a reference to hash that has |
304
|
|
|
|
|
|
|
the position as the key and the type of methylation |
305
|
|
|
|
|
|
|
as the value |
306
|
|
|
|
|
|
|
-xln_sub => sub { ($self,$cut) = @_; ...; return $xln_cut }, |
307
|
|
|
|
|
|
|
a coderef to a routine that translates the input cut value |
308
|
|
|
|
|
|
|
into Bio::Restriction::Enzyme coordinates |
309
|
|
|
|
|
|
|
( e.g., for withrefm format, this might be |
310
|
|
|
|
|
|
|
-xln_sub => sub { length( shift()->string ) + shift } ) |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
A Restriction::Enzyme object manages its recognition sequence as a |
313
|
|
|
|
|
|
|
Bio::PrimarySeq object. |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
The minimum requirement is for a name and a sequence. |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
This will create the restriction enzyme object, and define several |
318
|
|
|
|
|
|
|
things about the sequence, such as palindromic, size, etc. |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=cut |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
# do all cut/comp cut setting within the constructor |
323
|
|
|
|
|
|
|
# new args |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub new { |
326
|
10163
|
|
|
10163
|
1
|
26759
|
my($class, @args) = @_; |
327
|
10163
|
|
|
|
|
22496
|
my $self = $class->SUPER::new(@args); |
328
|
|
|
|
|
|
|
|
329
|
10163
|
|
|
|
|
40325
|
my ($name,$enzyme,$site,$seq,$precut, $postcut,$cut,$complementary_cut, $is_prototype, $prototype, |
330
|
|
|
|
|
|
|
$isoschizomers, $meth, $microbe, $source, $vendors, $references, $neo, $recog, $xln_sub) = |
331
|
|
|
|
|
|
|
$self->_rearrange([qw( |
332
|
|
|
|
|
|
|
NAME |
333
|
|
|
|
|
|
|
ENZYME |
334
|
|
|
|
|
|
|
SITE |
335
|
|
|
|
|
|
|
SEQ |
336
|
|
|
|
|
|
|
PRECUT |
337
|
|
|
|
|
|
|
POSTCUT |
338
|
|
|
|
|
|
|
CUT |
339
|
|
|
|
|
|
|
COMPLEMENTARY_CUT |
340
|
|
|
|
|
|
|
IS_PROTOTYPE |
341
|
|
|
|
|
|
|
PROTOTYPE |
342
|
|
|
|
|
|
|
ISOSCHIZOMERS |
343
|
|
|
|
|
|
|
METHYLATION_SITES |
344
|
|
|
|
|
|
|
MICROBE |
345
|
|
|
|
|
|
|
SOURCE |
346
|
|
|
|
|
|
|
VENDORS |
347
|
|
|
|
|
|
|
REFERENCES |
348
|
|
|
|
|
|
|
IS_NEOSCHIZOMER |
349
|
|
|
|
|
|
|
RECOG |
350
|
|
|
|
|
|
|
XLN_SUB |
351
|
|
|
|
|
|
|
)], @args); |
352
|
|
|
|
|
|
|
|
353
|
10163
|
0
|
66
|
|
|
54615
|
$self->throw('At the minimum, you must define a name and '. |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
354
|
|
|
|
|
|
|
'recognition site for the restriction enzyme') |
355
|
|
|
|
|
|
|
unless (($name || $enzyme) && ($site || $recog || $seq)); |
356
|
|
|
|
|
|
|
|
357
|
10163
|
|
|
|
|
13059
|
$self->{_isoschizomers} = []; |
358
|
10163
|
|
|
|
|
10659
|
$self->{_methylation_sites} = {}; |
359
|
10163
|
|
|
|
|
10228
|
$self->{_vendors} = []; |
360
|
10163
|
|
|
|
|
8919
|
$self->{_references} = []; |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# squelch warnings |
363
|
10163
|
|
100
|
|
|
23988
|
$postcut ||=''; |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
# enzyme name |
366
|
10163
|
100
|
|
|
|
12763
|
$enzyme && $self->name($enzyme); |
367
|
10163
|
100
|
|
|
|
18849
|
$name && $self->name($name); |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# site |
370
|
|
|
|
|
|
|
# |
371
|
|
|
|
|
|
|
# note that the site() setter will automatically set |
372
|
|
|
|
|
|
|
# cut(), complementary_cut(), if the cut site is indicated |
373
|
|
|
|
|
|
|
# in $site with '^' /maj |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
# create the cut site if appropriate/this is a kludge due to |
376
|
|
|
|
|
|
|
# the base.pm format in the new B:R order... |
377
|
10163
|
100
|
100
|
|
|
21476
|
if ( $cut and $cut <= length $site) { |
378
|
2531
|
|
|
|
|
4417
|
$site = substr($site, 0, $cut).'^'.substr($site, $cut); |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
|
381
|
10163
|
50
|
|
|
|
11382
|
if ($site) { |
382
|
10163
|
|
|
|
|
13103
|
$self->site($site); |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
else { |
385
|
0
|
0
|
|
|
|
0
|
$seq && $self->site($seq); |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
|
388
|
10162
|
100
|
|
|
|
12002
|
if ($recog) { |
389
|
7495
|
|
|
|
|
10706
|
$self->recog($recog); |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
else { |
392
|
2667
|
50
|
|
|
|
3616
|
$seq && $self->recog($seq); |
393
|
2667
|
50
|
|
|
|
4811
|
$site && $self->recog($site); |
394
|
|
|
|
|
|
|
} |
395
|
|
|
|
|
|
|
# call revcom_site to initialize it and revcom_recog: |
396
|
10162
|
|
|
|
|
12279
|
$self->revcom_site(); |
397
|
|
|
|
|
|
|
|
398
|
10162
|
|
|
|
|
16565
|
$recog = $self->string; # for length calculations below |
399
|
|
|
|
|
|
|
|
400
|
10162
|
100
|
|
|
|
15697
|
if ($xln_sub) { |
401
|
7494
|
50
|
|
|
|
12302
|
$self->warn("Translation subroutine is not a coderef; ignoring") unless |
402
|
|
|
|
|
|
|
ref($xln_sub) eq 'CODE'; |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
# cut coordinates |
406
|
10162
|
|
|
|
|
10973
|
my ($pc_cut, $pc_comp_cut) = ( $postcut =~ /(-?\d+)\/(-?\d+)/ ); |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
# cut definitions in constructor override any autoset in |
409
|
|
|
|
|
|
|
# site() |
410
|
|
|
|
|
|
|
# definitions in site conform to withrefm coords, translation |
411
|
|
|
|
|
|
|
# happens here |
412
|
|
|
|
|
|
|
|
413
|
10162
|
100
|
|
|
|
16974
|
if (defined $cut) { |
|
|
100
|
|
|
|
|
|
414
|
2664
|
50
|
|
|
|
4386
|
$self->cut( $xln_sub ? $xln_sub->($self, $cut) : $cut ); |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
elsif ( defined $pc_cut ) { |
417
|
457
|
100
|
|
|
|
1113
|
$self->cut( $xln_sub ? $xln_sub->($self, $pc_cut) : $pc_cut ); |
418
|
|
|
|
|
|
|
} |
419
|
|
|
|
|
|
|
|
420
|
10162
|
100
|
|
|
|
16523
|
if (defined $complementary_cut) { |
|
|
100
|
|
|
|
|
|
421
|
4
|
50
|
|
|
|
12
|
$self->complementary_cut($xln_sub ? $xln_sub->($self,$complementary_cut) : $complementary_cut); |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
elsif (defined $pc_comp_cut) { |
424
|
457
|
100
|
|
|
|
981
|
$self->complementary_cut($xln_sub ? $xln_sub->($self,$pc_comp_cut) : $pc_comp_cut); |
425
|
|
|
|
|
|
|
} |
426
|
|
|
|
|
|
|
|
427
|
10162
|
100
|
|
|
|
18133
|
$is_prototype && $self->is_prototype($is_prototype); |
428
|
10162
|
100
|
|
|
|
12379
|
$prototype && $self->prototype($prototype); |
429
|
10162
|
100
|
|
|
|
16803
|
$isoschizomers && $self->isoschizomers($isoschizomers); |
430
|
10162
|
50
|
|
|
|
13236
|
$meth && $self->methylation_sites($meth); |
431
|
10162
|
50
|
|
|
|
11447
|
$microbe && $self->microbe($microbe); |
432
|
10162
|
100
|
|
|
|
16255
|
$source && $self->source($source); |
433
|
10162
|
100
|
|
|
|
14989
|
$vendors && $self->vendors($vendors); |
434
|
10162
|
100
|
|
|
|
16711
|
$references && $self->references($references); |
435
|
10162
|
50
|
|
|
|
12073
|
$neo && $self->is_neoschizomer($neo); |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
# create multicut enzymes here if $precut defined |
438
|
10162
|
100
|
|
|
|
12031
|
if (defined $precut) { |
439
|
49
|
|
|
|
|
164
|
bless $self, 'Bio::Restriction::Enzyme::MultiCut'; |
440
|
49
|
|
|
|
|
222
|
my ($pc_cut, $pc_comp_cut) = $precut =~ /(-?\d+)\/(-?\d+)/; |
441
|
49
|
|
|
|
|
173
|
my $re2 = $self->clone; |
442
|
49
|
50
|
|
|
|
200
|
$re2->cut($xln_sub ? $xln_sub->($self, -$pc_cut) : -$pc_cut); |
443
|
49
|
50
|
|
|
|
165
|
$re2->complementary_cut($xln_sub ? $xln_sub->($self, -$pc_comp_cut) : -$pc_comp_cut); |
444
|
49
|
|
|
|
|
165
|
$self->others($re2); |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
|
447
|
10162
|
|
|
|
|
25977
|
return $self; |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=head1 Essential methods |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=cut |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=head2 name |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
Title : name |
457
|
|
|
|
|
|
|
Usage : $re->name($newval) |
458
|
|
|
|
|
|
|
Function : Gets/Sets the restriction enzyme name |
459
|
|
|
|
|
|
|
Example : $re->name('EcoRI') |
460
|
|
|
|
|
|
|
Returns : value of name |
461
|
|
|
|
|
|
|
Args : newvalue (optional) |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
This will also clean up the name. I have added this because some |
464
|
|
|
|
|
|
|
people get confused about restriction enzyme names. The name should |
465
|
|
|
|
|
|
|
be One upper case letter, and two lower case letters (because it is |
466
|
|
|
|
|
|
|
derived from the organism name, eg. EcoRI is from E. coli). After |
467
|
|
|
|
|
|
|
that it is all confused, but the numbers should be roman numbers not |
468
|
|
|
|
|
|
|
numbers, therefore we'll correct those. At least this will provide |
469
|
|
|
|
|
|
|
some standard, I hope. |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
=cut |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
sub name{ |
474
|
70047
|
|
|
70047
|
1
|
52711
|
my ($self, $name)=@_; |
475
|
|
|
|
|
|
|
|
476
|
70047
|
100
|
|
|
|
82053
|
if ($name) { # correct and set the name |
477
|
10164
|
|
|
|
|
7896
|
my $old_name = $name; |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
# remove spaces. Some people write HindIII as Hind III |
480
|
10164
|
|
|
|
|
16994
|
$name =~ s/\s+//g; |
481
|
|
|
|
|
|
|
# change TAILING ones to I's |
482
|
10164
|
50
|
|
|
|
18688
|
if ($name =~ m/(1+)$/) { |
483
|
0
|
|
|
|
|
0
|
my $i = 'I' x length($1); |
484
|
0
|
|
|
|
|
0
|
$name =~ s/1+$/$i/; |
485
|
|
|
|
|
|
|
} |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
# make the first letter upper case |
488
|
10164
|
|
|
|
|
26613
|
$name =~ s/^(\w)/uc($1)/e; |
|
10164
|
|
|
|
|
22538
|
|
489
|
|
|
|
|
|
|
|
490
|
10164
|
50
|
|
|
|
18321
|
unless ($name eq $old_name) { |
491
|
|
|
|
|
|
|
# we have changed the name, so send a warning |
492
|
0
|
|
|
|
|
0
|
$self->warn("The enzyme name $old_name was changed to $name"); |
493
|
|
|
|
|
|
|
} |
494
|
10164
|
|
|
|
|
12827
|
$self->{'_name'} = $name; |
495
|
|
|
|
|
|
|
} |
496
|
70047
|
|
|
|
|
154207
|
return $self->{'_name'}; |
497
|
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=head2 site |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
Title : site |
503
|
|
|
|
|
|
|
Usage : $re->site(); |
504
|
|
|
|
|
|
|
Function : Gets/sets the recognition sequence for the enzyme. |
505
|
|
|
|
|
|
|
Example : $seq_string = $re->site(); |
506
|
|
|
|
|
|
|
Returns : String containing recognition sequence indicating |
507
|
|
|
|
|
|
|
: cleavage site as in 'G^AATTC'. |
508
|
|
|
|
|
|
|
Argument : n/a |
509
|
|
|
|
|
|
|
Throws : n/a |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
Side effect: the sequence is always converted to upper case. |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
The cut site can also be set by using methods L and |
515
|
|
|
|
|
|
|
L. |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
This will pad out missing sequence with N's. For example the enzyme |
518
|
|
|
|
|
|
|
Acc36I cuts at ACCTGC(4/8). This will be returned as ACCTGCNNNN^ |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
Note that the common notation ACCTGC(4/8) means that the forward |
521
|
|
|
|
|
|
|
strand cut is four nucleotides after the END of the recognition |
522
|
|
|
|
|
|
|
site. The forward cut() in the coordinates used here in Acc36I |
523
|
|
|
|
|
|
|
ACCTGC(4/8) is at 6+4 i.e. 10. |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
** This is the main setable method for the recognition site. |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
=cut |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
sub site { |
530
|
10169
|
|
|
10169
|
1
|
9653
|
my ( $self, $site ) = @_; |
531
|
|
|
|
|
|
|
|
532
|
10169
|
100
|
|
|
|
12547
|
if ($site) { |
533
|
|
|
|
|
|
|
|
534
|
10163
|
100
|
|
|
|
18720
|
$self->throw("Unrecognized characters in site: [$site]") |
535
|
|
|
|
|
|
|
if $site =~ /[^ATGCMRWSYKVHDBN\^]/i; |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
# we may have to redefine this if there is a ^ in the sequence |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
# first, check and see if we have a cut site in the sequence |
540
|
|
|
|
|
|
|
# if so, find the position, and set the target sequence and cut site |
541
|
|
|
|
|
|
|
|
542
|
10162
|
|
|
|
|
10586
|
$self->{'_site'} = $site; |
543
|
|
|
|
|
|
|
|
544
|
10162
|
|
|
|
|
19702
|
my ( $first, $second ) = $site =~ /(.*)\^(.*)/; |
545
|
10162
|
100
|
|
|
|
18628
|
$site = "$1$2" if defined $first; |
546
|
10162
|
|
|
|
|
8578
|
$self->{'_site'} = $site; |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
# now set the recognition site as a new Bio::PrimarySeq object |
549
|
|
|
|
|
|
|
# we need it before calling cut() and complementary_cut() |
550
|
10162
|
|
|
|
|
14225
|
$self->{_seq} = Bio::PrimarySeq->new( |
551
|
|
|
|
|
|
|
-id => $self->name, |
552
|
|
|
|
|
|
|
-seq => $site, |
553
|
|
|
|
|
|
|
-verbose => $self->verbose, |
554
|
|
|
|
|
|
|
-alphabet => 'dna' |
555
|
|
|
|
|
|
|
); |
556
|
|
|
|
|
|
|
|
557
|
10162
|
100
|
|
|
|
18424
|
if ( defined $first ) { |
558
|
4945
|
|
|
|
|
8283
|
$self->cut( length $first ); |
559
|
4945
|
|
|
|
|
6701
|
$self->complementary_cut( length $second ); |
560
|
4945
|
|
|
|
|
6483
|
$self->revcom_site(); |
561
|
|
|
|
|
|
|
} |
562
|
|
|
|
|
|
|
} |
563
|
10168
|
|
|
|
|
12787
|
return $self->{'_site'}; |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
=head2 revcom_site |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
Title : revcom_site |
569
|
|
|
|
|
|
|
Usage : $re->revcom_site(); |
570
|
|
|
|
|
|
|
Function : Gets/sets the complementary recognition sequence for the enzyme. |
571
|
|
|
|
|
|
|
Example : $seq_string = $re->revcom_site(); |
572
|
|
|
|
|
|
|
Returns : String containing recognition sequence indicating |
573
|
|
|
|
|
|
|
: cleavage site as in 'G^AATTC'. |
574
|
|
|
|
|
|
|
Argument : none (sets on first call) |
575
|
|
|
|
|
|
|
Throws : n/a |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
This is the same as site, except it returns the revcom site. For |
578
|
|
|
|
|
|
|
palindromic enzymes these two are identical. For non-palindromic |
579
|
|
|
|
|
|
|
enzymes they are not! |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
On set, this also handles setting the revcom_recog attribute. |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
See also L above. |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
=cut |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
sub revcom_site { |
588
|
15112
|
|
|
15112
|
1
|
11241
|
my $self = shift; |
589
|
|
|
|
|
|
|
# getter |
590
|
15112
|
100
|
|
|
|
22457
|
return $self->{'_revcom_site'} unless !$self->{'_revcom_site'}; |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
# setter |
593
|
10162
|
|
|
|
|
8290
|
my $site = $self->{'_site'}; |
594
|
10162
|
100
|
|
|
|
12970
|
if ($self->is_palindromic) { |
595
|
9266
|
|
|
|
|
9255
|
$self->{'_revcom_site'}=$self->{'_site'}; |
596
|
9266
|
|
|
|
|
12068
|
$self->revcom_recog( $self->string ); |
597
|
9266
|
|
|
|
|
10543
|
return $self->{'_revcom_site'}; |
598
|
|
|
|
|
|
|
} |
599
|
|
|
|
|
|
|
|
600
|
896
|
50
|
|
|
|
2021
|
$self->throw("Unrecognized characters in revcom site: [$site]") |
601
|
|
|
|
|
|
|
if $site =~ /[^ATGCMRWSYKVHDBN\^]/i; |
602
|
|
|
|
|
|
|
|
603
|
896
|
100
|
|
|
|
1650
|
if ($site =~ /\^/) { |
604
|
|
|
|
|
|
|
# first, check and see if we have a cut site indicated in the sequence |
605
|
|
|
|
|
|
|
# if so, find the position, and set the target sequence and cut site |
606
|
16
|
|
|
|
|
35
|
$site = $self->revcom; |
607
|
16
|
|
|
|
|
33
|
$self->revcom_recog( $site ); |
608
|
16
|
|
|
|
|
43
|
my $c = length($site)-$self->cut; |
609
|
16
|
|
|
|
|
54
|
$site = substr($site, 0, $c).'^'.substr($site,$c); |
610
|
16
|
|
|
|
|
25
|
$self->{'_revcom_site'} = $site; |
611
|
|
|
|
|
|
|
} |
612
|
|
|
|
|
|
|
else { |
613
|
880
|
|
|
|
|
1226
|
my $revcom=$self->revcom; |
614
|
880
|
|
|
|
|
1718
|
$self->revcom_recog( $revcom ); |
615
|
|
|
|
|
|
|
# my $cc=$self->complementary_cut; |
616
|
|
|
|
|
|
|
# my $hat=length($revcom)-$cc+1; # we need it on the other strand! |
617
|
|
|
|
|
|
|
# if ($cc > length($revcom)) { |
618
|
|
|
|
|
|
|
# my $pad= "N" x ($cc-length($revcom)); |
619
|
|
|
|
|
|
|
# $revcom = $pad. $revcom; |
620
|
|
|
|
|
|
|
# $hat=length($revcom)-$cc+1; |
621
|
|
|
|
|
|
|
# } |
622
|
|
|
|
|
|
|
# elsif ($cc < 0) { |
623
|
|
|
|
|
|
|
# my $pad = "N" x -$cc; |
624
|
|
|
|
|
|
|
# $revcom .= $pad; |
625
|
|
|
|
|
|
|
# $hat=length($revcom); |
626
|
|
|
|
|
|
|
# } |
627
|
|
|
|
|
|
|
# $revcom =~ s/(.{$hat})/$1\^/; |
628
|
880
|
|
|
|
|
1002
|
$self->{'_revcom_site'}=$revcom; |
629
|
|
|
|
|
|
|
} |
630
|
896
|
|
|
|
|
918
|
return $self->{'_revcom_site'}; |
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
=head2 cut |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
Title : cut |
636
|
|
|
|
|
|
|
Usage : $num = $re->cut(1); |
637
|
|
|
|
|
|
|
Function : Sets/gets an integer indicating the position of cleavage |
638
|
|
|
|
|
|
|
relative to the 5' end of the recognition sequence in the |
639
|
|
|
|
|
|
|
forward strand. |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
For type II enzymes, sets the symmetrically positioned |
642
|
|
|
|
|
|
|
reverse strand cut site by calling complementary_cut(). |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
Returns : Integer, 0 if not set |
645
|
|
|
|
|
|
|
Argument : an integer for the forward strand cut site (optional) |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
Note that the common notation ACCTGC(4/8) means that the forward |
648
|
|
|
|
|
|
|
strand cut is four nucleotides after the END of the recognition |
649
|
|
|
|
|
|
|
site. The forwad cut in the coordinates used here in Acc36I |
650
|
|
|
|
|
|
|
ACCTGC(4/8) is at 6+4 i.e. 10. |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
Note that REBASE uses notation where cuts within symmetic sites are |
653
|
|
|
|
|
|
|
marked by '^' within the forward sequence but if the site is |
654
|
|
|
|
|
|
|
asymmetric the parenthesis syntax is used where numbering ALWAYS |
655
|
|
|
|
|
|
|
starts from last nucleotide in the forward strand. That's why AciI has |
656
|
|
|
|
|
|
|
a site usually written as CCGC(-3/-1) actualy cuts in |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
C^C G C |
659
|
|
|
|
|
|
|
G G C^G |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
In our notation, these locations are 1 and 3. |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
The cuts locations in the notation used are relative to the first |
665
|
|
|
|
|
|
|
(non-N) nucleotide of the reported forward strand of the recognition |
666
|
|
|
|
|
|
|
sequence. The following diagram numbers the phosphodiester bonds |
667
|
|
|
|
|
|
|
(marked by + ) which can be cut by the restriction enzymes: |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
1 2 3 4 5 6 7 8 ... |
670
|
|
|
|
|
|
|
N + N + N + N + N + G + A + C + T + G + G + N + N + N |
671
|
|
|
|
|
|
|
... -5 -4 -3 -2 -1 |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
=cut |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
sub cut { |
677
|
24294
|
|
|
24294
|
1
|
20211
|
my ($self, $value) = @_; |
678
|
24294
|
100
|
|
|
|
31408
|
if (defined $value) { |
679
|
8117
|
50
|
|
|
|
19154
|
$self->throw("The cut position needs to be an integer [$value]") |
680
|
|
|
|
|
|
|
unless $value =~ /[-+]?\d+/; |
681
|
8117
|
|
|
|
|
8967
|
$self->{'_cut'} = $value; |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
# add the caret to the site attribute only if internal /maj |
684
|
8117
|
100
|
100
|
|
|
24473
|
if ( ($self->{_site} !~ /\^/) && ($value <= length ($self->{_site}))) { |
685
|
|
|
|
|
|
|
$self->{_site} = |
686
|
5174
|
|
|
|
|
10445
|
substr($self->{_site}, 0, $value). '^'. substr($self->{_site}, $value); |
687
|
|
|
|
|
|
|
} |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
# auto-set comp cut only if cut site is inside the recog site./maj |
690
|
|
|
|
|
|
|
$self->complementary_cut(length ($self->seq->seq) - $value ) |
691
|
8117
|
100
|
100
|
|
|
19588
|
if (($self->{_site} =~ /\^/) && ($self->type eq 'II')); |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
} |
694
|
|
|
|
|
|
|
# return undef if not defined yet, not 0 /maj |
695
|
24294
|
|
|
|
|
32383
|
return $self->{'_cut'}; |
696
|
|
|
|
|
|
|
} |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
=head2 cuts_after |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
Title : cuts_after |
701
|
|
|
|
|
|
|
Usage : Alias for cut() |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
=cut |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
sub cuts_after { |
706
|
0
|
|
|
0
|
1
|
0
|
shift->cut(@_); |
707
|
|
|
|
|
|
|
} |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
=head2 complementary_cut |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
Title : complementary_cut |
713
|
|
|
|
|
|
|
Usage : $num = $re->complementary_cut('1'); |
714
|
|
|
|
|
|
|
Function : Sets/Gets an integer indicating the position of cleavage |
715
|
|
|
|
|
|
|
: on the reverse strand of the restriction site. |
716
|
|
|
|
|
|
|
Returns : Integer |
717
|
|
|
|
|
|
|
Argument : An integer (optional) |
718
|
|
|
|
|
|
|
Throws : Exception if argument is non-numeric. |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
This method determines the cut on the reverse strand of the sequence. |
721
|
|
|
|
|
|
|
For most enzymes this will be within the sequence, and will be set |
722
|
|
|
|
|
|
|
automatically based on the forward strand cut, but it need not be. |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
B that the returned location indicates the location AFTER the |
725
|
|
|
|
|
|
|
first non-N site nucleotide in the FORWARD strand. |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
=cut |
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
sub complementary_cut { |
730
|
16431
|
|
|
16431
|
1
|
13080
|
my ($self, $num)=@_; |
731
|
|
|
|
|
|
|
|
732
|
16431
|
100
|
|
|
|
22103
|
if (defined $num) { |
733
|
13152
|
50
|
|
|
|
28337
|
$self->throw("The cut position needs to be an integer [$num]") |
734
|
|
|
|
|
|
|
unless $num =~ /[-+]?\d+/; |
735
|
13152
|
|
|
|
|
13263
|
$self->{'_rc_cut'} = $num; |
736
|
|
|
|
|
|
|
} |
737
|
|
|
|
|
|
|
# return undef, not 0, if not yet defined /maj |
738
|
16431
|
|
|
|
|
16238
|
return $self->{'_rc_cut'}; |
739
|
|
|
|
|
|
|
} |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
=head1 Read only (usually) recognition site descriptive methods |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
=cut |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
=head2 type |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
Title : type |
749
|
|
|
|
|
|
|
Usage : $re->type(); |
750
|
|
|
|
|
|
|
Function : Get/set the restriction system type |
751
|
|
|
|
|
|
|
Returns : |
752
|
|
|
|
|
|
|
Argument : optional type: ('I'|II|III) |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
Restriction enzymes have been catezorized into three types. Some |
755
|
|
|
|
|
|
|
REBASE formats give the type, but the following rules can be used to |
756
|
|
|
|
|
|
|
classify the known enzymes: |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
=over 4 |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
=item 1 |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
Bipartite site (with 6-8 Ns in the middle and the cut site |
763
|
|
|
|
|
|
|
is E 50 nt away) =E type I |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
=item 2 |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
Site length E 3 =E type I |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
=item 3 |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
5-6 asymmetric site and cuts E20 nt away =E type III |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
=item 4 |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
All other =E type II |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
=back |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
There are some enzymes in REBASE which have bipartite recognition site |
780
|
|
|
|
|
|
|
and cat far from the site but are still classified as type I. I've no |
781
|
|
|
|
|
|
|
idea if this is really so. |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
=cut |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
sub type { |
786
|
7707
|
|
|
7707
|
1
|
6568
|
my ($self, $value) = @_; |
787
|
|
|
|
|
|
|
|
788
|
7707
|
50
|
|
|
|
9949
|
if ($value) { |
789
|
|
|
|
|
|
|
$self->throw("Not a valid value [$value], needs to one of : ". |
790
|
|
|
|
|
|
|
join (', ', sort keys %TYPE) ) |
791
|
0
|
0
|
|
|
|
0
|
unless $TYPE{$value}; |
792
|
0
|
|
|
|
|
0
|
return $self->{'_type'} = $value; |
793
|
|
|
|
|
|
|
} |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
# pre set |
796
|
|
|
|
|
|
|
#return $self->{'_type'} if $self->{'_type'}; |
797
|
|
|
|
|
|
|
# bipartite |
798
|
|
|
|
|
|
|
return $self->{'_type'} = 'I' |
799
|
7707
|
50
|
66
|
|
|
11204
|
if $self->{'_seq'}->seq =~ /N*[^N]+N{6,8}[^N]/ and abs($self->cut) > 50 ; |
800
|
|
|
|
|
|
|
# 3 nt site |
801
|
|
|
|
|
|
|
return $self->{'_type'} = 'I' |
802
|
7707
|
100
|
|
|
|
13462
|
if $self->{'_seq'}->length == 3; |
803
|
|
|
|
|
|
|
# asymmetric and cuts > 20 nt |
804
|
7697
|
50
|
100
|
|
|
9518
|
return $self->{'_type'} = 'III' |
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
805
|
|
|
|
|
|
|
if (length $self->string == 5 or length $self->string == 6 ) and |
806
|
|
|
|
|
|
|
not $self->palindromic and abs($self->cut) > 20; |
807
|
7697
|
|
|
|
|
23096
|
return $self->{'_type'} = 'II'; |
808
|
|
|
|
|
|
|
} |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
=head2 seq |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
Title : seq |
813
|
|
|
|
|
|
|
Usage : $re->seq(); |
814
|
|
|
|
|
|
|
Function : Get the Bio::PrimarySeq.pm object representing |
815
|
|
|
|
|
|
|
: the recognition sequence |
816
|
|
|
|
|
|
|
Returns : A Bio::PrimarySeq object representing the |
817
|
|
|
|
|
|
|
enzyme recognition site |
818
|
|
|
|
|
|
|
Argument : n/a |
819
|
|
|
|
|
|
|
Throws : n/a |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
=cut |
823
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
sub seq { |
825
|
7854
|
|
|
7854
|
1
|
12910
|
shift->{'_seq'}; |
826
|
|
|
|
|
|
|
} |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
=head2 string |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
Title : string |
831
|
|
|
|
|
|
|
Usage : $re->string(); |
832
|
|
|
|
|
|
|
Function : Get a string representing the recognition sequence. |
833
|
|
|
|
|
|
|
Returns : String. Does NOT contain a '^' representing the cut location |
834
|
|
|
|
|
|
|
as returned by the site() method. |
835
|
|
|
|
|
|
|
Argument : n/a |
836
|
|
|
|
|
|
|
Throws : n/a |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
=cut |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
sub string { |
841
|
52617
|
|
|
52617
|
1
|
78025
|
shift->{'_seq'}->seq; |
842
|
|
|
|
|
|
|
} |
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
=head2 recog |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
Title : recog |
847
|
|
|
|
|
|
|
Usage : $enz->recog($recognition_sequence) |
848
|
|
|
|
|
|
|
Function: Gets/sets the pure recognition site. Sets as |
849
|
|
|
|
|
|
|
regexp if appropriate. |
850
|
|
|
|
|
|
|
As for string(), the cut indicating carets (^) |
851
|
|
|
|
|
|
|
are expunged. |
852
|
|
|
|
|
|
|
Example : |
853
|
|
|
|
|
|
|
Returns : value of recog (a scalar) |
854
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
=cut |
857
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
sub recog{ |
859
|
25643
|
|
|
25643
|
1
|
19329
|
my $self = shift; |
860
|
25643
|
|
|
|
|
16888
|
my $recog = shift; |
861
|
25643
|
100
|
|
|
|
51500
|
return $self->{'recog'} unless $recog; |
862
|
10162
|
|
|
|
|
15480
|
$recog =~ s/\^//g; |
863
|
10162
|
100
|
|
|
|
21957
|
$recog = _expand($recog) if $recog =~ /[^ATGC]/; |
864
|
10162
|
|
|
|
|
12403
|
return $self->{'recog'} = $recog; |
865
|
|
|
|
|
|
|
} |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
=head2 revcom_recog |
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
Title : revcom_recog |
870
|
|
|
|
|
|
|
Usage : $enz->revcom_recog($recognition_sequence) |
871
|
|
|
|
|
|
|
Function: Gets/sets the pure reverse-complemented recognition site. |
872
|
|
|
|
|
|
|
Sets as regexp if appropriate. |
873
|
|
|
|
|
|
|
As for string(), the cut indicating carets (^) are expunged. |
874
|
|
|
|
|
|
|
Example : |
875
|
|
|
|
|
|
|
Returns : value of recog (a scalar) |
876
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
877
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
=cut |
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
sub revcom_recog{ |
881
|
12969
|
|
|
12969
|
1
|
10471
|
my $self = shift; |
882
|
12969
|
|
|
|
|
8907
|
my $recog = shift; |
883
|
12969
|
100
|
|
|
|
16584
|
unless ($recog) { |
884
|
2807
|
50
|
|
|
|
4192
|
$self->throw( "revcom recognition site not set; call \$enz->revcom_site to initialize" ) unless $self->{'revcom_recog'}; |
885
|
2807
|
|
|
|
|
4567
|
return $self->{'revcom_recog'}; |
886
|
|
|
|
|
|
|
} |
887
|
10162
|
|
|
|
|
10270
|
$recog =~ s/\^//g; |
888
|
10162
|
100
|
|
|
|
23433
|
$recog = _expand($recog) if $recog =~ /[^ATGC]/; |
889
|
10162
|
|
|
|
|
12339
|
return $self->{'revcom_recog'} = $recog; |
890
|
|
|
|
|
|
|
} |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
=head2 revcom |
893
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
Title : revcom |
895
|
|
|
|
|
|
|
Usage : $re->revcom(); |
896
|
|
|
|
|
|
|
Function : Get a string representing the reverse complement of |
897
|
|
|
|
|
|
|
: the recognition sequence. |
898
|
|
|
|
|
|
|
Returns : String |
899
|
|
|
|
|
|
|
Argument : n/a |
900
|
|
|
|
|
|
|
Throws : n/a |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
=cut |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
sub revcom { |
905
|
11059
|
|
|
11059
|
1
|
23220
|
shift->{'_seq'}->revcom->seq(); |
906
|
|
|
|
|
|
|
} |
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
=head2 recognition_length |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
Title : recognition_length |
911
|
|
|
|
|
|
|
Usage : $re->recognition_length(); |
912
|
|
|
|
|
|
|
Function : Get the length of the RECOGNITION sequence. |
913
|
|
|
|
|
|
|
This is the total recognition sequence, |
914
|
|
|
|
|
|
|
inluding the ambiguous codes. |
915
|
|
|
|
|
|
|
Returns : An integer |
916
|
|
|
|
|
|
|
Argument : Nothing |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
See also: L |
919
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
=cut |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
sub recognition_length { |
923
|
1
|
|
|
1
|
1
|
4
|
my $self = shift; |
924
|
1
|
|
|
|
|
3
|
return length($self->string); |
925
|
|
|
|
|
|
|
} |
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
=head2 cutter |
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
Title : cutter |
930
|
|
|
|
|
|
|
Usage : $re->cutter |
931
|
|
|
|
|
|
|
Function : Returns the "cutter" value of the recognition site. |
932
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
This is a value relative to site length and lack of |
934
|
|
|
|
|
|
|
ambiguity codes. Hence: 'RCATGY' is a five (5) cutter site |
935
|
|
|
|
|
|
|
and 'CCTNAGG' a six cutter |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
This measure correlates to the frequency of the enzyme |
938
|
|
|
|
|
|
|
cuts much better than plain recognition site length. |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
Example : $re->cutter |
941
|
|
|
|
|
|
|
Returns : integer or float number |
942
|
|
|
|
|
|
|
Args : none |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
Why is this better than just stripping the ambiguos codes? Think about |
945
|
|
|
|
|
|
|
it like this: You have a random sequence; all nucleotides are equally |
946
|
|
|
|
|
|
|
probable. You have a four nucleotide re site. The probability of that |
947
|
|
|
|
|
|
|
site finding a match is one out of 4^4 or 256, meaning that on average |
948
|
|
|
|
|
|
|
a four cutter finds a match every 256 nucleotides. For a six cutter, |
949
|
|
|
|
|
|
|
the average fragment length is 4^6 or 4096. In the case of ambiguity |
950
|
|
|
|
|
|
|
codes the chances are finding the match are better: an R (A|T) has 1/2 |
951
|
|
|
|
|
|
|
chance of finding a match in a random sequence. Therefore, for RGCGCY |
952
|
|
|
|
|
|
|
the probability is one out of (2*4*4*4*4*2) which exactly the same as |
953
|
|
|
|
|
|
|
for a five cutter! Cutter, although it can have non-integer values |
954
|
|
|
|
|
|
|
turns out to be a useful and simple measure. |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
From bug 2178: VHDB are ambiguity symbols that match three different |
957
|
|
|
|
|
|
|
nucleotides, so they contribute less to the effective recognition sequence |
958
|
|
|
|
|
|
|
length than e.g. Y which matches only two nucleotides. A symbol which matches n |
959
|
|
|
|
|
|
|
of the 4 nucleotides has an effective length of 1 - log(n) / log(4). |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
=cut |
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
sub cutter { |
964
|
3532
|
|
|
3532
|
1
|
2328
|
my ($self)=@_; |
965
|
3532
|
|
|
|
|
3326
|
$_ = uc $self->string; |
966
|
|
|
|
|
|
|
|
967
|
3532
|
|
|
|
|
2782
|
my $cutter = tr/[ATGC]//d; |
968
|
3532
|
|
|
|
|
2277
|
my $count = tr/[MRWSYK]//d; |
969
|
3532
|
|
|
|
|
2498
|
$cutter += $count/2; |
970
|
3532
|
|
|
|
|
2366
|
$count = tr/[VHDB]//d; |
971
|
3532
|
|
|
|
|
2260
|
$cutter += $count * (1 - log(3) / log(4)); |
972
|
3532
|
|
|
|
|
7031
|
return $cutter; |
973
|
|
|
|
|
|
|
} |
974
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
=head2 is_palindromic |
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
Title : is_palindromic |
979
|
|
|
|
|
|
|
Alias : palindromic |
980
|
|
|
|
|
|
|
Usage : $re->is_palindromic(); |
981
|
|
|
|
|
|
|
Function : Determines if the recognition sequence is palindromic |
982
|
|
|
|
|
|
|
: for the current restriction enzyme. |
983
|
|
|
|
|
|
|
Returns : Boolean |
984
|
|
|
|
|
|
|
Argument : n/a |
985
|
|
|
|
|
|
|
Throws : n/a |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
A palindromic site (EcoRI): |
988
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
5-GAATTC-3 |
990
|
|
|
|
|
|
|
3-CTTAAG-5 |
991
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
=cut |
993
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
sub is_palindromic { |
995
|
36823
|
|
|
36823
|
1
|
27963
|
my $self = shift; |
996
|
36823
|
100
|
|
|
|
84890
|
return $self->{_palindromic} if defined $self->{_palindromic}; |
997
|
10162
|
100
|
|
|
|
11801
|
if ($self->string eq $self->revcom) { |
998
|
9266
|
|
|
|
|
23228
|
return $self->{_palindromic}=1; |
999
|
|
|
|
|
|
|
} |
1000
|
896
|
|
|
|
|
2041
|
return $self->{_palindromic} = 0; |
1001
|
|
|
|
|
|
|
} |
1002
|
|
|
|
|
|
|
|
1003
|
5824
|
|
|
5824
|
0
|
8119
|
sub palindromic { shift->is_palindromic(@_) } |
1004
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
=head2 is_symmetric |
1006
|
|
|
|
|
|
|
|
1007
|
|
|
|
|
|
|
Title : is_symmetric |
1008
|
|
|
|
|
|
|
Alias : symmetric |
1009
|
|
|
|
|
|
|
Usage : $re->is_symmetric(); |
1010
|
|
|
|
|
|
|
Function : Determines if the enzyme is a symmetric cutter |
1011
|
|
|
|
|
|
|
Returns : Boolean |
1012
|
|
|
|
|
|
|
Argument : none |
1013
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
A symmetric but non-palindromic site (HindI): |
1015
|
|
|
|
|
|
|
v |
1016
|
|
|
|
|
|
|
5-C A C-3 |
1017
|
|
|
|
|
|
|
3-G T G-5 |
1018
|
|
|
|
|
|
|
^ |
1019
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
=cut |
1021
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
sub is_symmetric { |
1023
|
4
|
|
|
4
|
|
20
|
no warnings qw( uninitialized ); |
|
4
|
|
|
|
|
5
|
|
|
4
|
|
|
|
|
5241
|
|
1024
|
9641
|
|
|
9641
|
1
|
6551
|
my $self = shift; |
1025
|
|
|
|
|
|
|
|
1026
|
9641
|
100
|
|
|
|
20053
|
return $self->{_symmetric} if defined $self->{_symmetric}; |
1027
|
5355
|
100
|
|
|
|
4859
|
if ($self->is_palindromic) { |
1028
|
4892
|
|
|
|
|
13023
|
return $self->{_symmetric} = 1; |
1029
|
|
|
|
|
|
|
} |
1030
|
463
|
100
|
|
|
|
503
|
if ($self->cut == length($self->string) - $self->complementary_cut) { |
1031
|
27
|
|
|
|
|
87
|
return $self->{_symmetric}=1; |
1032
|
|
|
|
|
|
|
} |
1033
|
436
|
|
|
|
|
1036
|
return $self->{_symmetric} = 0; |
1034
|
|
|
|
|
|
|
} |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
|
1037
|
0
|
|
|
0
|
0
|
0
|
sub symmetric { shift->is_symmetric(@_) } |
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
=head2 overhang |
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
Title : overhang |
1042
|
|
|
|
|
|
|
Usage : $re->overhang(); |
1043
|
|
|
|
|
|
|
Function : Determines the overhang of the restriction enzyme |
1044
|
|
|
|
|
|
|
Returns : "5'", "3'", "blunt" of undef |
1045
|
|
|
|
|
|
|
Argument : n/a |
1046
|
|
|
|
|
|
|
Throws : n/a |
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
A blunt site in SmaI returns C |
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
5' C C C^G G G 3' |
1051
|
|
|
|
|
|
|
3' G G G^C C C 5' |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
A 5' overhang in EcoRI returns C<5'> |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
5' G^A A T T C 3' |
1056
|
|
|
|
|
|
|
3' C T T A A^G 5' |
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
A 3' overhang in KpnI returns C<3'> |
1059
|
|
|
|
|
|
|
|
1060
|
|
|
|
|
|
|
5' G G T A C^C 3' |
1061
|
|
|
|
|
|
|
3' C^C A T G G 5' |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
=cut |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
sub overhang { |
1066
|
545
|
|
|
545
|
1
|
335
|
my $self = shift; |
1067
|
545
|
100
|
66
|
|
|
1168
|
unless ($self->{'_cut'} && $self->{'_rc_cut'}) { |
1068
|
32
|
|
|
|
|
50
|
return "unknown"; |
1069
|
|
|
|
|
|
|
} |
1070
|
513
|
100
|
|
|
|
655
|
if ($self->{_cut} < $self->{_rc_cut}) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1071
|
287
|
|
|
|
|
425
|
$self->{_overhang}="5'"; |
1072
|
|
|
|
|
|
|
} elsif ($self->{_cut} == $self->{_rc_cut}) { |
1073
|
116
|
|
|
|
|
168
|
$self->{_overhang}="blunt"; |
1074
|
|
|
|
|
|
|
} elsif ($self->{_cut} > $self->{_rc_cut}) { |
1075
|
110
|
|
|
|
|
172
|
$self->{_overhang}="3'"; |
1076
|
|
|
|
|
|
|
} else { |
1077
|
0
|
|
|
|
|
0
|
$self->{_overhang}="unknown"; |
1078
|
|
|
|
|
|
|
} |
1079
|
|
|
|
|
|
|
return $self->{_overhang} |
1080
|
513
|
|
|
|
|
752
|
} |
1081
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
=head2 overhang_seq |
1083
|
|
|
|
|
|
|
|
1084
|
|
|
|
|
|
|
Title : overhang_seq |
1085
|
|
|
|
|
|
|
Usage : $re->overhang_seq(); |
1086
|
|
|
|
|
|
|
Function : Determines the overhang sequence of the restriction enzyme |
1087
|
|
|
|
|
|
|
Returns : a Bio::LocatableSeq |
1088
|
|
|
|
|
|
|
Argument : n/a |
1089
|
|
|
|
|
|
|
Throws : n/a |
1090
|
|
|
|
|
|
|
|
1091
|
|
|
|
|
|
|
I do not think it is necessary to create a seq object of these. (Heikki) |
1092
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
Note: returns empty string for blunt sequences and undef for ones that |
1094
|
|
|
|
|
|
|
we don't know. Compare these: |
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
A blunt site in SmaI returns empty string |
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
5' C C C^G G G 3' |
1099
|
|
|
|
|
|
|
3' G G G^C C C 5' |
1100
|
|
|
|
|
|
|
|
1101
|
|
|
|
|
|
|
A 5' overhang in EcoRI returns C |
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
5' G^A A T T C 3' |
1104
|
|
|
|
|
|
|
3' C T T A A^G 5' |
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
A 3' overhang in KpnI returns C |
1107
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
5' G G T A C^C 3' |
1109
|
|
|
|
|
|
|
3' C^C A T G G 5' |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
Note that you need to use method L to decide |
1112
|
|
|
|
|
|
|
whether it is a 5' or 3' overhang!!! |
1113
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
Note: The overhang stuff does not work if the site is asymmetric! Rethink! |
1115
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
=cut |
1117
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
sub overhang_seq { |
1119
|
5
|
|
|
5
|
1
|
7
|
my $self = shift; |
1120
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
# my $overhang->Bio::PrimarySeq(-id=>$self->name . '-overhang', |
1122
|
|
|
|
|
|
|
# -verbose=>$self->verbose, |
1123
|
|
|
|
|
|
|
# -alphabet=>'dna'); |
1124
|
|
|
|
|
|
|
|
1125
|
5
|
50
|
|
|
|
11
|
return '' if $self->overhang eq 'blunt' ; |
1126
|
|
|
|
|
|
|
|
1127
|
5
|
50
|
33
|
|
|
21
|
unless ($self->{_cut} && $self->{_rc_cut}) { |
1128
|
|
|
|
|
|
|
# lets just check that we really can't figure it out |
1129
|
0
|
|
|
|
|
0
|
$self->cut; |
1130
|
0
|
|
|
|
|
0
|
$self->complementary_cut; |
1131
|
0
|
0
|
0
|
|
|
0
|
unless ($self->{_cut} && $self->{_rc_cut}) { |
1132
|
0
|
|
|
|
|
0
|
return; |
1133
|
|
|
|
|
|
|
} |
1134
|
|
|
|
|
|
|
} |
1135
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
# this is throwing an error for sequences outside the restriction |
1137
|
|
|
|
|
|
|
# site (eg ^NNNNGATCNNNN^) |
1138
|
|
|
|
|
|
|
# So if this is the case we need to fake these guys |
1139
|
5
|
50
|
33
|
|
|
37
|
if (($self->{_cut}<0) || |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
1140
|
|
|
|
|
|
|
($self->{_rc_cut}<0) || |
1141
|
|
|
|
|
|
|
($self->{_cut}>$self->seq->length) || |
1142
|
|
|
|
|
|
|
($self->{_rc_cut}>$self->seq->length)) { |
1143
|
0
|
|
|
|
|
0
|
my $tempseq=$self->site; |
1144
|
0
|
|
|
|
|
0
|
my ($five, $three)=split /\^/, $tempseq; |
1145
|
0
|
0
|
|
|
|
0
|
if ($self->{_cut} > $self->{_rc_cut}) { |
|
|
0
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
return substr($five, $self->{_rc_cut}) |
1147
|
0
|
|
|
|
|
0
|
} elsif ($self->{_cut} < $self->{_rc_cut}) { |
1148
|
|
|
|
|
|
|
return substr($three, 0, $self->{_rc_cut}) |
1149
|
0
|
|
|
|
|
0
|
} else { |
1150
|
0
|
|
|
|
|
0
|
return ''; |
1151
|
|
|
|
|
|
|
} |
1152
|
|
|
|
|
|
|
} |
1153
|
|
|
|
|
|
|
|
1154
|
5
|
50
|
|
|
|
20
|
if ($self->{_cut} > $self->{_rc_cut}) { |
|
|
50
|
|
|
|
|
|
1155
|
0
|
|
|
|
|
0
|
return $self->seq->subseq($self->{_rc_cut}+1,$self->{_cut}); |
1156
|
|
|
|
|
|
|
} elsif ($self->{_cut} < $self->{_rc_cut}) { |
1157
|
5
|
|
|
|
|
12
|
return $self->seq->subseq($self->{_cut}+1, $self->{_rc_cut}); |
1158
|
|
|
|
|
|
|
} else { |
1159
|
0
|
|
|
|
|
0
|
return ''; |
1160
|
|
|
|
|
|
|
} |
1161
|
|
|
|
|
|
|
} |
1162
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
|
1165
|
|
|
|
|
|
|
=head2 compatible_ends |
1166
|
|
|
|
|
|
|
|
1167
|
|
|
|
|
|
|
Title : compatible_ends |
1168
|
|
|
|
|
|
|
Usage : $re->compatible_ends($re2); |
1169
|
|
|
|
|
|
|
Function : Determines if the two restriction enzyme cut sites |
1170
|
|
|
|
|
|
|
have compatible ends. |
1171
|
|
|
|
|
|
|
Returns : 0 if not, 1 if only one pair ends match, 2 if both ends. |
1172
|
|
|
|
|
|
|
Argument : a Bio::Restriction::Enzyme |
1173
|
|
|
|
|
|
|
Throws : unless the argument is a Bio::Resriction::Enzyme and |
1174
|
|
|
|
|
|
|
if there are Ns in the ovarhangs |
1175
|
|
|
|
|
|
|
|
1176
|
|
|
|
|
|
|
In case of type II enzymes which which cut symmetrically, this |
1177
|
|
|
|
|
|
|
function can be considered to return a boolean value. |
1178
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
=cut |
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
sub compatible_ends { |
1183
|
1
|
|
|
1
|
1
|
2
|
my ($self, $re) = @_; |
1184
|
|
|
|
|
|
|
|
1185
|
1
|
50
|
|
|
|
10
|
$self->throw("Need a Bio::Restriction::Enzyme as an argument, [$re]") |
1186
|
|
|
|
|
|
|
unless $re->isa('Bio::Restriction::Enzyme'); |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
# $self->throw("Only type II enzymes work now") |
1189
|
|
|
|
|
|
|
# unless $self->type eq 'II'; |
1190
|
|
|
|
|
|
|
|
1191
|
1
|
50
|
33
|
|
|
3
|
$self->debug("N(s) in overhangs. Can not compare") |
1192
|
|
|
|
|
|
|
if $self->overhang_seq =~ /N/ or $re->overhang_seq =~ /N/; |
1193
|
|
|
|
|
|
|
|
1194
|
1
|
50
|
33
|
|
|
5
|
return 2 if $self->overhang_seq eq $re->overhang_seq and |
1195
|
|
|
|
|
|
|
$self->overhang eq $re->overhang; |
1196
|
|
|
|
|
|
|
|
1197
|
0
|
|
|
|
|
0
|
return 0; |
1198
|
|
|
|
|
|
|
} |
1199
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
=head2 is_ambiguous |
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
Title : is_ambiguous |
1203
|
|
|
|
|
|
|
Usage : $re->is_ambiguous(); |
1204
|
|
|
|
|
|
|
Function : Determines if the restriction enzyme contains ambiguous sequences |
1205
|
|
|
|
|
|
|
Returns : Boolean |
1206
|
|
|
|
|
|
|
Argument : n/a |
1207
|
|
|
|
|
|
|
Throws : n/a |
1208
|
|
|
|
|
|
|
|
1209
|
|
|
|
|
|
|
=cut |
1210
|
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
|
sub is_ambiguous { |
1212
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
1213
|
1
|
50
|
|
|
|
5
|
return $self->string =~ m/[^AGCT]/ ? 1 : 0 ; |
1214
|
|
|
|
|
|
|
} |
1215
|
|
|
|
|
|
|
|
1216
|
|
|
|
|
|
|
=head2 Additional methods from Rebase |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
=cut |
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
=head2 is_prototype |
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
Title : is_prototype |
1223
|
|
|
|
|
|
|
Usage : $re->is_prototype |
1224
|
|
|
|
|
|
|
Function : Get/Set method for finding out if this enzyme is a prototype |
1225
|
|
|
|
|
|
|
Example : $re->is_prototype(1) |
1226
|
|
|
|
|
|
|
Returns : Boolean |
1227
|
|
|
|
|
|
|
Args : none |
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
Prototype enzymes are the most commonly available and usually first |
1230
|
|
|
|
|
|
|
enzymes discoverd that have the same recognition site. Using only |
1231
|
|
|
|
|
|
|
prototype enzymes in restriction analysis avoids redundancy and |
1232
|
|
|
|
|
|
|
speeds things up. |
1233
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
=cut |
1235
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
sub is_prototype { |
1237
|
7213
|
|
|
7213
|
1
|
5418
|
my ($self, $value) = @_; |
1238
|
7213
|
100
|
|
|
|
9877
|
if (defined $value) { |
1239
|
7210
|
|
|
|
|
12159
|
return $self->{'_is_prototype'} = $value ; |
1240
|
|
|
|
|
|
|
} |
1241
|
3
|
100
|
|
|
|
8
|
if (defined $self->{'_is_prototype'}) { |
1242
|
2
|
|
|
|
|
6
|
return $self->{'_is_prototype'} |
1243
|
|
|
|
|
|
|
} else { |
1244
|
1
|
|
|
|
|
10
|
$self->warn("Can't unequivocally assign prototype based on input format alone"); |
1245
|
|
|
|
|
|
|
return |
1246
|
0
|
|
|
|
|
0
|
} |
1247
|
|
|
|
|
|
|
} |
1248
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
=head2 is_neoschizomer |
1250
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
Title : is_neoschizomer |
1252
|
|
|
|
|
|
|
Usage : $re->is_neoschizomer |
1253
|
|
|
|
|
|
|
Function : Get/Set method for finding out if this enzyme is a neoschizomer |
1254
|
|
|
|
|
|
|
Example : $re->is_neoschizomer(1) |
1255
|
|
|
|
|
|
|
Returns : Boolean |
1256
|
|
|
|
|
|
|
Args : none |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
Neoschizomers are distinguishable from the prototype enzyme by having a |
1259
|
|
|
|
|
|
|
different cleavage pattern. Note that not all formats report this |
1260
|
|
|
|
|
|
|
|
1261
|
|
|
|
|
|
|
=cut |
1262
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
sub is_neoschizomer { |
1264
|
0
|
|
|
0
|
1
|
0
|
my ($self, $value) = @_; |
1265
|
0
|
0
|
|
|
|
0
|
if (defined $value) { |
1266
|
0
|
|
|
|
|
0
|
return $self->{'_is_neoschizomer'} = $value ; |
1267
|
|
|
|
|
|
|
} |
1268
|
0
|
0
|
|
|
|
0
|
if (defined $self->{'_is_neoschizomer'}) { |
1269
|
0
|
|
|
|
|
0
|
return $self->{'_is_neoschizomer'} |
1270
|
|
|
|
|
|
|
} else { |
1271
|
0
|
|
|
|
|
0
|
$self->warn("Can't unequivocally assign neoschizomer based on input format alone"); |
1272
|
|
|
|
|
|
|
return |
1273
|
0
|
|
|
|
|
0
|
} |
1274
|
|
|
|
|
|
|
} |
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
=head2 prototype_name |
1277
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
Title : prototype_name |
1279
|
|
|
|
|
|
|
Alias : prototype |
1280
|
|
|
|
|
|
|
Usage : $re->prototype_name |
1281
|
|
|
|
|
|
|
Function : Get/Set method for the name of prototype for |
1282
|
|
|
|
|
|
|
this enzyme's recognition site |
1283
|
|
|
|
|
|
|
Example : $re->prototype_name(1) |
1284
|
|
|
|
|
|
|
Returns : prototype enzyme name string or an empty string |
1285
|
|
|
|
|
|
|
Args : optional prototype enzyme name string |
1286
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
If the enzyme itself is the prototype, its own name is returned. Not to |
1288
|
|
|
|
|
|
|
confuse the negative result with an unset value, use method |
1289
|
|
|
|
|
|
|
L. |
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
This method is called I rather than I, |
1292
|
|
|
|
|
|
|
because it returns a string rather than on object. |
1293
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
=cut |
1295
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
sub prototype_name { |
1297
|
12
|
|
|
12
|
1
|
12
|
my $self = shift; |
1298
|
|
|
|
|
|
|
|
1299
|
12
|
100
|
|
|
|
28
|
$self->{'_prototype'} = shift if @_; |
1300
|
12
|
100
|
|
|
|
27
|
return $self->name if $self->{'_is_prototype'}; |
1301
|
2
|
|
50
|
|
|
10
|
return $self->{'_prototype'} || ''; |
1302
|
|
|
|
|
|
|
} |
1303
|
|
|
|
|
|
|
|
1304
|
9
|
|
|
9
|
0
|
16
|
sub prototype { shift->prototype_name(@_) } |
1305
|
|
|
|
|
|
|
|
1306
|
|
|
|
|
|
|
=head2 isoschizomers |
1307
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
Title : isoschizomers |
1309
|
|
|
|
|
|
|
Alias : isos |
1310
|
|
|
|
|
|
|
Usage : $re->isoschizomers(@list); |
1311
|
|
|
|
|
|
|
Function : Gets/Sets a list of known isoschizomers (enzymes that |
1312
|
|
|
|
|
|
|
recognize the same site, but don't necessarily cut at |
1313
|
|
|
|
|
|
|
the same position). |
1314
|
|
|
|
|
|
|
Arguments : A reference to an array that contains the isoschizomers |
1315
|
|
|
|
|
|
|
Returns : A reference to an array of the known isoschizomers or 0 |
1316
|
|
|
|
|
|
|
if not defined. |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
This has to be the hardest name to spell, so now you can use the alias |
1319
|
|
|
|
|
|
|
'isos'. Added for compatibility to REBASE |
1320
|
|
|
|
|
|
|
|
1321
|
|
|
|
|
|
|
=cut |
1322
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
sub isoschizomers { |
1324
|
7456
|
|
|
7456
|
1
|
6203
|
my ($self) = shift; |
1325
|
7456
|
100
|
|
|
|
10298
|
push @{$self->{_isoschizomers}}, @_ if @_; |
|
7454
|
|
|
|
|
12049
|
|
1326
|
|
|
|
|
|
|
# make sure that you don't dereference if null |
1327
|
|
|
|
|
|
|
# chad believes quite strongly that you should return |
1328
|
|
|
|
|
|
|
# a reference to an array anyway. don't bother dereferencing. |
1329
|
|
|
|
|
|
|
# i'll post that to the list. |
1330
|
7456
|
50
|
|
|
|
10736
|
if ($self->{'_isoschizomers'}) { |
1331
|
7456
|
|
|
|
|
6174
|
return @{$self->{_isoschizomers}}; |
|
7456
|
|
|
|
|
7997
|
|
1332
|
|
|
|
|
|
|
} |
1333
|
|
|
|
|
|
|
|
1334
|
|
|
|
|
|
|
} |
1335
|
|
|
|
|
|
|
|
1336
|
0
|
|
|
0
|
0
|
0
|
sub isos { shift->isoschizomers(@_) } |
1337
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
=head2 purge_isoschizomers |
1339
|
|
|
|
|
|
|
|
1340
|
|
|
|
|
|
|
Title : purge_isoschizomers |
1341
|
|
|
|
|
|
|
Alias : purge_isos |
1342
|
|
|
|
|
|
|
Usage : $re->purge_isoschizomers(); |
1343
|
|
|
|
|
|
|
Function : Purges the set of isoschizomers for this enzyme |
1344
|
|
|
|
|
|
|
Arguments : |
1345
|
|
|
|
|
|
|
Returns : 1 |
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
=cut |
1348
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
sub purge_isoschizomers { |
1350
|
1
|
|
|
1
|
1
|
4
|
my ($self) = shift; |
1351
|
1
|
|
|
|
|
6
|
$self->{_isoschizomers} = []; |
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
} |
1354
|
|
|
|
|
|
|
|
1355
|
0
|
|
|
0
|
0
|
0
|
sub purge_isos { shift->purge_isoschizomers(@_) } |
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
=head2 methylation_sites |
1358
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
Title : methylation_sites |
1360
|
|
|
|
|
|
|
Usage : $re->methylation_sites(\%sites); |
1361
|
|
|
|
|
|
|
Function : Gets/Sets known methylation sites (positions on the sequence |
1362
|
|
|
|
|
|
|
that get modified to promote or prevent cleavage). |
1363
|
|
|
|
|
|
|
Arguments : A reference to a hash that contains the methylation sites |
1364
|
|
|
|
|
|
|
Returns : A reference to a hash of the methylation sites or |
1365
|
|
|
|
|
|
|
an empty string if not defined. |
1366
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
There are three types of methylation sites: |
1368
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
=over 3 |
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
=item * (6) = N6-methyladenosine |
1372
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
=item * (5) = 5-methylcytosine |
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
=item * (4) = N4-methylcytosine |
1376
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
=back |
1378
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
These are stored as 6, 5, and 4 respectively. The hash has the |
1380
|
|
|
|
|
|
|
sequence position as the key and the type of methylation as the value. |
1381
|
|
|
|
|
|
|
A negative number in the sequence position indicates that the DNA is |
1382
|
|
|
|
|
|
|
methylated on the complementary strand. |
1383
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
Note that in REBASE, the methylation positions are given |
1385
|
|
|
|
|
|
|
Added for compatibility to REBASE. |
1386
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
=cut |
1388
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
sub methylation_sites { |
1390
|
782
|
|
|
782
|
1
|
598
|
my $self = shift; |
1391
|
|
|
|
|
|
|
|
1392
|
782
|
|
|
|
|
1175
|
while (@_) { |
1393
|
829
|
|
|
|
|
601
|
my $key = shift; |
1394
|
829
|
|
|
|
|
2281
|
$self->{'_methylation_sites'}->{$key} = shift; |
1395
|
|
|
|
|
|
|
} |
1396
|
782
|
|
|
|
|
552
|
return %{$self->{_methylation_sites}}; |
|
782
|
|
|
|
|
1201
|
|
1397
|
|
|
|
|
|
|
} |
1398
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
=head2 purge_methylation_sites |
1401
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
Title : purge_methylation_sites |
1403
|
|
|
|
|
|
|
Usage : $re->purge_methylation_sites(); |
1404
|
|
|
|
|
|
|
Function : Purges the set of methylation_sites for this enzyme |
1405
|
|
|
|
|
|
|
Arguments : |
1406
|
|
|
|
|
|
|
Returns : |
1407
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
=cut |
1409
|
|
|
|
|
|
|
|
1410
|
|
|
|
|
|
|
sub purge_methylation_sites { |
1411
|
23
|
|
|
23
|
1
|
25
|
my ($self) = shift; |
1412
|
23
|
|
|
|
|
36
|
$self->{_methylation_sites} = {}; |
1413
|
|
|
|
|
|
|
} |
1414
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
=head2 microbe |
1416
|
|
|
|
|
|
|
|
1417
|
|
|
|
|
|
|
Title : microbe |
1418
|
|
|
|
|
|
|
Usage : $re->microbe($microbe); |
1419
|
|
|
|
|
|
|
Function : Gets/Sets microorganism where the restriction enzyme was found |
1420
|
|
|
|
|
|
|
Arguments : A scalar containing the microbes name |
1421
|
|
|
|
|
|
|
Returns : A scalar containing the microbes name or 0 if not defined |
1422
|
|
|
|
|
|
|
|
1423
|
|
|
|
|
|
|
Added for compatibility to REBASE |
1424
|
|
|
|
|
|
|
|
1425
|
|
|
|
|
|
|
=cut |
1426
|
|
|
|
|
|
|
|
1427
|
|
|
|
|
|
|
sub microbe { |
1428
|
2
|
|
|
2
|
1
|
3
|
my ($self, $microbe) = @_; |
1429
|
2
|
100
|
|
|
|
6
|
if ($microbe) { |
1430
|
1
|
|
|
|
|
2
|
$self->{_microbe}=$microbe; |
1431
|
|
|
|
|
|
|
} |
1432
|
2
|
|
50
|
|
|
9
|
return $self->{_microbe} || ''; |
1433
|
|
|
|
|
|
|
|
1434
|
|
|
|
|
|
|
} |
1435
|
|
|
|
|
|
|
|
1436
|
|
|
|
|
|
|
|
1437
|
|
|
|
|
|
|
=head2 source |
1438
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
Title : source |
1440
|
|
|
|
|
|
|
Usage : $re->source('Rob Edwards'); |
1441
|
|
|
|
|
|
|
Function : Gets/Sets the person who provided the enzyme |
1442
|
|
|
|
|
|
|
Arguments : A scalar containing the persons name |
1443
|
|
|
|
|
|
|
Returns : A scalar containing the persons name or 0 if not defined |
1444
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
Added for compatibility to REBASE |
1446
|
|
|
|
|
|
|
|
1447
|
|
|
|
|
|
|
=cut |
1448
|
|
|
|
|
|
|
|
1449
|
|
|
|
|
|
|
sub source { |
1450
|
7455
|
|
|
7455
|
1
|
6042
|
my ($self, $source) = @_; |
1451
|
7455
|
100
|
|
|
|
9526
|
if ($source) { |
1452
|
7454
|
|
|
|
|
8182
|
$self->{_source}=$source; |
1453
|
|
|
|
|
|
|
} |
1454
|
7455
|
|
50
|
|
|
10779
|
return $self->{_source} || ''; |
1455
|
|
|
|
|
|
|
} |
1456
|
|
|
|
|
|
|
|
1457
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
=head2 vendors |
1459
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
Title : vendors |
1461
|
|
|
|
|
|
|
Usage : $re->vendor(@list_of_companies); |
1462
|
|
|
|
|
|
|
Function : Gets/Sets the a list of companies that you can get the enzyme from. |
1463
|
|
|
|
|
|
|
Also sets the commercially_available boolean |
1464
|
|
|
|
|
|
|
Arguments : A reference to an array containing the names of companies |
1465
|
|
|
|
|
|
|
that you can get the enzyme from |
1466
|
|
|
|
|
|
|
Returns : A reference to an array containing the names of companies |
1467
|
|
|
|
|
|
|
that you can get the enzyme from |
1468
|
|
|
|
|
|
|
|
1469
|
|
|
|
|
|
|
Added for compatibility to REBASE |
1470
|
|
|
|
|
|
|
|
1471
|
|
|
|
|
|
|
=cut |
1472
|
|
|
|
|
|
|
|
1473
|
|
|
|
|
|
|
sub vendors { |
1474
|
7472
|
|
|
7472
|
1
|
5304
|
my $self = shift; |
1475
|
7472
|
100
|
|
|
|
8827
|
push @{$self->{_vendors}}, @_ if @_; |
|
7470
|
|
|
|
|
8212
|
|
1476
|
7472
|
50
|
|
|
|
10169
|
if ($self->{'_vendors'}) { |
1477
|
7472
|
|
|
|
|
4629
|
return @{$self->{'_vendors'}}; |
|
7472
|
|
|
|
|
7087
|
|
1478
|
|
|
|
|
|
|
} |
1479
|
|
|
|
|
|
|
} |
1480
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
|
1482
|
|
|
|
|
|
|
=head2 purge_vendors |
1483
|
|
|
|
|
|
|
|
1484
|
|
|
|
|
|
|
Title : purge_vendors |
1485
|
|
|
|
|
|
|
Usage : $re->purge_references(); |
1486
|
|
|
|
|
|
|
Function : Purges the set of references for this enzyme |
1487
|
|
|
|
|
|
|
Arguments : |
1488
|
|
|
|
|
|
|
Returns : |
1489
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
=cut |
1491
|
|
|
|
|
|
|
|
1492
|
|
|
|
|
|
|
sub purge_vendors { |
1493
|
1
|
|
|
1
|
1
|
2
|
my ($self) = shift; |
1494
|
1
|
|
|
|
|
2
|
$self->{_vendors} = []; |
1495
|
|
|
|
|
|
|
|
1496
|
|
|
|
|
|
|
} |
1497
|
|
|
|
|
|
|
|
1498
|
|
|
|
|
|
|
=head2 vendor |
1499
|
|
|
|
|
|
|
|
1500
|
|
|
|
|
|
|
Title : vendor |
1501
|
|
|
|
|
|
|
Usage : $re->vendor(@list_of_companies); |
1502
|
|
|
|
|
|
|
Function : Gets/Sets the a list of companies that you can get the enzyme from. |
1503
|
|
|
|
|
|
|
Also sets the commercially_available boolean |
1504
|
|
|
|
|
|
|
Arguments : A reference to an array containing the names of companies |
1505
|
|
|
|
|
|
|
that you can get the enzyme from |
1506
|
|
|
|
|
|
|
Returns : A reference to an array containing the names of companies |
1507
|
|
|
|
|
|
|
that you can get the enzyme from |
1508
|
|
|
|
|
|
|
|
1509
|
|
|
|
|
|
|
Added for compatibility to REBASE |
1510
|
|
|
|
|
|
|
|
1511
|
|
|
|
|
|
|
=cut |
1512
|
|
|
|
|
|
|
|
1513
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
sub vendor { |
1515
|
1
|
|
|
1
|
1
|
1
|
my $self = shift; |
1516
|
1
|
|
|
|
|
2
|
return push @{$self->{_vendors}}, @_; |
|
1
|
|
|
|
|
5
|
|
1517
|
0
|
|
|
|
|
0
|
return $self->{_vendors}; |
1518
|
|
|
|
|
|
|
} |
1519
|
|
|
|
|
|
|
|
1520
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
=head2 references |
1522
|
|
|
|
|
|
|
|
1523
|
|
|
|
|
|
|
Title : references |
1524
|
|
|
|
|
|
|
Usage : $re->references(string); |
1525
|
|
|
|
|
|
|
Function : Gets/Sets the references for this enzyme |
1526
|
|
|
|
|
|
|
Arguments : an array of string reference(s) (optional) |
1527
|
|
|
|
|
|
|
Returns : an array of references |
1528
|
|
|
|
|
|
|
|
1529
|
|
|
|
|
|
|
Use L to reset the list of references |
1530
|
|
|
|
|
|
|
|
1531
|
|
|
|
|
|
|
This should be a L object, but its not (yet) |
1532
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
=cut |
1534
|
|
|
|
|
|
|
|
1535
|
|
|
|
|
|
|
sub references { |
1536
|
7472
|
|
|
7472
|
1
|
5942
|
my ($self) = shift; |
1537
|
7472
|
100
|
|
|
|
8624
|
push @{$self->{_references}}, @_ if @_; |
|
7470
|
|
|
|
|
7685
|
|
1538
|
7472
|
|
|
|
|
5740
|
return @{$self->{_references}}; |
|
7472
|
|
|
|
|
7705
|
|
1539
|
|
|
|
|
|
|
} |
1540
|
|
|
|
|
|
|
|
1541
|
|
|
|
|
|
|
|
1542
|
|
|
|
|
|
|
=head2 purge_references |
1543
|
|
|
|
|
|
|
|
1544
|
|
|
|
|
|
|
Title : purge_references |
1545
|
|
|
|
|
|
|
Usage : $re->purge_references(); |
1546
|
|
|
|
|
|
|
Function : Purges the set of references for this enzyme |
1547
|
|
|
|
|
|
|
Arguments : |
1548
|
|
|
|
|
|
|
Returns : 1 |
1549
|
|
|
|
|
|
|
|
1550
|
|
|
|
|
|
|
=cut |
1551
|
|
|
|
|
|
|
|
1552
|
|
|
|
|
|
|
sub purge_references { |
1553
|
1
|
|
|
1
|
1
|
1
|
my ($self) = shift; |
1554
|
1
|
|
|
|
|
3
|
$self->{_references} = []; |
1555
|
|
|
|
|
|
|
|
1556
|
|
|
|
|
|
|
} |
1557
|
|
|
|
|
|
|
|
1558
|
|
|
|
|
|
|
=head2 clone |
1559
|
|
|
|
|
|
|
|
1560
|
|
|
|
|
|
|
Title : clone |
1561
|
|
|
|
|
|
|
Usage : $re->clone |
1562
|
|
|
|
|
|
|
Function : Deep copy of the object |
1563
|
|
|
|
|
|
|
Arguments : - |
1564
|
|
|
|
|
|
|
Returns : new Bio::Restriction::EnzymeI object |
1565
|
|
|
|
|
|
|
|
1566
|
|
|
|
|
|
|
This works as long as the object is a clean in-memory object using |
1567
|
|
|
|
|
|
|
scalars, arrays and hashes. You have been warned. |
1568
|
|
|
|
|
|
|
|
1569
|
|
|
|
|
|
|
If you have module Storable, it is used, otherwise local code is used. |
1570
|
|
|
|
|
|
|
Todo: local code cuts circular references. |
1571
|
|
|
|
|
|
|
|
1572
|
|
|
|
|
|
|
=cut |
1573
|
|
|
|
|
|
|
|
1574
|
|
|
|
|
|
|
# there's some issue here; deprecating and rolling another below/maj |
1575
|
|
|
|
|
|
|
|
1576
|
|
|
|
|
|
|
sub clone_depr { |
1577
|
0
|
|
|
0
|
0
|
0
|
my ($self, $this) = @_; |
1578
|
|
|
|
|
|
|
|
1579
|
0
|
|
|
|
|
0
|
eval { require Storable; }; |
|
0
|
|
|
|
|
0
|
|
1580
|
0
|
0
|
|
|
|
0
|
return Storable::dclone($self) unless $@; |
1581
|
|
|
|
|
|
|
# modified from deep_copy() @ http://www.stonehenge.com/merlyn/UnixReview/col30.html |
1582
|
0
|
0
|
|
|
|
0
|
unless ($this) { |
1583
|
0
|
|
|
|
|
0
|
my $new; |
1584
|
0
|
|
|
|
|
0
|
foreach my $k (keys %$self) { |
1585
|
0
|
0
|
|
|
|
0
|
if (not ref $self->{$k}) { |
1586
|
0
|
|
|
|
|
0
|
$new->{$k} = $self->{$k}; |
1587
|
|
|
|
|
|
|
} else { |
1588
|
0
|
|
|
|
|
0
|
$new->{$k} = $self->clone($self->{$k}); |
1589
|
|
|
|
|
|
|
} |
1590
|
|
|
|
|
|
|
#print Dumper $new; |
1591
|
|
|
|
|
|
|
} |
1592
|
0
|
|
|
|
|
0
|
bless $new, ref($self); |
1593
|
0
|
|
|
|
|
0
|
return $new; |
1594
|
|
|
|
|
|
|
} |
1595
|
0
|
0
|
|
|
|
0
|
if (not ref $this) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1596
|
0
|
|
|
|
|
0
|
$this; |
1597
|
|
|
|
|
|
|
} |
1598
|
|
|
|
|
|
|
elsif (ref $this eq "ARRAY") { |
1599
|
0
|
|
|
|
|
0
|
[map $self->clone($_), @$this]; |
1600
|
|
|
|
|
|
|
} |
1601
|
|
|
|
|
|
|
elsif (ref $this eq "HASH") { |
1602
|
0
|
|
|
|
|
0
|
+{map { $_ => $self->clone($this->{$_}) } keys %$this}; |
|
0
|
|
|
|
|
0
|
|
1603
|
|
|
|
|
|
|
} else { # objects |
1604
|
0
|
0
|
|
|
|
0
|
return if $this->isa('Bio::Restriction::EnzymeI'); |
1605
|
0
|
0
|
|
|
|
0
|
return $this->clone if $this->can('clone'); |
1606
|
0
|
|
|
|
|
0
|
my $obj; |
1607
|
0
|
|
|
|
|
0
|
foreach my $k (keys %$this) { |
1608
|
0
|
0
|
|
|
|
0
|
if (not ref $this->{$k}) { |
1609
|
0
|
|
|
|
|
0
|
$obj->{$k} = $this->{$k}; |
1610
|
|
|
|
|
|
|
} else { |
1611
|
0
|
|
|
|
|
0
|
$obj->{$k} = $this->clone($this->{$k}); |
1612
|
|
|
|
|
|
|
} |
1613
|
|
|
|
|
|
|
} |
1614
|
0
|
|
|
|
|
0
|
bless $obj, ref($this); |
1615
|
0
|
|
|
|
|
0
|
return $obj; |
1616
|
|
|
|
|
|
|
} |
1617
|
|
|
|
|
|
|
} |
1618
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
sub clone { |
1620
|
1501
|
|
|
1501
|
1
|
919
|
my $self = shift; |
1621
|
1501
|
|
|
|
|
1127
|
my ($this, $visited) = @_; |
1622
|
1501
|
100
|
|
|
|
1781
|
unless (defined $this) { |
1623
|
52
|
|
|
|
|
57
|
my %h; |
1624
|
52
|
|
|
|
|
357
|
tie %h, 'Tie::RefHash'; |
1625
|
52
|
|
|
|
|
481
|
my $visited = \%h; |
1626
|
52
|
|
|
|
|
121
|
return $self->clone($self, $visited); |
1627
|
|
|
|
|
|
|
} |
1628
|
1449
|
|
|
|
|
769
|
my $thing; |
1629
|
1449
|
|
|
|
|
1191
|
for ($this) { |
1630
|
1449
|
100
|
|
|
|
1754
|
if (ref) { |
1631
|
475
|
100
|
|
|
|
1175
|
return $visited->{$this} if $visited->{$this}; |
1632
|
|
|
|
|
|
|
} |
1633
|
|
|
|
|
|
|
# scalar |
1634
|
1447
|
100
|
|
|
|
4213
|
(!ref) && do { |
1635
|
974
|
|
|
|
|
733
|
$thing = $this; |
1636
|
974
|
|
|
|
|
723
|
last; |
1637
|
|
|
|
|
|
|
}; |
1638
|
|
|
|
|
|
|
# object |
1639
|
473
|
100
|
|
|
|
901
|
(ref =~ /^Bio::/) && do { |
1640
|
108
|
|
|
|
|
125
|
$thing = {}; |
1641
|
108
|
|
|
|
|
141
|
bless($thing, ref); |
1642
|
108
|
|
|
|
|
337
|
$visited->{$this} = $thing; |
1643
|
108
|
|
|
|
|
700
|
foreach my $attr (keys %{$_}) { |
|
108
|
|
|
|
|
359
|
|
1644
|
1196
|
100
|
|
|
|
1825
|
$thing->{$attr} = (defined $_->{$attr} ? $self->clone($_->{$attr},$visited) : undef ); |
1645
|
|
|
|
|
|
|
} |
1646
|
108
|
|
|
|
|
141
|
last; |
1647
|
|
|
|
|
|
|
}; |
1648
|
365
|
100
|
|
|
|
583
|
(ref eq 'ARRAY') && do { |
1649
|
311
|
|
|
|
|
278
|
$thing = []; |
1650
|
311
|
|
|
|
|
616
|
$visited->{$this} = $thing; |
1651
|
311
|
|
|
|
|
1434
|
foreach my $elt (@{$_}) { |
|
311
|
|
|
|
|
391
|
|
1652
|
309
|
50
|
|
|
|
557
|
push @$thing, (defined $elt ? $self->clone($elt,$visited) : undef); |
1653
|
|
|
|
|
|
|
} |
1654
|
311
|
|
|
|
|
263
|
last; |
1655
|
|
|
|
|
|
|
}; |
1656
|
54
|
50
|
|
|
|
146
|
(ref eq 'HASH') && do { |
1657
|
54
|
|
|
|
|
87
|
$thing = {}; |
1658
|
54
|
|
|
|
|
124
|
$visited->{$this} = $thing; |
1659
|
4
|
|
|
4
|
|
21
|
no warnings qw( uninitialized ); # avoid 'uninitialized value' warning against $key |
|
4
|
|
|
|
|
11
|
|
|
4
|
|
|
|
|
211
|
|
1660
|
54
|
|
|
|
|
284
|
foreach my $key (%{$_}) { |
|
54
|
|
|
|
|
149
|
|
1661
|
0
|
0
|
|
|
|
0
|
$thing->{$key} = (defined $_->{key} ? $self->clone( $_->{$key},$visited) : undef ); |
1662
|
|
|
|
|
|
|
} |
1663
|
4
|
|
|
4
|
|
14
|
use warnings; |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
837
|
|
1664
|
54
|
|
|
|
|
68
|
last; |
1665
|
|
|
|
|
|
|
}; |
1666
|
0
|
0
|
|
|
|
0
|
(ref eq 'SCALAR') && do { |
1667
|
0
|
|
|
|
|
0
|
$thing = ${$_}; |
|
0
|
|
|
|
|
0
|
|
1668
|
0
|
|
|
|
|
0
|
$visited->{$this} = $thing; |
1669
|
0
|
|
|
|
|
0
|
$thing = \$thing; |
1670
|
0
|
|
|
|
|
0
|
last; |
1671
|
|
|
|
|
|
|
}; |
1672
|
|
|
|
|
|
|
} |
1673
|
|
|
|
|
|
|
|
1674
|
1447
|
|
|
|
|
2269
|
return $thing; |
1675
|
|
|
|
|
|
|
} |
1676
|
|
|
|
|
|
|
|
1677
|
|
|
|
|
|
|
|
1678
|
|
|
|
|
|
|
|
1679
|
|
|
|
|
|
|
=head2 _expand |
1680
|
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
|
Title : _expand |
1682
|
|
|
|
|
|
|
Function : Expand nucleotide ambiguity codes to their representative letters |
1683
|
|
|
|
|
|
|
Returns : The full length string |
1684
|
|
|
|
|
|
|
Arguments : The string to be expanded. |
1685
|
|
|
|
|
|
|
|
1686
|
|
|
|
|
|
|
Stolen from the original RestrictionEnzyme.pm |
1687
|
|
|
|
|
|
|
|
1688
|
|
|
|
|
|
|
=cut |
1689
|
|
|
|
|
|
|
|
1690
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
sub _expand { |
1692
|
7774
|
|
|
7774
|
|
6236
|
my $str = shift; |
1693
|
|
|
|
|
|
|
|
1694
|
7774
|
|
|
|
|
20023
|
$str =~ s/N|X/\./g; |
1695
|
7774
|
|
|
|
|
8372
|
$str =~ s/R/\[AG\]/g; |
1696
|
7774
|
|
|
|
|
7220
|
$str =~ s/Y/\[CT\]/g; |
1697
|
7774
|
|
|
|
|
5969
|
$str =~ s/S/\[GC\]/g; |
1698
|
7774
|
|
|
|
|
6943
|
$str =~ s/W/\[AT\]/g; |
1699
|
7774
|
|
|
|
|
6156
|
$str =~ s/M/\[AC\]/g; |
1700
|
7774
|
|
|
|
|
5374
|
$str =~ s/K/\[TG\]/g; |
1701
|
7774
|
|
|
|
|
5364
|
$str =~ s/B/\[CGT\]/g; |
1702
|
7774
|
|
|
|
|
5134
|
$str =~ s/D/\[AGT\]/g; |
1703
|
7774
|
|
|
|
|
5117
|
$str =~ s/H/\[ACT\]/g; |
1704
|
7774
|
|
|
|
|
5090
|
$str =~ s/V/\[ACG\]/g; |
1705
|
|
|
|
|
|
|
|
1706
|
7774
|
|
|
|
|
8494
|
return $str; |
1707
|
|
|
|
|
|
|
} |
1708
|
|
|
|
|
|
|
|
1709
|
|
|
|
|
|
|
1; |
1710
|
|
|
|
|
|
|
|