| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#------------------------------------------------------------------ |
|
2
|
|
|
|
|
|
|
# |
|
3
|
|
|
|
|
|
|
# BioPerl module Bio::Restriction::Enzyme |
|
4
|
|
|
|
|
|
|
# |
|
5
|
|
|
|
|
|
|
# Please direct questions and support issues to |
|
6
|
|
|
|
|
|
|
# |
|
7
|
|
|
|
|
|
|
# Cared for by Rob Edwards |
|
8
|
|
|
|
|
|
|
# |
|
9
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
|
10
|
|
|
|
|
|
|
#------------------------------------------------------------------ |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
## POD Documentation: |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Restriction::Enzyme - A single restriction endonuclease |
|
17
|
|
|
|
|
|
|
(cuts DNA at specific locations) |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# set up a single restriction enzyme. This contains lots of |
|
22
|
|
|
|
|
|
|
# information about the enzyme that is generally parsed from a |
|
23
|
|
|
|
|
|
|
# rebase file and can then be read back |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
use Bio::Restriction::Enzyme; |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# define a new enzyme with the cut sequence |
|
28
|
|
|
|
|
|
|
my $re=Bio::Restriction::Enzyme->new |
|
29
|
|
|
|
|
|
|
(-enzyme=>'EcoRI', -seq=>'G^AATTC'); |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# once the sequence has been defined a bunch of stuff is calculated |
|
32
|
|
|
|
|
|
|
# for you: |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
#### PRECALCULATED |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# find where the enzyme cuts after ... |
|
37
|
|
|
|
|
|
|
my $ca=$re->cut; |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# ... and where it cuts on the opposite strand |
|
40
|
|
|
|
|
|
|
my $oca = $re->complementary_cut; |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# get the cut sequence string back. |
|
43
|
|
|
|
|
|
|
# Note that site will return the sequence with a caret |
|
44
|
|
|
|
|
|
|
my $with_caret=$re->site; #returns 'G^AATTC'; |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# but it is also a Bio::PrimarySeq object .... |
|
47
|
|
|
|
|
|
|
my $without_caret=$re->seq; # returns 'GAATTC'; |
|
48
|
|
|
|
|
|
|
# ... and so does string |
|
49
|
|
|
|
|
|
|
$without_caret=$re->string; #returns 'GAATTC'; |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# what is the reverse complement of the cut site |
|
52
|
|
|
|
|
|
|
my $rc=$re->revcom; # returns 'GAATTC'; |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# now the recognition length. There are two types: |
|
55
|
|
|
|
|
|
|
# recognition_length() is the length of the sequence |
|
56
|
|
|
|
|
|
|
# cutter() estimate of cut frequency |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
my $recog_length = $re->recognition_length; # returns 6 |
|
59
|
|
|
|
|
|
|
# also returns 6 in this case but would return |
|
60
|
|
|
|
|
|
|
# 4 for GANNTC and 5 for RGATCY (BstX2I)! |
|
61
|
|
|
|
|
|
|
$recog_length=$re->cutter; |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# is the sequence a palindrome - the same forwards and backwards |
|
64
|
|
|
|
|
|
|
my $pal= $re->palindromic; # this is a boolean |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# is the sequence blunt (i.e. no overhang - the forward and reverse |
|
67
|
|
|
|
|
|
|
# cuts are the same) |
|
68
|
|
|
|
|
|
|
print "blunt\n" if $re->overhang eq 'blunt'; |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# Overhang can have three values: "5'", "3'", "blunt", and undef |
|
71
|
|
|
|
|
|
|
# Direction is very important if you use Klenow! |
|
72
|
|
|
|
|
|
|
my $oh=$re->overhang; |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# what is the overhang sequence |
|
75
|
|
|
|
|
|
|
my $ohseq=$re->overhang_seq; # will return 'AATT'; |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# is the sequence ambiguous - does it contain non-GATC bases? |
|
78
|
|
|
|
|
|
|
my $ambig=$re->is_ambiguous; # this is boolean |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
print "Stuff about the enzyme\nCuts after: $ca\n", |
|
81
|
|
|
|
|
|
|
"Complementary cut: $oca\nSite:\n\t$with_caret or\n", |
|
82
|
|
|
|
|
|
|
"\t$without_caret\n"; |
|
83
|
|
|
|
|
|
|
print "Reverse of the sequence: $rc\nRecognition length: $recog_length\n", |
|
84
|
|
|
|
|
|
|
"Is it palindromic? $pal\n"; |
|
85
|
|
|
|
|
|
|
print "The overhang is $oh with sequence $ohseq\n", |
|
86
|
|
|
|
|
|
|
"And is it ambiguous? $ambig\n\n"; |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
### THINGS YOU CAN SET, and get from rich REBASE file |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# get or set the isoschizomers (enzymes that recognize the same |
|
92
|
|
|
|
|
|
|
# site) |
|
93
|
|
|
|
|
|
|
$re->isoschizomers('PvuII', 'SmaI'); # not really true :) |
|
94
|
|
|
|
|
|
|
print "Isoschizomers are ", join " ", $re->isoschizomers, "\n"; |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# get or set the methylation sites |
|
97
|
|
|
|
|
|
|
$re->methylation_sites(2); # not really true :) |
|
98
|
|
|
|
|
|
|
print "Methylated at ", join " ", keys %{$re->methylation_sites},"\n"; |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
#Get or set the source microbe |
|
101
|
|
|
|
|
|
|
$re->microbe('E. coli'); |
|
102
|
|
|
|
|
|
|
print "It came from ", $re->microbe, "\n"; |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# get or set the person who isolated it |
|
105
|
|
|
|
|
|
|
$re->source("Rob"); # not really true :) |
|
106
|
|
|
|
|
|
|
print $re->source, " sent it to us\n"; |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
# get or set whether it is commercially available and the company |
|
109
|
|
|
|
|
|
|
# that it can be bought at |
|
110
|
|
|
|
|
|
|
$re->vendors('NEB'); # my favorite |
|
111
|
|
|
|
|
|
|
print "Is it commercially available :"; |
|
112
|
|
|
|
|
|
|
print $re->vendors ? "Yes" : "No"; |
|
113
|
|
|
|
|
|
|
print " and it can be got from ", join " ", |
|
114
|
|
|
|
|
|
|
$re->vendors, "\n"; |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# get or set a reference for this |
|
117
|
|
|
|
|
|
|
$re->reference('Edwards et al. J. Bacteriology'); |
|
118
|
|
|
|
|
|
|
print "It was not published in ", $re->reference, "\n"; |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# get or set the enzyme name |
|
121
|
|
|
|
|
|
|
$re->name('BamHI'); |
|
122
|
|
|
|
|
|
|
print "The name of EcoRI is not really ", $re->name, "\n"; |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
This module defines a single restriction endonuclease. You can use it |
|
128
|
|
|
|
|
|
|
to make custom restriction enzymes, and it is used by |
|
129
|
|
|
|
|
|
|
Bio::Restriction::IO to define enzymes in the New England Biolabs |
|
130
|
|
|
|
|
|
|
REBASE collection. |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
Use Bio::Restriction::Analysis to figure out which enzymes are available |
|
133
|
|
|
|
|
|
|
and where they cut your sequence. |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=head1 RESTRICTION MODIFICATION SYSTEMS |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
At least three geneticaly and biochamically distinct restriction |
|
139
|
|
|
|
|
|
|
modification systems exist. The cutting components of them are known |
|
140
|
|
|
|
|
|
|
as restriction endonuleases. The three systems are known by roman |
|
141
|
|
|
|
|
|
|
numerals: Type I, II, and III restriction enzymes. |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
REBASE format 'cutzymes'(#15) lists enzyme type in its last field. The |
|
144
|
|
|
|
|
|
|
categories there do not always match the the following short |
|
145
|
|
|
|
|
|
|
descriptions of the enzymes types. See |
|
146
|
|
|
|
|
|
|
http://it.stlawu.edu/~tbudd/rmsyst.html for a better overview. |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=head2 TypeI |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Type I systems recognize a bipartite asymetrical sequence of 5-7 bp: |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
---TGA*NnTGCT--- * = methylation sites |
|
154
|
|
|
|
|
|
|
---ACTNnA*CGA--- n = 6 for EcoK, n = 8 for EcoB |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
The cleavage site is roughly 1000 (400-7000) base pairs from the |
|
157
|
|
|
|
|
|
|
recognition site. |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head2 TypeII |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
The simplest and most common (at least commercially). |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
Site recognition is via short palindromic base sequences that are 4-6 |
|
164
|
|
|
|
|
|
|
base pairs long. Cleavage is at the recognition site (but may |
|
165
|
|
|
|
|
|
|
occasionally be just adjacent to the palindromic sequence, usually |
|
166
|
|
|
|
|
|
|
within) and may produce blunt end termini or staggered, "sticky |
|
167
|
|
|
|
|
|
|
end" termini. |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=head2 TypeIII |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
The recognition site is a 5-7 bp asymmetrical sequence. Cleavage is |
|
172
|
|
|
|
|
|
|
ATP dependent 24-26 base pairs downstream from the recognition site |
|
173
|
|
|
|
|
|
|
and usually yields staggered cuts 2-4 bases apart. |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=head1 COMMENTS |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
I am trying to make this backwards compatible with |
|
179
|
|
|
|
|
|
|
Bio::Tools::RestrictionEnzyme. Undoubtedly some things will break, |
|
180
|
|
|
|
|
|
|
but we can fix things as we progress.....! |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
I have added another comments section at the end of this POD that |
|
183
|
|
|
|
|
|
|
discusses a couple of areas I know are broken (at the moment) |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
=head1 TO DO |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=over 2 |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=item * |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
Convert vendors touse full names of companies instead of code |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=item * |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
Add regular expression based matching to vendors |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=item * |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
Move away from the archaic ^ notation for cut sites. Ideally |
|
201
|
|
|
|
|
|
|
I'd totally like to remove this altogether, or add a method |
|
202
|
|
|
|
|
|
|
that adds it in if someone really wants it. We should be |
|
203
|
|
|
|
|
|
|
fixed on a sequence, number notation. |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=back |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=head1 FEEDBACK |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=head2 Mailing Lists |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
|
212
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
|
213
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
|
216
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=head2 Support |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
I |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
|
225
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
|
226
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
|
227
|
|
|
|
|
|
|
with code and data examples if at all possible. |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
=head2 Reporting Bugs |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
|
232
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
|
233
|
|
|
|
|
|
|
web: |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=head1 AUTHOR |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
Rob Edwards, redwards@utmem.edu |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
Heikki Lehvaslaiho, heikki-at-bioperl-dot-org |
|
244
|
|
|
|
|
|
|
Peter Blaiklock, pblaiklo@restrictionmapper.org |
|
245
|
|
|
|
|
|
|
Mark A. Jensen, maj-at-fortinbras-dot-us |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
Copyright (c) 2003 Rob Edwards. |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
Some of this work is Copyright (c) 1997-2002 Steve A. Chervitz. All |
|
252
|
|
|
|
|
|
|
Rights Reserved. This module is free software; you can redistribute |
|
253
|
|
|
|
|
|
|
it and/or modify it under the same terms as Perl itself. |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
L, |
|
258
|
|
|
|
|
|
|
L, L |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=head1 APPENDIX |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
Methods beginning with a leading underscore are considered private and |
|
263
|
|
|
|
|
|
|
are intended for internal use by this module. They are not considered |
|
264
|
|
|
|
|
|
|
part of the public interface and are described here for documentation |
|
265
|
|
|
|
|
|
|
purposes only. |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=cut |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
package Bio::Restriction::Enzyme; |
|
270
|
4
|
|
|
4
|
|
568
|
use strict; |
|
|
4
|
|
|
|
|
3
|
|
|
|
4
|
|
|
|
|
94
|
|
|
271
|
|
|
|
|
|
|
|
|
272
|
4
|
|
|
4
|
|
624
|
use Bio::PrimarySeq; |
|
|
4
|
|
|
|
|
5
|
|
|
|
4
|
|
|
|
|
86
|
|
|
273
|
|
|
|
|
|
|
|
|
274
|
4
|
|
|
4
|
|
16
|
use Data::Dumper; |
|
|
4
|
|
|
|
|
10
|
|
|
|
4
|
|
|
|
|
183
|
|
|
275
|
4
|
|
|
4
|
|
1691
|
use Tie::RefHash; |
|
|
4
|
|
|
|
|
9232
|
|
|
|
4
|
|
|
|
|
93
|
|
|
276
|
4
|
|
|
4
|
|
19
|
use vars qw (%TYPE); |
|
|
4
|
|
|
|
|
4
|
|
|
|
4
|
|
|
|
|
122
|
|
|
277
|
4
|
|
|
4
|
|
14
|
use base qw(Bio::Root::Root Bio::Restriction::EnzymeI); |
|
|
4
|
|
|
|
|
5
|
|
|
|
4
|
|
|
|
|
1787
|
|
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
BEGIN { |
|
280
|
4
|
|
|
4
|
|
6345
|
my %TYPE = (I => 1, II => 1, III => 1); |
|
281
|
|
|
|
|
|
|
} |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=head2 new |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
Title : new |
|
286
|
|
|
|
|
|
|
Function |
|
287
|
|
|
|
|
|
|
Function : Initializes the Enzyme object |
|
288
|
|
|
|
|
|
|
Returns : The Restriction::Enzyme object |
|
289
|
|
|
|
|
|
|
Argument : A standard definition can have several formats. For example: |
|
290
|
|
|
|
|
|
|
$re->new(-enzyme='EcoRI', -seq->'GAATTC' -cut->'1') |
|
291
|
|
|
|
|
|
|
Or, you can define the cut site in the sequence, for example |
|
292
|
|
|
|
|
|
|
$re->new(-enzyme='EcoRI', -seq->'G^AATTC'), but you must use a caret |
|
293
|
|
|
|
|
|
|
Or, a sequence can cut outside the recognition site, for example |
|
294
|
|
|
|
|
|
|
$re->new(-enzyme='AbeI', -seq->'CCTCAGC' -cut->'-5/-2') |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
Other arguments: |
|
297
|
|
|
|
|
|
|
-isoschizomers=>\@list a reference to an array of |
|
298
|
|
|
|
|
|
|
known isoschizomers |
|
299
|
|
|
|
|
|
|
-references=>$ref a reference to the enzyme |
|
300
|
|
|
|
|
|
|
-source=>$source the source (person) of the enzyme |
|
301
|
|
|
|
|
|
|
-commercial_availability=>@companies a list of companies |
|
302
|
|
|
|
|
|
|
that supply the enzyme |
|
303
|
|
|
|
|
|
|
-methylation_site=>\%sites a reference to hash that has |
|
304
|
|
|
|
|
|
|
the position as the key and the type of methylation |
|
305
|
|
|
|
|
|
|
as the value |
|
306
|
|
|
|
|
|
|
-xln_sub => sub { ($self,$cut) = @_; ...; return $xln_cut }, |
|
307
|
|
|
|
|
|
|
a coderef to a routine that translates the input cut value |
|
308
|
|
|
|
|
|
|
into Bio::Restriction::Enzyme coordinates |
|
309
|
|
|
|
|
|
|
( e.g., for withrefm format, this might be |
|
310
|
|
|
|
|
|
|
-xln_sub => sub { length( shift()->string ) + shift } ) |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
A Restriction::Enzyme object manages its recognition sequence as a |
|
313
|
|
|
|
|
|
|
Bio::PrimarySeq object. |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
The minimum requirement is for a name and a sequence. |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
This will create the restriction enzyme object, and define several |
|
318
|
|
|
|
|
|
|
things about the sequence, such as palindromic, size, etc. |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=cut |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
# do all cut/comp cut setting within the constructor |
|
323
|
|
|
|
|
|
|
# new args |
|
324
|
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub new { |
|
326
|
10163
|
|
|
10163
|
1
|
25021
|
my($class, @args) = @_; |
|
327
|
10163
|
|
|
|
|
20528
|
my $self = $class->SUPER::new(@args); |
|
328
|
|
|
|
|
|
|
|
|
329
|
10163
|
|
|
|
|
39650
|
my ($name,$enzyme,$site,$seq,$precut, $postcut,$cut,$complementary_cut, $is_prototype, $prototype, |
|
330
|
|
|
|
|
|
|
$isoschizomers, $meth, $microbe, $source, $vendors, $references, $neo, $recog, $xln_sub) = |
|
331
|
|
|
|
|
|
|
$self->_rearrange([qw( |
|
332
|
|
|
|
|
|
|
NAME |
|
333
|
|
|
|
|
|
|
ENZYME |
|
334
|
|
|
|
|
|
|
SITE |
|
335
|
|
|
|
|
|
|
SEQ |
|
336
|
|
|
|
|
|
|
PRECUT |
|
337
|
|
|
|
|
|
|
POSTCUT |
|
338
|
|
|
|
|
|
|
CUT |
|
339
|
|
|
|
|
|
|
COMPLEMENTARY_CUT |
|
340
|
|
|
|
|
|
|
IS_PROTOTYPE |
|
341
|
|
|
|
|
|
|
PROTOTYPE |
|
342
|
|
|
|
|
|
|
ISOSCHIZOMERS |
|
343
|
|
|
|
|
|
|
METHYLATION_SITES |
|
344
|
|
|
|
|
|
|
MICROBE |
|
345
|
|
|
|
|
|
|
SOURCE |
|
346
|
|
|
|
|
|
|
VENDORS |
|
347
|
|
|
|
|
|
|
REFERENCES |
|
348
|
|
|
|
|
|
|
IS_NEOSCHIZOMER |
|
349
|
|
|
|
|
|
|
RECOG |
|
350
|
|
|
|
|
|
|
XLN_SUB |
|
351
|
|
|
|
|
|
|
)], @args); |
|
352
|
|
|
|
|
|
|
|
|
353
|
10163
|
0
|
66
|
|
|
53968
|
$self->throw('At the minimum, you must define a name and '. |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
354
|
|
|
|
|
|
|
'recognition site for the restriction enzyme') |
|
355
|
|
|
|
|
|
|
unless (($name || $enzyme) && ($site || $recog || $seq)); |
|
356
|
|
|
|
|
|
|
|
|
357
|
10163
|
|
|
|
|
11706
|
$self->{_isoschizomers} = []; |
|
358
|
10163
|
|
|
|
|
9691
|
$self->{_methylation_sites} = {}; |
|
359
|
10163
|
|
|
|
|
9147
|
$self->{_vendors} = []; |
|
360
|
10163
|
|
|
|
|
9941
|
$self->{_references} = []; |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# squelch warnings |
|
363
|
10163
|
|
100
|
|
|
23299
|
$postcut ||=''; |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
# enzyme name |
|
366
|
10163
|
100
|
|
|
|
12199
|
$enzyme && $self->name($enzyme); |
|
367
|
10163
|
100
|
|
|
|
19387
|
$name && $self->name($name); |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# site |
|
370
|
|
|
|
|
|
|
# |
|
371
|
|
|
|
|
|
|
# note that the site() setter will automatically set |
|
372
|
|
|
|
|
|
|
# cut(), complementary_cut(), if the cut site is indicated |
|
373
|
|
|
|
|
|
|
# in $site with '^' /maj |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
# create the cut site if appropriate/this is a kludge due to |
|
376
|
|
|
|
|
|
|
# the base.pm format in the new B:R order... |
|
377
|
10163
|
100
|
100
|
|
|
21747
|
if ( $cut and $cut <= length $site) { |
|
378
|
2531
|
|
|
|
|
4448
|
$site = substr($site, 0, $cut).'^'.substr($site, $cut); |
|
379
|
|
|
|
|
|
|
} |
|
380
|
|
|
|
|
|
|
|
|
381
|
10163
|
50
|
|
|
|
12212
|
if ($site) { |
|
382
|
10163
|
|
|
|
|
12633
|
$self->site($site); |
|
383
|
|
|
|
|
|
|
} |
|
384
|
|
|
|
|
|
|
else { |
|
385
|
0
|
0
|
|
|
|
0
|
$seq && $self->site($seq); |
|
386
|
|
|
|
|
|
|
} |
|
387
|
|
|
|
|
|
|
|
|
388
|
10162
|
100
|
|
|
|
11324
|
if ($recog) { |
|
389
|
7495
|
|
|
|
|
9651
|
$self->recog($recog); |
|
390
|
|
|
|
|
|
|
} |
|
391
|
|
|
|
|
|
|
else { |
|
392
|
2667
|
50
|
|
|
|
3302
|
$seq && $self->recog($seq); |
|
393
|
2667
|
50
|
|
|
|
4642
|
$site && $self->recog($site); |
|
394
|
|
|
|
|
|
|
} |
|
395
|
|
|
|
|
|
|
# call revcom_site to initialize it and revcom_recog: |
|
396
|
10162
|
|
|
|
|
11843
|
$self->revcom_site(); |
|
397
|
|
|
|
|
|
|
|
|
398
|
10162
|
|
|
|
|
17245
|
$recog = $self->string; # for length calculations below |
|
399
|
|
|
|
|
|
|
|
|
400
|
10162
|
100
|
|
|
|
15481
|
if ($xln_sub) { |
|
401
|
7494
|
50
|
|
|
|
13441
|
$self->warn("Translation subroutine is not a coderef; ignoring") unless |
|
402
|
|
|
|
|
|
|
ref($xln_sub) eq 'CODE'; |
|
403
|
|
|
|
|
|
|
} |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
# cut coordinates |
|
406
|
10162
|
|
|
|
|
11272
|
my ($pc_cut, $pc_comp_cut) = ( $postcut =~ /(-?\d+)\/(-?\d+)/ ); |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
# cut definitions in constructor override any autoset in |
|
409
|
|
|
|
|
|
|
# site() |
|
410
|
|
|
|
|
|
|
# definitions in site conform to withrefm coords, translation |
|
411
|
|
|
|
|
|
|
# happens here |
|
412
|
|
|
|
|
|
|
|
|
413
|
10162
|
100
|
|
|
|
16176
|
if (defined $cut) { |
|
|
|
100
|
|
|
|
|
|
|
414
|
2664
|
50
|
|
|
|
4511
|
$self->cut( $xln_sub ? $xln_sub->($self, $cut) : $cut ); |
|
415
|
|
|
|
|
|
|
} |
|
416
|
|
|
|
|
|
|
elsif ( defined $pc_cut ) { |
|
417
|
457
|
100
|
|
|
|
1088
|
$self->cut( $xln_sub ? $xln_sub->($self, $pc_cut) : $pc_cut ); |
|
418
|
|
|
|
|
|
|
} |
|
419
|
|
|
|
|
|
|
|
|
420
|
10162
|
100
|
|
|
|
16503
|
if (defined $complementary_cut) { |
|
|
|
100
|
|
|
|
|
|
|
421
|
4
|
50
|
|
|
|
10
|
$self->complementary_cut($xln_sub ? $xln_sub->($self,$complementary_cut) : $complementary_cut); |
|
422
|
|
|
|
|
|
|
} |
|
423
|
|
|
|
|
|
|
elsif (defined $pc_comp_cut) { |
|
424
|
457
|
100
|
|
|
|
884
|
$self->complementary_cut($xln_sub ? $xln_sub->($self,$pc_comp_cut) : $pc_comp_cut); |
|
425
|
|
|
|
|
|
|
} |
|
426
|
|
|
|
|
|
|
|
|
427
|
10162
|
100
|
|
|
|
18619
|
$is_prototype && $self->is_prototype($is_prototype); |
|
428
|
10162
|
100
|
|
|
|
14366
|
$prototype && $self->prototype($prototype); |
|
429
|
10162
|
100
|
|
|
|
17423
|
$isoschizomers && $self->isoschizomers($isoschizomers); |
|
430
|
10162
|
50
|
|
|
|
12455
|
$meth && $self->methylation_sites($meth); |
|
431
|
10162
|
50
|
|
|
|
10908
|
$microbe && $self->microbe($microbe); |
|
432
|
10162
|
100
|
|
|
|
16515
|
$source && $self->source($source); |
|
433
|
10162
|
100
|
|
|
|
14740
|
$vendors && $self->vendors($vendors); |
|
434
|
10162
|
100
|
|
|
|
15532
|
$references && $self->references($references); |
|
435
|
10162
|
50
|
|
|
|
13116
|
$neo && $self->is_neoschizomer($neo); |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
# create multicut enzymes here if $precut defined |
|
438
|
10162
|
100
|
|
|
|
12731
|
if (defined $precut) { |
|
439
|
49
|
|
|
|
|
114
|
bless $self, 'Bio::Restriction::Enzyme::MultiCut'; |
|
440
|
49
|
|
|
|
|
179
|
my ($pc_cut, $pc_comp_cut) = $precut =~ /(-?\d+)\/(-?\d+)/; |
|
441
|
49
|
|
|
|
|
157
|
my $re2 = $self->clone; |
|
442
|
49
|
50
|
|
|
|
194
|
$re2->cut($xln_sub ? $xln_sub->($self, -$pc_cut) : -$pc_cut); |
|
443
|
49
|
50
|
|
|
|
162
|
$re2->complementary_cut($xln_sub ? $xln_sub->($self, -$pc_comp_cut) : -$pc_comp_cut); |
|
444
|
49
|
|
|
|
|
146
|
$self->others($re2); |
|
445
|
|
|
|
|
|
|
} |
|
446
|
|
|
|
|
|
|
|
|
447
|
10162
|
|
|
|
|
26934
|
return $self; |
|
448
|
|
|
|
|
|
|
} |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=head1 Essential methods |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=cut |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=head2 name |
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
Title : name |
|
457
|
|
|
|
|
|
|
Usage : $re->name($newval) |
|
458
|
|
|
|
|
|
|
Function : Gets/Sets the restriction enzyme name |
|
459
|
|
|
|
|
|
|
Example : $re->name('EcoRI') |
|
460
|
|
|
|
|
|
|
Returns : value of name |
|
461
|
|
|
|
|
|
|
Args : newvalue (optional) |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
This will also clean up the name. I have added this because some |
|
464
|
|
|
|
|
|
|
people get confused about restriction enzyme names. The name should |
|
465
|
|
|
|
|
|
|
be One upper case letter, and two lower case letters (because it is |
|
466
|
|
|
|
|
|
|
derived from the organism name, eg. EcoRI is from E. coli). After |
|
467
|
|
|
|
|
|
|
that it is all confused, but the numbers should be roman numbers not |
|
468
|
|
|
|
|
|
|
numbers, therefore we'll correct those. At least this will provide |
|
469
|
|
|
|
|
|
|
some standard, I hope. |
|
470
|
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
=cut |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
sub name{ |
|
474
|
70047
|
|
|
70047
|
1
|
50639
|
my ($self, $name)=@_; |
|
475
|
|
|
|
|
|
|
|
|
476
|
70047
|
100
|
|
|
|
76093
|
if ($name) { # correct and set the name |
|
477
|
10164
|
|
|
|
|
7542
|
my $old_name = $name; |
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
# remove spaces. Some people write HindIII as Hind III |
|
480
|
10164
|
|
|
|
|
14149
|
$name =~ s/\s+//g; |
|
481
|
|
|
|
|
|
|
# change TAILING ones to I's |
|
482
|
10164
|
50
|
|
|
|
18036
|
if ($name =~ m/(1+)$/) { |
|
483
|
0
|
|
|
|
|
0
|
my $i = 'I' x length($1); |
|
484
|
0
|
|
|
|
|
0
|
$name =~ s/1+$/$i/; |
|
485
|
|
|
|
|
|
|
} |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
# make the first letter upper case |
|
488
|
10164
|
|
|
|
|
25271
|
$name =~ s/^(\w)/uc($1)/e; |
|
|
10164
|
|
|
|
|
20997
|
|
|
489
|
|
|
|
|
|
|
|
|
490
|
10164
|
50
|
|
|
|
15642
|
unless ($name eq $old_name) { |
|
491
|
|
|
|
|
|
|
# we have changed the name, so send a warning |
|
492
|
0
|
|
|
|
|
0
|
$self->warn("The enzyme name $old_name was changed to $name"); |
|
493
|
|
|
|
|
|
|
} |
|
494
|
10164
|
|
|
|
|
14069
|
$self->{'_name'} = $name; |
|
495
|
|
|
|
|
|
|
} |
|
496
|
70047
|
|
|
|
|
140318
|
return $self->{'_name'}; |
|
497
|
|
|
|
|
|
|
} |
|
498
|
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=head2 site |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
Title : site |
|
503
|
|
|
|
|
|
|
Usage : $re->site(); |
|
504
|
|
|
|
|
|
|
Function : Gets/sets the recognition sequence for the enzyme. |
|
505
|
|
|
|
|
|
|
Example : $seq_string = $re->site(); |
|
506
|
|
|
|
|
|
|
Returns : String containing recognition sequence indicating |
|
507
|
|
|
|
|
|
|
: cleavage site as in 'G^AATTC'. |
|
508
|
|
|
|
|
|
|
Argument : n/a |
|
509
|
|
|
|
|
|
|
Throws : n/a |
|
510
|
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
Side effect: the sequence is always converted to upper case. |
|
513
|
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
The cut site can also be set by using methods L and |
|
515
|
|
|
|
|
|
|
L. |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
This will pad out missing sequence with N's. For example the enzyme |
|
518
|
|
|
|
|
|
|
Acc36I cuts at ACCTGC(4/8). This will be returned as ACCTGCNNNN^ |
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
Note that the common notation ACCTGC(4/8) means that the forward |
|
521
|
|
|
|
|
|
|
strand cut is four nucleotides after the END of the recognition |
|
522
|
|
|
|
|
|
|
site. The forward cut() in the coordinates used here in Acc36I |
|
523
|
|
|
|
|
|
|
ACCTGC(4/8) is at 6+4 i.e. 10. |
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
** This is the main setable method for the recognition site. |
|
526
|
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
=cut |
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
sub site { |
|
530
|
10169
|
|
|
10169
|
1
|
9446
|
my ( $self, $site ) = @_; |
|
531
|
|
|
|
|
|
|
|
|
532
|
10169
|
100
|
|
|
|
13133
|
if ($site) { |
|
533
|
|
|
|
|
|
|
|
|
534
|
10163
|
100
|
|
|
|
17119
|
$self->throw("Unrecognized characters in site: [$site]") |
|
535
|
|
|
|
|
|
|
if $site =~ /[^ATGCMRWSYKVHDBN\^]/i; |
|
536
|
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
# we may have to redefine this if there is a ^ in the sequence |
|
538
|
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
# first, check and see if we have a cut site in the sequence |
|
540
|
|
|
|
|
|
|
# if so, find the position, and set the target sequence and cut site |
|
541
|
|
|
|
|
|
|
|
|
542
|
10162
|
|
|
|
|
10630
|
$self->{'_site'} = $site; |
|
543
|
|
|
|
|
|
|
|
|
544
|
10162
|
|
|
|
|
18936
|
my ( $first, $second ) = $site =~ /(.*)\^(.*)/; |
|
545
|
10162
|
100
|
|
|
|
18436
|
$site = "$1$2" if defined $first; |
|
546
|
10162
|
|
|
|
|
8224
|
$self->{'_site'} = $site; |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
# now set the recognition site as a new Bio::PrimarySeq object |
|
549
|
|
|
|
|
|
|
# we need it before calling cut() and complementary_cut() |
|
550
|
10162
|
|
|
|
|
13289
|
$self->{_seq} = Bio::PrimarySeq->new( |
|
551
|
|
|
|
|
|
|
-id => $self->name, |
|
552
|
|
|
|
|
|
|
-seq => $site, |
|
553
|
|
|
|
|
|
|
-verbose => $self->verbose, |
|
554
|
|
|
|
|
|
|
-alphabet => 'dna' |
|
555
|
|
|
|
|
|
|
); |
|
556
|
|
|
|
|
|
|
|
|
557
|
10162
|
100
|
|
|
|
18755
|
if ( defined $first ) { |
|
558
|
4945
|
|
|
|
|
7204
|
$self->cut( length $first ); |
|
559
|
4945
|
|
|
|
|
6335
|
$self->complementary_cut( length $second ); |
|
560
|
4945
|
|
|
|
|
6221
|
$self->revcom_site(); |
|
561
|
|
|
|
|
|
|
} |
|
562
|
|
|
|
|
|
|
} |
|
563
|
10168
|
|
|
|
|
12039
|
return $self->{'_site'}; |
|
564
|
|
|
|
|
|
|
} |
|
565
|
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
=head2 revcom_site |
|
567
|
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
Title : revcom_site |
|
569
|
|
|
|
|
|
|
Usage : $re->revcom_site(); |
|
570
|
|
|
|
|
|
|
Function : Gets/sets the complementary recognition sequence for the enzyme. |
|
571
|
|
|
|
|
|
|
Example : $seq_string = $re->revcom_site(); |
|
572
|
|
|
|
|
|
|
Returns : String containing recognition sequence indicating |
|
573
|
|
|
|
|
|
|
: cleavage site as in 'G^AATTC'. |
|
574
|
|
|
|
|
|
|
Argument : none (sets on first call) |
|
575
|
|
|
|
|
|
|
Throws : n/a |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
This is the same as site, except it returns the revcom site. For |
|
578
|
|
|
|
|
|
|
palindromic enzymes these two are identical. For non-palindromic |
|
579
|
|
|
|
|
|
|
enzymes they are not! |
|
580
|
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
On set, this also handles setting the revcom_recog attribute. |
|
582
|
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
See also L above. |
|
584
|
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
=cut |
|
586
|
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
sub revcom_site { |
|
588
|
15112
|
|
|
15112
|
1
|
10991
|
my $self = shift; |
|
589
|
|
|
|
|
|
|
# getter |
|
590
|
15112
|
100
|
|
|
|
20823
|
return $self->{'_revcom_site'} unless !$self->{'_revcom_site'}; |
|
591
|
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
# setter |
|
593
|
10162
|
|
|
|
|
8153
|
my $site = $self->{'_site'}; |
|
594
|
10162
|
100
|
|
|
|
12697
|
if ($self->is_palindromic) { |
|
595
|
9266
|
|
|
|
|
9632
|
$self->{'_revcom_site'}=$self->{'_site'}; |
|
596
|
9266
|
|
|
|
|
11406
|
$self->revcom_recog( $self->string ); |
|
597
|
9266
|
|
|
|
|
10580
|
return $self->{'_revcom_site'}; |
|
598
|
|
|
|
|
|
|
} |
|
599
|
|
|
|
|
|
|
|
|
600
|
896
|
50
|
|
|
|
1732
|
$self->throw("Unrecognized characters in revcom site: [$site]") |
|
601
|
|
|
|
|
|
|
if $site =~ /[^ATGCMRWSYKVHDBN\^]/i; |
|
602
|
|
|
|
|
|
|
|
|
603
|
896
|
100
|
|
|
|
1335
|
if ($site =~ /\^/) { |
|
604
|
|
|
|
|
|
|
# first, check and see if we have a cut site indicated in the sequence |
|
605
|
|
|
|
|
|
|
# if so, find the position, and set the target sequence and cut site |
|
606
|
16
|
|
|
|
|
31
|
$site = $self->revcom; |
|
607
|
16
|
|
|
|
|
36
|
$self->revcom_recog( $site ); |
|
608
|
16
|
|
|
|
|
41
|
my $c = length($site)-$self->cut; |
|
609
|
16
|
|
|
|
|
53
|
$site = substr($site, 0, $c).'^'.substr($site,$c); |
|
610
|
16
|
|
|
|
|
28
|
$self->{'_revcom_site'} = $site; |
|
611
|
|
|
|
|
|
|
} |
|
612
|
|
|
|
|
|
|
else { |
|
613
|
880
|
|
|
|
|
1046
|
my $revcom=$self->revcom; |
|
614
|
880
|
|
|
|
|
1641
|
$self->revcom_recog( $revcom ); |
|
615
|
|
|
|
|
|
|
# my $cc=$self->complementary_cut; |
|
616
|
|
|
|
|
|
|
# my $hat=length($revcom)-$cc+1; # we need it on the other strand! |
|
617
|
|
|
|
|
|
|
# if ($cc > length($revcom)) { |
|
618
|
|
|
|
|
|
|
# my $pad= "N" x ($cc-length($revcom)); |
|
619
|
|
|
|
|
|
|
# $revcom = $pad. $revcom; |
|
620
|
|
|
|
|
|
|
# $hat=length($revcom)-$cc+1; |
|
621
|
|
|
|
|
|
|
# } |
|
622
|
|
|
|
|
|
|
# elsif ($cc < 0) { |
|
623
|
|
|
|
|
|
|
# my $pad = "N" x -$cc; |
|
624
|
|
|
|
|
|
|
# $revcom .= $pad; |
|
625
|
|
|
|
|
|
|
# $hat=length($revcom); |
|
626
|
|
|
|
|
|
|
# } |
|
627
|
|
|
|
|
|
|
# $revcom =~ s/(.{$hat})/$1\^/; |
|
628
|
880
|
|
|
|
|
967
|
$self->{'_revcom_site'}=$revcom; |
|
629
|
|
|
|
|
|
|
} |
|
630
|
896
|
|
|
|
|
833
|
return $self->{'_revcom_site'}; |
|
631
|
|
|
|
|
|
|
} |
|
632
|
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
=head2 cut |
|
634
|
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
Title : cut |
|
636
|
|
|
|
|
|
|
Usage : $num = $re->cut(1); |
|
637
|
|
|
|
|
|
|
Function : Sets/gets an integer indicating the position of cleavage |
|
638
|
|
|
|
|
|
|
relative to the 5' end of the recognition sequence in the |
|
639
|
|
|
|
|
|
|
forward strand. |
|
640
|
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
For type II enzymes, sets the symmetrically positioned |
|
642
|
|
|
|
|
|
|
reverse strand cut site by calling complementary_cut(). |
|
643
|
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
Returns : Integer, 0 if not set |
|
645
|
|
|
|
|
|
|
Argument : an integer for the forward strand cut site (optional) |
|
646
|
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
Note that the common notation ACCTGC(4/8) means that the forward |
|
648
|
|
|
|
|
|
|
strand cut is four nucleotides after the END of the recognition |
|
649
|
|
|
|
|
|
|
site. The forwad cut in the coordinates used here in Acc36I |
|
650
|
|
|
|
|
|
|
ACCTGC(4/8) is at 6+4 i.e. 10. |
|
651
|
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
Note that REBASE uses notation where cuts within symmetic sites are |
|
653
|
|
|
|
|
|
|
marked by '^' within the forward sequence but if the site is |
|
654
|
|
|
|
|
|
|
asymmetric the parenthesis syntax is used where numbering ALWAYS |
|
655
|
|
|
|
|
|
|
starts from last nucleotide in the forward strand. That's why AciI has |
|
656
|
|
|
|
|
|
|
a site usually written as CCGC(-3/-1) actualy cuts in |
|
657
|
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
C^C G C |
|
659
|
|
|
|
|
|
|
G G C^G |
|
660
|
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
In our notation, these locations are 1 and 3. |
|
662
|
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
The cuts locations in the notation used are relative to the first |
|
665
|
|
|
|
|
|
|
(non-N) nucleotide of the reported forward strand of the recognition |
|
666
|
|
|
|
|
|
|
sequence. The following diagram numbers the phosphodiester bonds |
|
667
|
|
|
|
|
|
|
(marked by + ) which can be cut by the restriction enzymes: |
|
668
|
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
1 2 3 4 5 6 7 8 ... |
|
670
|
|
|
|
|
|
|
N + N + N + N + N + G + A + C + T + G + G + N + N + N |
|
671
|
|
|
|
|
|
|
... -5 -4 -3 -2 -1 |
|
672
|
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
=cut |
|
675
|
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
sub cut { |
|
677
|
24294
|
|
|
24294
|
1
|
18639
|
my ($self, $value) = @_; |
|
678
|
24294
|
100
|
|
|
|
29480
|
if (defined $value) { |
|
679
|
8117
|
50
|
|
|
|
18315
|
$self->throw("The cut position needs to be an integer [$value]") |
|
680
|
|
|
|
|
|
|
unless $value =~ /[-+]?\d+/; |
|
681
|
8117
|
|
|
|
|
8570
|
$self->{'_cut'} = $value; |
|
682
|
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
# add the caret to the site attribute only if internal /maj |
|
684
|
8117
|
100
|
100
|
|
|
23965
|
if ( ($self->{_site} !~ /\^/) && ($value <= length ($self->{_site}))) { |
|
685
|
|
|
|
|
|
|
$self->{_site} = |
|
686
|
5174
|
|
|
|
|
10030
|
substr($self->{_site}, 0, $value). '^'. substr($self->{_site}, $value); |
|
687
|
|
|
|
|
|
|
} |
|
688
|
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
# auto-set comp cut only if cut site is inside the recog site./maj |
|
690
|
|
|
|
|
|
|
$self->complementary_cut(length ($self->seq->seq) - $value ) |
|
691
|
8117
|
100
|
100
|
|
|
19689
|
if (($self->{_site} =~ /\^/) && ($self->type eq 'II')); |
|
692
|
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
} |
|
694
|
|
|
|
|
|
|
# return undef if not defined yet, not 0 /maj |
|
695
|
24294
|
|
|
|
|
29077
|
return $self->{'_cut'}; |
|
696
|
|
|
|
|
|
|
} |
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
=head2 cuts_after |
|
699
|
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
Title : cuts_after |
|
701
|
|
|
|
|
|
|
Usage : Alias for cut() |
|
702
|
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
=cut |
|
704
|
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
sub cuts_after { |
|
706
|
0
|
|
|
0
|
1
|
0
|
shift->cut(@_); |
|
707
|
|
|
|
|
|
|
} |
|
708
|
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
=head2 complementary_cut |
|
711
|
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
Title : complementary_cut |
|
713
|
|
|
|
|
|
|
Usage : $num = $re->complementary_cut('1'); |
|
714
|
|
|
|
|
|
|
Function : Sets/Gets an integer indicating the position of cleavage |
|
715
|
|
|
|
|
|
|
: on the reverse strand of the restriction site. |
|
716
|
|
|
|
|
|
|
Returns : Integer |
|
717
|
|
|
|
|
|
|
Argument : An integer (optional) |
|
718
|
|
|
|
|
|
|
Throws : Exception if argument is non-numeric. |
|
719
|
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
This method determines the cut on the reverse strand of the sequence. |
|
721
|
|
|
|
|
|
|
For most enzymes this will be within the sequence, and will be set |
|
722
|
|
|
|
|
|
|
automatically based on the forward strand cut, but it need not be. |
|
723
|
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
B that the returned location indicates the location AFTER the |
|
725
|
|
|
|
|
|
|
first non-N site nucleotide in the FORWARD strand. |
|
726
|
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
=cut |
|
728
|
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
sub complementary_cut { |
|
730
|
16431
|
|
|
16431
|
1
|
11815
|
my ($self, $num)=@_; |
|
731
|
|
|
|
|
|
|
|
|
732
|
16431
|
100
|
|
|
|
19969
|
if (defined $num) { |
|
733
|
13152
|
50
|
|
|
|
27538
|
$self->throw("The cut position needs to be an integer [$num]") |
|
734
|
|
|
|
|
|
|
unless $num =~ /[-+]?\d+/; |
|
735
|
13152
|
|
|
|
|
12929
|
$self->{'_rc_cut'} = $num; |
|
736
|
|
|
|
|
|
|
} |
|
737
|
|
|
|
|
|
|
# return undef, not 0, if not yet defined /maj |
|
738
|
16431
|
|
|
|
|
14801
|
return $self->{'_rc_cut'}; |
|
739
|
|
|
|
|
|
|
} |
|
740
|
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
=head1 Read only (usually) recognition site descriptive methods |
|
743
|
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
=cut |
|
745
|
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
=head2 type |
|
747
|
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
Title : type |
|
749
|
|
|
|
|
|
|
Usage : $re->type(); |
|
750
|
|
|
|
|
|
|
Function : Get/set the restriction system type |
|
751
|
|
|
|
|
|
|
Returns : |
|
752
|
|
|
|
|
|
|
Argument : optional type: ('I'|II|III) |
|
753
|
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
Restriction enzymes have been catezorized into three types. Some |
|
755
|
|
|
|
|
|
|
REBASE formats give the type, but the following rules can be used to |
|
756
|
|
|
|
|
|
|
classify the known enzymes: |
|
757
|
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
=over 4 |
|
759
|
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
=item 1 |
|
761
|
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
Bipartite site (with 6-8 Ns in the middle and the cut site |
|
763
|
|
|
|
|
|
|
is E 50 nt away) =E type I |
|
764
|
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
=item 2 |
|
766
|
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
Site length E 3 =E type I |
|
768
|
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
=item 3 |
|
770
|
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
5-6 asymmetric site and cuts E20 nt away =E type III |
|
772
|
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
=item 4 |
|
774
|
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
All other =E type II |
|
776
|
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
=back |
|
778
|
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
There are some enzymes in REBASE which have bipartite recognition site |
|
780
|
|
|
|
|
|
|
and cat far from the site but are still classified as type I. I've no |
|
781
|
|
|
|
|
|
|
idea if this is really so. |
|
782
|
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
=cut |
|
784
|
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
sub type { |
|
786
|
7707
|
|
|
7707
|
1
|
6246
|
my ($self, $value) = @_; |
|
787
|
|
|
|
|
|
|
|
|
788
|
7707
|
50
|
|
|
|
9397
|
if ($value) { |
|
789
|
|
|
|
|
|
|
$self->throw("Not a valid value [$value], needs to one of : ". |
|
790
|
|
|
|
|
|
|
join (', ', sort keys %TYPE) ) |
|
791
|
0
|
0
|
|
|
|
0
|
unless $TYPE{$value}; |
|
792
|
0
|
|
|
|
|
0
|
return $self->{'_type'} = $value; |
|
793
|
|
|
|
|
|
|
} |
|
794
|
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
# pre set |
|
796
|
|
|
|
|
|
|
#return $self->{'_type'} if $self->{'_type'}; |
|
797
|
|
|
|
|
|
|
# bipartite |
|
798
|
|
|
|
|
|
|
return $self->{'_type'} = 'I' |
|
799
|
7707
|
50
|
66
|
|
|
11122
|
if $self->{'_seq'}->seq =~ /N*[^N]+N{6,8}[^N]/ and abs($self->cut) > 50 ; |
|
800
|
|
|
|
|
|
|
# 3 nt site |
|
801
|
|
|
|
|
|
|
return $self->{'_type'} = 'I' |
|
802
|
7707
|
100
|
|
|
|
12355
|
if $self->{'_seq'}->length == 3; |
|
803
|
|
|
|
|
|
|
# asymmetric and cuts > 20 nt |
|
804
|
7697
|
50
|
100
|
|
|
8906
|
return $self->{'_type'} = 'III' |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
805
|
|
|
|
|
|
|
if (length $self->string == 5 or length $self->string == 6 ) and |
|
806
|
|
|
|
|
|
|
not $self->palindromic and abs($self->cut) > 20; |
|
807
|
7697
|
|
|
|
|
22371
|
return $self->{'_type'} = 'II'; |
|
808
|
|
|
|
|
|
|
} |
|
809
|
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
=head2 seq |
|
811
|
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
Title : seq |
|
813
|
|
|
|
|
|
|
Usage : $re->seq(); |
|
814
|
|
|
|
|
|
|
Function : Get the Bio::PrimarySeq.pm object representing |
|
815
|
|
|
|
|
|
|
: the recognition sequence |
|
816
|
|
|
|
|
|
|
Returns : A Bio::PrimarySeq object representing the |
|
817
|
|
|
|
|
|
|
enzyme recognition site |
|
818
|
|
|
|
|
|
|
Argument : n/a |
|
819
|
|
|
|
|
|
|
Throws : n/a |
|
820
|
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
=cut |
|
823
|
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
sub seq { |
|
825
|
7854
|
|
|
7854
|
1
|
12136
|
shift->{'_seq'}; |
|
826
|
|
|
|
|
|
|
} |
|
827
|
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
=head2 string |
|
829
|
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
Title : string |
|
831
|
|
|
|
|
|
|
Usage : $re->string(); |
|
832
|
|
|
|
|
|
|
Function : Get a string representing the recognition sequence. |
|
833
|
|
|
|
|
|
|
Returns : String. Does NOT contain a '^' representing the cut location |
|
834
|
|
|
|
|
|
|
as returned by the site() method. |
|
835
|
|
|
|
|
|
|
Argument : n/a |
|
836
|
|
|
|
|
|
|
Throws : n/a |
|
837
|
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
=cut |
|
839
|
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
sub string { |
|
841
|
52617
|
|
|
52617
|
1
|
73954
|
shift->{'_seq'}->seq; |
|
842
|
|
|
|
|
|
|
} |
|
843
|
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
=head2 recog |
|
845
|
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
Title : recog |
|
847
|
|
|
|
|
|
|
Usage : $enz->recog($recognition_sequence) |
|
848
|
|
|
|
|
|
|
Function: Gets/sets the pure recognition site. Sets as |
|
849
|
|
|
|
|
|
|
regexp if appropriate. |
|
850
|
|
|
|
|
|
|
As for string(), the cut indicating carets (^) |
|
851
|
|
|
|
|
|
|
are expunged. |
|
852
|
|
|
|
|
|
|
Example : |
|
853
|
|
|
|
|
|
|
Returns : value of recog (a scalar) |
|
854
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
|
855
|
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
=cut |
|
857
|
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
sub recog{ |
|
859
|
25643
|
|
|
25643
|
1
|
18561
|
my $self = shift; |
|
860
|
25643
|
|
|
|
|
16527
|
my $recog = shift; |
|
861
|
25643
|
100
|
|
|
|
47970
|
return $self->{'recog'} unless $recog; |
|
862
|
10162
|
|
|
|
|
14206
|
$recog =~ s/\^//g; |
|
863
|
10162
|
100
|
|
|
|
20744
|
$recog = _expand($recog) if $recog =~ /[^ATGC]/; |
|
864
|
10162
|
|
|
|
|
12480
|
return $self->{'recog'} = $recog; |
|
865
|
|
|
|
|
|
|
} |
|
866
|
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
=head2 revcom_recog |
|
868
|
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
Title : revcom_recog |
|
870
|
|
|
|
|
|
|
Usage : $enz->revcom_recog($recognition_sequence) |
|
871
|
|
|
|
|
|
|
Function: Gets/sets the pure reverse-complemented recognition site. |
|
872
|
|
|
|
|
|
|
Sets as regexp if appropriate. |
|
873
|
|
|
|
|
|
|
As for string(), the cut indicating carets (^) are expunged. |
|
874
|
|
|
|
|
|
|
Example : |
|
875
|
|
|
|
|
|
|
Returns : value of recog (a scalar) |
|
876
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
|
877
|
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
=cut |
|
879
|
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
sub revcom_recog{ |
|
881
|
12969
|
|
|
12969
|
1
|
10919
|
my $self = shift; |
|
882
|
12969
|
|
|
|
|
9229
|
my $recog = shift; |
|
883
|
12969
|
100
|
|
|
|
17131
|
unless ($recog) { |
|
884
|
2807
|
50
|
|
|
|
3643
|
$self->throw( "revcom recognition site not set; call \$enz->revcom_site to initialize" ) unless $self->{'revcom_recog'}; |
|
885
|
2807
|
|
|
|
|
3539
|
return $self->{'revcom_recog'}; |
|
886
|
|
|
|
|
|
|
} |
|
887
|
10162
|
|
|
|
|
10705
|
$recog =~ s/\^//g; |
|
888
|
10162
|
100
|
|
|
|
22334
|
$recog = _expand($recog) if $recog =~ /[^ATGC]/; |
|
889
|
10162
|
|
|
|
|
12511
|
return $self->{'revcom_recog'} = $recog; |
|
890
|
|
|
|
|
|
|
} |
|
891
|
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
=head2 revcom |
|
893
|
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
Title : revcom |
|
895
|
|
|
|
|
|
|
Usage : $re->revcom(); |
|
896
|
|
|
|
|
|
|
Function : Get a string representing the reverse complement of |
|
897
|
|
|
|
|
|
|
: the recognition sequence. |
|
898
|
|
|
|
|
|
|
Returns : String |
|
899
|
|
|
|
|
|
|
Argument : n/a |
|
900
|
|
|
|
|
|
|
Throws : n/a |
|
901
|
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
=cut |
|
903
|
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
sub revcom { |
|
905
|
11059
|
|
|
11059
|
1
|
20741
|
shift->{'_seq'}->revcom->seq(); |
|
906
|
|
|
|
|
|
|
} |
|
907
|
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
=head2 recognition_length |
|
909
|
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
Title : recognition_length |
|
911
|
|
|
|
|
|
|
Usage : $re->recognition_length(); |
|
912
|
|
|
|
|
|
|
Function : Get the length of the RECOGNITION sequence. |
|
913
|
|
|
|
|
|
|
This is the total recognition sequence, |
|
914
|
|
|
|
|
|
|
inluding the ambiguous codes. |
|
915
|
|
|
|
|
|
|
Returns : An integer |
|
916
|
|
|
|
|
|
|
Argument : Nothing |
|
917
|
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
See also: L |
|
919
|
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
=cut |
|
921
|
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
sub recognition_length { |
|
923
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
|
924
|
1
|
|
|
|
|
2
|
return length($self->string); |
|
925
|
|
|
|
|
|
|
} |
|
926
|
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
=head2 cutter |
|
928
|
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
Title : cutter |
|
930
|
|
|
|
|
|
|
Usage : $re->cutter |
|
931
|
|
|
|
|
|
|
Function : Returns the "cutter" value of the recognition site. |
|
932
|
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
This is a value relative to site length and lack of |
|
934
|
|
|
|
|
|
|
ambiguity codes. Hence: 'RCATGY' is a five (5) cutter site |
|
935
|
|
|
|
|
|
|
and 'CCTNAGG' a six cutter |
|
936
|
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
This measure correlates to the frequency of the enzyme |
|
938
|
|
|
|
|
|
|
cuts much better than plain recognition site length. |
|
939
|
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
Example : $re->cutter |
|
941
|
|
|
|
|
|
|
Returns : integer or float number |
|
942
|
|
|
|
|
|
|
Args : none |
|
943
|
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
Why is this better than just stripping the ambiguos codes? Think about |
|
945
|
|
|
|
|
|
|
it like this: You have a random sequence; all nucleotides are equally |
|
946
|
|
|
|
|
|
|
probable. You have a four nucleotide re site. The probability of that |
|
947
|
|
|
|
|
|
|
site finding a match is one out of 4^4 or 256, meaning that on average |
|
948
|
|
|
|
|
|
|
a four cutter finds a match every 256 nucleotides. For a six cutter, |
|
949
|
|
|
|
|
|
|
the average fragment length is 4^6 or 4096. In the case of ambiguity |
|
950
|
|
|
|
|
|
|
codes the chances are finding the match are better: an R (A|T) has 1/2 |
|
951
|
|
|
|
|
|
|
chance of finding a match in a random sequence. Therefore, for RGCGCY |
|
952
|
|
|
|
|
|
|
the probability is one out of (2*4*4*4*4*2) which exactly the same as |
|
953
|
|
|
|
|
|
|
for a five cutter! Cutter, although it can have non-integer values |
|
954
|
|
|
|
|
|
|
turns out to be a useful and simple measure. |
|
955
|
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
From bug 2178: VHDB are ambiguity symbols that match three different |
|
957
|
|
|
|
|
|
|
nucleotides, so they contribute less to the effective recognition sequence |
|
958
|
|
|
|
|
|
|
length than e.g. Y which matches only two nucleotides. A symbol which matches n |
|
959
|
|
|
|
|
|
|
of the 4 nucleotides has an effective length of 1 - log(n) / log(4). |
|
960
|
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
=cut |
|
962
|
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
sub cutter { |
|
964
|
3532
|
|
|
3532
|
1
|
2200
|
my ($self)=@_; |
|
965
|
3532
|
|
|
|
|
2794
|
$_ = uc $self->string; |
|
966
|
|
|
|
|
|
|
|
|
967
|
3532
|
|
|
|
|
2864
|
my $cutter = tr/[ATGC]//d; |
|
968
|
3532
|
|
|
|
|
2273
|
my $count = tr/[MRWSYK]//d; |
|
969
|
3532
|
|
|
|
|
2513
|
$cutter += $count/2; |
|
970
|
3532
|
|
|
|
|
2306
|
$count = tr/[VHDB]//d; |
|
971
|
3532
|
|
|
|
|
2434
|
$cutter += $count * (1 - log(3) / log(4)); |
|
972
|
3532
|
|
|
|
|
6773
|
return $cutter; |
|
973
|
|
|
|
|
|
|
} |
|
974
|
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
=head2 is_palindromic |
|
977
|
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
Title : is_palindromic |
|
979
|
|
|
|
|
|
|
Alias : palindromic |
|
980
|
|
|
|
|
|
|
Usage : $re->is_palindromic(); |
|
981
|
|
|
|
|
|
|
Function : Determines if the recognition sequence is palindromic |
|
982
|
|
|
|
|
|
|
: for the current restriction enzyme. |
|
983
|
|
|
|
|
|
|
Returns : Boolean |
|
984
|
|
|
|
|
|
|
Argument : n/a |
|
985
|
|
|
|
|
|
|
Throws : n/a |
|
986
|
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
A palindromic site (EcoRI): |
|
988
|
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
5-GAATTC-3 |
|
990
|
|
|
|
|
|
|
3-CTTAAG-5 |
|
991
|
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
=cut |
|
993
|
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
sub is_palindromic { |
|
995
|
36823
|
|
|
36823
|
1
|
24803
|
my $self = shift; |
|
996
|
36823
|
100
|
|
|
|
79256
|
return $self->{_palindromic} if defined $self->{_palindromic}; |
|
997
|
10162
|
100
|
|
|
|
11391
|
if ($self->string eq $self->revcom) { |
|
998
|
9266
|
|
|
|
|
23356
|
return $self->{_palindromic}=1; |
|
999
|
|
|
|
|
|
|
} |
|
1000
|
896
|
|
|
|
|
2119
|
return $self->{_palindromic} = 0; |
|
1001
|
|
|
|
|
|
|
} |
|
1002
|
|
|
|
|
|
|
|
|
1003
|
5824
|
|
|
5824
|
0
|
8061
|
sub palindromic { shift->is_palindromic(@_) } |
|
1004
|
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
=head2 is_symmetric |
|
1006
|
|
|
|
|
|
|
|
|
1007
|
|
|
|
|
|
|
Title : is_symmetric |
|
1008
|
|
|
|
|
|
|
Alias : symmetric |
|
1009
|
|
|
|
|
|
|
Usage : $re->is_symmetric(); |
|
1010
|
|
|
|
|
|
|
Function : Determines if the enzyme is a symmetric cutter |
|
1011
|
|
|
|
|
|
|
Returns : Boolean |
|
1012
|
|
|
|
|
|
|
Argument : none |
|
1013
|
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
A symmetric but non-palindromic site (HindI): |
|
1015
|
|
|
|
|
|
|
v |
|
1016
|
|
|
|
|
|
|
5-C A C-3 |
|
1017
|
|
|
|
|
|
|
3-G T G-5 |
|
1018
|
|
|
|
|
|
|
^ |
|
1019
|
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
=cut |
|
1021
|
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
sub is_symmetric { |
|
1023
|
4
|
|
|
4
|
|
20
|
no warnings qw( uninitialized ); |
|
|
4
|
|
|
|
|
4
|
|
|
|
4
|
|
|
|
|
5466
|
|
|
1024
|
9641
|
|
|
9641
|
1
|
6356
|
my $self = shift; |
|
1025
|
|
|
|
|
|
|
|
|
1026
|
9641
|
100
|
|
|
|
18115
|
return $self->{_symmetric} if defined $self->{_symmetric}; |
|
1027
|
5355
|
100
|
|
|
|
4876
|
if ($self->is_palindromic) { |
|
1028
|
4892
|
|
|
|
|
11997
|
return $self->{_symmetric} = 1; |
|
1029
|
|
|
|
|
|
|
} |
|
1030
|
463
|
100
|
|
|
|
493
|
if ($self->cut == length($self->string) - $self->complementary_cut) { |
|
1031
|
27
|
|
|
|
|
82
|
return $self->{_symmetric}=1; |
|
1032
|
|
|
|
|
|
|
} |
|
1033
|
436
|
|
|
|
|
953
|
return $self->{_symmetric} = 0; |
|
1034
|
|
|
|
|
|
|
} |
|
1035
|
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
|
|
1037
|
0
|
|
|
0
|
0
|
0
|
sub symmetric { shift->is_symmetric(@_) } |
|
1038
|
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
=head2 overhang |
|
1040
|
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
Title : overhang |
|
1042
|
|
|
|
|
|
|
Usage : $re->overhang(); |
|
1043
|
|
|
|
|
|
|
Function : Determines the overhang of the restriction enzyme |
|
1044
|
|
|
|
|
|
|
Returns : "5'", "3'", "blunt" of undef |
|
1045
|
|
|
|
|
|
|
Argument : n/a |
|
1046
|
|
|
|
|
|
|
Throws : n/a |
|
1047
|
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
A blunt site in SmaI returns C |
|
1049
|
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
5' C C C^G G G 3' |
|
1051
|
|
|
|
|
|
|
3' G G G^C C C 5' |
|
1052
|
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
A 5' overhang in EcoRI returns C<5'> |
|
1054
|
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
5' G^A A T T C 3' |
|
1056
|
|
|
|
|
|
|
3' C T T A A^G 5' |
|
1057
|
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
A 3' overhang in KpnI returns C<3'> |
|
1059
|
|
|
|
|
|
|
|
|
1060
|
|
|
|
|
|
|
5' G G T A C^C 3' |
|
1061
|
|
|
|
|
|
|
3' C^C A T G G 5' |
|
1062
|
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
=cut |
|
1064
|
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
sub overhang { |
|
1066
|
545
|
|
|
545
|
1
|
331
|
my $self = shift; |
|
1067
|
545
|
100
|
66
|
|
|
1241
|
unless ($self->{'_cut'} && $self->{'_rc_cut'}) { |
|
1068
|
32
|
|
|
|
|
41
|
return "unknown"; |
|
1069
|
|
|
|
|
|
|
} |
|
1070
|
513
|
100
|
|
|
|
660
|
if ($self->{_cut} < $self->{_rc_cut}) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
1071
|
287
|
|
|
|
|
434
|
$self->{_overhang}="5'"; |
|
1072
|
|
|
|
|
|
|
} elsif ($self->{_cut} == $self->{_rc_cut}) { |
|
1073
|
116
|
|
|
|
|
188
|
$self->{_overhang}="blunt"; |
|
1074
|
|
|
|
|
|
|
} elsif ($self->{_cut} > $self->{_rc_cut}) { |
|
1075
|
110
|
|
|
|
|
165
|
$self->{_overhang}="3'"; |
|
1076
|
|
|
|
|
|
|
} else { |
|
1077
|
0
|
|
|
|
|
0
|
$self->{_overhang}="unknown"; |
|
1078
|
|
|
|
|
|
|
} |
|
1079
|
|
|
|
|
|
|
return $self->{_overhang} |
|
1080
|
513
|
|
|
|
|
752
|
} |
|
1081
|
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
=head2 overhang_seq |
|
1083
|
|
|
|
|
|
|
|
|
1084
|
|
|
|
|
|
|
Title : overhang_seq |
|
1085
|
|
|
|
|
|
|
Usage : $re->overhang_seq(); |
|
1086
|
|
|
|
|
|
|
Function : Determines the overhang sequence of the restriction enzyme |
|
1087
|
|
|
|
|
|
|
Returns : a Bio::LocatableSeq |
|
1088
|
|
|
|
|
|
|
Argument : n/a |
|
1089
|
|
|
|
|
|
|
Throws : n/a |
|
1090
|
|
|
|
|
|
|
|
|
1091
|
|
|
|
|
|
|
I do not think it is necessary to create a seq object of these. (Heikki) |
|
1092
|
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
Note: returns empty string for blunt sequences and undef for ones that |
|
1094
|
|
|
|
|
|
|
we don't know. Compare these: |
|
1095
|
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
A blunt site in SmaI returns empty string |
|
1097
|
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
5' C C C^G G G 3' |
|
1099
|
|
|
|
|
|
|
3' G G G^C C C 5' |
|
1100
|
|
|
|
|
|
|
|
|
1101
|
|
|
|
|
|
|
A 5' overhang in EcoRI returns C |
|
1102
|
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
5' G^A A T T C 3' |
|
1104
|
|
|
|
|
|
|
3' C T T A A^G 5' |
|
1105
|
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
A 3' overhang in KpnI returns C |
|
1107
|
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
5' G G T A C^C 3' |
|
1109
|
|
|
|
|
|
|
3' C^C A T G G 5' |
|
1110
|
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
Note that you need to use method L to decide |
|
1112
|
|
|
|
|
|
|
whether it is a 5' or 3' overhang!!! |
|
1113
|
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
Note: The overhang stuff does not work if the site is asymmetric! Rethink! |
|
1115
|
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
=cut |
|
1117
|
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
sub overhang_seq { |
|
1119
|
5
|
|
|
5
|
1
|
6
|
my $self = shift; |
|
1120
|
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
# my $overhang->Bio::PrimarySeq(-id=>$self->name . '-overhang', |
|
1122
|
|
|
|
|
|
|
# -verbose=>$self->verbose, |
|
1123
|
|
|
|
|
|
|
# -alphabet=>'dna'); |
|
1124
|
|
|
|
|
|
|
|
|
1125
|
5
|
50
|
|
|
|
6
|
return '' if $self->overhang eq 'blunt' ; |
|
1126
|
|
|
|
|
|
|
|
|
1127
|
5
|
50
|
33
|
|
|
16
|
unless ($self->{_cut} && $self->{_rc_cut}) { |
|
1128
|
|
|
|
|
|
|
# lets just check that we really can't figure it out |
|
1129
|
0
|
|
|
|
|
0
|
$self->cut; |
|
1130
|
0
|
|
|
|
|
0
|
$self->complementary_cut; |
|
1131
|
0
|
0
|
0
|
|
|
0
|
unless ($self->{_cut} && $self->{_rc_cut}) { |
|
1132
|
0
|
|
|
|
|
0
|
return; |
|
1133
|
|
|
|
|
|
|
} |
|
1134
|
|
|
|
|
|
|
} |
|
1135
|
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
# this is throwing an error for sequences outside the restriction |
|
1137
|
|
|
|
|
|
|
# site (eg ^NNNNGATCNNNN^) |
|
1138
|
|
|
|
|
|
|
# So if this is the case we need to fake these guys |
|
1139
|
5
|
50
|
33
|
|
|
20
|
if (($self->{_cut}<0) || |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
($self->{_rc_cut}<0) || |
|
1141
|
|
|
|
|
|
|
($self->{_cut}>$self->seq->length) || |
|
1142
|
|
|
|
|
|
|
($self->{_rc_cut}>$self->seq->length)) { |
|
1143
|
0
|
|
|
|
|
0
|
my $tempseq=$self->site; |
|
1144
|
0
|
|
|
|
|
0
|
my ($five, $three)=split /\^/, $tempseq; |
|
1145
|
0
|
0
|
|
|
|
0
|
if ($self->{_cut} > $self->{_rc_cut}) { |
|
|
|
0
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
return substr($five, $self->{_rc_cut}) |
|
1147
|
0
|
|
|
|
|
0
|
} elsif ($self->{_cut} < $self->{_rc_cut}) { |
|
1148
|
|
|
|
|
|
|
return substr($three, 0, $self->{_rc_cut}) |
|
1149
|
0
|
|
|
|
|
0
|
} else { |
|
1150
|
0
|
|
|
|
|
0
|
return ''; |
|
1151
|
|
|
|
|
|
|
} |
|
1152
|
|
|
|
|
|
|
} |
|
1153
|
|
|
|
|
|
|
|
|
1154
|
5
|
50
|
|
|
|
12
|
if ($self->{_cut} > $self->{_rc_cut}) { |
|
|
|
50
|
|
|
|
|
|
|
1155
|
0
|
|
|
|
|
0
|
return $self->seq->subseq($self->{_rc_cut}+1,$self->{_cut}); |
|
1156
|
|
|
|
|
|
|
} elsif ($self->{_cut} < $self->{_rc_cut}) { |
|
1157
|
5
|
|
|
|
|
6
|
return $self->seq->subseq($self->{_cut}+1, $self->{_rc_cut}); |
|
1158
|
|
|
|
|
|
|
} else { |
|
1159
|
0
|
|
|
|
|
0
|
return ''; |
|
1160
|
|
|
|
|
|
|
} |
|
1161
|
|
|
|
|
|
|
} |
|
1162
|
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
|
|
1165
|
|
|
|
|
|
|
=head2 compatible_ends |
|
1166
|
|
|
|
|
|
|
|
|
1167
|
|
|
|
|
|
|
Title : compatible_ends |
|
1168
|
|
|
|
|
|
|
Usage : $re->compatible_ends($re2); |
|
1169
|
|
|
|
|
|
|
Function : Determines if the two restriction enzyme cut sites |
|
1170
|
|
|
|
|
|
|
have compatible ends. |
|
1171
|
|
|
|
|
|
|
Returns : 0 if not, 1 if only one pair ends match, 2 if both ends. |
|
1172
|
|
|
|
|
|
|
Argument : a Bio::Restriction::Enzyme |
|
1173
|
|
|
|
|
|
|
Throws : unless the argument is a Bio::Resriction::Enzyme and |
|
1174
|
|
|
|
|
|
|
if there are Ns in the ovarhangs |
|
1175
|
|
|
|
|
|
|
|
|
1176
|
|
|
|
|
|
|
In case of type II enzymes which which cut symmetrically, this |
|
1177
|
|
|
|
|
|
|
function can be considered to return a boolean value. |
|
1178
|
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
=cut |
|
1181
|
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
sub compatible_ends { |
|
1183
|
1
|
|
|
1
|
1
|
2
|
my ($self, $re) = @_; |
|
1184
|
|
|
|
|
|
|
|
|
1185
|
1
|
50
|
|
|
|
5
|
$self->throw("Need a Bio::Restriction::Enzyme as an argument, [$re]") |
|
1186
|
|
|
|
|
|
|
unless $re->isa('Bio::Restriction::Enzyme'); |
|
1187
|
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
# $self->throw("Only type II enzymes work now") |
|
1189
|
|
|
|
|
|
|
# unless $self->type eq 'II'; |
|
1190
|
|
|
|
|
|
|
|
|
1191
|
1
|
50
|
33
|
|
|
3
|
$self->debug("N(s) in overhangs. Can not compare") |
|
1192
|
|
|
|
|
|
|
if $self->overhang_seq =~ /N/ or $re->overhang_seq =~ /N/; |
|
1193
|
|
|
|
|
|
|
|
|
1194
|
1
|
50
|
33
|
|
|
2
|
return 2 if $self->overhang_seq eq $re->overhang_seq and |
|
1195
|
|
|
|
|
|
|
$self->overhang eq $re->overhang; |
|
1196
|
|
|
|
|
|
|
|
|
1197
|
0
|
|
|
|
|
0
|
return 0; |
|
1198
|
|
|
|
|
|
|
} |
|
1199
|
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
=head2 is_ambiguous |
|
1201
|
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
Title : is_ambiguous |
|
1203
|
|
|
|
|
|
|
Usage : $re->is_ambiguous(); |
|
1204
|
|
|
|
|
|
|
Function : Determines if the restriction enzyme contains ambiguous sequences |
|
1205
|
|
|
|
|
|
|
Returns : Boolean |
|
1206
|
|
|
|
|
|
|
Argument : n/a |
|
1207
|
|
|
|
|
|
|
Throws : n/a |
|
1208
|
|
|
|
|
|
|
|
|
1209
|
|
|
|
|
|
|
=cut |
|
1210
|
|
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
|
sub is_ambiguous { |
|
1212
|
1
|
|
|
1
|
1
|
3
|
my $self = shift; |
|
1213
|
1
|
50
|
|
|
|
2
|
return $self->string =~ m/[^AGCT]/ ? 1 : 0 ; |
|
1214
|
|
|
|
|
|
|
} |
|
1215
|
|
|
|
|
|
|
|
|
1216
|
|
|
|
|
|
|
=head2 Additional methods from Rebase |
|
1217
|
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
=cut |
|
1219
|
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
=head2 is_prototype |
|
1221
|
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
Title : is_prototype |
|
1223
|
|
|
|
|
|
|
Usage : $re->is_prototype |
|
1224
|
|
|
|
|
|
|
Function : Get/Set method for finding out if this enzyme is a prototype |
|
1225
|
|
|
|
|
|
|
Example : $re->is_prototype(1) |
|
1226
|
|
|
|
|
|
|
Returns : Boolean |
|
1227
|
|
|
|
|
|
|
Args : none |
|
1228
|
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
Prototype enzymes are the most commonly available and usually first |
|
1230
|
|
|
|
|
|
|
enzymes discoverd that have the same recognition site. Using only |
|
1231
|
|
|
|
|
|
|
prototype enzymes in restriction analysis avoids redundancy and |
|
1232
|
|
|
|
|
|
|
speeds things up. |
|
1233
|
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
=cut |
|
1235
|
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
sub is_prototype { |
|
1237
|
7213
|
|
|
7213
|
1
|
5317
|
my ($self, $value) = @_; |
|
1238
|
7213
|
100
|
|
|
|
9293
|
if (defined $value) { |
|
1239
|
7210
|
|
|
|
|
10816
|
return $self->{'_is_prototype'} = $value ; |
|
1240
|
|
|
|
|
|
|
} |
|
1241
|
3
|
100
|
|
|
|
7
|
if (defined $self->{'_is_prototype'}) { |
|
1242
|
2
|
|
|
|
|
7
|
return $self->{'_is_prototype'} |
|
1243
|
|
|
|
|
|
|
} else { |
|
1244
|
1
|
|
|
|
|
7
|
$self->warn("Can't unequivocally assign prototype based on input format alone"); |
|
1245
|
|
|
|
|
|
|
return |
|
1246
|
0
|
|
|
|
|
0
|
} |
|
1247
|
|
|
|
|
|
|
} |
|
1248
|
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
=head2 is_neoschizomer |
|
1250
|
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
Title : is_neoschizomer |
|
1252
|
|
|
|
|
|
|
Usage : $re->is_neoschizomer |
|
1253
|
|
|
|
|
|
|
Function : Get/Set method for finding out if this enzyme is a neoschizomer |
|
1254
|
|
|
|
|
|
|
Example : $re->is_neoschizomer(1) |
|
1255
|
|
|
|
|
|
|
Returns : Boolean |
|
1256
|
|
|
|
|
|
|
Args : none |
|
1257
|
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
Neoschizomers are distinguishable from the prototype enzyme by having a |
|
1259
|
|
|
|
|
|
|
different cleavage pattern. Note that not all formats report this |
|
1260
|
|
|
|
|
|
|
|
|
1261
|
|
|
|
|
|
|
=cut |
|
1262
|
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
sub is_neoschizomer { |
|
1264
|
0
|
|
|
0
|
1
|
0
|
my ($self, $value) = @_; |
|
1265
|
0
|
0
|
|
|
|
0
|
if (defined $value) { |
|
1266
|
0
|
|
|
|
|
0
|
return $self->{'_is_neoschizomer'} = $value ; |
|
1267
|
|
|
|
|
|
|
} |
|
1268
|
0
|
0
|
|
|
|
0
|
if (defined $self->{'_is_neoschizomer'}) { |
|
1269
|
0
|
|
|
|
|
0
|
return $self->{'_is_neoschizomer'} |
|
1270
|
|
|
|
|
|
|
} else { |
|
1271
|
0
|
|
|
|
|
0
|
$self->warn("Can't unequivocally assign neoschizomer based on input format alone"); |
|
1272
|
|
|
|
|
|
|
return |
|
1273
|
0
|
|
|
|
|
0
|
} |
|
1274
|
|
|
|
|
|
|
} |
|
1275
|
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
=head2 prototype_name |
|
1277
|
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
Title : prototype_name |
|
1279
|
|
|
|
|
|
|
Alias : prototype |
|
1280
|
|
|
|
|
|
|
Usage : $re->prototype_name |
|
1281
|
|
|
|
|
|
|
Function : Get/Set method for the name of prototype for |
|
1282
|
|
|
|
|
|
|
this enzyme's recognition site |
|
1283
|
|
|
|
|
|
|
Example : $re->prototype_name(1) |
|
1284
|
|
|
|
|
|
|
Returns : prototype enzyme name string or an empty string |
|
1285
|
|
|
|
|
|
|
Args : optional prototype enzyme name string |
|
1286
|
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
If the enzyme itself is the prototype, its own name is returned. Not to |
|
1288
|
|
|
|
|
|
|
confuse the negative result with an unset value, use method |
|
1289
|
|
|
|
|
|
|
L. |
|
1290
|
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
This method is called I rather than I, |
|
1292
|
|
|
|
|
|
|
because it returns a string rather than on object. |
|
1293
|
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
=cut |
|
1295
|
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
sub prototype_name { |
|
1297
|
12
|
|
|
12
|
1
|
12
|
my $self = shift; |
|
1298
|
|
|
|
|
|
|
|
|
1299
|
12
|
100
|
|
|
|
28
|
$self->{'_prototype'} = shift if @_; |
|
1300
|
12
|
100
|
|
|
|
29
|
return $self->name if $self->{'_is_prototype'}; |
|
1301
|
2
|
|
50
|
|
|
7
|
return $self->{'_prototype'} || ''; |
|
1302
|
|
|
|
|
|
|
} |
|
1303
|
|
|
|
|
|
|
|
|
1304
|
9
|
|
|
9
|
0
|
16
|
sub prototype { shift->prototype_name(@_) } |
|
1305
|
|
|
|
|
|
|
|
|
1306
|
|
|
|
|
|
|
=head2 isoschizomers |
|
1307
|
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
Title : isoschizomers |
|
1309
|
|
|
|
|
|
|
Alias : isos |
|
1310
|
|
|
|
|
|
|
Usage : $re->isoschizomers(@list); |
|
1311
|
|
|
|
|
|
|
Function : Gets/Sets a list of known isoschizomers (enzymes that |
|
1312
|
|
|
|
|
|
|
recognize the same site, but don't necessarily cut at |
|
1313
|
|
|
|
|
|
|
the same position). |
|
1314
|
|
|
|
|
|
|
Arguments : A reference to an array that contains the isoschizomers |
|
1315
|
|
|
|
|
|
|
Returns : A reference to an array of the known isoschizomers or 0 |
|
1316
|
|
|
|
|
|
|
if not defined. |
|
1317
|
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
This has to be the hardest name to spell, so now you can use the alias |
|
1319
|
|
|
|
|
|
|
'isos'. Added for compatibility to REBASE |
|
1320
|
|
|
|
|
|
|
|
|
1321
|
|
|
|
|
|
|
=cut |
|
1322
|
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
sub isoschizomers { |
|
1324
|
7456
|
|
|
7456
|
1
|
5987
|
my ($self) = shift; |
|
1325
|
7456
|
100
|
|
|
|
10390
|
push @{$self->{_isoschizomers}}, @_ if @_; |
|
|
7454
|
|
|
|
|
11366
|
|
|
1326
|
|
|
|
|
|
|
# make sure that you don't dereference if null |
|
1327
|
|
|
|
|
|
|
# chad believes quite strongly that you should return |
|
1328
|
|
|
|
|
|
|
# a reference to an array anyway. don't bother dereferencing. |
|
1329
|
|
|
|
|
|
|
# i'll post that to the list. |
|
1330
|
7456
|
50
|
|
|
|
11081
|
if ($self->{'_isoschizomers'}) { |
|
1331
|
7456
|
|
|
|
|
4649
|
return @{$self->{_isoschizomers}}; |
|
|
7456
|
|
|
|
|
6976
|
|
|
1332
|
|
|
|
|
|
|
} |
|
1333
|
|
|
|
|
|
|
|
|
1334
|
|
|
|
|
|
|
} |
|
1335
|
|
|
|
|
|
|
|
|
1336
|
0
|
|
|
0
|
0
|
0
|
sub isos { shift->isoschizomers(@_) } |
|
1337
|
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
=head2 purge_isoschizomers |
|
1339
|
|
|
|
|
|
|
|
|
1340
|
|
|
|
|
|
|
Title : purge_isoschizomers |
|
1341
|
|
|
|
|
|
|
Alias : purge_isos |
|
1342
|
|
|
|
|
|
|
Usage : $re->purge_isoschizomers(); |
|
1343
|
|
|
|
|
|
|
Function : Purges the set of isoschizomers for this enzyme |
|
1344
|
|
|
|
|
|
|
Arguments : |
|
1345
|
|
|
|
|
|
|
Returns : 1 |
|
1346
|
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
=cut |
|
1348
|
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
sub purge_isoschizomers { |
|
1350
|
1
|
|
|
1
|
1
|
3
|
my ($self) = shift; |
|
1351
|
1
|
|
|
|
|
4
|
$self->{_isoschizomers} = []; |
|
1352
|
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
} |
|
1354
|
|
|
|
|
|
|
|
|
1355
|
0
|
|
|
0
|
0
|
0
|
sub purge_isos { shift->purge_isoschizomers(@_) } |
|
1356
|
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
=head2 methylation_sites |
|
1358
|
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
Title : methylation_sites |
|
1360
|
|
|
|
|
|
|
Usage : $re->methylation_sites(\%sites); |
|
1361
|
|
|
|
|
|
|
Function : Gets/Sets known methylation sites (positions on the sequence |
|
1362
|
|
|
|
|
|
|
that get modified to promote or prevent cleavage). |
|
1363
|
|
|
|
|
|
|
Arguments : A reference to a hash that contains the methylation sites |
|
1364
|
|
|
|
|
|
|
Returns : A reference to a hash of the methylation sites or |
|
1365
|
|
|
|
|
|
|
an empty string if not defined. |
|
1366
|
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
There are three types of methylation sites: |
|
1368
|
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
=over 3 |
|
1370
|
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
=item * (6) = N6-methyladenosine |
|
1372
|
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
=item * (5) = 5-methylcytosine |
|
1374
|
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
=item * (4) = N4-methylcytosine |
|
1376
|
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
=back |
|
1378
|
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
These are stored as 6, 5, and 4 respectively. The hash has the |
|
1380
|
|
|
|
|
|
|
sequence position as the key and the type of methylation as the value. |
|
1381
|
|
|
|
|
|
|
A negative number in the sequence position indicates that the DNA is |
|
1382
|
|
|
|
|
|
|
methylated on the complementary strand. |
|
1383
|
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
Note that in REBASE, the methylation positions are given |
|
1385
|
|
|
|
|
|
|
Added for compatibility to REBASE. |
|
1386
|
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
=cut |
|
1388
|
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
sub methylation_sites { |
|
1390
|
782
|
|
|
782
|
1
|
623
|
my $self = shift; |
|
1391
|
|
|
|
|
|
|
|
|
1392
|
782
|
|
|
|
|
1175
|
while (@_) { |
|
1393
|
829
|
|
|
|
|
584
|
my $key = shift; |
|
1394
|
829
|
|
|
|
|
2059
|
$self->{'_methylation_sites'}->{$key} = shift; |
|
1395
|
|
|
|
|
|
|
} |
|
1396
|
782
|
|
|
|
|
543
|
return %{$self->{_methylation_sites}}; |
|
|
782
|
|
|
|
|
1091
|
|
|
1397
|
|
|
|
|
|
|
} |
|
1398
|
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
=head2 purge_methylation_sites |
|
1401
|
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
Title : purge_methylation_sites |
|
1403
|
|
|
|
|
|
|
Usage : $re->purge_methylation_sites(); |
|
1404
|
|
|
|
|
|
|
Function : Purges the set of methylation_sites for this enzyme |
|
1405
|
|
|
|
|
|
|
Arguments : |
|
1406
|
|
|
|
|
|
|
Returns : |
|
1407
|
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
=cut |
|
1409
|
|
|
|
|
|
|
|
|
1410
|
|
|
|
|
|
|
sub purge_methylation_sites { |
|
1411
|
23
|
|
|
23
|
1
|
27
|
my ($self) = shift; |
|
1412
|
23
|
|
|
|
|
38
|
$self->{_methylation_sites} = {}; |
|
1413
|
|
|
|
|
|
|
} |
|
1414
|
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
=head2 microbe |
|
1416
|
|
|
|
|
|
|
|
|
1417
|
|
|
|
|
|
|
Title : microbe |
|
1418
|
|
|
|
|
|
|
Usage : $re->microbe($microbe); |
|
1419
|
|
|
|
|
|
|
Function : Gets/Sets microorganism where the restriction enzyme was found |
|
1420
|
|
|
|
|
|
|
Arguments : A scalar containing the microbes name |
|
1421
|
|
|
|
|
|
|
Returns : A scalar containing the microbes name or 0 if not defined |
|
1422
|
|
|
|
|
|
|
|
|
1423
|
|
|
|
|
|
|
Added for compatibility to REBASE |
|
1424
|
|
|
|
|
|
|
|
|
1425
|
|
|
|
|
|
|
=cut |
|
1426
|
|
|
|
|
|
|
|
|
1427
|
|
|
|
|
|
|
sub microbe { |
|
1428
|
2
|
|
|
2
|
1
|
3
|
my ($self, $microbe) = @_; |
|
1429
|
2
|
100
|
|
|
|
5
|
if ($microbe) { |
|
1430
|
1
|
|
|
|
|
1
|
$self->{_microbe}=$microbe; |
|
1431
|
|
|
|
|
|
|
} |
|
1432
|
2
|
|
50
|
|
|
9
|
return $self->{_microbe} || ''; |
|
1433
|
|
|
|
|
|
|
|
|
1434
|
|
|
|
|
|
|
} |
|
1435
|
|
|
|
|
|
|
|
|
1436
|
|
|
|
|
|
|
|
|
1437
|
|
|
|
|
|
|
=head2 source |
|
1438
|
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
Title : source |
|
1440
|
|
|
|
|
|
|
Usage : $re->source('Rob Edwards'); |
|
1441
|
|
|
|
|
|
|
Function : Gets/Sets the person who provided the enzyme |
|
1442
|
|
|
|
|
|
|
Arguments : A scalar containing the persons name |
|
1443
|
|
|
|
|
|
|
Returns : A scalar containing the persons name or 0 if not defined |
|
1444
|
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
Added for compatibility to REBASE |
|
1446
|
|
|
|
|
|
|
|
|
1447
|
|
|
|
|
|
|
=cut |
|
1448
|
|
|
|
|
|
|
|
|
1449
|
|
|
|
|
|
|
sub source { |
|
1450
|
7455
|
|
|
7455
|
1
|
5879
|
my ($self, $source) = @_; |
|
1451
|
7455
|
100
|
|
|
|
8554
|
if ($source) { |
|
1452
|
7454
|
|
|
|
|
8662
|
$self->{_source}=$source; |
|
1453
|
|
|
|
|
|
|
} |
|
1454
|
7455
|
|
50
|
|
|
10966
|
return $self->{_source} || ''; |
|
1455
|
|
|
|
|
|
|
} |
|
1456
|
|
|
|
|
|
|
|
|
1457
|
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
=head2 vendors |
|
1459
|
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
Title : vendors |
|
1461
|
|
|
|
|
|
|
Usage : $re->vendor(@list_of_companies); |
|
1462
|
|
|
|
|
|
|
Function : Gets/Sets the a list of companies that you can get the enzyme from. |
|
1463
|
|
|
|
|
|
|
Also sets the commercially_available boolean |
|
1464
|
|
|
|
|
|
|
Arguments : A reference to an array containing the names of companies |
|
1465
|
|
|
|
|
|
|
that you can get the enzyme from |
|
1466
|
|
|
|
|
|
|
Returns : A reference to an array containing the names of companies |
|
1467
|
|
|
|
|
|
|
that you can get the enzyme from |
|
1468
|
|
|
|
|
|
|
|
|
1469
|
|
|
|
|
|
|
Added for compatibility to REBASE |
|
1470
|
|
|
|
|
|
|
|
|
1471
|
|
|
|
|
|
|
=cut |
|
1472
|
|
|
|
|
|
|
|
|
1473
|
|
|
|
|
|
|
sub vendors { |
|
1474
|
7472
|
|
|
7472
|
1
|
6627
|
my $self = shift; |
|
1475
|
7472
|
100
|
|
|
|
8888
|
push @{$self->{_vendors}}, @_ if @_; |
|
|
7470
|
|
|
|
|
8439
|
|
|
1476
|
7472
|
50
|
|
|
|
10745
|
if ($self->{'_vendors'}) { |
|
1477
|
7472
|
|
|
|
|
4676
|
return @{$self->{'_vendors'}}; |
|
|
7472
|
|
|
|
|
8196
|
|
|
1478
|
|
|
|
|
|
|
} |
|
1479
|
|
|
|
|
|
|
} |
|
1480
|
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
|
|
1482
|
|
|
|
|
|
|
=head2 purge_vendors |
|
1483
|
|
|
|
|
|
|
|
|
1484
|
|
|
|
|
|
|
Title : purge_vendors |
|
1485
|
|
|
|
|
|
|
Usage : $re->purge_references(); |
|
1486
|
|
|
|
|
|
|
Function : Purges the set of references for this enzyme |
|
1487
|
|
|
|
|
|
|
Arguments : |
|
1488
|
|
|
|
|
|
|
Returns : |
|
1489
|
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
=cut |
|
1491
|
|
|
|
|
|
|
|
|
1492
|
|
|
|
|
|
|
sub purge_vendors { |
|
1493
|
1
|
|
|
1
|
1
|
3
|
my ($self) = shift; |
|
1494
|
1
|
|
|
|
|
2
|
$self->{_vendors} = []; |
|
1495
|
|
|
|
|
|
|
|
|
1496
|
|
|
|
|
|
|
} |
|
1497
|
|
|
|
|
|
|
|
|
1498
|
|
|
|
|
|
|
=head2 vendor |
|
1499
|
|
|
|
|
|
|
|
|
1500
|
|
|
|
|
|
|
Title : vendor |
|
1501
|
|
|
|
|
|
|
Usage : $re->vendor(@list_of_companies); |
|
1502
|
|
|
|
|
|
|
Function : Gets/Sets the a list of companies that you can get the enzyme from. |
|
1503
|
|
|
|
|
|
|
Also sets the commercially_available boolean |
|
1504
|
|
|
|
|
|
|
Arguments : A reference to an array containing the names of companies |
|
1505
|
|
|
|
|
|
|
that you can get the enzyme from |
|
1506
|
|
|
|
|
|
|
Returns : A reference to an array containing the names of companies |
|
1507
|
|
|
|
|
|
|
that you can get the enzyme from |
|
1508
|
|
|
|
|
|
|
|
|
1509
|
|
|
|
|
|
|
Added for compatibility to REBASE |
|
1510
|
|
|
|
|
|
|
|
|
1511
|
|
|
|
|
|
|
=cut |
|
1512
|
|
|
|
|
|
|
|
|
1513
|
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
sub vendor { |
|
1515
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
|
1516
|
1
|
|
|
|
|
2
|
return push @{$self->{_vendors}}, @_; |
|
|
1
|
|
|
|
|
5
|
|
|
1517
|
0
|
|
|
|
|
0
|
return $self->{_vendors}; |
|
1518
|
|
|
|
|
|
|
} |
|
1519
|
|
|
|
|
|
|
|
|
1520
|
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
=head2 references |
|
1522
|
|
|
|
|
|
|
|
|
1523
|
|
|
|
|
|
|
Title : references |
|
1524
|
|
|
|
|
|
|
Usage : $re->references(string); |
|
1525
|
|
|
|
|
|
|
Function : Gets/Sets the references for this enzyme |
|
1526
|
|
|
|
|
|
|
Arguments : an array of string reference(s) (optional) |
|
1527
|
|
|
|
|
|
|
Returns : an array of references |
|
1528
|
|
|
|
|
|
|
|
|
1529
|
|
|
|
|
|
|
Use L to reset the list of references |
|
1530
|
|
|
|
|
|
|
|
|
1531
|
|
|
|
|
|
|
This should be a L object, but its not (yet) |
|
1532
|
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
=cut |
|
1534
|
|
|
|
|
|
|
|
|
1535
|
|
|
|
|
|
|
sub references { |
|
1536
|
7472
|
|
|
7472
|
1
|
6046
|
my ($self) = shift; |
|
1537
|
7472
|
100
|
|
|
|
8731
|
push @{$self->{_references}}, @_ if @_; |
|
|
7470
|
|
|
|
|
7602
|
|
|
1538
|
7472
|
|
|
|
|
5105
|
return @{$self->{_references}}; |
|
|
7472
|
|
|
|
|
6764
|
|
|
1539
|
|
|
|
|
|
|
} |
|
1540
|
|
|
|
|
|
|
|
|
1541
|
|
|
|
|
|
|
|
|
1542
|
|
|
|
|
|
|
=head2 purge_references |
|
1543
|
|
|
|
|
|
|
|
|
1544
|
|
|
|
|
|
|
Title : purge_references |
|
1545
|
|
|
|
|
|
|
Usage : $re->purge_references(); |
|
1546
|
|
|
|
|
|
|
Function : Purges the set of references for this enzyme |
|
1547
|
|
|
|
|
|
|
Arguments : |
|
1548
|
|
|
|
|
|
|
Returns : 1 |
|
1549
|
|
|
|
|
|
|
|
|
1550
|
|
|
|
|
|
|
=cut |
|
1551
|
|
|
|
|
|
|
|
|
1552
|
|
|
|
|
|
|
sub purge_references { |
|
1553
|
1
|
|
|
1
|
1
|
2
|
my ($self) = shift; |
|
1554
|
1
|
|
|
|
|
2
|
$self->{_references} = []; |
|
1555
|
|
|
|
|
|
|
|
|
1556
|
|
|
|
|
|
|
} |
|
1557
|
|
|
|
|
|
|
|
|
1558
|
|
|
|
|
|
|
=head2 clone |
|
1559
|
|
|
|
|
|
|
|
|
1560
|
|
|
|
|
|
|
Title : clone |
|
1561
|
|
|
|
|
|
|
Usage : $re->clone |
|
1562
|
|
|
|
|
|
|
Function : Deep copy of the object |
|
1563
|
|
|
|
|
|
|
Arguments : - |
|
1564
|
|
|
|
|
|
|
Returns : new Bio::Restriction::EnzymeI object |
|
1565
|
|
|
|
|
|
|
|
|
1566
|
|
|
|
|
|
|
This works as long as the object is a clean in-memory object using |
|
1567
|
|
|
|
|
|
|
scalars, arrays and hashes. You have been warned. |
|
1568
|
|
|
|
|
|
|
|
|
1569
|
|
|
|
|
|
|
If you have module Storable, it is used, otherwise local code is used. |
|
1570
|
|
|
|
|
|
|
Todo: local code cuts circular references. |
|
1571
|
|
|
|
|
|
|
|
|
1572
|
|
|
|
|
|
|
=cut |
|
1573
|
|
|
|
|
|
|
|
|
1574
|
|
|
|
|
|
|
# there's some issue here; deprecating and rolling another below/maj |
|
1575
|
|
|
|
|
|
|
|
|
1576
|
|
|
|
|
|
|
sub clone_depr { |
|
1577
|
0
|
|
|
0
|
0
|
0
|
my ($self, $this) = @_; |
|
1578
|
|
|
|
|
|
|
|
|
1579
|
0
|
|
|
|
|
0
|
eval { require Storable; }; |
|
|
0
|
|
|
|
|
0
|
|
|
1580
|
0
|
0
|
|
|
|
0
|
return Storable::dclone($self) unless $@; |
|
1581
|
|
|
|
|
|
|
# modified from deep_copy() @ http://www.stonehenge.com/merlyn/UnixReview/col30.html |
|
1582
|
0
|
0
|
|
|
|
0
|
unless ($this) { |
|
1583
|
0
|
|
|
|
|
0
|
my $new; |
|
1584
|
0
|
|
|
|
|
0
|
foreach my $k (keys %$self) { |
|
1585
|
0
|
0
|
|
|
|
0
|
if (not ref $self->{$k}) { |
|
1586
|
0
|
|
|
|
|
0
|
$new->{$k} = $self->{$k}; |
|
1587
|
|
|
|
|
|
|
} else { |
|
1588
|
0
|
|
|
|
|
0
|
$new->{$k} = $self->clone($self->{$k}); |
|
1589
|
|
|
|
|
|
|
} |
|
1590
|
|
|
|
|
|
|
#print Dumper $new; |
|
1591
|
|
|
|
|
|
|
} |
|
1592
|
0
|
|
|
|
|
0
|
bless $new, ref($self); |
|
1593
|
0
|
|
|
|
|
0
|
return $new; |
|
1594
|
|
|
|
|
|
|
} |
|
1595
|
0
|
0
|
|
|
|
0
|
if (not ref $this) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1596
|
0
|
|
|
|
|
0
|
$this; |
|
1597
|
|
|
|
|
|
|
} |
|
1598
|
|
|
|
|
|
|
elsif (ref $this eq "ARRAY") { |
|
1599
|
0
|
|
|
|
|
0
|
[map $self->clone($_), @$this]; |
|
1600
|
|
|
|
|
|
|
} |
|
1601
|
|
|
|
|
|
|
elsif (ref $this eq "HASH") { |
|
1602
|
0
|
|
|
|
|
0
|
+{map { $_ => $self->clone($this->{$_}) } keys %$this}; |
|
|
0
|
|
|
|
|
0
|
|
|
1603
|
|
|
|
|
|
|
} else { # objects |
|
1604
|
0
|
0
|
|
|
|
0
|
return if $this->isa('Bio::Restriction::EnzymeI'); |
|
1605
|
0
|
0
|
|
|
|
0
|
return $this->clone if $this->can('clone'); |
|
1606
|
0
|
|
|
|
|
0
|
my $obj; |
|
1607
|
0
|
|
|
|
|
0
|
foreach my $k (keys %$this) { |
|
1608
|
0
|
0
|
|
|
|
0
|
if (not ref $this->{$k}) { |
|
1609
|
0
|
|
|
|
|
0
|
$obj->{$k} = $this->{$k}; |
|
1610
|
|
|
|
|
|
|
} else { |
|
1611
|
0
|
|
|
|
|
0
|
$obj->{$k} = $this->clone($this->{$k}); |
|
1612
|
|
|
|
|
|
|
} |
|
1613
|
|
|
|
|
|
|
} |
|
1614
|
0
|
|
|
|
|
0
|
bless $obj, ref($this); |
|
1615
|
0
|
|
|
|
|
0
|
return $obj; |
|
1616
|
|
|
|
|
|
|
} |
|
1617
|
|
|
|
|
|
|
} |
|
1618
|
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
sub clone { |
|
1620
|
1501
|
|
|
1501
|
1
|
951
|
my $self = shift; |
|
1621
|
1501
|
|
|
|
|
1161
|
my ($this, $visited) = @_; |
|
1622
|
1501
|
100
|
|
|
|
1715
|
unless (defined $this) { |
|
1623
|
52
|
|
|
|
|
53
|
my %h; |
|
1624
|
52
|
|
|
|
|
291
|
tie %h, 'Tie::RefHash'; |
|
1625
|
52
|
|
|
|
|
407
|
my $visited = \%h; |
|
1626
|
52
|
|
|
|
|
106
|
return $self->clone($self, $visited); |
|
1627
|
|
|
|
|
|
|
} |
|
1628
|
1449
|
|
|
|
|
831
|
my $thing; |
|
1629
|
1449
|
|
|
|
|
1176
|
for ($this) { |
|
1630
|
1449
|
100
|
|
|
|
1696
|
if (ref) { |
|
1631
|
475
|
100
|
|
|
|
1183
|
return $visited->{$this} if $visited->{$this}; |
|
1632
|
|
|
|
|
|
|
} |
|
1633
|
|
|
|
|
|
|
# scalar |
|
1634
|
1447
|
100
|
|
|
|
4096
|
(!ref) && do { |
|
1635
|
974
|
|
|
|
|
628
|
$thing = $this; |
|
1636
|
974
|
|
|
|
|
716
|
last; |
|
1637
|
|
|
|
|
|
|
}; |
|
1638
|
|
|
|
|
|
|
# object |
|
1639
|
473
|
100
|
|
|
|
852
|
(ref =~ /^Bio::/) && do { |
|
1640
|
108
|
|
|
|
|
120
|
$thing = {}; |
|
1641
|
108
|
|
|
|
|
164
|
bless($thing, ref); |
|
1642
|
108
|
|
|
|
|
304
|
$visited->{$this} = $thing; |
|
1643
|
108
|
|
|
|
|
646
|
foreach my $attr (keys %{$_}) { |
|
|
108
|
|
|
|
|
344
|
|
|
1644
|
1196
|
100
|
|
|
|
1873
|
$thing->{$attr} = (defined $_->{$attr} ? $self->clone($_->{$attr},$visited) : undef ); |
|
1645
|
|
|
|
|
|
|
} |
|
1646
|
108
|
|
|
|
|
155
|
last; |
|
1647
|
|
|
|
|
|
|
}; |
|
1648
|
365
|
100
|
|
|
|
513
|
(ref eq 'ARRAY') && do { |
|
1649
|
311
|
|
|
|
|
267
|
$thing = []; |
|
1650
|
311
|
|
|
|
|
640
|
$visited->{$this} = $thing; |
|
1651
|
311
|
|
|
|
|
1426
|
foreach my $elt (@{$_}) { |
|
|
311
|
|
|
|
|
372
|
|
|
1652
|
309
|
50
|
|
|
|
534
|
push @$thing, (defined $elt ? $self->clone($elt,$visited) : undef); |
|
1653
|
|
|
|
|
|
|
} |
|
1654
|
311
|
|
|
|
|
289
|
last; |
|
1655
|
|
|
|
|
|
|
}; |
|
1656
|
54
|
50
|
|
|
|
119
|
(ref eq 'HASH') && do { |
|
1657
|
54
|
|
|
|
|
71
|
$thing = {}; |
|
1658
|
54
|
|
|
|
|
129
|
$visited->{$this} = $thing; |
|
1659
|
4
|
|
|
4
|
|
22
|
no warnings qw( uninitialized ); # avoid 'uninitialized value' warning against $key |
|
|
4
|
|
|
|
|
9
|
|
|
|
4
|
|
|
|
|
242
|
|
|
1660
|
54
|
|
|
|
|
295
|
foreach my $key (%{$_}) { |
|
|
54
|
|
|
|
|
127
|
|
|
1661
|
0
|
0
|
|
|
|
0
|
$thing->{$key} = (defined $_->{key} ? $self->clone( $_->{$key},$visited) : undef ); |
|
1662
|
|
|
|
|
|
|
} |
|
1663
|
4
|
|
|
4
|
|
16
|
use warnings; |
|
|
4
|
|
|
|
|
4
|
|
|
|
4
|
|
|
|
|
851
|
|
|
1664
|
54
|
|
|
|
|
71
|
last; |
|
1665
|
|
|
|
|
|
|
}; |
|
1666
|
0
|
0
|
|
|
|
0
|
(ref eq 'SCALAR') && do { |
|
1667
|
0
|
|
|
|
|
0
|
$thing = ${$_}; |
|
|
0
|
|
|
|
|
0
|
|
|
1668
|
0
|
|
|
|
|
0
|
$visited->{$this} = $thing; |
|
1669
|
0
|
|
|
|
|
0
|
$thing = \$thing; |
|
1670
|
0
|
|
|
|
|
0
|
last; |
|
1671
|
|
|
|
|
|
|
}; |
|
1672
|
|
|
|
|
|
|
} |
|
1673
|
|
|
|
|
|
|
|
|
1674
|
1447
|
|
|
|
|
2253
|
return $thing; |
|
1675
|
|
|
|
|
|
|
} |
|
1676
|
|
|
|
|
|
|
|
|
1677
|
|
|
|
|
|
|
|
|
1678
|
|
|
|
|
|
|
|
|
1679
|
|
|
|
|
|
|
=head2 _expand |
|
1680
|
|
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
|
Title : _expand |
|
1682
|
|
|
|
|
|
|
Function : Expand nucleotide ambiguity codes to their representative letters |
|
1683
|
|
|
|
|
|
|
Returns : The full length string |
|
1684
|
|
|
|
|
|
|
Arguments : The string to be expanded. |
|
1685
|
|
|
|
|
|
|
|
|
1686
|
|
|
|
|
|
|
Stolen from the original RestrictionEnzyme.pm |
|
1687
|
|
|
|
|
|
|
|
|
1688
|
|
|
|
|
|
|
=cut |
|
1689
|
|
|
|
|
|
|
|
|
1690
|
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
sub _expand { |
|
1692
|
7774
|
|
|
7774
|
|
5816
|
my $str = shift; |
|
1693
|
|
|
|
|
|
|
|
|
1694
|
7774
|
|
|
|
|
19514
|
$str =~ s/N|X/\./g; |
|
1695
|
7774
|
|
|
|
|
7603
|
$str =~ s/R/\[AG\]/g; |
|
1696
|
7774
|
|
|
|
|
6376
|
$str =~ s/Y/\[CT\]/g; |
|
1697
|
7774
|
|
|
|
|
5725
|
$str =~ s/S/\[GC\]/g; |
|
1698
|
7774
|
|
|
|
|
7020
|
$str =~ s/W/\[AT\]/g; |
|
1699
|
7774
|
|
|
|
|
5266
|
$str =~ s/M/\[AC\]/g; |
|
1700
|
7774
|
|
|
|
|
5397
|
$str =~ s/K/\[TG\]/g; |
|
1701
|
7774
|
|
|
|
|
4834
|
$str =~ s/B/\[CGT\]/g; |
|
1702
|
7774
|
|
|
|
|
5266
|
$str =~ s/D/\[AGT\]/g; |
|
1703
|
7774
|
|
|
|
|
5021
|
$str =~ s/H/\[ACT\]/g; |
|
1704
|
7774
|
|
|
|
|
4738
|
$str =~ s/V/\[ACG\]/g; |
|
1705
|
|
|
|
|
|
|
|
|
1706
|
7774
|
|
|
|
|
8158
|
return $str; |
|
1707
|
|
|
|
|
|
|
} |
|
1708
|
|
|
|
|
|
|
|
|
1709
|
|
|
|
|
|
|
1; |
|
1710
|
|
|
|
|
|
|
|