line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# GeneDesign module for restriction enzyme handing |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Bio::GeneDesign::RestrictionEnzyme |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 VERSION |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
Version 5.56 |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 DESCRIPTION |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
GeneDesign object that represents a type II restriction enzyme |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 AUTHOR |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
Sarah Richardson |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=cut |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
package Bio::GeneDesign::RestrictionEnzyme; |
24
|
|
|
|
|
|
|
|
25
|
11
|
|
|
11
|
|
68
|
use Bio::GeneDesign::Basic qw(:GD); |
|
11
|
|
|
|
|
22
|
|
|
11
|
|
|
|
|
1674
|
|
26
|
11
|
|
|
11
|
|
68
|
use Carp; |
|
11
|
|
|
|
|
19
|
|
|
11
|
|
|
|
|
606
|
|
27
|
|
|
|
|
|
|
|
28
|
11
|
|
|
11
|
|
65
|
use strict; |
|
11
|
|
|
|
|
19
|
|
|
11
|
|
|
|
|
211
|
|
29
|
11
|
|
|
11
|
|
56
|
use warnings; |
|
11
|
|
|
|
|
25
|
|
|
11
|
|
|
|
|
370
|
|
30
|
|
|
|
|
|
|
|
31
|
11
|
|
|
11
|
|
60
|
use base qw(Bio::Root::Root); |
|
11
|
|
|
|
|
23
|
|
|
11
|
|
|
|
|
45664
|
|
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
our $VERSION = 5.56; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
my $IIPreg = qr/ ([A-Z]*) \^ ([A-Z]*) /x; |
36
|
|
|
|
|
|
|
my $IIAreg = qr/\A \w+ \(([\-]*\d+) \/ ([\-]*\d+)\)\Z /x; |
37
|
|
|
|
|
|
|
my $IIBreg = qr/\A\(([\-]*\d+) \/ ([\-]*\d+)\) \w+ \(([\-]*\d+) \/ ([\-]*\d+)\)\Z /x; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my %RE_vendors = ( |
40
|
|
|
|
|
|
|
B => "Invitrogen", C => "Minotech", E => "Stratagene Agilent", |
41
|
|
|
|
|
|
|
F => "Thermo Scientific Fermentas", I => "SibEnzyme", J => "Nippon Gene Co.", |
42
|
|
|
|
|
|
|
K => "Takara", M => "Roche Applied Science", N => "New England Biolabs", |
43
|
|
|
|
|
|
|
O => "Toyobo Technologies", Q => "Molecular Biology Resources", |
44
|
|
|
|
|
|
|
R => "Promega", S => "Sigma Aldrich", U => "Bangalore Genei", V => "Vivantis", |
45
|
|
|
|
|
|
|
X => "EURx", Y => "CinnaGen" |
46
|
|
|
|
|
|
|
); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
my %methtrans = (b => "blocked", blocked => "blocked", |
49
|
|
|
|
|
|
|
i => "inhibited", inhibited => "inhibited", |
50
|
|
|
|
|
|
|
u => "unknown", unknown => "unknown" |
51
|
|
|
|
|
|
|
); |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head1 CONSTRUCTOR METHODS |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head2 new |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
You can create a new enzyme or clone an existing enzyme to create a new instance |
58
|
|
|
|
|
|
|
of an abstract enzyme definition. To do this, provide the -enzyme flag; the |
59
|
|
|
|
|
|
|
constructor will ignore every other argument except for -start. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Required arguments: |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
EITHER |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
-enzyme : a Bio::GeneDesign::RestrictionEnzyme object to clone |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
OR |
68
|
|
|
|
|
|
|
-id : The name of the enzyme (i.e., BamHI) |
69
|
|
|
|
|
|
|
-cutseq : The string describing the enzyme's recognition and cleavage |
70
|
|
|
|
|
|
|
site |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
Optional arguments: |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
-temp : The incubation temperature for the enzyme |
75
|
|
|
|
|
|
|
-tempin : The heat inactivation temperature for the enzyme |
76
|
|
|
|
|
|
|
-score : A float score, usually the price of the enzyme in dollars |
77
|
|
|
|
|
|
|
-methdam : Sensitivity to dam methylation; can take the values |
78
|
|
|
|
|
|
|
b or blocked, |
79
|
|
|
|
|
|
|
i or inhibited, |
80
|
|
|
|
|
|
|
u or unknown, |
81
|
|
|
|
|
|
|
if undefined, will take the value indifferent. |
82
|
|
|
|
|
|
|
-methdcm : Sensitivity to dcm methylation; can take the values |
83
|
|
|
|
|
|
|
b or blocked, |
84
|
|
|
|
|
|
|
i or inhibited, |
85
|
|
|
|
|
|
|
u or unknown, |
86
|
|
|
|
|
|
|
if undefined, will take the value indifferent. |
87
|
|
|
|
|
|
|
-methcpg : Sensitivity to cpg methylation; can take the values |
88
|
|
|
|
|
|
|
b or blocked, |
89
|
|
|
|
|
|
|
i or inhibited, |
90
|
|
|
|
|
|
|
u or unknown, |
91
|
|
|
|
|
|
|
if undefined, will take the value indifferent. |
92
|
|
|
|
|
|
|
-vendors : a string of single letter codes that represent vendor |
93
|
|
|
|
|
|
|
availability - no spaces. see vendor() for a list of the |
94
|
|
|
|
|
|
|
codes. |
95
|
|
|
|
|
|
|
-staract : Whether or not the enzyme exhibits star activity - 1 or 0. |
96
|
|
|
|
|
|
|
-buffers : a hash reference; keys are buffer names and values are the |
97
|
|
|
|
|
|
|
enzyme activity in that buffer. For example: |
98
|
|
|
|
|
|
|
NEB1 => 50, NEB2 => 100, etc. |
99
|
|
|
|
|
|
|
-start : An integer representing an offset; usually used only in |
100
|
|
|
|
|
|
|
cloned instances, as opposed to abstract instances. |
101
|
|
|
|
|
|
|
-exclude : An arrayref full of ids for enzymes that should be |
102
|
|
|
|
|
|
|
considered mutually exclusive to this enzyme - see exclude() |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=cut |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub new |
107
|
|
|
|
|
|
|
{ |
108
|
768
|
|
|
768
|
1
|
5338
|
my ($class, @args) = @_; |
109
|
768
|
|
|
|
|
2850
|
my $self = $class->SUPER::new(@args); |
110
|
|
|
|
|
|
|
|
111
|
768
|
|
|
|
|
24277
|
my ($object, $id, $cutseq, $temp, $tempin, $score, $methdam, $methdcm, |
112
|
|
|
|
|
|
|
$methcpg, $vendors, $staract, $buffers, $start, $exclude, $aggress) = |
113
|
|
|
|
|
|
|
$self->_rearrange([qw(ENZYME ID CUTSEQ TEMP TEMPIN SCORE METHDAM METHDCM |
114
|
|
|
|
|
|
|
METHCPG VENDORS STARACT BUFFERS START EXCLUDE AGGRESS)], @args); |
115
|
|
|
|
|
|
|
|
116
|
768
|
50
|
|
|
|
58195
|
if ($object) |
117
|
|
|
|
|
|
|
{ |
118
|
0
|
0
|
|
|
|
0
|
$self->throw("object of class " . ref($object) . " does not implement ". |
119
|
|
|
|
|
|
|
"Bio::GeneDesign::RestrictionEnzyme.") |
120
|
|
|
|
|
|
|
unless $object->isa("Bio::GeneDesign::RestrictionEnzyme"); |
121
|
0
|
|
|
|
|
0
|
$self = $object->clone(); |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
else |
124
|
|
|
|
|
|
|
{ |
125
|
|
|
|
|
|
|
|
126
|
768
|
50
|
|
|
|
1987
|
$self->throw("No enzyme id defined") unless ($id); |
127
|
768
|
|
|
|
|
1744
|
$self->{id} = $id; |
128
|
|
|
|
|
|
|
|
129
|
768
|
50
|
|
|
|
1550
|
$self->throw("No cut sequence defined") unless ($cutseq); |
130
|
768
|
|
|
|
|
1444
|
$self->{cutseq} = $cutseq; |
131
|
|
|
|
|
|
|
|
132
|
768
|
|
|
|
|
1250
|
my $recseq = $cutseq; |
133
|
768
|
|
|
|
|
5965
|
$recseq =~ s/\W*\d*//xg; |
134
|
768
|
|
|
|
|
1771
|
$self->{recseq} = $recseq; |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
#Regular expression arrayref to use for enzyme searching |
137
|
|
|
|
|
|
|
#Should store as compiled regexes instead |
138
|
768
|
|
|
|
|
2097
|
$self->{regex} = _regarr($recseq); |
139
|
|
|
|
|
|
|
|
140
|
768
|
|
|
|
|
1514
|
my $sitelen = length($recseq); |
141
|
768
|
|
|
|
|
1390
|
$self->{length} = $sitelen; |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
#Enzyme Class and Palindromy |
144
|
768
|
|
|
|
|
1418
|
my ($lef, $rig) = (q{}, q{}); |
145
|
768
|
100
|
|
|
|
5489
|
if ($cutseq =~ $IIPreg) |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
146
|
|
|
|
|
|
|
{ |
147
|
567
|
|
|
|
|
1238
|
$lef = length($1); |
148
|
567
|
|
|
|
|
810
|
$rig = length($2); |
149
|
567
|
|
|
|
|
937
|
$self->{class} = "IIP"; |
150
|
567
|
|
|
|
|
1169
|
$self->{classex} = $IIPreg; |
151
|
|
|
|
|
|
|
|
152
|
567
|
100
|
|
|
|
962
|
if ($lef == $rig) |
153
|
|
|
|
|
|
|
{ |
154
|
123
|
|
|
|
|
216
|
$self->{palindromy} = "unknown"; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
else |
157
|
|
|
|
|
|
|
{ |
158
|
444
|
|
|
|
|
584
|
my $inlef = $lef; |
159
|
444
|
100
|
|
|
|
1377
|
$inlef = length($recseq) - $inlef if ($inlef > (.5 * length($recseq))); |
160
|
444
|
|
|
|
|
1375
|
my $mattersbit = substr($recseq, $inlef, length($recseq) - (2 * $inlef)); |
161
|
444
|
100
|
66
|
|
|
3899
|
if ($mattersbit && $mattersbit =~ $ambnt && length($mattersbit) % 2 == 0) |
|
|
100
|
100
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
162
|
|
|
|
|
|
|
{ |
163
|
54
|
|
|
|
|
167
|
$self->{palindromy} = "pnon"; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
elsif ($mattersbit && $mattersbit eq _complement($mattersbit, 1)) |
166
|
|
|
|
|
|
|
{ |
167
|
384
|
|
|
|
|
1069
|
$self->{palindromy} = "pal"; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
elsif ($mattersbit) |
170
|
|
|
|
|
|
|
{ |
171
|
6
|
|
|
|
|
23
|
$self->{palindromy} = "nonpal"; |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
elsif ($cutseq =~ $IIBreg) |
176
|
|
|
|
|
|
|
{ |
177
|
33
|
|
|
|
|
99
|
$lef = int($1); |
178
|
33
|
|
|
|
|
70
|
$rig = int($2); |
179
|
33
|
|
|
|
|
83
|
$self->{class} = "IIB"; |
180
|
33
|
|
|
|
|
78
|
$self->{classex} = $IIBreg; |
181
|
33
|
|
|
|
|
69
|
$self->{palindromy} = "pnon"; |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
elsif ($cutseq =~ $IIAreg) |
184
|
|
|
|
|
|
|
{ |
185
|
168
|
|
|
|
|
501
|
$lef = int($1); |
186
|
168
|
|
|
|
|
250
|
$rig = int($2); |
187
|
168
|
|
|
|
|
318
|
$self->{class} = "IIA"; |
188
|
168
|
|
|
|
|
338
|
$self->{classex} = $IIAreg; |
189
|
168
|
|
|
|
|
262
|
$self->{palindromy} = "pnon"; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
else |
192
|
|
|
|
|
|
|
{ |
193
|
0
|
|
|
|
|
0
|
$self->{class} = "unknown"; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
#Enzyme type |
197
|
768
|
|
|
|
|
984
|
my $type; |
198
|
768
|
100
|
|
|
|
1535
|
if ($lef < $rig) |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
199
|
|
|
|
|
|
|
{ |
200
|
438
|
|
|
|
|
662
|
$type .= "5'"; |
201
|
438
|
|
|
|
|
698
|
$self->{inside_cut} = $lef; |
202
|
438
|
|
|
|
|
668
|
$self->{outside_cut} = $rig; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
elsif ($lef > $rig) |
205
|
|
|
|
|
|
|
{ |
206
|
195
|
|
|
|
|
313
|
$type .= "3'"; |
207
|
195
|
|
|
|
|
292
|
$self->{inside_cut} = $rig; |
208
|
195
|
|
|
|
|
282
|
$self->{outside_cut} = $lef; |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
elsif ($lef == $rig) |
211
|
|
|
|
|
|
|
{ |
212
|
135
|
|
|
|
|
207
|
$type .= 'b'; |
213
|
|
|
|
|
|
|
} |
214
|
768
|
100
|
|
|
|
1563
|
$self->{onebpoverhang} = 1 if (abs($lef - $rig) == 1); |
215
|
768
|
|
|
|
|
1225
|
$self->{type} = $type; |
216
|
|
|
|
|
|
|
|
217
|
768
|
100
|
|
|
|
1955
|
$self->{temp} = $temp if ($temp); |
218
|
768
|
100
|
|
|
|
1522
|
if ($tempin) |
219
|
|
|
|
|
|
|
{ |
220
|
630
|
|
|
|
|
2207
|
my ($intime, $intemp) = split q{@}, $tempin; |
221
|
630
|
|
|
|
|
1383
|
$self->{tempin} = $intemp; |
222
|
630
|
|
|
|
|
1567
|
$self->{timein} = $intime; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
768
|
50
|
|
|
|
2384
|
$self->{score} = $score if ($score); |
226
|
768
|
|
|
|
|
1504
|
$self->{aggress} = $aggress; |
227
|
|
|
|
|
|
|
|
228
|
768
|
100
|
|
|
|
1487
|
$self->{staract} = 1 if ($staract); |
229
|
|
|
|
|
|
|
|
230
|
768
|
100
|
|
|
|
1451
|
if (exists $methtrans{$methdam}) |
231
|
|
|
|
|
|
|
{ |
232
|
54
|
|
|
|
|
121
|
$self->{methdam} = $methtrans{$methdam}; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
else |
235
|
|
|
|
|
|
|
{ |
236
|
714
|
|
|
|
|
1368
|
$self->{methdam} = 'indifferent'; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
768
|
100
|
|
|
|
1339
|
if (exists $methtrans{$methdcm}) |
240
|
|
|
|
|
|
|
{ |
241
|
105
|
|
|
|
|
203
|
$self->{methdcm} = $methtrans{$methdcm}; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
else |
244
|
|
|
|
|
|
|
{ |
245
|
663
|
|
|
|
|
1040
|
$self->{methdcm} = 'indifferent'; |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
768
|
100
|
|
|
|
1353
|
if (exists $methtrans{$methcpg}) |
249
|
|
|
|
|
|
|
{ |
250
|
390
|
|
|
|
|
792
|
$self->{methcpg} = $methtrans{$methcpg}; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
else |
253
|
|
|
|
|
|
|
{ |
254
|
378
|
|
|
|
|
605
|
$self->{methcpg} = 'indifferent'; |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
|
257
|
768
|
100
|
|
|
|
1495
|
if ($vendors) |
258
|
|
|
|
|
|
|
{ |
259
|
765
|
|
|
|
|
1168
|
my %vhsh = (); |
260
|
765
|
|
|
|
|
2331
|
foreach my $v (split(q{}, $vendors)) |
261
|
|
|
|
|
|
|
{ |
262
|
2991
|
50
|
|
|
|
6771
|
$vhsh{$v} = $RE_vendors{$v} if (exists $RE_vendors{$v}); |
263
|
2991
|
50
|
|
|
|
5820
|
carp("$v not in vendor list!") unless (exists $RE_vendors{$v}); |
264
|
|
|
|
|
|
|
} |
265
|
765
|
|
|
|
|
1937
|
$self->{vendors} = \%vhsh; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
768
|
50
|
|
|
|
2778
|
$self->{buffers} = $buffers if ($buffers); |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
768
|
50
|
|
|
|
1222
|
$self->{start} = $start if ($start); |
272
|
|
|
|
|
|
|
|
273
|
768
|
50
|
|
|
|
1153
|
$self->{exclude} = $exclude if ($exclude); |
274
|
|
|
|
|
|
|
|
275
|
768
|
|
|
|
|
3797
|
return $self; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=head1 FUNCTIONAL METHODS |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=head2 clone |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
By default in GeneDesign code, RestrictionEnzyme objects are meant to stand as |
283
|
|
|
|
|
|
|
abstracts - that is, they stand for BamHI in general, and not for a particular |
284
|
|
|
|
|
|
|
instance of a BamHI recognition site. If you want to use the objects in the |
285
|
|
|
|
|
|
|
latter sense, you will need to clone the abstract object instantiated when the |
286
|
|
|
|
|
|
|
definition file is read in, thus generating an arbitrary number of BamHI |
287
|
|
|
|
|
|
|
instances that can then be differentiated by their start attributes. |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
=cut |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
sub clone |
292
|
|
|
|
|
|
|
{ |
293
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
294
|
0
|
|
|
|
|
0
|
my $copy; |
295
|
0
|
|
|
|
|
0
|
foreach my $key (keys %{$self}) |
|
0
|
|
|
|
|
0
|
|
296
|
|
|
|
|
|
|
{ |
297
|
0
|
|
|
|
|
0
|
$copy->{$key} = $self->{$key}; |
298
|
|
|
|
|
|
|
} |
299
|
0
|
|
|
|
|
0
|
bless $copy, ref $self; |
300
|
0
|
|
|
|
|
0
|
return $copy; |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=head2 positions |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
Generates a hash describing the positions of the enzyme's recognition |
306
|
|
|
|
|
|
|
sites in a nucleotide sequence. Keys are offset in nucleotides, and values are |
307
|
|
|
|
|
|
|
the recognition site found at said offset as a string. |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=cut |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
sub positions |
312
|
|
|
|
|
|
|
{ |
313
|
29
|
|
|
29
|
1
|
41
|
my ($self, $seq) = @_; |
314
|
29
|
|
|
|
|
39
|
my $total = {}; |
315
|
29
|
|
|
|
|
33
|
foreach my $sit (@{$self->{regex}}) |
|
29
|
|
|
|
|
65
|
|
316
|
|
|
|
|
|
|
{ |
317
|
46
|
|
|
|
|
792
|
while ($seq =~ /(?=($sit))/ixg) |
318
|
|
|
|
|
|
|
{ |
319
|
27
|
|
|
|
|
190
|
$total->{pos $seq} = $1; |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
} |
322
|
29
|
|
|
|
|
72
|
return $total; |
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
=head2 overhang |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
Given a nucleotide sequence context, what overhang does this enzyme leave, and |
328
|
|
|
|
|
|
|
how far away from the cutsite is it? |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
Arguments: |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
=cut |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
sub overhang |
335
|
|
|
|
|
|
|
{ |
336
|
0
|
|
|
0
|
1
|
0
|
my ($self, $dna, $context, $strand) = @_; |
337
|
0
|
|
|
|
|
0
|
my ($ohangstart, $mattersbit) = (0, q{}); |
338
|
0
|
|
|
|
|
0
|
my $lef; |
339
|
|
|
|
|
|
|
my $rig; |
340
|
0
|
0
|
|
|
|
0
|
if ($self->{class} eq "IIP") |
|
|
0
|
|
|
|
|
|
341
|
|
|
|
|
|
|
{ |
342
|
0
|
0
|
|
|
|
0
|
($lef, $rig) = (length($1), length($2)) if ($self->{cutseq} =~ $IIPreg); |
343
|
0
|
0
|
|
|
|
0
|
($lef, $rig) = ($rig, $lef) if ($rig < $lef); |
344
|
0
|
|
|
|
|
0
|
$ohangstart = $lef + 1; |
345
|
0
|
|
|
|
|
0
|
$mattersbit = substr($dna, $ohangstart-1, $rig-$lef); |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
elsif ($self->{class} eq "IIA") |
348
|
|
|
|
|
|
|
{ |
349
|
0
|
0
|
|
|
|
0
|
($lef, $rig) = ($1, $2) if ($self->{cutseq} =~ $IIAreg); |
350
|
0
|
0
|
|
|
|
0
|
($lef, $rig) = ($rig, $lef) if ($rig < $lef); |
351
|
0
|
0
|
|
|
|
0
|
if ($strand == 1) |
352
|
|
|
|
|
|
|
{ |
353
|
0
|
|
|
|
|
0
|
$ohangstart = length($dna) + $lef + 1; |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
else |
356
|
|
|
|
|
|
|
{ |
357
|
0
|
|
|
|
|
0
|
$ohangstart = length($context) - length($dna) - $rig + 1; |
358
|
|
|
|
|
|
|
} |
359
|
0
|
|
|
|
|
0
|
$mattersbit = substr($context, $ohangstart-1, $rig-$lef); |
360
|
0
|
0
|
|
|
|
0
|
$ohangstart = $strand == 1 ? length($dna) + $lef : 0 - ($rig); |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
else |
363
|
|
|
|
|
|
|
{ |
364
|
0
|
|
|
|
|
0
|
return (); |
365
|
|
|
|
|
|
|
} |
366
|
0
|
|
|
|
|
0
|
return ($ohangstart, $mattersbit); |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=head2 display |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
Generates a tab delimited display string that can be used to print enzyme |
372
|
|
|
|
|
|
|
information out in a tabular format. |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
=cut |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
sub display |
377
|
|
|
|
|
|
|
{ |
378
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
379
|
0
|
0
|
|
|
|
0
|
my $staract = $self->{staract} ? "*" : q{}; |
380
|
0
|
|
|
|
|
0
|
my (@blocked, @inhibed) = ((), ()); |
381
|
0
|
0
|
|
|
|
0
|
push @blocked, "cpg" if ($self->{methcpg} eq "blocked"); |
382
|
0
|
0
|
|
|
|
0
|
push @blocked, "dam" if ($self->{methdam} eq "blocked"); |
383
|
0
|
0
|
|
|
|
0
|
push @blocked, "dcm" if ($self->{methdcm} eq "blocked"); |
384
|
0
|
0
|
|
|
|
0
|
push @inhibed, "cpg" if ($self->{methcpg} eq "inhibited"); |
385
|
0
|
0
|
|
|
|
0
|
push @inhibed, "dam" if ($self->{methdam} eq "inhibited"); |
386
|
0
|
0
|
|
|
|
0
|
push @inhibed, "dcm" if ($self->{methdcm} eq "inhibited"); |
387
|
0
|
|
|
|
|
0
|
my $buffstr = undef; |
388
|
0
|
|
|
|
|
0
|
foreach (sort keys %{$self->{buffers}}) |
|
0
|
|
|
|
|
0
|
|
389
|
|
|
|
|
|
|
{ |
390
|
0
|
0
|
|
|
|
0
|
$buffstr .= "$_ (" . $self->{buffers}->{$_} . ") " if ($self->{buffers}->{$_}); |
391
|
|
|
|
|
|
|
} |
392
|
0
|
|
|
|
|
0
|
my $vendstr = join(", ", values %{$self->{vendors}}); |
|
0
|
|
|
|
|
0
|
|
393
|
0
|
|
|
|
|
0
|
my $display = undef; |
394
|
0
|
0
|
|
|
|
0
|
my $inact = $self->{tempin} ? " (". $self->{timein} . q{@} . $self->{tempin} . ")" : q{}; |
395
|
0
|
|
|
|
|
0
|
$display .= $self->{id} . "\t"; |
396
|
0
|
|
|
|
|
0
|
$display .= $self->{cutseq} . $staract . "\t"; |
397
|
0
|
|
|
|
|
0
|
$display .= $self->{type} . "\t"; |
398
|
0
|
0
|
|
|
|
0
|
$display .= $self->{start} . "\t" if ($self->{start}); |
399
|
0
|
|
|
|
|
0
|
$display .= $self->{temp} . $inact . "\t"; |
400
|
0
|
|
|
|
|
0
|
$display .= join(", ", @blocked) . "\t"; |
401
|
0
|
|
|
|
|
0
|
$display .= join(", ", @inhibed) . "\t"; |
402
|
0
|
|
|
|
|
0
|
$display .= $self->{score} . "\t"; |
403
|
0
|
|
|
|
|
0
|
$display .= $buffstr . "\t"; |
404
|
0
|
|
|
|
|
0
|
$display .= $vendstr . "\t"; |
405
|
0
|
|
|
|
|
0
|
return $display; |
406
|
|
|
|
|
|
|
} |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
=head2 common_buffers |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
Returns an array reference listing the buffers, if any, in which two enzymes |
411
|
|
|
|
|
|
|
both have 100% activity. in boolean mode returns the number of buffers |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
=cut |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
sub common_buffers |
416
|
|
|
|
|
|
|
{ |
417
|
0
|
|
|
0
|
1
|
0
|
my ($self, $buddy, $bool) = @_; |
418
|
0
|
0
|
|
|
|
0
|
$self->throw("Argument is not a Bio::GeneDesign::RestrictionEnzyme") |
419
|
|
|
|
|
|
|
unless $buddy->isa("Bio::GeneDesign::RestrictionEnzyme"); |
420
|
|
|
|
|
|
|
|
421
|
0
|
|
|
|
|
0
|
my $sbuffs = $self->{buffers}; |
422
|
0
|
|
|
|
|
0
|
my $bbuffs = $buddy->{buffers}; |
423
|
0
|
|
|
|
|
0
|
my @answer; |
424
|
0
|
|
|
|
|
0
|
foreach my $skey (sort keys %{$sbuffs}) |
|
0
|
|
|
|
|
0
|
|
425
|
|
|
|
|
|
|
{ |
426
|
0
|
|
|
|
|
0
|
my $sval = $sbuffs->{$skey}; |
427
|
0
|
|
|
|
|
0
|
my $bval = $bbuffs->{$skey}; |
428
|
0
|
0
|
0
|
|
|
0
|
if ($skey eq "Other" && $sval && $bval && "$sval" eq "$bval") |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
429
|
|
|
|
|
|
|
{ |
430
|
0
|
|
|
|
|
0
|
push @answer, $skey; |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
elsif ($sval && $bval && "$sval" == 100 && "$bval" == 100) |
433
|
|
|
|
|
|
|
{ |
434
|
0
|
|
|
|
|
0
|
push @answer, $skey; |
435
|
|
|
|
|
|
|
} |
436
|
|
|
|
|
|
|
} |
437
|
0
|
0
|
|
|
|
0
|
return $bool ? scalar(@answer) : \@answer; |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
=head2 acceptable_buffer |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
Returns a buffer in which both enzymes will have at least a thresholded amount |
443
|
|
|
|
|
|
|
of activity. |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=cut |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
sub acceptable_buffer |
448
|
|
|
|
|
|
|
{ |
449
|
0
|
|
|
0
|
1
|
0
|
my ($self, $buddy, $level) = @_; |
450
|
0
|
0
|
|
|
|
0
|
$self->throw("Argument is not a Bio::GeneDesign::RestrictionEnzyme") |
451
|
|
|
|
|
|
|
unless $buddy->isa("Bio::GeneDesign::RestrictionEnzyme"); |
452
|
|
|
|
|
|
|
|
453
|
0
|
|
0
|
|
|
0
|
$level = $level || 75; |
454
|
0
|
|
|
|
|
0
|
my $sbuffs = $self->{buffers}; |
455
|
0
|
|
|
|
|
0
|
my $bbuffs = $buddy->{buffers}; |
456
|
0
|
|
|
|
|
0
|
my %answers; |
457
|
0
|
|
|
|
|
0
|
foreach my $skey (sort keys %{$sbuffs}) |
|
0
|
|
|
|
|
0
|
|
458
|
|
|
|
|
|
|
{ |
459
|
0
|
|
|
|
|
0
|
my $sval = $sbuffs->{$skey}; |
460
|
0
|
|
|
|
|
0
|
my $bval = $bbuffs->{$skey}; |
461
|
0
|
0
|
0
|
|
|
0
|
if ($skey eq "Other" && $sval && $bval && $sval == $bval) |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
462
|
|
|
|
|
|
|
{ |
463
|
0
|
|
|
|
|
0
|
$answers{$skey} = 200; |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
elsif ($sval && $bval && $sval >= $level && $bval >= $level) |
466
|
|
|
|
|
|
|
{ |
467
|
0
|
|
|
|
|
0
|
$answers{$skey} = $sval + $bval; |
468
|
|
|
|
|
|
|
} |
469
|
|
|
|
|
|
|
} |
470
|
0
|
0
|
|
|
|
0
|
my @keys = sort {$answers{$b} <=> $answers{$a} && $b cmp $a} keys %answers; |
|
0
|
|
|
|
|
0
|
|
471
|
0
|
0
|
|
|
|
0
|
return scalar @keys ? $keys[0] : undef; |
472
|
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
=head2 units |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
Returns the number of units needed to cleave some sequence |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=cut |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
sub units |
481
|
|
|
|
|
|
|
{ |
482
|
0
|
|
|
0
|
1
|
0
|
my ($self, @args) = @_; |
483
|
|
|
|
|
|
|
|
484
|
0
|
|
|
|
|
0
|
my ($buffer, $sequence) = $self->_rearrange([qw(buffer sequence)], @args); |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
|
487
|
0
|
|
|
|
|
0
|
my $poshsh = $self->positions($sequence); |
488
|
0
|
|
|
|
|
0
|
my $count = scalar keys %{$poshsh}; |
|
0
|
|
|
|
|
0
|
|
489
|
|
|
|
|
|
|
|
490
|
0
|
|
|
|
|
0
|
my $freq = $count / (length $sequence); |
491
|
|
|
|
|
|
|
|
492
|
0
|
|
0
|
|
|
0
|
my $aggr = $self->aggress() || .000001; |
493
|
0
|
|
|
|
|
0
|
$aggr = 1 / $aggr; |
494
|
|
|
|
|
|
|
|
495
|
0
|
|
0
|
|
|
0
|
$buffer = $buffer || $self->acceptable_buffer($self, 100); |
496
|
0
|
|
0
|
|
|
0
|
my $buff = $self->buffers->{$buffer} || 1; |
497
|
0
|
|
|
|
|
0
|
my $jad = $buff / 100; |
498
|
0
|
0
|
|
|
|
0
|
my $adj = $jad > 0 ? 1 / $jad : 0; |
499
|
|
|
|
|
|
|
|
500
|
0
|
|
|
|
|
0
|
my $units = sprintf("%.1f", $freq * $aggr * $adj); |
501
|
|
|
|
|
|
|
|
502
|
0
|
|
|
|
|
0
|
return $units; |
503
|
|
|
|
|
|
|
} |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=head1 FILTERING METHODS |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
=head2 filter_by_sequence |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
Arguments: an arrayref of string nucleotide sequences (may be ambiguous) |
511
|
|
|
|
|
|
|
a flag indicating whether or not the sequences in the array are |
512
|
|
|
|
|
|
|
required (1 means they must NOT match; default 0 means they must |
513
|
|
|
|
|
|
|
match) |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
Returns : 1 if the enzyme passes; |
516
|
|
|
|
|
|
|
0 if the enzyme fails. |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=cut |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
sub filter_by_sequence |
521
|
|
|
|
|
|
|
{ |
522
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arrref, $req) = @_; |
523
|
0
|
0
|
|
|
|
0
|
$req = 0 if (! $req); |
524
|
0
|
|
|
|
|
0
|
my $result = 1; |
525
|
0
|
|
|
|
|
0
|
foreach my $seq (@$arrref) |
526
|
|
|
|
|
|
|
{ |
527
|
0
|
|
|
|
|
0
|
my $regex = _regres($seq, 1); |
528
|
0
|
0
|
|
|
|
0
|
if ($regex =~ /\[ X \]/x) |
529
|
|
|
|
|
|
|
{ |
530
|
0
|
|
|
|
|
0
|
print "\tWARNING: Cannot parse sequence $seq containing non-nucleotide " |
531
|
|
|
|
|
|
|
. "characters - ignoring.\n"; |
532
|
0
|
|
|
|
|
0
|
next; |
533
|
|
|
|
|
|
|
} |
534
|
0
|
0
|
0
|
|
|
0
|
$result = 0 if ( $req == 1 && $self->{recseq} =~ $regex ); |
535
|
0
|
0
|
0
|
|
|
0
|
$result = 0 if ( $req == 0 && $self->{recseq} !~ $regex ); |
536
|
|
|
|
|
|
|
} |
537
|
0
|
|
|
|
|
0
|
return $result; |
538
|
|
|
|
|
|
|
} |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
=head2 filter_by_score |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
Arguments : a float |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
Returns : 1 if the enzyme's score is less than or equal to the argument, |
545
|
|
|
|
|
|
|
0 if the enzyme's score is higher. |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
=cut |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
sub filter_by_score |
550
|
|
|
|
|
|
|
{ |
551
|
0
|
|
|
0
|
1
|
0
|
my ($self, $score) = @_; |
552
|
0
|
|
|
|
|
0
|
my $result = 1; |
553
|
0
|
0
|
|
|
|
0
|
$result = 0 if ($self->{score} > $score); |
554
|
0
|
|
|
|
|
0
|
return $result; |
555
|
|
|
|
|
|
|
} |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
=head2 filter_by_vendor |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
Arguments : an arrayref of vendor abbreviations; see vendor(). |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
Returns : 1 if the enzyme is supplied by any of the vendors queried, |
562
|
|
|
|
|
|
|
0 else. |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
=cut |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
sub filter_by_vendor |
567
|
|
|
|
|
|
|
{ |
568
|
0
|
|
|
0
|
1
|
0
|
my ($self, $vendlist) = @_; |
569
|
0
|
|
|
|
|
0
|
my $result = 1; |
570
|
0
|
|
|
|
|
0
|
my $flag = 0; |
571
|
0
|
|
|
|
|
0
|
foreach my $vend (@$vendlist) |
572
|
|
|
|
|
|
|
{ |
573
|
0
|
0
|
|
|
|
0
|
unless (exists($RE_vendors{$vend})) |
574
|
|
|
|
|
|
|
{ |
575
|
0
|
|
|
|
|
0
|
print "\tWARNING: Cannot parse vendor argument $vend - ignoring.\n"; |
576
|
0
|
|
|
|
|
0
|
next; |
577
|
|
|
|
|
|
|
} |
578
|
0
|
0
|
|
|
|
0
|
$flag++ if ( exists( $self->{vendors}->{$vend} ) ); |
579
|
|
|
|
|
|
|
} |
580
|
0
|
0
|
|
|
|
0
|
$result = $flag == 0 ? 0 : 1; |
581
|
0
|
|
|
|
|
0
|
return $result; |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
=head2 filter_by_buffer_activity |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
Arguments : a hashref of buffer thresholds; the key is the buffer name, the |
587
|
|
|
|
|
|
|
value is an activity threshold. |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets all the buffer requirements, |
590
|
|
|
|
|
|
|
0 else. |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
=cut |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
sub filter_by_buffer_activity |
595
|
|
|
|
|
|
|
{ |
596
|
0
|
|
|
0
|
1
|
0
|
my ($self, $hshref) = @_; |
597
|
0
|
|
|
|
|
0
|
my $result = 1; |
598
|
0
|
|
|
|
|
0
|
my $rebuff = $self->{buffers}; |
599
|
0
|
|
|
|
|
0
|
foreach my $buff (keys %$hshref) |
600
|
|
|
|
|
|
|
{ |
601
|
0
|
|
|
|
|
0
|
my $val = $hshref->{$buff}; |
602
|
0
|
0
|
0
|
|
|
0
|
$result = 0 if ( ! exists($rebuff->{$buff}) || $rebuff->{$buff} < $val ); |
603
|
|
|
|
|
|
|
} |
604
|
|
|
|
|
|
|
|
605
|
0
|
|
|
|
|
0
|
return $result; |
606
|
|
|
|
|
|
|
} |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
=head2 filter_by_dcm_sensitivity |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
Arguments : an arrayref of sensitivity values; the key is the sensitivity |
611
|
|
|
|
|
|
|
blocked, inhibited, or indifferent |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the dcm sensitivity requirements, |
614
|
|
|
|
|
|
|
0 else. |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
=cut |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
sub filter_by_dcm_sensitivity |
619
|
|
|
|
|
|
|
{ |
620
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arrref) = @_; |
621
|
0
|
|
|
|
|
0
|
my $result = 1; |
622
|
0
|
|
|
|
|
0
|
my %sensehsh; |
623
|
0
|
|
|
|
|
0
|
foreach my $sense (@$arrref) |
624
|
|
|
|
|
|
|
{ |
625
|
0
|
0
|
0
|
|
|
0
|
if ($sense ne "blocked" && $sense ne "inhibited" && $sense ne "indifferent") |
|
|
|
0
|
|
|
|
|
626
|
|
|
|
|
|
|
{ |
627
|
0
|
|
|
|
|
0
|
$sense = lc $sense; |
628
|
0
|
|
|
|
|
0
|
print "\tWARNING: Cannot parse dcmsense argument $sense - ignoring.\n"; |
629
|
0
|
|
|
|
|
0
|
next; |
630
|
|
|
|
|
|
|
} |
631
|
0
|
|
|
|
|
0
|
$sensehsh{$sense}++; |
632
|
|
|
|
|
|
|
} |
633
|
0
|
0
|
|
|
|
0
|
$result = 0 unless ( exists($sensehsh{$self->{methdcm}}) ); |
634
|
0
|
|
|
|
|
0
|
return $result; |
635
|
|
|
|
|
|
|
} |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
=head2 filter_by_dam_sensitivity |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
Arguments : an arrayref of sensitivity values; the key is the sensitivity |
640
|
|
|
|
|
|
|
blocked, inhibited, or indifferent |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the dam sensitivity requirements, |
643
|
|
|
|
|
|
|
0 else. |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
=cut |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
sub filter_by_dam_sensitivity |
648
|
|
|
|
|
|
|
{ |
649
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arrref) = @_; |
650
|
0
|
|
|
|
|
0
|
my $result = 1; |
651
|
0
|
|
|
|
|
0
|
my %sensehsh; |
652
|
0
|
|
|
|
|
0
|
foreach my $sense (@$arrref) |
653
|
|
|
|
|
|
|
{ |
654
|
0
|
0
|
0
|
|
|
0
|
if ($sense ne "blocked" && $sense ne "inhibited" && $sense ne "indifferent") |
|
|
|
0
|
|
|
|
|
655
|
|
|
|
|
|
|
{ |
656
|
0
|
|
|
|
|
0
|
$sense = lc $sense; |
657
|
0
|
|
|
|
|
0
|
print "\tWARNING: Cannot parse damsense argument $sense - ignoring.\n"; |
658
|
0
|
|
|
|
|
0
|
next; |
659
|
|
|
|
|
|
|
} |
660
|
0
|
|
|
|
|
0
|
$sensehsh{$sense}++; |
661
|
|
|
|
|
|
|
} |
662
|
0
|
0
|
|
|
|
0
|
$result = 0 unless ( exists($sensehsh{$self->{methdam}}) ); |
663
|
0
|
|
|
|
|
0
|
return $result; |
664
|
|
|
|
|
|
|
} |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
=head2 filter_by_cpg_sensitivity |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
Arguments : an arrayref of sensitivity values; the key is the sensitivity |
669
|
|
|
|
|
|
|
blocked, inhibited, or indifferent |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the cpg sensitivity requirements, |
672
|
|
|
|
|
|
|
0 else. |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
=cut |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
sub filter_by_cpg_sensitivity |
677
|
|
|
|
|
|
|
{ |
678
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arrref) = @_; |
679
|
0
|
|
|
|
|
0
|
my $result = 1; |
680
|
0
|
|
|
|
|
0
|
my %sensehsh; |
681
|
0
|
|
|
|
|
0
|
foreach my $sense (@$arrref) |
682
|
|
|
|
|
|
|
{ |
683
|
0
|
0
|
0
|
|
|
0
|
if ($sense ne "blocked" && $sense ne "inhibited" && $sense ne "indifferent") |
|
|
|
0
|
|
|
|
|
684
|
|
|
|
|
|
|
{ |
685
|
0
|
|
|
|
|
0
|
$sense = lc $sense; |
686
|
0
|
|
|
|
|
0
|
print "\tWARNING: Cannot parse cpgsense argument $sense - ignoring.\n"; |
687
|
0
|
|
|
|
|
0
|
next; |
688
|
|
|
|
|
|
|
} |
689
|
0
|
|
|
|
|
0
|
$sensehsh{$sense}++; |
690
|
|
|
|
|
|
|
} |
691
|
0
|
0
|
|
|
|
0
|
$result = 0 unless ( exists($sensehsh{$self->{methcpg}}) ); |
692
|
0
|
|
|
|
|
0
|
return $result; |
693
|
|
|
|
|
|
|
} |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
=head2 filter_by_star_activity |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
Arguments : 1 if star activity required, 0 else |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the star activity requirements, |
700
|
|
|
|
|
|
|
0 else. |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
=cut |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
sub filter_by_star_activity |
705
|
|
|
|
|
|
|
{ |
706
|
0
|
|
|
0
|
1
|
0
|
my ($self, $star) = @_; |
707
|
0
|
|
|
|
|
0
|
my $result = 1; |
708
|
0
|
0
|
|
|
|
0
|
$star = 0 unless ($star); |
709
|
0
|
0
|
0
|
|
|
0
|
$result = 0 if (($star && ! $self->{staract}) || (! $star && $self->{staract})); |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
710
|
0
|
|
|
|
|
0
|
return $result; |
711
|
|
|
|
|
|
|
} |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
=head2 filter_by_incubation_temperature |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
Arguments : an arrayref of acceptable integer incubation temperatures |
716
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the incubation temperature requirements, |
718
|
|
|
|
|
|
|
0 else. |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
=cut |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
sub filter_by_incubation_temperature |
723
|
|
|
|
|
|
|
{ |
724
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arrref) = @_; |
725
|
0
|
|
|
|
|
0
|
my $result = 1; |
726
|
0
|
|
|
|
|
0
|
my %temps; |
727
|
0
|
|
|
|
|
0
|
foreach my $temp (@$arrref) |
728
|
|
|
|
|
|
|
{ |
729
|
0
|
0
|
0
|
|
|
0
|
if ($temp !~ /\d/x || $temp <= 0) |
730
|
|
|
|
|
|
|
{ |
731
|
0
|
|
|
|
|
0
|
print "\tWARNING: Cannot parse incubation argument $temp - ignoring.\n"; |
732
|
|
|
|
|
|
|
} |
733
|
0
|
|
|
|
|
0
|
$temps{$temp}++; |
734
|
|
|
|
|
|
|
} |
735
|
0
|
0
|
|
|
|
0
|
$result = 0 unless ( exists $temps{$self->{temp}}); |
736
|
0
|
|
|
|
|
0
|
return $result; |
737
|
|
|
|
|
|
|
} |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
=head2 filter_by_inactivation_temperature |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
Arguments : an acceptable integer inactivation temperature maximum |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the inactivation temperature requirement, |
744
|
|
|
|
|
|
|
0 else. |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
=cut |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
sub filter_by_inactivation_temperature |
749
|
|
|
|
|
|
|
{ |
750
|
0
|
|
|
0
|
1
|
0
|
my ($self, $temp) = @_; |
751
|
0
|
|
|
|
|
0
|
my $result = 1; |
752
|
0
|
0
|
0
|
|
|
0
|
if ($temp !~ /\d/x || $temp <= 0) |
753
|
|
|
|
|
|
|
{ |
754
|
0
|
|
|
|
|
0
|
print "\tWARNING: Cannot parse inactivation argument $temp - ignoring.\n"; |
755
|
|
|
|
|
|
|
} |
756
|
|
|
|
|
|
|
else |
757
|
|
|
|
|
|
|
{ |
758
|
0
|
0
|
|
|
|
0
|
$result = 0 if ($self->{tempin} > $temp); |
759
|
|
|
|
|
|
|
} |
760
|
0
|
|
|
|
|
0
|
return $result; |
761
|
|
|
|
|
|
|
} |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
=head2 filter_by_base_ambiguity |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
Arguments : "nonNonly" if any non N bases are allowed; "ATCGonly" if only |
766
|
|
|
|
|
|
|
A, T, C, or G are allowed |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the ambiguous nucleotide requirement, |
769
|
|
|
|
|
|
|
0 else. |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
=cut |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
sub filter_by_base_ambiguity |
774
|
|
|
|
|
|
|
{ |
775
|
0
|
|
|
0
|
1
|
0
|
my ($self, $ambig) = @_; |
776
|
0
|
|
|
|
|
0
|
my $result = 1; |
777
|
0
|
0
|
0
|
|
|
0
|
if ($ambig ne "nonNonly" && $ambig ne "ATCGonly") |
778
|
|
|
|
|
|
|
{ |
779
|
0
|
|
|
|
|
0
|
print "\tWARNING: Cannot parse ambiguity argument $ambig - ignoring.\n"; |
780
|
|
|
|
|
|
|
} |
781
|
|
|
|
|
|
|
else |
782
|
|
|
|
|
|
|
{ |
783
|
0
|
|
|
|
|
0
|
my $ambregex; |
784
|
0
|
0
|
|
|
|
0
|
$ambregex = qr/N/ if ($ambig eq "nonNonly"); |
785
|
0
|
0
|
|
|
|
0
|
$ambregex = $ambnt if ($ambig eq "ATCGonly"); |
786
|
0
|
0
|
|
|
|
0
|
$result = 0 unless ( $self->{recseq} =~ $ambregex ); |
787
|
|
|
|
|
|
|
} |
788
|
0
|
|
|
|
|
0
|
return $result; |
789
|
|
|
|
|
|
|
} |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
=head2 filter_by_length |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
Arguments : an arrayref of acceptable recognition site lengths |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the recognition site length requirements, |
796
|
|
|
|
|
|
|
0 else. |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=cut |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
sub filter_by_length |
801
|
|
|
|
|
|
|
{ |
802
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arrref) = @_; |
803
|
0
|
|
|
|
|
0
|
my $result = 1; |
804
|
0
|
|
|
|
|
0
|
my %lens; |
805
|
0
|
|
|
|
|
0
|
foreach my $len (@$arrref) |
806
|
|
|
|
|
|
|
{ |
807
|
0
|
0
|
0
|
|
|
0
|
if ($len =~ /\D/x || $len <= 0) |
808
|
|
|
|
|
|
|
{ |
809
|
0
|
|
|
|
|
0
|
print "\tWARNING: Cannot parse length argument $len - ignoring.\n"; |
810
|
0
|
|
|
|
|
0
|
next; |
811
|
|
|
|
|
|
|
} |
812
|
0
|
|
|
|
|
0
|
$lens{$len}++; |
813
|
|
|
|
|
|
|
} |
814
|
0
|
0
|
|
|
|
0
|
$result = 0 unless ( exists $lens{length($self->{recseq})} ); |
815
|
0
|
|
|
|
|
0
|
return $result; |
816
|
|
|
|
|
|
|
} |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
=head2 filter_by_overhang_palindromy |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
Arguments : an arrayref of acceptable overhang palindromys, from the list |
821
|
|
|
|
|
|
|
pal (palindromic), |
822
|
|
|
|
|
|
|
nonpal (nonpalindromic), |
823
|
|
|
|
|
|
|
pnon (potentially nonpalindromic) |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the palindromy requirements, |
826
|
|
|
|
|
|
|
0 else. |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
=cut |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
sub filter_by_overhang_palindromy |
831
|
|
|
|
|
|
|
{ |
832
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arrref) = @_; |
833
|
0
|
|
|
|
|
0
|
my $result = 1; |
834
|
0
|
|
|
|
|
0
|
my %pals; |
835
|
0
|
|
|
|
|
0
|
foreach my $pal (@$arrref) |
836
|
|
|
|
|
|
|
{ |
837
|
0
|
0
|
0
|
|
|
0
|
if ($pal ne "pal" && $pal ne "pnon" && $pal ne "nonpal") |
|
|
|
0
|
|
|
|
|
838
|
|
|
|
|
|
|
{ |
839
|
0
|
|
|
|
|
0
|
print "\tWARNING: Cannot parse palindromy argument $pal - ignoring.\n"; |
840
|
0
|
|
|
|
|
0
|
next; |
841
|
|
|
|
|
|
|
} |
842
|
0
|
|
|
|
|
0
|
$pals{$pal}++; |
843
|
|
|
|
|
|
|
} |
844
|
0
|
0
|
|
|
|
0
|
$result = 0 unless (exists $pals{$self->{palindromy}}); |
845
|
0
|
|
|
|
|
0
|
return $result; |
846
|
|
|
|
|
|
|
} |
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
=head2 filter_by_stickiness |
849
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
Arguments : an arrayref of acceptable overhang orientations, from the list |
851
|
|
|
|
|
|
|
1 (single basepair overhang), |
852
|
|
|
|
|
|
|
5 (five prime overhang), |
853
|
|
|
|
|
|
|
3 (three prime overhang), |
854
|
|
|
|
|
|
|
b (blunt ended) |
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the overhang requirements, |
857
|
|
|
|
|
|
|
0 else. |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
=cut |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
sub filter_by_stickiness |
862
|
|
|
|
|
|
|
{ |
863
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arrref) = @_; |
864
|
0
|
|
|
|
|
0
|
my $result = 1; |
865
|
0
|
|
|
|
|
0
|
my %sticks; |
866
|
0
|
|
|
|
|
0
|
foreach my $stick (@$arrref) |
867
|
|
|
|
|
|
|
{ |
868
|
0
|
0
|
0
|
|
|
0
|
if ($stick ne "5" && $stick ne "3" && $stick ne "1" && $stick ne "b") |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
869
|
|
|
|
|
|
|
{ |
870
|
0
|
|
|
|
|
0
|
print "\tWARNING: Cannot parse sticky argument $stick - ignoring.\n"; |
871
|
0
|
|
|
|
|
0
|
next; |
872
|
|
|
|
|
|
|
} |
873
|
0
|
|
|
|
|
0
|
$sticks{$stick}++; |
874
|
|
|
|
|
|
|
} |
875
|
0
|
0
|
0
|
|
|
0
|
$result = 0 if ($self->{onebpoverhang} && ! exists $sticks{1}); |
876
|
0
|
|
|
|
|
0
|
my $type = $self->{type}; |
877
|
0
|
|
|
|
|
0
|
$type =~ s/\'//xg; |
878
|
0
|
0
|
|
|
|
0
|
$result = 0 unless (exists $sticks{$type}); |
879
|
0
|
|
|
|
|
0
|
return $result; |
880
|
|
|
|
|
|
|
} |
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
=head1 ACCESSOR METHODS |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
Methods for setting and accessing enzyme attributes |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
=head2 id |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
The name of the enzyme. |
889
|
|
|
|
|
|
|
|
890
|
|
|
|
|
|
|
=cut |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
sub id |
893
|
|
|
|
|
|
|
{ |
894
|
98
|
|
|
98
|
1
|
194
|
my ($self) = @_; |
895
|
98
|
|
|
|
|
288
|
return $self->{id}; |
896
|
|
|
|
|
|
|
} |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
=head2 display_name |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
The name of the enzyme. |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
=cut |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
sub display_name |
905
|
|
|
|
|
|
|
{ |
906
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
907
|
0
|
|
|
|
|
0
|
return $self->{id}; |
908
|
|
|
|
|
|
|
} |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
=head2 score |
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
This attribute initially holds the price in dollars per unit of the enzyme |
913
|
|
|
|
|
|
|
(2009 US Dollars) but can be used to hold any score or cost value. |
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
=cut |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
sub score |
918
|
|
|
|
|
|
|
{ |
919
|
0
|
|
|
0
|
1
|
0
|
my ($self, $value) = @_; |
920
|
0
|
0
|
|
|
|
0
|
if (defined $value) |
921
|
|
|
|
|
|
|
{ |
922
|
0
|
|
|
|
|
0
|
$self->{score} = $value; |
923
|
|
|
|
|
|
|
} |
924
|
0
|
|
|
|
|
0
|
return $self->{score}; |
925
|
|
|
|
|
|
|
} |
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
=head2 aggress |
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
Aggressiveness is the number of recognition sites in a template piece of DNA |
930
|
|
|
|
|
|
|
(usually lambda, but sometimes adeno2, pBR322, pUC19, pXba, etc) over the total |
931
|
|
|
|
|
|
|
length of that template piece of DNA. This number tells the manufacturer how |
932
|
|
|
|
|
|
|
much enzyme to sell as a "unit" - the amount of enzyme required to fully digest |
933
|
|
|
|
|
|
|
one microgram of template DNA under reaction conditions in an hour. |
934
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
=cut |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
sub aggress |
938
|
|
|
|
|
|
|
{ |
939
|
0
|
|
|
0
|
1
|
0
|
my ($self, $value) = @_; |
940
|
0
|
0
|
|
|
|
0
|
if (defined $value) |
941
|
|
|
|
|
|
|
{ |
942
|
0
|
|
|
|
|
0
|
$self->{aggress} = $value; |
943
|
|
|
|
|
|
|
} |
944
|
0
|
|
|
|
|
0
|
return $self->{aggress}; |
945
|
|
|
|
|
|
|
} |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
=head2 len |
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
The length in bases of the recognition sequence (recseq). |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
=cut |
952
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
sub len |
954
|
|
|
|
|
|
|
{ |
955
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
956
|
0
|
|
|
|
|
0
|
return $self->{length}; |
957
|
|
|
|
|
|
|
} |
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
=head2 methcpg |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
The effect of CpG methylation on the enzyme's efficacy. |
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
=cut |
964
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
sub methcpg |
966
|
|
|
|
|
|
|
{ |
967
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
968
|
0
|
|
|
|
|
0
|
return $self->{methcpg}; |
969
|
|
|
|
|
|
|
} |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
=head2 methdcm |
972
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
The effect of Dcm methylation on the enzyme's efficacy. |
974
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
=cut |
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
sub methdcm |
978
|
|
|
|
|
|
|
{ |
979
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
980
|
0
|
|
|
|
|
0
|
return $self->{methdcm}; |
981
|
|
|
|
|
|
|
} |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
=head2 methdam |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
The effect of Dam methylation on the enzyme's efficacy. |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
=cut |
988
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
sub methdam |
990
|
|
|
|
|
|
|
{ |
991
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
992
|
0
|
|
|
|
|
0
|
return $self->{methdam}; |
993
|
|
|
|
|
|
|
} |
994
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
=head2 buffers |
996
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
A hash reference where the keys are buffer names and the values are the activity |
998
|
|
|
|
|
|
|
level of the enzyme in that Buffer. Since most of the enzymes in the default |
999
|
|
|
|
|
|
|
GeneDesign list are NEB enzymes, this is usually full of NEB buffers. |
1000
|
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
=cut |
1002
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
sub buffers |
1004
|
|
|
|
|
|
|
{ |
1005
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1006
|
0
|
|
|
|
|
0
|
return $self->{buffers}; |
1007
|
|
|
|
|
|
|
} |
1008
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
=head2 vendors |
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
A hash reference where the keys are abbreviations for and the values are names |
1012
|
|
|
|
|
|
|
of vendors that stock the enzyme. These are read in from the enzyme file. |
1013
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
B = Invitrogen |
1015
|
|
|
|
|
|
|
C = Minotech |
1016
|
|
|
|
|
|
|
E = Stratagene |
1017
|
|
|
|
|
|
|
F = Thermo Scientific Fermentas |
1018
|
|
|
|
|
|
|
I = SibEnzyme |
1019
|
|
|
|
|
|
|
J = Nippon Gene Co. |
1020
|
|
|
|
|
|
|
K = Takara |
1021
|
|
|
|
|
|
|
M = Roche Applied Science |
1022
|
|
|
|
|
|
|
N = New England Biolabs |
1023
|
|
|
|
|
|
|
O = Toyobo Technologies |
1024
|
|
|
|
|
|
|
Q = Molecular Biology Resources |
1025
|
|
|
|
|
|
|
R = Promega |
1026
|
|
|
|
|
|
|
S = Sigma Aldrich |
1027
|
|
|
|
|
|
|
U = Bangalore Genei |
1028
|
|
|
|
|
|
|
V = Vivantis |
1029
|
|
|
|
|
|
|
X = EURx |
1030
|
|
|
|
|
|
|
Y = CinnaGen |
1031
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
=cut |
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
sub vendors |
1035
|
|
|
|
|
|
|
{ |
1036
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1037
|
0
|
|
|
|
|
0
|
return $self->{vendors}; |
1038
|
|
|
|
|
|
|
} |
1039
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
=head2 tempin |
1041
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
The temperature in degrees Celsius that deactivates the enzyme. |
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
=cut |
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
sub tempin |
1047
|
|
|
|
|
|
|
{ |
1048
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1049
|
0
|
|
|
|
|
0
|
return $self->{tempin}; |
1050
|
|
|
|
|
|
|
} |
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
=head2 timein |
1053
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
The time required at inactivation temperature to deactivate the enzyme. |
1055
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
=cut |
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
sub timein |
1059
|
|
|
|
|
|
|
{ |
1060
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1061
|
0
|
|
|
|
|
0
|
return $self->{timein}; |
1062
|
|
|
|
|
|
|
} |
1063
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
=head2 temp |
1065
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
Incubation temperature for the best enzyme activity, in degrees Celsius. |
1067
|
|
|
|
|
|
|
|
1068
|
|
|
|
|
|
|
=cut |
1069
|
|
|
|
|
|
|
|
1070
|
|
|
|
|
|
|
sub temp |
1071
|
|
|
|
|
|
|
{ |
1072
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1073
|
0
|
|
|
|
|
0
|
return $self->{temp}; |
1074
|
|
|
|
|
|
|
} |
1075
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
=head2 recseq |
1077
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
This attribute is the "clean" description of the enzyme's recognition sequence - |
1079
|
|
|
|
|
|
|
that is, no information about cleavage site can be gained from this attribute. |
1080
|
|
|
|
|
|
|
This is determined automatically from the cleavage string (cutseq) at |
1081
|
|
|
|
|
|
|
instantiation. |
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
=cut |
1084
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
sub recseq |
1086
|
|
|
|
|
|
|
{ |
1087
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1088
|
0
|
|
|
|
|
0
|
return $self->{recseq}; |
1089
|
|
|
|
|
|
|
} |
1090
|
|
|
|
|
|
|
|
1091
|
|
|
|
|
|
|
=head2 seq |
1092
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
Synonym for recseq |
1094
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
=cut |
1096
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
sub seq |
1098
|
|
|
|
|
|
|
{ |
1099
|
21
|
|
|
21
|
1
|
38
|
my ($self) = @_; |
1100
|
21
|
|
|
|
|
75
|
return $self->{recseq}; |
1101
|
|
|
|
|
|
|
} |
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
=head2 cutseq |
1104
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
This attribute is the string description of the enzyme's recognition sequence. |
1106
|
|
|
|
|
|
|
It includes information about both the recognition and cleavage sites. |
1107
|
|
|
|
|
|
|
See http://rebase.neb.com/rebase/rebrec.html for help interpreting this field. |
1108
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
=cut |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
sub cutseq |
1112
|
|
|
|
|
|
|
{ |
1113
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1114
|
0
|
|
|
|
|
0
|
return $self->{cutseq}; |
1115
|
|
|
|
|
|
|
} |
1116
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
=head2 regex |
1118
|
|
|
|
|
|
|
|
1119
|
|
|
|
|
|
|
This attribute stores a set of regular expressions as an array reference to |
1120
|
|
|
|
|
|
|
speed the search for recognition sites in sequence. The first entry in the |
1121
|
|
|
|
|
|
|
arrayref is the regular expression representing the forward orientation of |
1122
|
|
|
|
|
|
|
the recognition sequence; the second entry represents the reverse orientation |
1123
|
|
|
|
|
|
|
and is only defined if the recognition site is nonpalindromic. |
1124
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
This attribute is defined at instantiation. |
1126
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
=cut |
1128
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
sub regex |
1130
|
|
|
|
|
|
|
{ |
1131
|
1
|
|
|
1
|
1
|
3
|
my ($self) = @_; |
1132
|
1
|
|
|
|
|
3
|
return $self->{regex}; |
1133
|
|
|
|
|
|
|
} |
1134
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
=head2 class |
1136
|
|
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
Class describes the cutting behavior of an enzyme. The classes used by |
1138
|
|
|
|
|
|
|
GeneDesign uses a generalized subset of the classes as described at Rebase - for |
1139
|
|
|
|
|
|
|
the purposes of enzyme editing, three classes have so far proven to be enough. |
1140
|
|
|
|
|
|
|
See http://rebase.neb.com/cgi-bin/sublist for the full description of enzyme |
1141
|
|
|
|
|
|
|
classes. |
1142
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
IIP : This enzyme has a symmetric target and a symmetric cleavage site; this |
1144
|
|
|
|
|
|
|
usually means that the enzyme cleaves inside its own recognition site. |
1145
|
|
|
|
|
|
|
This is not the same as overhang palindromy! |
1146
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
IIA : This enzyme has an asymmetric recognition site and usually cleaves |
1148
|
|
|
|
|
|
|
outside of it. |
1149
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
IIB : This enzyme has one recognition site and two cleavage sites, one on |
1151
|
|
|
|
|
|
|
either side of the recognition site, and thus cuts itself out of |
1152
|
|
|
|
|
|
|
sequence. |
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
=cut |
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
sub class |
1157
|
|
|
|
|
|
|
{ |
1158
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1159
|
0
|
|
|
|
|
0
|
return $self->{class}; |
1160
|
|
|
|
|
|
|
} |
1161
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
=head2 classex |
1163
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
=cut |
1165
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
sub classex |
1167
|
|
|
|
|
|
|
{ |
1168
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1169
|
0
|
|
|
|
|
0
|
return $self->{classex}; |
1170
|
|
|
|
|
|
|
} |
1171
|
|
|
|
|
|
|
|
1172
|
|
|
|
|
|
|
=head2 class_regexes |
1173
|
|
|
|
|
|
|
|
1174
|
|
|
|
|
|
|
Short cut to accessing class regular expressions |
1175
|
|
|
|
|
|
|
|
1176
|
|
|
|
|
|
|
=cut |
1177
|
|
|
|
|
|
|
|
1178
|
|
|
|
|
|
|
sub class_regexes |
1179
|
|
|
|
|
|
|
{ |
1180
|
0
|
|
|
0
|
1
|
0
|
return {'IIP' => $IIPreg, 'IIA' => $IIAreg, 'IIB' => $IIBreg}; |
1181
|
|
|
|
|
|
|
} |
1182
|
|
|
|
|
|
|
|
1183
|
|
|
|
|
|
|
=head2 type |
1184
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
Type describes the kind of overhang left by an enzyme. This is probably not a |
1186
|
|
|
|
|
|
|
good use of the word type. |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
Type may be 5', for a five prime overhang; 3', for a three prime overhang; |
1189
|
|
|
|
|
|
|
or b for blunt ends. |
1190
|
|
|
|
|
|
|
|
1191
|
|
|
|
|
|
|
=cut |
1192
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
sub type |
1194
|
|
|
|
|
|
|
{ |
1195
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1196
|
0
|
|
|
|
|
0
|
return $self->{type}; |
1197
|
|
|
|
|
|
|
} |
1198
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
=head2 onebpoverhang |
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
One basepair overhangs can be harder to ligate than blunt ends. This attribute |
1203
|
|
|
|
|
|
|
returns 1 if an enzyme leaves a 1bp overhang and 0 else. |
1204
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
=cut |
1206
|
|
|
|
|
|
|
|
1207
|
|
|
|
|
|
|
sub onebpoverhang |
1208
|
|
|
|
|
|
|
{ |
1209
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1210
|
0
|
|
|
|
|
0
|
return $self->{onebpoverhang}; |
1211
|
|
|
|
|
|
|
} |
1212
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
=head2 exclude |
1214
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
Some enzymes share overlapping recognition sites. If you are trying to ensure |
1216
|
|
|
|
|
|
|
the absence or uniqueness of a recognition site, you will want to be sure to |
1217
|
|
|
|
|
|
|
exclude isoschizomers and neoschizomers from consideration elsewhere. The |
1218
|
|
|
|
|
|
|
exclude attribute stores an array reference that lists the ids of neo- and |
1219
|
|
|
|
|
|
|
isoschizomers - or any arbitrary enzyme that is incompatible with this enzyme - |
1220
|
|
|
|
|
|
|
for easy lookup. |
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
=cut |
1223
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
sub exclude |
1225
|
|
|
|
|
|
|
{ |
1226
|
768
|
|
|
768
|
1
|
1742
|
my ($self, $value) = @_; |
1227
|
768
|
50
|
|
|
|
1365
|
if (defined $value) |
1228
|
|
|
|
|
|
|
{ |
1229
|
768
|
50
|
|
|
|
1699
|
$self->throw("$value is not a reference to an array") |
1230
|
|
|
|
|
|
|
unless (ref $value eq "ARRAY"); |
1231
|
768
|
|
|
|
|
1402
|
$self->{exclude} = $value; |
1232
|
|
|
|
|
|
|
} |
1233
|
768
|
|
|
|
|
3171
|
return $self->{exclude}; |
1234
|
|
|
|
|
|
|
} |
1235
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
=head2 palindromy |
1237
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
Information about the overhang the enzyme leaves. |
1239
|
|
|
|
|
|
|
|
1240
|
|
|
|
|
|
|
pal = palindromic |
1241
|
|
|
|
|
|
|
nonpal = nonpalindromic |
1242
|
|
|
|
|
|
|
pnon = potentially nonpalindromic, or sometimes palindromic and sometimes |
1243
|
|
|
|
|
|
|
nonpalindromic |
1244
|
|
|
|
|
|
|
unknown = unknown |
1245
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
=cut |
1247
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
sub palindromy |
1249
|
|
|
|
|
|
|
{ |
1250
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
1251
|
0
|
|
|
|
|
|
return $self->{palindromy}; |
1252
|
|
|
|
|
|
|
} |
1253
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
=head2 staract |
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
1 if the enzyme exhibits star activity, 0 else |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
=cut |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
sub staract |
1261
|
|
|
|
|
|
|
{ |
1262
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
1263
|
0
|
|
|
|
|
|
return $self->{staract}; |
1264
|
|
|
|
|
|
|
} |
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
=head2 start |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
The offset in nucleotides of the enzymes recognition site in an ORF |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
=cut |
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
sub start |
1273
|
|
|
|
|
|
|
{ |
1274
|
0
|
|
|
0
|
1
|
|
my ($self, $value) = @_; |
1275
|
0
|
0
|
|
|
|
|
if (defined $value) |
1276
|
|
|
|
|
|
|
{ |
1277
|
0
|
|
|
|
|
|
$self->{start} = $value; |
1278
|
|
|
|
|
|
|
} |
1279
|
0
|
|
|
|
|
|
return $self->{start}; |
1280
|
|
|
|
|
|
|
} |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
=head2 outside_cut |
1283
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
=cut |
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
sub outside_cut |
1287
|
|
|
|
|
|
|
{ |
1288
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
1289
|
0
|
|
|
|
|
|
return $self->{outside_cut}; |
1290
|
|
|
|
|
|
|
} |
1291
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
=head2 inside_cut |
1293
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
=cut |
1295
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
sub inside_cut |
1297
|
|
|
|
|
|
|
{ |
1298
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
1299
|
0
|
|
|
|
|
|
return $self->{inside_cut}; |
1300
|
|
|
|
|
|
|
} |
1301
|
|
|
|
|
|
|
|
1302
|
|
|
|
|
|
|
1; |
1303
|
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
|
__END__ |