line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# GeneDesign module for restriction enzyme handing |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Bio::GeneDesign::RestrictionEnzyme |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 VERSION |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
Version 5.52 |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 DESCRIPTION |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
GeneDesign object that represents a type II restriction enzyme |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 AUTHOR |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
Sarah Richardson <SMRichardson@lbl.gov> |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=cut |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
package Bio::GeneDesign::RestrictionEnzyme; |
24
|
|
|
|
|
|
|
|
25
|
11
|
|
|
11
|
|
63
|
use Bio::GeneDesign::Basic qw(:GD); |
|
11
|
|
|
|
|
26
|
|
|
11
|
|
|
|
|
2458
|
|
26
|
11
|
|
|
11
|
|
66
|
use Carp; |
|
11
|
|
|
|
|
26
|
|
|
11
|
|
|
|
|
742
|
|
27
|
|
|
|
|
|
|
|
28
|
11
|
|
|
11
|
|
74
|
use strict; |
|
11
|
|
|
|
|
23
|
|
|
11
|
|
|
|
|
361
|
|
29
|
11
|
|
|
11
|
|
63
|
use warnings; |
|
11
|
|
|
|
|
21
|
|
|
11
|
|
|
|
|
379
|
|
30
|
|
|
|
|
|
|
|
31
|
11
|
|
|
11
|
|
62
|
use base qw(Bio::Root::Root); |
|
11
|
|
|
|
|
22
|
|
|
11
|
|
|
|
|
61657
|
|
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
our $VERSION = 5.52; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
my $IIPreg = qr/ ([A-Z]*) \^ ([A-Z]*) /x; |
36
|
|
|
|
|
|
|
my $IIAreg = qr/\A \w+ \(([\-]*\d+) \/ ([\-]*\d+)\)\Z /x; |
37
|
|
|
|
|
|
|
my $IIBreg = qr/\A\(([\-]*\d+) \/ ([\-]*\d+)\) \w+ \(([\-]*\d+) \/ ([\-]*\d+)\)\Z /x; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my %RE_vendors = ( |
40
|
|
|
|
|
|
|
B => "Invitrogen", C => "Minotech", E => "Stratagene Agilent", |
41
|
|
|
|
|
|
|
F => "Thermo Scientific Fermentas", I => "SibEnzyme", J => "Nippon Gene Co.", |
42
|
|
|
|
|
|
|
K => "Takara", M => "Roche Applied Science", N => "New England Biolabs", |
43
|
|
|
|
|
|
|
O => "Toyobo Technologies", Q => "Molecular Biology Resources", |
44
|
|
|
|
|
|
|
R => "Promega", S => "Sigma Aldrich", U => "Bangalore Genei", V => "Vivantis", |
45
|
|
|
|
|
|
|
X => "EURx", Y => "CinnaGen" |
46
|
|
|
|
|
|
|
); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
my %methtrans = (b => "blocked", blocked => "blocked", |
49
|
|
|
|
|
|
|
i => "inhibited", inhibited => "inhibited", |
50
|
|
|
|
|
|
|
u => "unknown", unknown => "unknown" |
51
|
|
|
|
|
|
|
); |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head1 CONSTRUCTOR METHODS |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head2 new |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
You can create a new enzyme or clone an existing enzyme to create a new instance |
58
|
|
|
|
|
|
|
of an abstract enzyme definition. To do this, provide the -enzyme flag; the |
59
|
|
|
|
|
|
|
constructor will ignore every other argument except for -start. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Required arguments: |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
EITHER |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
-enzyme : a Bio::GeneDesign::RestrictionEnzyme object to clone |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
OR |
68
|
|
|
|
|
|
|
-id : The name of the enzyme (i.e., BamHI) |
69
|
|
|
|
|
|
|
-cutseq : The string describing the enzyme's recognition and cleavage |
70
|
|
|
|
|
|
|
site |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
Optional arguments: |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
-temp : The incubation temperature for the enzyme |
75
|
|
|
|
|
|
|
-tempin : The heat inactivation temperature for the enzyme |
76
|
|
|
|
|
|
|
-score : A float score, usually the price of the enzyme in dollars |
77
|
|
|
|
|
|
|
-methdam : Sensitivity to dam methylation; can take the values |
78
|
|
|
|
|
|
|
b or blocked, |
79
|
|
|
|
|
|
|
i or inhibited, |
80
|
|
|
|
|
|
|
u or unknown, |
81
|
|
|
|
|
|
|
if undefined, will take the value indifferent. |
82
|
|
|
|
|
|
|
-methdcm : Sensitivity to dcm methylation; can take the values |
83
|
|
|
|
|
|
|
b or blocked, |
84
|
|
|
|
|
|
|
i or inhibited, |
85
|
|
|
|
|
|
|
u or unknown, |
86
|
|
|
|
|
|
|
if undefined, will take the value indifferent. |
87
|
|
|
|
|
|
|
-methcpg : Sensitivity to cpg methylation; can take the values |
88
|
|
|
|
|
|
|
b or blocked, |
89
|
|
|
|
|
|
|
i or inhibited, |
90
|
|
|
|
|
|
|
u or unknown, |
91
|
|
|
|
|
|
|
if undefined, will take the value indifferent. |
92
|
|
|
|
|
|
|
-vendors : a string of single letter codes that represent vendor |
93
|
|
|
|
|
|
|
availability - no spaces. see vendor() for a list of the |
94
|
|
|
|
|
|
|
codes. |
95
|
|
|
|
|
|
|
-staract : Whether or not the enzyme exhibits star activity - 1 or 0. |
96
|
|
|
|
|
|
|
-buffers : a hash reference; keys are buffer names and values are the |
97
|
|
|
|
|
|
|
enzyme activity in that buffer. For example: |
98
|
|
|
|
|
|
|
NEB1 => 50, NEB2 => 100, etc. |
99
|
|
|
|
|
|
|
-start : An integer representing an offset; usually used only in |
100
|
|
|
|
|
|
|
cloned instances, as opposed to abstract instances. |
101
|
|
|
|
|
|
|
-exclude : An arrayref full of ids for enzymes that should be |
102
|
|
|
|
|
|
|
considered mutually exclusive to this enzyme - see exclude() |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=cut |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub new |
107
|
|
|
|
|
|
|
{ |
108
|
768
|
|
|
768
|
1
|
5615
|
my ($class, @args) = @_; |
109
|
768
|
|
|
|
|
3193
|
my $self = $class->SUPER::new(@args); |
110
|
|
|
|
|
|
|
|
111
|
768
|
|
|
|
|
31095
|
my ($object, $id, $cutseq, $temp, $tempin, $score, $methdam, $methdcm, |
112
|
|
|
|
|
|
|
$methcpg, $vendors, $staract, $buffers, $start, $exclude, $aggress) = |
113
|
|
|
|
|
|
|
$self->_rearrange([qw(ENZYME ID CUTSEQ TEMP TEMPIN SCORE METHDAM METHDCM |
114
|
|
|
|
|
|
|
METHCPG VENDORS STARACT BUFFERS START EXCLUDE AGGRESS)], @args); |
115
|
|
|
|
|
|
|
|
116
|
768
|
50
|
|
|
|
71804
|
if ($object) |
117
|
|
|
|
|
|
|
{ |
118
|
0
|
0
|
|
|
|
0
|
$self->throw("object of class " . ref($object) . " does not implement ". |
119
|
|
|
|
|
|
|
"Bio::GeneDesign::RestrictionEnzyme.") |
120
|
|
|
|
|
|
|
unless $object->isa("Bio::GeneDesign::RestrictionEnzyme"); |
121
|
0
|
|
|
|
|
0
|
$self = $object->clone(); |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
else |
124
|
|
|
|
|
|
|
{ |
125
|
|
|
|
|
|
|
|
126
|
768
|
50
|
|
|
|
2262
|
$self->throw("No enzyme id defined") unless ($id); |
127
|
768
|
|
|
|
|
1943
|
$self->{'id'} = $id; |
128
|
|
|
|
|
|
|
|
129
|
768
|
50
|
|
|
|
2032
|
$self->throw("No cut sequence defined") unless ($cutseq); |
130
|
768
|
|
|
|
|
1610
|
$self->{'cutseq'} = $cutseq; |
131
|
|
|
|
|
|
|
|
132
|
768
|
|
|
|
|
1265
|
my $recseq = $cutseq; |
133
|
768
|
|
|
|
|
7850
|
$recseq =~ s/\W*\d*//xg; |
134
|
768
|
|
|
|
|
2054
|
$self->{'recseq'} = $recseq; |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
#Regular expression arrayref to use for enzyme searching |
137
|
|
|
|
|
|
|
#Should store as compiled regexes instead |
138
|
768
|
|
|
|
|
2347
|
$self->{'regex'} = _regarr($recseq); |
139
|
|
|
|
|
|
|
|
140
|
768
|
|
|
|
|
1708
|
my $sitelen = length($recseq); |
141
|
768
|
|
|
|
|
1611
|
$self->{'length'} = $sitelen; |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
#Enzyme Class and Palindromy |
144
|
768
|
|
|
|
|
1428
|
my ($lef, $rig) = (q{}, q{}); |
145
|
768
|
100
|
|
|
|
6101
|
if ($cutseq =~ $IIPreg) |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
146
|
|
|
|
|
|
|
{ |
147
|
567
|
|
|
|
|
1065
|
$lef = length($1); |
148
|
567
|
|
|
|
|
855
|
$rig = length($2); |
149
|
567
|
|
|
|
|
1082
|
$self->{'class'} = "IIP"; |
150
|
567
|
|
|
|
|
1340
|
$self->{'classex'} = $IIPreg; |
151
|
|
|
|
|
|
|
|
152
|
567
|
100
|
|
|
|
1064
|
if ($lef == $rig) |
153
|
|
|
|
|
|
|
{ |
154
|
123
|
|
|
|
|
263
|
$self->{'palindromy'} = "unknown"; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
else |
157
|
|
|
|
|
|
|
{ |
158
|
444
|
|
|
|
|
515
|
my $inlef = $lef; |
159
|
444
|
100
|
|
|
|
1603
|
$inlef = length($recseq) - $inlef if ($inlef > (.5 * length($recseq))); |
160
|
444
|
|
|
|
|
1544
|
my $mattersbit = substr($recseq, $inlef, length($recseq) - (2 * $inlef)); |
161
|
444
|
100
|
66
|
|
|
5648
|
if ($mattersbit && $mattersbit =~ $ambnt && length($mattersbit) % 2 == 0) |
|
|
100
|
100
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
162
|
|
|
|
|
|
|
{ |
163
|
54
|
|
|
|
|
207
|
$self->{'palindromy'} = "pnon"; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
elsif ($mattersbit && $mattersbit eq _complement($mattersbit, 1)) |
166
|
|
|
|
|
|
|
{ |
167
|
384
|
|
|
|
|
1328
|
$self->{'palindromy'} = "pal"; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
elsif ($mattersbit) |
170
|
|
|
|
|
|
|
{ |
171
|
6
|
|
|
|
|
28
|
$self->{'palindromy'} = "nonpal"; |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
elsif ($cutseq =~ $IIBreg) |
176
|
|
|
|
|
|
|
{ |
177
|
33
|
|
|
|
|
107
|
$lef = int($1); |
178
|
33
|
|
|
|
|
69
|
$rig = int($2); |
179
|
33
|
|
|
|
|
68
|
$self->{'class'} = "IIB"; |
180
|
33
|
|
|
|
|
86
|
$self->{'classex'} = $IIBreg; |
181
|
33
|
|
|
|
|
101
|
$self->{'palindromy'} = "pnon"; |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
elsif ($cutseq =~ $IIAreg) |
184
|
|
|
|
|
|
|
{ |
185
|
168
|
|
|
|
|
360
|
$lef = int($1); |
186
|
168
|
|
|
|
|
261
|
$rig = int($2); |
187
|
168
|
|
|
|
|
347
|
$self->{'class'} = "IIA"; |
188
|
168
|
|
|
|
|
395
|
$self->{'classex'} = $IIAreg; |
189
|
168
|
|
|
|
|
303
|
$self->{'palindromy'} = "pnon"; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
else |
192
|
|
|
|
|
|
|
{ |
193
|
0
|
|
|
|
|
0
|
$self->{'class'} = "unknown"; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
#Enzyme type |
197
|
768
|
|
|
|
|
972
|
my $type; |
198
|
768
|
100
|
|
|
|
1719
|
if ($lef < $rig) |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
199
|
|
|
|
|
|
|
{ |
200
|
438
|
|
|
|
|
574
|
$type .= "5'"; |
201
|
438
|
|
|
|
|
763
|
$self->{'inside_cut'} = $lef; |
202
|
438
|
|
|
|
|
695
|
$self->{'outside_cut'} = $rig; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
elsif ($lef > $rig) |
205
|
|
|
|
|
|
|
{ |
206
|
195
|
|
|
|
|
250
|
$type .= "3'"; |
207
|
195
|
|
|
|
|
329
|
$self->{'inside_cut'} = $rig; |
208
|
195
|
|
|
|
|
465
|
$self->{'outside_cut'} = $lef; |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
elsif ($lef == $rig) |
211
|
|
|
|
|
|
|
{ |
212
|
135
|
|
|
|
|
202
|
$type .= "b"; |
213
|
|
|
|
|
|
|
} |
214
|
768
|
100
|
|
|
|
1769
|
$self->{'onebpoverhang'} = 1 if (abs($lef - $rig) == 1); |
215
|
768
|
|
|
|
|
1335
|
$self->{'type'} = $type; |
216
|
|
|
|
|
|
|
|
217
|
768
|
100
|
|
|
|
2490
|
$self->{'temp'} = $temp if ($temp); |
218
|
768
|
100
|
|
|
|
1868
|
if ($tempin) |
219
|
|
|
|
|
|
|
{ |
220
|
630
|
|
|
|
|
2261
|
my ($intime, $intemp) = split q{@}, $tempin; |
221
|
630
|
|
|
|
|
1694
|
$self->{'tempin'} = $intemp; |
222
|
630
|
|
|
|
|
1832
|
$self->{'timein'} = $intime; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
768
|
50
|
|
|
|
3177
|
$self->{'score'} = $score if ($score); |
226
|
768
|
|
|
|
|
1557
|
$self->{'aggress'} = $aggress; |
227
|
|
|
|
|
|
|
|
228
|
768
|
100
|
|
|
|
1802
|
$self->{'staract'} = 1 if ($staract); |
229
|
|
|
|
|
|
|
|
230
|
768
|
100
|
|
|
|
1887
|
if (exists $methtrans{$methdam}) |
231
|
|
|
|
|
|
|
{ |
232
|
54
|
|
|
|
|
149
|
$self->{'methdam'} = $methtrans{$methdam}; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
else |
235
|
|
|
|
|
|
|
{ |
236
|
714
|
|
|
|
|
1704
|
$self->{'methdam'} = "indifferent"; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
768
|
100
|
|
|
|
1600
|
if (exists $methtrans{$methdcm}) |
240
|
|
|
|
|
|
|
{ |
241
|
105
|
|
|
|
|
247
|
$self->{'methdcm'} = $methtrans{$methdcm}; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
else |
244
|
|
|
|
|
|
|
{ |
245
|
663
|
|
|
|
|
1200
|
$self->{'methdcm'} = "indifferent"; |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
768
|
100
|
|
|
|
1594
|
if (exists $methtrans{$methcpg}) |
249
|
|
|
|
|
|
|
{ |
250
|
390
|
|
|
|
|
925
|
$self->{'methcpg'} = $methtrans{$methcpg}; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
else |
253
|
|
|
|
|
|
|
{ |
254
|
378
|
|
|
|
|
723
|
$self->{'methcpg'} = "indifferent"; |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
|
257
|
768
|
100
|
|
|
|
1770
|
if ($vendors) |
258
|
|
|
|
|
|
|
{ |
259
|
765
|
|
|
|
|
1260
|
my %vhsh = (); |
260
|
765
|
|
|
|
|
2730
|
foreach my $v (split(q{}, $vendors)) |
261
|
|
|
|
|
|
|
{ |
262
|
2991
|
50
|
|
|
|
9098
|
$vhsh{$v} = $RE_vendors{$v} if (exists $RE_vendors{$v}); |
263
|
2991
|
50
|
|
|
|
8451
|
carp("$v not in vendor list!") unless (exists $RE_vendors{$v}); |
264
|
|
|
|
|
|
|
} |
265
|
765
|
|
|
|
|
2447
|
$self->{'vendors'} = \%vhsh; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
768
|
50
|
|
|
|
3615
|
$self->{'buffers'} = $buffers if ($buffers); |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
768
|
50
|
|
|
|
1422
|
$self->{'start'} = $start if ($start); |
272
|
|
|
|
|
|
|
|
273
|
768
|
50
|
|
|
|
1348
|
$self->{'exclude'} = $exclude if ($exclude); |
274
|
|
|
|
|
|
|
|
275
|
768
|
|
|
|
|
5312
|
return $self; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=head1 FUNCTIONAL METHODS |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=head2 clone |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
By default in GeneDesign code, RestrictionEnzyme objects are meant to stand as |
283
|
|
|
|
|
|
|
abstracts - that is, they stand for BamHI in general, and not for a particular |
284
|
|
|
|
|
|
|
instance of a BamHI recognition site. If you want to use the objects in the |
285
|
|
|
|
|
|
|
latter sense, you will need to clone the abstract object instantiated when the |
286
|
|
|
|
|
|
|
definition file is read in, thus generating an arbitrary number of BamHI |
287
|
|
|
|
|
|
|
instances that can then be differentiated by their start attributes. |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
=cut |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
sub clone |
292
|
|
|
|
|
|
|
{ |
293
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
294
|
0
|
|
|
|
|
0
|
my $copy; |
295
|
0
|
|
|
|
|
0
|
foreach my $key (keys %$self) |
296
|
|
|
|
|
|
|
{ |
297
|
0
|
0
|
|
|
|
0
|
if (ref $self->{$key} eq "ARRAY") |
|
|
0
|
|
|
|
|
|
298
|
|
|
|
|
|
|
{ |
299
|
0
|
|
|
|
|
0
|
$copy->{$key} = [@{$self->{$key}}]; |
|
0
|
|
|
|
|
0
|
|
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
elsif (ref $self->{$key} eq "HASH") |
302
|
|
|
|
|
|
|
{ |
303
|
0
|
|
|
|
|
0
|
$copy->{$key} = {%{$self->{$key}}}; |
|
0
|
|
|
|
|
0
|
|
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
else |
306
|
|
|
|
|
|
|
{ |
307
|
0
|
|
|
|
|
0
|
$copy->{$key} = $self->{$key}; |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
} |
310
|
0
|
|
|
|
|
0
|
bless $copy, ref $self; |
311
|
0
|
|
|
|
|
0
|
return $copy; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
=head2 positions |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
Generates a hash describing the positions of the enzyme's recognition |
317
|
|
|
|
|
|
|
sites in a nucleotide sequence. Keys are offset in nucleotides, and values are |
318
|
|
|
|
|
|
|
the recognition site found at said offset as a string. |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=cut |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
sub positions |
323
|
|
|
|
|
|
|
{ |
324
|
29
|
|
|
29
|
1
|
44
|
my ($self, $seq) = @_; |
325
|
29
|
|
|
|
|
44
|
my $total = {}; |
326
|
29
|
|
|
|
|
36
|
foreach my $sit (@{$self->{regex}}) |
|
29
|
|
|
|
|
88
|
|
327
|
|
|
|
|
|
|
{ |
328
|
46
|
|
|
|
|
2006
|
while ($seq =~ /(?=($sit))/ixg) |
329
|
|
|
|
|
|
|
{ |
330
|
27
|
|
|
|
|
448
|
$total->{pos $seq} = $1; |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
} |
333
|
29
|
|
|
|
|
104
|
return $total; |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
=head2 overhang |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
Given a nucleotide sequence context, what overhang does this enzyme leave, and |
339
|
|
|
|
|
|
|
how far away from the cutsite is it? |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
Arguments: |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
=cut |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
sub overhang |
346
|
|
|
|
|
|
|
{ |
347
|
0
|
|
|
0
|
1
|
0
|
my ($self, $dna, $context, $strand) = @_; |
348
|
0
|
|
|
|
|
0
|
my ($ohangstart, $mattersbit) = (0, q{}); |
349
|
0
|
|
|
|
|
0
|
my $lef; |
350
|
|
|
|
|
|
|
my $rig; |
351
|
0
|
0
|
|
|
|
0
|
if ($self->{class} eq "IIP") |
|
|
0
|
|
|
|
|
|
352
|
|
|
|
|
|
|
{ |
353
|
0
|
0
|
|
|
|
0
|
($lef, $rig) = (length($1), length($2)) if ($self->{cutseq} =~ $IIPreg); |
354
|
0
|
0
|
|
|
|
0
|
($lef, $rig) = ($rig, $lef) if ($rig < $lef); |
355
|
0
|
|
|
|
|
0
|
$ohangstart = $lef + 1; |
356
|
0
|
|
|
|
|
0
|
$mattersbit = substr($dna, $ohangstart-1, $rig-$lef); |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
elsif ($self->{class} eq "IIA") |
359
|
|
|
|
|
|
|
{ |
360
|
0
|
0
|
|
|
|
0
|
($lef, $rig) = ($1, $2) if ($self->{cutseq} =~ $IIAreg); |
361
|
0
|
0
|
|
|
|
0
|
($lef, $rig) = ($rig, $lef) if ($rig < $lef); |
362
|
0
|
0
|
|
|
|
0
|
if ($strand == 1) |
363
|
|
|
|
|
|
|
{ |
364
|
0
|
|
|
|
|
0
|
$ohangstart = length($dna) + $lef + 1; |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
else |
367
|
|
|
|
|
|
|
{ |
368
|
0
|
|
|
|
|
0
|
$ohangstart = length($context) - length($dna) - $rig + 1; |
369
|
|
|
|
|
|
|
} |
370
|
0
|
|
|
|
|
0
|
$mattersbit = substr($context, $ohangstart-1, $rig-$lef); |
371
|
0
|
0
|
|
|
|
0
|
$ohangstart = $strand == 1 ? length($dna) + $lef : 0 - ($rig); |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
else |
374
|
|
|
|
|
|
|
{ |
375
|
0
|
|
|
|
|
0
|
return (); |
376
|
|
|
|
|
|
|
} |
377
|
0
|
|
|
|
|
0
|
return ($ohangstart, $mattersbit); |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=head2 display |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
Generates a tab delimited display string that can be used to print enzyme |
383
|
|
|
|
|
|
|
information out in a tabular format. |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
=cut |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
sub display |
388
|
|
|
|
|
|
|
{ |
389
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
390
|
0
|
0
|
|
|
|
0
|
my $staract = $self->{staract} ? "*" : q{}; |
391
|
0
|
|
|
|
|
0
|
my (@blocked, @inhibed) = ((), ()); |
392
|
0
|
0
|
|
|
|
0
|
push @blocked, "cpg" if ($self->{methcpg} eq "blocked"); |
393
|
0
|
0
|
|
|
|
0
|
push @blocked, "dam" if ($self->{methdam} eq "blocked"); |
394
|
0
|
0
|
|
|
|
0
|
push @blocked, "dcm" if ($self->{methdcm} eq "blocked"); |
395
|
0
|
0
|
|
|
|
0
|
push @inhibed, "cpg" if ($self->{methcpg} eq "inhibited"); |
396
|
0
|
0
|
|
|
|
0
|
push @inhibed, "dam" if ($self->{methdam} eq "inhibited"); |
397
|
0
|
0
|
|
|
|
0
|
push @inhibed, "dcm" if ($self->{methdcm} eq "inhibited"); |
398
|
0
|
|
|
|
|
0
|
my $buffstr = undef; |
399
|
0
|
|
|
|
|
0
|
foreach (sort keys %{$self->{buffers}}) |
|
0
|
|
|
|
|
0
|
|
400
|
|
|
|
|
|
|
{ |
401
|
0
|
0
|
|
|
|
0
|
$buffstr .= "$_ (" . $self->{buffers}->{$_} . ") " if ($self->{buffers}->{$_}); |
402
|
|
|
|
|
|
|
} |
403
|
0
|
|
|
|
|
0
|
my $vendstr = join(", ", values %{$self->{vendors}}); |
|
0
|
|
|
|
|
0
|
|
404
|
0
|
|
|
|
|
0
|
my $display = undef; |
405
|
0
|
0
|
|
|
|
0
|
my $inact = $self->{tempin} ? " (". $self->{timein} . q{@} . $self->{tempin} . ")" : q{}; |
406
|
0
|
|
|
|
|
0
|
$display .= $self->{id} . "\t"; |
407
|
0
|
|
|
|
|
0
|
$display .= $self->{cutseq} . $staract . "\t"; |
408
|
0
|
|
|
|
|
0
|
$display .= $self->{type} . "\t"; |
409
|
0
|
0
|
|
|
|
0
|
$display .= $self->{start} . "\t" if ($self->{start}); |
410
|
0
|
|
|
|
|
0
|
$display .= $self->{temp} . $inact . "\t"; |
411
|
0
|
|
|
|
|
0
|
$display .= join(", ", @blocked) . "\t"; |
412
|
0
|
|
|
|
|
0
|
$display .= join(", ", @inhibed) . "\t"; |
413
|
0
|
|
|
|
|
0
|
$display .= $self->{score} . "\t"; |
414
|
0
|
|
|
|
|
0
|
$display .= $buffstr . "\t"; |
415
|
0
|
|
|
|
|
0
|
$display .= $vendstr . "\t"; |
416
|
0
|
|
|
|
|
0
|
return $display; |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
=head2 common_buffers |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
Returns an array reference listing the buffers, if any, in which two enzymes |
422
|
|
|
|
|
|
|
both have 100% activity. in boolean mode returns the number of buffers |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
=cut |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
sub common_buffers |
427
|
|
|
|
|
|
|
{ |
428
|
0
|
|
|
0
|
1
|
0
|
my ($self, $buddy, $bool) = @_; |
429
|
0
|
0
|
|
|
|
0
|
$self->throw("Argument is not a Bio::GeneDesign::RestrictionEnzyme") |
430
|
|
|
|
|
|
|
unless $buddy->isa("Bio::GeneDesign::RestrictionEnzyme"); |
431
|
|
|
|
|
|
|
|
432
|
0
|
|
|
|
|
0
|
my $sbuffs = $self->{buffers}; |
433
|
0
|
|
|
|
|
0
|
my $bbuffs = $buddy->{buffers}; |
434
|
0
|
|
|
|
|
0
|
my @answer; |
435
|
0
|
|
|
|
|
0
|
foreach my $skey (sort keys %{$sbuffs}) |
|
0
|
|
|
|
|
0
|
|
436
|
|
|
|
|
|
|
{ |
437
|
0
|
|
|
|
|
0
|
my $sval = $sbuffs->{$skey}; |
438
|
0
|
|
|
|
|
0
|
my $bval = $bbuffs->{$skey}; |
439
|
0
|
0
|
0
|
|
|
0
|
if ($skey eq "Other" && $sval && $bval && "$sval" eq "$bval") |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
440
|
|
|
|
|
|
|
{ |
441
|
0
|
|
|
|
|
0
|
push @answer, $skey; |
442
|
|
|
|
|
|
|
} |
443
|
|
|
|
|
|
|
elsif ($sval && $bval && "$sval" == 100 && "$bval" == 100) |
444
|
|
|
|
|
|
|
{ |
445
|
0
|
|
|
|
|
0
|
push @answer, $skey; |
446
|
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
} |
448
|
0
|
0
|
|
|
|
0
|
return $bool ? scalar(@answer) : \@answer; |
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=head2 acceptable_buffer |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
Returns a buffer in which both enzymes will have at least a thresholded amount |
454
|
|
|
|
|
|
|
of activity. |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
=cut |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
sub acceptable_buffer |
459
|
|
|
|
|
|
|
{ |
460
|
0
|
|
|
0
|
1
|
0
|
my ($self, $buddy, $level) = @_; |
461
|
0
|
0
|
|
|
|
0
|
$self->throw("Argument is not a Bio::GeneDesign::RestrictionEnzyme") |
462
|
|
|
|
|
|
|
unless $buddy->isa("Bio::GeneDesign::RestrictionEnzyme"); |
463
|
|
|
|
|
|
|
|
464
|
0
|
|
0
|
|
|
0
|
$level = $level || 75; |
465
|
0
|
|
|
|
|
0
|
my $sbuffs = $self->{buffers}; |
466
|
0
|
|
|
|
|
0
|
my $bbuffs = $buddy->{buffers}; |
467
|
0
|
|
|
|
|
0
|
my %answers; |
468
|
0
|
|
|
|
|
0
|
foreach my $skey (sort keys %{$sbuffs}) |
|
0
|
|
|
|
|
0
|
|
469
|
|
|
|
|
|
|
{ |
470
|
0
|
|
|
|
|
0
|
my $sval = $sbuffs->{$skey}; |
471
|
0
|
|
|
|
|
0
|
my $bval = $bbuffs->{$skey}; |
472
|
0
|
0
|
0
|
|
|
0
|
if ($skey eq "Other" && $sval && $bval && $sval == $bval) |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
473
|
|
|
|
|
|
|
{ |
474
|
0
|
|
|
|
|
0
|
$answers{$skey} = 200; |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
elsif ($sval && $bval && $sval >= $level && $bval >= $level) |
477
|
|
|
|
|
|
|
{ |
478
|
0
|
|
|
|
|
0
|
$answers{$skey} = $sval + $bval; |
479
|
|
|
|
|
|
|
} |
480
|
|
|
|
|
|
|
} |
481
|
0
|
0
|
|
|
|
0
|
my @keys = sort {$answers{$b} <=> $answers{$a} && $b cmp $a} keys %answers; |
|
0
|
|
|
|
|
0
|
|
482
|
0
|
0
|
|
|
|
0
|
return scalar @keys ? $keys[0] : undef; |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
=head2 units |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
Returns the number of units needed to cleave some sequence |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
=cut |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
sub units |
492
|
|
|
|
|
|
|
{ |
493
|
0
|
|
|
0
|
1
|
0
|
my ($self, @args) = @_; |
494
|
|
|
|
|
|
|
|
495
|
0
|
|
|
|
|
0
|
my ($buffer, $sequence) = $self->_rearrange([qw(buffer sequence)], @args); |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
|
498
|
0
|
|
|
|
|
0
|
my $poshsh = $self->positions($sequence); |
499
|
0
|
|
|
|
|
0
|
my $count = scalar keys %{$poshsh}; |
|
0
|
|
|
|
|
0
|
|
500
|
|
|
|
|
|
|
|
501
|
0
|
|
|
|
|
0
|
my $freq = $count / (length $sequence); |
502
|
|
|
|
|
|
|
|
503
|
0
|
|
0
|
|
|
0
|
my $aggr = $self->aggress() || .000001; |
504
|
0
|
|
|
|
|
0
|
$aggr = 1 / $aggr; |
505
|
|
|
|
|
|
|
|
506
|
0
|
|
0
|
|
|
0
|
$buffer = $buffer || $self->acceptable_buffer($self, 100); |
507
|
0
|
|
0
|
|
|
0
|
my $buff = $self->buffers->{$buffer} || 1; |
508
|
0
|
|
|
|
|
0
|
my $jad = $buff / 100; |
509
|
0
|
0
|
|
|
|
0
|
my $adj = $jad > 0 ? 1 / $jad : 0; |
510
|
|
|
|
|
|
|
|
511
|
0
|
|
|
|
|
0
|
my $units = sprintf("%.1f", $freq * $aggr * $adj); |
512
|
|
|
|
|
|
|
|
513
|
0
|
|
|
|
|
0
|
return $units; |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=head1 FILTERING METHODS |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
=head2 filter_by_sequence |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
Arguments: an arrayref of string nucleotide sequences (may be ambiguous) |
522
|
|
|
|
|
|
|
a flag indicating whether or not the sequences in the array are |
523
|
|
|
|
|
|
|
required (1 means they must NOT match; default 0 means they must |
524
|
|
|
|
|
|
|
match) |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
Returns : 1 if the enzyme passes; |
527
|
|
|
|
|
|
|
0 if the enzyme fails. |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
=cut |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
sub filter_by_sequence |
532
|
|
|
|
|
|
|
{ |
533
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arrref, $req) = @_; |
534
|
0
|
0
|
|
|
|
0
|
$req = 0 if (! $req); |
535
|
0
|
|
|
|
|
0
|
my $result = 1; |
536
|
0
|
|
|
|
|
0
|
foreach my $seq (@$arrref) |
537
|
|
|
|
|
|
|
{ |
538
|
0
|
|
|
|
|
0
|
my $regex = _regres($seq, 1); |
539
|
0
|
0
|
|
|
|
0
|
if ($regex =~ /\[ X \]/x) |
540
|
|
|
|
|
|
|
{ |
541
|
0
|
|
|
|
|
0
|
print "\tWARNING: Can't parse sequence $seq containing non-nucleotide " |
542
|
|
|
|
|
|
|
. "characters - ignoring.\n"; |
543
|
0
|
|
|
|
|
0
|
next; |
544
|
|
|
|
|
|
|
} |
545
|
0
|
0
|
0
|
|
|
0
|
$result = 0 if ( $req == 1 && $self->{recseq} =~ $regex ); |
546
|
0
|
0
|
0
|
|
|
0
|
$result = 0 if ( $req == 0 && $self->{recseq} !~ $regex ); |
547
|
|
|
|
|
|
|
} |
548
|
0
|
|
|
|
|
0
|
return $result; |
549
|
|
|
|
|
|
|
} |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
=head2 filter_by_score |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
Arguments : a float |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
Returns : 1 if the enzyme's score is less than or equal to the argument, |
556
|
|
|
|
|
|
|
0 if the enzyme's score is higher. |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
=cut |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
sub filter_by_score |
561
|
|
|
|
|
|
|
{ |
562
|
0
|
|
|
0
|
1
|
0
|
my ($self, $score) = @_; |
563
|
0
|
|
|
|
|
0
|
my $result = 1; |
564
|
0
|
0
|
|
|
|
0
|
$result = 0 if ($self->{score} > $score); |
565
|
0
|
|
|
|
|
0
|
return $result; |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
=head2 filter_by_vendor |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
Arguments : an arrayref of vendor abbreviations; see vendor(). |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
Returns : 1 if the enzyme is supplied by any of the vendors queried, |
573
|
|
|
|
|
|
|
0 else. |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
=cut |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
sub filter_by_vendor |
578
|
|
|
|
|
|
|
{ |
579
|
0
|
|
|
0
|
1
|
0
|
my ($self, $vendlist) = @_; |
580
|
0
|
|
|
|
|
0
|
my $result = 1; |
581
|
0
|
|
|
|
|
0
|
my $flag = 0; |
582
|
0
|
|
|
|
|
0
|
foreach my $vend (@$vendlist) |
583
|
|
|
|
|
|
|
{ |
584
|
0
|
0
|
|
|
|
0
|
unless (exists($RE_vendors{$vend})) |
585
|
|
|
|
|
|
|
{ |
586
|
0
|
|
|
|
|
0
|
print "\tWARNING: Can't parse vendor argument $vend - ignoring.\n"; |
587
|
0
|
|
|
|
|
0
|
next; |
588
|
|
|
|
|
|
|
} |
589
|
0
|
0
|
|
|
|
0
|
$flag++ if ( exists( $self->{vendors}->{$vend} ) ); |
590
|
|
|
|
|
|
|
} |
591
|
0
|
0
|
|
|
|
0
|
$result = $flag == 0 ? 0 : 1; |
592
|
0
|
|
|
|
|
0
|
return $result; |
593
|
|
|
|
|
|
|
} |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
=head2 filter_by_buffer_activity |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
Arguments : a hashref of buffer thresholds; the key is the buffer name, the |
598
|
|
|
|
|
|
|
value is an activity threshold. |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets all the buffer requirements, |
601
|
|
|
|
|
|
|
0 else. |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
=cut |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
sub filter_by_buffer_activity |
606
|
|
|
|
|
|
|
{ |
607
|
0
|
|
|
0
|
1
|
0
|
my ($self, $hshref) = @_; |
608
|
0
|
|
|
|
|
0
|
my $result = 1; |
609
|
0
|
|
|
|
|
0
|
my $rebuff = $self->{buffers}; |
610
|
0
|
|
|
|
|
0
|
foreach my $buff (keys %$hshref) |
611
|
|
|
|
|
|
|
{ |
612
|
0
|
|
|
|
|
0
|
my $val = $hshref->{$buff}; |
613
|
0
|
0
|
0
|
|
|
0
|
$result = 0 if ( ! exists($rebuff->{$buff}) || $rebuff->{$buff} < $val ); |
614
|
|
|
|
|
|
|
} |
615
|
|
|
|
|
|
|
|
616
|
0
|
|
|
|
|
0
|
return $result; |
617
|
|
|
|
|
|
|
} |
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
=head2 filter_by_dcm_sensitivity |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
Arguments : an arrayref of sensitivity values; the key is the sensitivity |
622
|
|
|
|
|
|
|
blocked, inhibited, or indifferent |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the dcm sensitivity requirements, |
625
|
|
|
|
|
|
|
0 else. |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
=cut |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
sub filter_by_dcm_sensitivity |
630
|
|
|
|
|
|
|
{ |
631
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arrref) = @_; |
632
|
0
|
|
|
|
|
0
|
my $result = 1; |
633
|
0
|
|
|
|
|
0
|
my %sensehsh; |
634
|
0
|
|
|
|
|
0
|
foreach my $sense (@$arrref) |
635
|
|
|
|
|
|
|
{ |
636
|
0
|
0
|
0
|
|
|
0
|
if ($sense ne "blocked" && $sense ne "inhibited" && $sense ne "indifferent") |
|
|
|
0
|
|
|
|
|
637
|
|
|
|
|
|
|
{ |
638
|
0
|
|
|
|
|
0
|
$sense = lc $sense; |
639
|
0
|
|
|
|
|
0
|
print "\tWARNING: Can't parse dcmsense argument $sense - ignoring.\n"; |
640
|
0
|
|
|
|
|
0
|
next; |
641
|
|
|
|
|
|
|
} |
642
|
0
|
|
|
|
|
0
|
$sensehsh{$sense}++; |
643
|
|
|
|
|
|
|
} |
644
|
0
|
0
|
|
|
|
0
|
$result = 0 unless ( exists($sensehsh{$self->{methdcm}}) ); |
645
|
0
|
|
|
|
|
0
|
return $result; |
646
|
|
|
|
|
|
|
} |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
=head2 filter_by_dam_sensitivity |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
Arguments : an arrayref of sensitivity values; the key is the sensitivity |
651
|
|
|
|
|
|
|
blocked, inhibited, or indifferent |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the dam sensitivity requirements, |
654
|
|
|
|
|
|
|
0 else. |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
=cut |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
sub filter_by_dam_sensitivity |
659
|
|
|
|
|
|
|
{ |
660
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arrref) = @_; |
661
|
0
|
|
|
|
|
0
|
my $result = 1; |
662
|
0
|
|
|
|
|
0
|
my %sensehsh; |
663
|
0
|
|
|
|
|
0
|
foreach my $sense (@$arrref) |
664
|
|
|
|
|
|
|
{ |
665
|
0
|
0
|
0
|
|
|
0
|
if ($sense ne "blocked" && $sense ne "inhibited" && $sense ne "indifferent") |
|
|
|
0
|
|
|
|
|
666
|
|
|
|
|
|
|
{ |
667
|
0
|
|
|
|
|
0
|
$sense = lc $sense; |
668
|
0
|
|
|
|
|
0
|
print "\tWARNING: Can't parse damsense argument $sense - ignoring.\n"; |
669
|
0
|
|
|
|
|
0
|
next; |
670
|
|
|
|
|
|
|
} |
671
|
0
|
|
|
|
|
0
|
$sensehsh{$sense}++; |
672
|
|
|
|
|
|
|
} |
673
|
0
|
0
|
|
|
|
0
|
$result = 0 unless ( exists($sensehsh{$self->{methdam}}) ); |
674
|
0
|
|
|
|
|
0
|
return $result; |
675
|
|
|
|
|
|
|
} |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=head2 filter_by_cpg_sensitivity |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
Arguments : an arrayref of sensitivity values; the key is the sensitivity |
680
|
|
|
|
|
|
|
blocked, inhibited, or indifferent |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the cpg sensitivity requirements, |
683
|
|
|
|
|
|
|
0 else. |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
=cut |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
sub filter_by_cpg_sensitivity |
688
|
|
|
|
|
|
|
{ |
689
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arrref) = @_; |
690
|
0
|
|
|
|
|
0
|
my $result = 1; |
691
|
0
|
|
|
|
|
0
|
my %sensehsh; |
692
|
0
|
|
|
|
|
0
|
foreach my $sense (@$arrref) |
693
|
|
|
|
|
|
|
{ |
694
|
0
|
0
|
0
|
|
|
0
|
if ($sense ne "blocked" && $sense ne "inhibited" && $sense ne "indifferent") |
|
|
|
0
|
|
|
|
|
695
|
|
|
|
|
|
|
{ |
696
|
0
|
|
|
|
|
0
|
$sense = lc $sense; |
697
|
0
|
|
|
|
|
0
|
print "\tWARNING: Can't parse cpgsense argument $sense - ignoring.\n"; |
698
|
0
|
|
|
|
|
0
|
next; |
699
|
|
|
|
|
|
|
} |
700
|
0
|
|
|
|
|
0
|
$sensehsh{$sense}++; |
701
|
|
|
|
|
|
|
} |
702
|
0
|
0
|
|
|
|
0
|
$result = 0 unless ( exists($sensehsh{$self->{methcpg}}) ); |
703
|
0
|
|
|
|
|
0
|
return $result; |
704
|
|
|
|
|
|
|
} |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
=head2 filter_by_star_activity |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
Arguments : 1 if star activity required, 0 else |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the star activity requirements, |
711
|
|
|
|
|
|
|
0 else. |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
=cut |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
sub filter_by_star_activity |
716
|
|
|
|
|
|
|
{ |
717
|
0
|
|
|
0
|
1
|
0
|
my ($self, $star) = @_; |
718
|
0
|
|
|
|
|
0
|
my $result = 1; |
719
|
0
|
0
|
|
|
|
0
|
$star = 0 unless ($star); |
720
|
0
|
0
|
0
|
|
|
0
|
$result = 0 if (($star && ! $self->{staract}) || (! $star && $self->{staract})); |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
721
|
0
|
|
|
|
|
0
|
return $result; |
722
|
|
|
|
|
|
|
} |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
=head2 filter_by_incubation_temperature |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
Arguments : an arrayref of acceptable integer incubation temperatures |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the incubation temperature requirements, |
729
|
|
|
|
|
|
|
0 else. |
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
=cut |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
sub filter_by_incubation_temperature |
734
|
|
|
|
|
|
|
{ |
735
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arrref) = @_; |
736
|
0
|
|
|
|
|
0
|
my $result = 1; |
737
|
0
|
|
|
|
|
0
|
my %temps; |
738
|
0
|
|
|
|
|
0
|
foreach my $temp (@$arrref) |
739
|
|
|
|
|
|
|
{ |
740
|
0
|
0
|
0
|
|
|
0
|
if ($temp !~ /\d/x || $temp <= 0) |
741
|
|
|
|
|
|
|
{ |
742
|
0
|
|
|
|
|
0
|
print "\tWARNING: Can't parse incubation argument $temp - ignoring.\n"; |
743
|
|
|
|
|
|
|
} |
744
|
0
|
|
|
|
|
0
|
$temps{$temp}++; |
745
|
|
|
|
|
|
|
} |
746
|
0
|
0
|
|
|
|
0
|
$result = 0 unless ( exists $temps{$self->{temp}}); |
747
|
0
|
|
|
|
|
0
|
return $result; |
748
|
|
|
|
|
|
|
} |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
=head2 filter_by_inactivation_temperature |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
Arguments : an acceptable integer inactivation temperature maximum |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the inactivation temperature requirement, |
755
|
|
|
|
|
|
|
0 else. |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
=cut |
758
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
sub filter_by_inactivation_temperature |
760
|
|
|
|
|
|
|
{ |
761
|
0
|
|
|
0
|
1
|
0
|
my ($self, $temp) = @_; |
762
|
0
|
|
|
|
|
0
|
my $result = 1; |
763
|
0
|
0
|
0
|
|
|
0
|
if ($temp !~ /\d/x || $temp <= 0) |
764
|
|
|
|
|
|
|
{ |
765
|
0
|
|
|
|
|
0
|
print "\tWARNING: Can't parse inactivation argument $temp - ignoring.\n"; |
766
|
|
|
|
|
|
|
} |
767
|
|
|
|
|
|
|
else |
768
|
|
|
|
|
|
|
{ |
769
|
0
|
0
|
|
|
|
0
|
$result = 0 if ($self->{tempin} > $temp); |
770
|
|
|
|
|
|
|
} |
771
|
0
|
|
|
|
|
0
|
return $result; |
772
|
|
|
|
|
|
|
} |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
=head2 filter_by_base_ambiguity |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
Arguments : "nonNonly" if any non N bases are allowed; "ATCGonly" if only |
777
|
|
|
|
|
|
|
A, T, C, or G are allowed |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the ambiguous nucleotide requirement, |
780
|
|
|
|
|
|
|
0 else. |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
=cut |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
sub filter_by_base_ambiguity |
785
|
|
|
|
|
|
|
{ |
786
|
0
|
|
|
0
|
1
|
0
|
my ($self, $ambig) = @_; |
787
|
0
|
|
|
|
|
0
|
my $result = 1; |
788
|
0
|
0
|
0
|
|
|
0
|
if ($ambig ne "nonNonly" && $ambig ne "ATCGonly") |
789
|
|
|
|
|
|
|
{ |
790
|
0
|
|
|
|
|
0
|
print "\tWARNING: Can't parse ambiguity argument $ambig - ignoring.\n"; |
791
|
|
|
|
|
|
|
} |
792
|
|
|
|
|
|
|
else |
793
|
|
|
|
|
|
|
{ |
794
|
0
|
|
|
|
|
0
|
my $ambregex; |
795
|
0
|
0
|
|
|
|
0
|
$ambregex = qr/N/ if ($ambig eq "nonNonly"); |
796
|
0
|
0
|
|
|
|
0
|
$ambregex = $ambnt if ($ambig eq "ATCGonly"); |
797
|
0
|
0
|
|
|
|
0
|
$result = 0 unless ( $self->{recseq} =~ $ambregex ); |
798
|
|
|
|
|
|
|
} |
799
|
0
|
|
|
|
|
0
|
return $result; |
800
|
|
|
|
|
|
|
} |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
=head2 filter_by_length |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
Arguments : an arrayref of acceptable recognition site lengths |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the recognition site length requirements, |
807
|
|
|
|
|
|
|
0 else. |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
=cut |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
sub filter_by_length |
812
|
|
|
|
|
|
|
{ |
813
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arrref) = @_; |
814
|
0
|
|
|
|
|
0
|
my $result = 1; |
815
|
0
|
|
|
|
|
0
|
my %lens; |
816
|
0
|
|
|
|
|
0
|
foreach my $len (@$arrref) |
817
|
|
|
|
|
|
|
{ |
818
|
0
|
0
|
0
|
|
|
0
|
if ($len =~ /\D/x || $len <= 0) |
819
|
|
|
|
|
|
|
{ |
820
|
0
|
|
|
|
|
0
|
print "\tWARNING: Can't parse length argument $len - ignoring.\n"; |
821
|
0
|
|
|
|
|
0
|
next; |
822
|
|
|
|
|
|
|
} |
823
|
0
|
|
|
|
|
0
|
$lens{$len}++; |
824
|
|
|
|
|
|
|
} |
825
|
0
|
0
|
|
|
|
0
|
$result = 0 unless ( exists $lens{length($self->{recseq})} ); |
826
|
0
|
|
|
|
|
0
|
return $result; |
827
|
|
|
|
|
|
|
} |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
=head2 filter_by_overhang_palindromy |
830
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
Arguments : an arrayref of acceptable overhang palindromys, from the list |
832
|
|
|
|
|
|
|
pal (palindromic), |
833
|
|
|
|
|
|
|
nonpal (nonpalindromic), |
834
|
|
|
|
|
|
|
pnon (potentially nonpalindromic) |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the palindromy requirements, |
837
|
|
|
|
|
|
|
0 else. |
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
=cut |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
sub filter_by_overhang_palindromy |
842
|
|
|
|
|
|
|
{ |
843
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arrref) = @_; |
844
|
0
|
|
|
|
|
0
|
my $result = 1; |
845
|
0
|
|
|
|
|
0
|
my %pals; |
846
|
0
|
|
|
|
|
0
|
foreach my $pal (@$arrref) |
847
|
|
|
|
|
|
|
{ |
848
|
0
|
0
|
0
|
|
|
0
|
if ($pal ne "pal" && $pal ne "pnon" && $pal ne "nonpal") |
|
|
|
0
|
|
|
|
|
849
|
|
|
|
|
|
|
{ |
850
|
0
|
|
|
|
|
0
|
print "\tWARNING: Can't parse palindromy argument $pal - ignoring.\n"; |
851
|
0
|
|
|
|
|
0
|
next; |
852
|
|
|
|
|
|
|
} |
853
|
0
|
|
|
|
|
0
|
$pals{$pal}++; |
854
|
|
|
|
|
|
|
} |
855
|
0
|
0
|
|
|
|
0
|
$result = 0 unless (exists $pals{$self->{palindromy}}); |
856
|
0
|
|
|
|
|
0
|
return $result; |
857
|
|
|
|
|
|
|
} |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
=head2 filter_by_stickiness |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
Arguments : an arrayref of acceptable overhang orientations, from the list |
862
|
|
|
|
|
|
|
1 (single basepair overhang), |
863
|
|
|
|
|
|
|
5 (five prime overhang), |
864
|
|
|
|
|
|
|
3 (three prime overhang), |
865
|
|
|
|
|
|
|
b (blunt ended) |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
Returns : 1 if the enzyme meets the overhang requirements, |
868
|
|
|
|
|
|
|
0 else. |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
=cut |
871
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
sub filter_by_stickiness |
873
|
|
|
|
|
|
|
{ |
874
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arrref) = @_; |
875
|
0
|
|
|
|
|
0
|
my $result = 1; |
876
|
0
|
|
|
|
|
0
|
my %sticks; |
877
|
0
|
|
|
|
|
0
|
foreach my $stick (@$arrref) |
878
|
|
|
|
|
|
|
{ |
879
|
0
|
0
|
0
|
|
|
0
|
if ($stick ne "5" && $stick ne "3" && $stick ne "1" && $stick ne "b") |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
880
|
|
|
|
|
|
|
{ |
881
|
0
|
|
|
|
|
0
|
print "\tWARNING: Can't parse sticky argument $stick - ignoring.\n"; |
882
|
0
|
|
|
|
|
0
|
next; |
883
|
|
|
|
|
|
|
} |
884
|
0
|
|
|
|
|
0
|
$sticks{$stick}++; |
885
|
|
|
|
|
|
|
} |
886
|
0
|
0
|
0
|
|
|
0
|
$result = 0 if ($self->{onebpoverhang} && ! exists $sticks{1}); |
887
|
0
|
|
|
|
|
0
|
my $type = $self->{type}; |
888
|
0
|
|
|
|
|
0
|
$type =~ s/\'//xg; |
889
|
0
|
0
|
|
|
|
0
|
$result = 0 unless (exists $sticks{$type}); |
890
|
0
|
|
|
|
|
0
|
return $result; |
891
|
|
|
|
|
|
|
} |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
=head1 ACCESSOR METHODS |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
Methods for setting and accessing enzyme attributes |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
=head2 id |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
The name of the enzyme. |
900
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
=cut |
902
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
sub id |
904
|
|
|
|
|
|
|
{ |
905
|
393314
|
|
|
393314
|
1
|
557684
|
my ($self) = @_; |
906
|
393314
|
|
|
|
|
2242830
|
return $self->{'id'}; |
907
|
|
|
|
|
|
|
} |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
=head2 display_name |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
The name of the enzyme. |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
=cut |
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
sub display_name |
916
|
|
|
|
|
|
|
{ |
917
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
918
|
0
|
|
|
|
|
0
|
return $self->{'id'}; |
919
|
|
|
|
|
|
|
} |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
=head2 score |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
This attribute initially holds the price in dollars per unit of the enzyme |
924
|
|
|
|
|
|
|
(2009 US Dollars) but can be used to hold any score or cost value. |
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
=cut |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
sub score |
929
|
|
|
|
|
|
|
{ |
930
|
0
|
|
|
0
|
1
|
0
|
my ($self, $value) = @_; |
931
|
0
|
0
|
|
|
|
0
|
if (defined $value) |
932
|
|
|
|
|
|
|
{ |
933
|
0
|
|
|
|
|
0
|
$self->{'score'} = $value; |
934
|
|
|
|
|
|
|
} |
935
|
0
|
|
|
|
|
0
|
return $self->{'score'}; |
936
|
|
|
|
|
|
|
} |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
=head2 aggress |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
Aggressiveness is the number of recognition sites in a template piece of DNA |
941
|
|
|
|
|
|
|
(usually lambda, but sometimes adeno2, pBR322, pUC19, pXba, etc) over the total |
942
|
|
|
|
|
|
|
length of that template piece of DNA. This number tells the manufacturer how |
943
|
|
|
|
|
|
|
much enzyme to sell as a "unit" - the amount of enzyme required to fully digest |
944
|
|
|
|
|
|
|
one microgram of template DNA under reaction conditions in an hour. |
945
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
=cut |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
sub aggress |
949
|
|
|
|
|
|
|
{ |
950
|
0
|
|
|
0
|
1
|
0
|
my ($self, $value) = @_; |
951
|
0
|
0
|
|
|
|
0
|
if (defined $value) |
952
|
|
|
|
|
|
|
{ |
953
|
0
|
|
|
|
|
0
|
$self->{'aggress'} = $value; |
954
|
|
|
|
|
|
|
} |
955
|
0
|
|
|
|
|
0
|
return $self->{'aggress'}; |
956
|
|
|
|
|
|
|
} |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
=head2 len |
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
The length in bases of the recognition sequence (recseq). |
961
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
=cut |
963
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
sub len |
965
|
|
|
|
|
|
|
{ |
966
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
967
|
0
|
|
|
|
|
0
|
return $self->{'length'}; |
968
|
|
|
|
|
|
|
} |
969
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
=head2 methcpg |
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
The effect of CpG methylation on the enzyme's efficacy. |
973
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
=cut |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
sub methcpg |
977
|
|
|
|
|
|
|
{ |
978
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
979
|
0
|
|
|
|
|
0
|
return $self->{'methcpg'}; |
980
|
|
|
|
|
|
|
} |
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
=head2 methdcm |
983
|
|
|
|
|
|
|
|
984
|
|
|
|
|
|
|
The effect of Dcm methylation on the enzyme's efficacy. |
985
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
=cut |
987
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
sub methdcm |
989
|
|
|
|
|
|
|
{ |
990
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
991
|
0
|
|
|
|
|
0
|
return $self->{'methdcm'}; |
992
|
|
|
|
|
|
|
} |
993
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
=head2 methdam |
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
The effect of Dam methylation on the enzyme's efficacy. |
997
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
=cut |
999
|
|
|
|
|
|
|
|
1000
|
|
|
|
|
|
|
sub methdam |
1001
|
|
|
|
|
|
|
{ |
1002
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1003
|
0
|
|
|
|
|
0
|
return $self->{'methdam'}; |
1004
|
|
|
|
|
|
|
} |
1005
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
=head2 buffers |
1007
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
A hash reference where the keys are buffer names and the values are the activity |
1009
|
|
|
|
|
|
|
level of the enzyme in that Buffer. Since most of the enzymes in the default |
1010
|
|
|
|
|
|
|
GeneDesign list are NEB enzymes, this is usually full of NEB buffers. |
1011
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
=cut |
1013
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
sub buffers |
1015
|
|
|
|
|
|
|
{ |
1016
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1017
|
0
|
|
|
|
|
0
|
return $self->{'buffers'}; |
1018
|
|
|
|
|
|
|
} |
1019
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
=head2 vendors |
1021
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
A hash reference where the keys are abbreviations for and the values are names |
1023
|
|
|
|
|
|
|
of vendors that stock the enzyme. These are read in from the enzyme file. |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
B = Invitrogen |
1026
|
|
|
|
|
|
|
C = Minotech |
1027
|
|
|
|
|
|
|
E = Stratagene |
1028
|
|
|
|
|
|
|
F = Thermo Scientific Fermentas |
1029
|
|
|
|
|
|
|
I = SibEnzyme |
1030
|
|
|
|
|
|
|
J = Nippon Gene Co. |
1031
|
|
|
|
|
|
|
K = Takara |
1032
|
|
|
|
|
|
|
M = Roche Applied Science |
1033
|
|
|
|
|
|
|
N = New England Biolabs |
1034
|
|
|
|
|
|
|
O = Toyobo Technologies |
1035
|
|
|
|
|
|
|
Q = Molecular Biology Resources |
1036
|
|
|
|
|
|
|
R = Promega |
1037
|
|
|
|
|
|
|
S = Sigma Aldrich |
1038
|
|
|
|
|
|
|
U = Bangalore Genei |
1039
|
|
|
|
|
|
|
V = Vivantis |
1040
|
|
|
|
|
|
|
X = EURx |
1041
|
|
|
|
|
|
|
Y = CinnaGen |
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
=cut |
1044
|
|
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
sub vendors |
1046
|
|
|
|
|
|
|
{ |
1047
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1048
|
0
|
|
|
|
|
0
|
return $self->{'vendors'}; |
1049
|
|
|
|
|
|
|
} |
1050
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
=head2 tempin |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
The temperature in degrees Celsius that deactivates the enzyme. |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
=cut |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
sub tempin |
1058
|
|
|
|
|
|
|
{ |
1059
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1060
|
0
|
|
|
|
|
0
|
return $self->{'tempin'}; |
1061
|
|
|
|
|
|
|
} |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
=head2 timein |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
The time required at inactivation temperature to deactivate the enzyme. |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
=cut |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
sub timein |
1070
|
|
|
|
|
|
|
{ |
1071
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1072
|
0
|
|
|
|
|
0
|
return $self->{'timein'}; |
1073
|
|
|
|
|
|
|
} |
1074
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
=head2 temp |
1076
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
Incubation temperature for the best enzyme activity, in degrees Celsius. |
1078
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
=cut |
1080
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
sub temp |
1082
|
|
|
|
|
|
|
{ |
1083
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1084
|
0
|
|
|
|
|
0
|
return $self->{'temp'}; |
1085
|
|
|
|
|
|
|
} |
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
=head2 recseq |
1088
|
|
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
This attribute is the "clean" description of the enzyme's recognition sequence - |
1090
|
|
|
|
|
|
|
that is, no information about cleavage site can be gained from this attribute. |
1091
|
|
|
|
|
|
|
This is determined automatically from the cleavage string (cutseq) at |
1092
|
|
|
|
|
|
|
instantiation. |
1093
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
=cut |
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
sub recseq |
1097
|
|
|
|
|
|
|
{ |
1098
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1099
|
0
|
|
|
|
|
0
|
return $self->{'recseq'}; |
1100
|
|
|
|
|
|
|
} |
1101
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
=head2 seq |
1103
|
|
|
|
|
|
|
|
1104
|
|
|
|
|
|
|
Synonym for recseq |
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
=cut |
1107
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
sub seq |
1109
|
|
|
|
|
|
|
{ |
1110
|
21
|
|
|
21
|
1
|
40
|
my ($self) = @_; |
1111
|
21
|
|
|
|
|
131
|
return $self->{'recseq'}; |
1112
|
|
|
|
|
|
|
} |
1113
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
=head2 cutseq |
1115
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
This attribute is the string description of the enzyme's recognition sequence. |
1117
|
|
|
|
|
|
|
It includes information about both the recognition and cleavage sites. |
1118
|
|
|
|
|
|
|
See http://rebase.neb.com/rebase/rebrec.html for help interpreting this field. |
1119
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
=cut |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
sub cutseq |
1123
|
|
|
|
|
|
|
{ |
1124
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1125
|
0
|
|
|
|
|
0
|
return $self->{'cutseq'}; |
1126
|
|
|
|
|
|
|
} |
1127
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
=head2 regex |
1129
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
This attribute stores a set of regular expressions as an array reference to |
1131
|
|
|
|
|
|
|
speed the search for recognition sites in sequence. The first entry in the |
1132
|
|
|
|
|
|
|
arrayref is the regular expression representing the forward orientation of |
1133
|
|
|
|
|
|
|
the recognition sequence; the second entry represents the reverse orientation |
1134
|
|
|
|
|
|
|
and is only defined if the recognition site is nonpalindromic. |
1135
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
This attribute is defined at instantiation. |
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
=cut |
1139
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
sub regex |
1141
|
|
|
|
|
|
|
{ |
1142
|
1
|
|
|
1
|
1
|
3
|
my ($self) = @_; |
1143
|
1
|
|
|
|
|
5
|
return $self->{'regex'}; |
1144
|
|
|
|
|
|
|
} |
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
=head2 class |
1147
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
Class describes the cutting behavior of an enzyme. The classes used by |
1149
|
|
|
|
|
|
|
GeneDesign uses a generalized subset of the classes as described at Rebase - for |
1150
|
|
|
|
|
|
|
the purposes of enzyme editing, three classes have so far proven to be enough. |
1151
|
|
|
|
|
|
|
See http://rebase.neb.com/cgi-bin/sublist for the full description of enzyme |
1152
|
|
|
|
|
|
|
classes. |
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
IIP : This enzyme has a symmetric target and a symmetric cleavage site; this |
1155
|
|
|
|
|
|
|
usually means that the enzyme cleaves inside its own recognition site. |
1156
|
|
|
|
|
|
|
This is not the same as overhang palindromy! |
1157
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
IIA : This enzyme has an asymmetric recognition site and usually cleaves |
1159
|
|
|
|
|
|
|
outside of it. |
1160
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
IIB : This enzyme has one recognition site and two cleavage sites, one on |
1162
|
|
|
|
|
|
|
either side of the recognition site, and thus cuts itself out of |
1163
|
|
|
|
|
|
|
sequence. |
1164
|
|
|
|
|
|
|
|
1165
|
|
|
|
|
|
|
=cut |
1166
|
|
|
|
|
|
|
|
1167
|
|
|
|
|
|
|
sub class |
1168
|
|
|
|
|
|
|
{ |
1169
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1170
|
0
|
|
|
|
|
0
|
return $self->{'class'}; |
1171
|
|
|
|
|
|
|
} |
1172
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
=head2 classex |
1174
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
=cut |
1176
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
sub classex |
1178
|
|
|
|
|
|
|
{ |
1179
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1180
|
0
|
|
|
|
|
0
|
return $self->{'classex'}; |
1181
|
|
|
|
|
|
|
} |
1182
|
|
|
|
|
|
|
|
1183
|
|
|
|
|
|
|
=head2 class_regexes |
1184
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
Short cut to accessing class regular expressions |
1186
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
=cut |
1188
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
sub class_regexes |
1190
|
|
|
|
|
|
|
{ |
1191
|
0
|
|
|
0
|
1
|
0
|
return {"IIP" => $IIPreg, "IIA" => $IIAreg, "IIB" => $IIBreg}; |
1192
|
|
|
|
|
|
|
} |
1193
|
|
|
|
|
|
|
|
1194
|
|
|
|
|
|
|
=head2 type |
1195
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
Type describes the kind of overhang left by an enzyme. This is probably not a |
1197
|
|
|
|
|
|
|
good use of the word type. |
1198
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
Type may be 5', for a five prime overhang; 3', for a three prime overhang; |
1200
|
|
|
|
|
|
|
or b for blunt ends. |
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
=cut |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
sub type |
1205
|
|
|
|
|
|
|
{ |
1206
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1207
|
0
|
|
|
|
|
0
|
return $self->{'type'}; |
1208
|
|
|
|
|
|
|
} |
1209
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
|
=head2 onebpoverhang |
1212
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
One basepair overhangs can be harder to ligate than blunt ends. This attribute |
1214
|
|
|
|
|
|
|
returns 1 if an enzyme leaves a 1bp overhang and 0 else. |
1215
|
|
|
|
|
|
|
|
1216
|
|
|
|
|
|
|
=cut |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
sub onebpoverhang |
1219
|
|
|
|
|
|
|
{ |
1220
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
1221
|
0
|
|
|
|
|
0
|
return $self->{'onebpoverhang'}; |
1222
|
|
|
|
|
|
|
} |
1223
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
=head2 exclude |
1225
|
|
|
|
|
|
|
|
1226
|
|
|
|
|
|
|
Some enzymes share overlapping recognition sites. If you are trying to ensure |
1227
|
|
|
|
|
|
|
the absence or uniqueness of a recognition site, you will want to be sure to |
1228
|
|
|
|
|
|
|
exclude isoschizomers and neoschizomers from consideration elsewhere. The |
1229
|
|
|
|
|
|
|
exclude attribute stores an array reference that lists the ids of neo- and |
1230
|
|
|
|
|
|
|
isoschizomers - or any arbitrary enzyme that is incompatible with this enzyme - |
1231
|
|
|
|
|
|
|
for easy lookup. |
1232
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
=cut |
1234
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
sub exclude |
1236
|
|
|
|
|
|
|
{ |
1237
|
768
|
|
|
768
|
1
|
1553
|
my ($self, $value) = @_; |
1238
|
768
|
50
|
|
|
|
2515
|
if (defined $value) |
1239
|
|
|
|
|
|
|
{ |
1240
|
768
|
50
|
|
|
|
2925
|
$self->throw("$value is not a reference to an array") |
1241
|
|
|
|
|
|
|
unless (ref $value eq "ARRAY"); |
1242
|
768
|
|
|
|
|
2841
|
$self->{'exclude'} = $value; |
1243
|
|
|
|
|
|
|
} |
1244
|
768
|
|
|
|
|
5616
|
return $self->{'exclude'}; |
1245
|
|
|
|
|
|
|
} |
1246
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
=head2 palindromy |
1248
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
Information about the overhang the enzyme leaves. |
1250
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
pal = palindromic |
1252
|
|
|
|
|
|
|
nonpal = nonpalindromic |
1253
|
|
|
|
|
|
|
pnon = potentially nonpalindromic, or sometimes palindromic and sometimes |
1254
|
|
|
|
|
|
|
nonpalindromic |
1255
|
|
|
|
|
|
|
unknown = unknown |
1256
|
|
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
=cut |
1258
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
sub palindromy |
1260
|
|
|
|
|
|
|
{ |
1261
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
1262
|
0
|
|
|
|
|
|
return $self->{'palindromy'}; |
1263
|
|
|
|
|
|
|
} |
1264
|
|
|
|
|
|
|
|
1265
|
|
|
|
|
|
|
=head2 staract |
1266
|
|
|
|
|
|
|
|
1267
|
|
|
|
|
|
|
1 if the enzyme exhibits star activity, 0 else |
1268
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
=cut |
1270
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
sub staract |
1272
|
|
|
|
|
|
|
{ |
1273
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
1274
|
0
|
|
|
|
|
|
return $self->{'staract'}; |
1275
|
|
|
|
|
|
|
} |
1276
|
|
|
|
|
|
|
|
1277
|
|
|
|
|
|
|
=head2 start |
1278
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
The offset in nucleotides of the enzymes recognition site in an ORF |
1280
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
=cut |
1282
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
sub start |
1284
|
|
|
|
|
|
|
{ |
1285
|
0
|
|
|
0
|
1
|
|
my ($self, $value) = @_; |
1286
|
0
|
0
|
|
|
|
|
if (defined $value) |
1287
|
|
|
|
|
|
|
{ |
1288
|
0
|
|
|
|
|
|
$self->{'start'} = $value; |
1289
|
|
|
|
|
|
|
} |
1290
|
0
|
|
|
|
|
|
return $self->{'start'}; |
1291
|
|
|
|
|
|
|
} |
1292
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
=head2 outside_cut |
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
=cut |
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
sub outside_cut |
1298
|
|
|
|
|
|
|
{ |
1299
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
1300
|
0
|
|
|
|
|
|
return $self->{'outside_cut'}; |
1301
|
|
|
|
|
|
|
} |
1302
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
=head2 inside_cut |
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
=cut |
1306
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
sub inside_cut |
1308
|
|
|
|
|
|
|
{ |
1309
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
1310
|
0
|
|
|
|
|
|
return $self->{'inside_cut'}; |
1311
|
|
|
|
|
|
|
} |
1312
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
1; |
1314
|
|
|
|
|
|
|
|
1315
|
|
|
|
|
|
|
__END__ |
1316
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
1318
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
Copyright (c) 2013, GeneDesign developers |
1320
|
|
|
|
|
|
|
All rights reserved. |
1321
|
|
|
|
|
|
|
|
1322
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without modification, |
1323
|
|
|
|
|
|
|
are permitted provided that the following conditions are met: |
1324
|
|
|
|
|
|
|
|
1325
|
|
|
|
|
|
|
* Redistributions of source code must retain the above copyright notice, this |
1326
|
|
|
|
|
|
|
list of conditions and the following disclaimer. |
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
* Redistributions in binary form must reproduce the above copyright notice, this |
1329
|
|
|
|
|
|
|
list of conditions and the following disclaimer in the documentation and/or |
1330
|
|
|
|
|
|
|
other materials provided with the distribution. |
1331
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
* The names of Johns Hopkins, the Joint Genome Institute, the Lawrence Berkeley |
1333
|
|
|
|
|
|
|
National Laboratory, the Department of Energy, and the GeneDesign developers may |
1334
|
|
|
|
|
|
|
not be used to endorse or promote products derived from this software without |
1335
|
|
|
|
|
|
|
specific prior written permission. |
1336
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
1338
|
|
|
|
|
|
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
1339
|
|
|
|
|
|
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
1340
|
|
|
|
|
|
|
DISCLAIMED. IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT, |
1341
|
|
|
|
|
|
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
1342
|
|
|
|
|
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
1343
|
|
|
|
|
|
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
1344
|
|
|
|
|
|
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE |
1345
|
|
|
|
|
|
|
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
1346
|
|
|
|
|
|
|
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
1347
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
=cut |