File Coverage

blib/lib/Bio/GeneDesign/RestrictionEnzyme.pm
Criterion Covered Total %
statement 110 388 28.3
branch 46 188 24.4
condition 7 120 5.8
subroutine 11 55 20.0
pod 50 50 100.0
total 224 801 27.9


line stmt bran cond sub pod time code
1             #
2             # GeneDesign module for restriction enzyme handing
3             #
4              
5             =head1 NAME
6              
7             Bio::GeneDesign::RestrictionEnzyme
8              
9             =head1 VERSION
10              
11             Version 5.56
12              
13             =head1 DESCRIPTION
14              
15             GeneDesign object that represents a type II restriction enzyme
16              
17             =head1 AUTHOR
18              
19             Sarah Richardson
20              
21             =cut
22              
23             package Bio::GeneDesign::RestrictionEnzyme;
24              
25 11     11   68 use Bio::GeneDesign::Basic qw(:GD);
  11         22  
  11         1674  
26 11     11   68 use Carp;
  11         19  
  11         606  
27              
28 11     11   65 use strict;
  11         19  
  11         211  
29 11     11   56 use warnings;
  11         25  
  11         370  
30              
31 11     11   60 use base qw(Bio::Root::Root);
  11         23  
  11         45664  
32              
33             our $VERSION = 5.56;
34              
35             my $IIPreg = qr/ ([A-Z]*) \^ ([A-Z]*) /x;
36             my $IIAreg = qr/\A \w+ \(([\-]*\d+) \/ ([\-]*\d+)\)\Z /x;
37             my $IIBreg = qr/\A\(([\-]*\d+) \/ ([\-]*\d+)\) \w+ \(([\-]*\d+) \/ ([\-]*\d+)\)\Z /x;
38              
39             my %RE_vendors = (
40             B => "Invitrogen", C => "Minotech", E => "Stratagene Agilent",
41             F => "Thermo Scientific Fermentas", I => "SibEnzyme", J => "Nippon Gene Co.",
42             K => "Takara", M => "Roche Applied Science", N => "New England Biolabs",
43             O => "Toyobo Technologies", Q => "Molecular Biology Resources",
44             R => "Promega", S => "Sigma Aldrich", U => "Bangalore Genei", V => "Vivantis",
45             X => "EURx", Y => "CinnaGen"
46             );
47              
48             my %methtrans = (b => "blocked", blocked => "blocked",
49             i => "inhibited", inhibited => "inhibited",
50             u => "unknown", unknown => "unknown"
51             );
52              
53             =head1 CONSTRUCTOR METHODS
54              
55             =head2 new
56              
57             You can create a new enzyme or clone an existing enzyme to create a new instance
58             of an abstract enzyme definition. To do this, provide the -enzyme flag; the
59             constructor will ignore every other argument except for -start.
60              
61             Required arguments:
62              
63             EITHER
64              
65             -enzyme : a Bio::GeneDesign::RestrictionEnzyme object to clone
66              
67             OR
68             -id : The name of the enzyme (i.e., BamHI)
69             -cutseq : The string describing the enzyme's recognition and cleavage
70             site
71              
72             Optional arguments:
73              
74             -temp : The incubation temperature for the enzyme
75             -tempin : The heat inactivation temperature for the enzyme
76             -score : A float score, usually the price of the enzyme in dollars
77             -methdam : Sensitivity to dam methylation; can take the values
78             b or blocked,
79             i or inhibited,
80             u or unknown,
81             if undefined, will take the value indifferent.
82             -methdcm : Sensitivity to dcm methylation; can take the values
83             b or blocked,
84             i or inhibited,
85             u or unknown,
86             if undefined, will take the value indifferent.
87             -methcpg : Sensitivity to cpg methylation; can take the values
88             b or blocked,
89             i or inhibited,
90             u or unknown,
91             if undefined, will take the value indifferent.
92             -vendors : a string of single letter codes that represent vendor
93             availability - no spaces. see vendor() for a list of the
94             codes.
95             -staract : Whether or not the enzyme exhibits star activity - 1 or 0.
96             -buffers : a hash reference; keys are buffer names and values are the
97             enzyme activity in that buffer. For example:
98             NEB1 => 50, NEB2 => 100, etc.
99             -start : An integer representing an offset; usually used only in
100             cloned instances, as opposed to abstract instances.
101             -exclude : An arrayref full of ids for enzymes that should be
102             considered mutually exclusive to this enzyme - see exclude()
103              
104             =cut
105              
106             sub new
107             {
108 768     768 1 5338 my ($class, @args) = @_;
109 768         2850 my $self = $class->SUPER::new(@args);
110              
111 768         24277 my ($object, $id, $cutseq, $temp, $tempin, $score, $methdam, $methdcm,
112             $methcpg, $vendors, $staract, $buffers, $start, $exclude, $aggress) =
113             $self->_rearrange([qw(ENZYME ID CUTSEQ TEMP TEMPIN SCORE METHDAM METHDCM
114             METHCPG VENDORS STARACT BUFFERS START EXCLUDE AGGRESS)], @args);
115              
116 768 50       58195 if ($object)
117             {
118 0 0       0 $self->throw("object of class " . ref($object) . " does not implement ".
119             "Bio::GeneDesign::RestrictionEnzyme.")
120             unless $object->isa("Bio::GeneDesign::RestrictionEnzyme");
121 0         0 $self = $object->clone();
122             }
123             else
124             {
125              
126 768 50       1987 $self->throw("No enzyme id defined") unless ($id);
127 768         1744 $self->{id} = $id;
128              
129 768 50       1550 $self->throw("No cut sequence defined") unless ($cutseq);
130 768         1444 $self->{cutseq} = $cutseq;
131              
132 768         1250 my $recseq = $cutseq;
133 768         5965 $recseq =~ s/\W*\d*//xg;
134 768         1771 $self->{recseq} = $recseq;
135              
136             #Regular expression arrayref to use for enzyme searching
137             #Should store as compiled regexes instead
138 768         2097 $self->{regex} = _regarr($recseq);
139              
140 768         1514 my $sitelen = length($recseq);
141 768         1390 $self->{length} = $sitelen;
142              
143             #Enzyme Class and Palindromy
144 768         1418 my ($lef, $rig) = (q{}, q{});
145 768 100       5489 if ($cutseq =~ $IIPreg)
    100          
    50          
146             {
147 567         1238 $lef = length($1);
148 567         810 $rig = length($2);
149 567         937 $self->{class} = "IIP";
150 567         1169 $self->{classex} = $IIPreg;
151              
152 567 100       962 if ($lef == $rig)
153             {
154 123         216 $self->{palindromy} = "unknown";
155             }
156             else
157             {
158 444         584 my $inlef = $lef;
159 444 100       1377 $inlef = length($recseq) - $inlef if ($inlef > (.5 * length($recseq)));
160 444         1375 my $mattersbit = substr($recseq, $inlef, length($recseq) - (2 * $inlef));
161 444 100 66     3899 if ($mattersbit && $mattersbit =~ $ambnt && length($mattersbit) % 2 == 0)
    100 100        
    50 66        
162             {
163 54         167 $self->{palindromy} = "pnon";
164             }
165             elsif ($mattersbit && $mattersbit eq _complement($mattersbit, 1))
166             {
167 384         1069 $self->{palindromy} = "pal";
168             }
169             elsif ($mattersbit)
170             {
171 6         23 $self->{palindromy} = "nonpal";
172             }
173             }
174             }
175             elsif ($cutseq =~ $IIBreg)
176             {
177 33         99 $lef = int($1);
178 33         70 $rig = int($2);
179 33         83 $self->{class} = "IIB";
180 33         78 $self->{classex} = $IIBreg;
181 33         69 $self->{palindromy} = "pnon";
182             }
183             elsif ($cutseq =~ $IIAreg)
184             {
185 168         501 $lef = int($1);
186 168         250 $rig = int($2);
187 168         318 $self->{class} = "IIA";
188 168         338 $self->{classex} = $IIAreg;
189 168         262 $self->{palindromy} = "pnon";
190             }
191             else
192             {
193 0         0 $self->{class} = "unknown";
194             }
195              
196             #Enzyme type
197 768         984 my $type;
198 768 100       1535 if ($lef < $rig)
    100          
    50          
199             {
200 438         662 $type .= "5'";
201 438         698 $self->{inside_cut} = $lef;
202 438         668 $self->{outside_cut} = $rig;
203             }
204             elsif ($lef > $rig)
205             {
206 195         313 $type .= "3'";
207 195         292 $self->{inside_cut} = $rig;
208 195         282 $self->{outside_cut} = $lef;
209             }
210             elsif ($lef == $rig)
211             {
212 135         207 $type .= 'b';
213             }
214 768 100       1563 $self->{onebpoverhang} = 1 if (abs($lef - $rig) == 1);
215 768         1225 $self->{type} = $type;
216              
217 768 100       1955 $self->{temp} = $temp if ($temp);
218 768 100       1522 if ($tempin)
219             {
220 630         2207 my ($intime, $intemp) = split q{@}, $tempin;
221 630         1383 $self->{tempin} = $intemp;
222 630         1567 $self->{timein} = $intime;
223             }
224              
225 768 50       2384 $self->{score} = $score if ($score);
226 768         1504 $self->{aggress} = $aggress;
227              
228 768 100       1487 $self->{staract} = 1 if ($staract);
229              
230 768 100       1451 if (exists $methtrans{$methdam})
231             {
232 54         121 $self->{methdam} = $methtrans{$methdam};
233             }
234             else
235             {
236 714         1368 $self->{methdam} = 'indifferent';
237             }
238              
239 768 100       1339 if (exists $methtrans{$methdcm})
240             {
241 105         203 $self->{methdcm} = $methtrans{$methdcm};
242             }
243             else
244             {
245 663         1040 $self->{methdcm} = 'indifferent';
246             }
247              
248 768 100       1353 if (exists $methtrans{$methcpg})
249             {
250 390         792 $self->{methcpg} = $methtrans{$methcpg};
251             }
252             else
253             {
254 378         605 $self->{methcpg} = 'indifferent';
255             }
256              
257 768 100       1495 if ($vendors)
258             {
259 765         1168 my %vhsh = ();
260 765         2331 foreach my $v (split(q{}, $vendors))
261             {
262 2991 50       6771 $vhsh{$v} = $RE_vendors{$v} if (exists $RE_vendors{$v});
263 2991 50       5820 carp("$v not in vendor list!") unless (exists $RE_vendors{$v});
264             }
265 765         1937 $self->{vendors} = \%vhsh;
266             }
267              
268 768 50       2778 $self->{buffers} = $buffers if ($buffers);
269             }
270              
271 768 50       1222 $self->{start} = $start if ($start);
272              
273 768 50       1153 $self->{exclude} = $exclude if ($exclude);
274              
275 768         3797 return $self;
276             }
277              
278             =head1 FUNCTIONAL METHODS
279              
280             =head2 clone
281              
282             By default in GeneDesign code, RestrictionEnzyme objects are meant to stand as
283             abstracts - that is, they stand for BamHI in general, and not for a particular
284             instance of a BamHI recognition site. If you want to use the objects in the
285             latter sense, you will need to clone the abstract object instantiated when the
286             definition file is read in, thus generating an arbitrary number of BamHI
287             instances that can then be differentiated by their start attributes.
288              
289             =cut
290              
291             sub clone
292             {
293 0     0 1 0 my ($self) = @_;
294 0         0 my $copy;
295 0         0 foreach my $key (keys %{$self})
  0         0  
296             {
297 0         0 $copy->{$key} = $self->{$key};
298             }
299 0         0 bless $copy, ref $self;
300 0         0 return $copy;
301             }
302              
303             =head2 positions
304              
305             Generates a hash describing the positions of the enzyme's recognition
306             sites in a nucleotide sequence. Keys are offset in nucleotides, and values are
307             the recognition site found at said offset as a string.
308              
309             =cut
310              
311             sub positions
312             {
313 29     29 1 41 my ($self, $seq) = @_;
314 29         39 my $total = {};
315 29         33 foreach my $sit (@{$self->{regex}})
  29         65  
316             {
317 46         792 while ($seq =~ /(?=($sit))/ixg)
318             {
319 27         190 $total->{pos $seq} = $1;
320             }
321             }
322 29         72 return $total;
323             }
324              
325             =head2 overhang
326              
327             Given a nucleotide sequence context, what overhang does this enzyme leave, and
328             how far away from the cutsite is it?
329              
330             Arguments:
331              
332             =cut
333              
334             sub overhang
335             {
336 0     0 1 0 my ($self, $dna, $context, $strand) = @_;
337 0         0 my ($ohangstart, $mattersbit) = (0, q{});
338 0         0 my $lef;
339             my $rig;
340 0 0       0 if ($self->{class} eq "IIP")
    0          
341             {
342 0 0       0 ($lef, $rig) = (length($1), length($2)) if ($self->{cutseq} =~ $IIPreg);
343 0 0       0 ($lef, $rig) = ($rig, $lef) if ($rig < $lef);
344 0         0 $ohangstart = $lef + 1;
345 0         0 $mattersbit = substr($dna, $ohangstart-1, $rig-$lef);
346             }
347             elsif ($self->{class} eq "IIA")
348             {
349 0 0       0 ($lef, $rig) = ($1, $2) if ($self->{cutseq} =~ $IIAreg);
350 0 0       0 ($lef, $rig) = ($rig, $lef) if ($rig < $lef);
351 0 0       0 if ($strand == 1)
352             {
353 0         0 $ohangstart = length($dna) + $lef + 1;
354             }
355             else
356             {
357 0         0 $ohangstart = length($context) - length($dna) - $rig + 1;
358             }
359 0         0 $mattersbit = substr($context, $ohangstart-1, $rig-$lef);
360 0 0       0 $ohangstart = $strand == 1 ? length($dna) + $lef : 0 - ($rig);
361             }
362             else
363             {
364 0         0 return ();
365             }
366 0         0 return ($ohangstart, $mattersbit);
367             }
368              
369             =head2 display
370              
371             Generates a tab delimited display string that can be used to print enzyme
372             information out in a tabular format.
373              
374             =cut
375              
376             sub display
377             {
378 0     0 1 0 my ($self) = @_;
379 0 0       0 my $staract = $self->{staract} ? "*" : q{};
380 0         0 my (@blocked, @inhibed) = ((), ());
381 0 0       0 push @blocked, "cpg" if ($self->{methcpg} eq "blocked");
382 0 0       0 push @blocked, "dam" if ($self->{methdam} eq "blocked");
383 0 0       0 push @blocked, "dcm" if ($self->{methdcm} eq "blocked");
384 0 0       0 push @inhibed, "cpg" if ($self->{methcpg} eq "inhibited");
385 0 0       0 push @inhibed, "dam" if ($self->{methdam} eq "inhibited");
386 0 0       0 push @inhibed, "dcm" if ($self->{methdcm} eq "inhibited");
387 0         0 my $buffstr = undef;
388 0         0 foreach (sort keys %{$self->{buffers}})
  0         0  
389             {
390 0 0       0 $buffstr .= "$_ (" . $self->{buffers}->{$_} . ") " if ($self->{buffers}->{$_});
391             }
392 0         0 my $vendstr = join(", ", values %{$self->{vendors}});
  0         0  
393 0         0 my $display = undef;
394 0 0       0 my $inact = $self->{tempin} ? " (". $self->{timein} . q{@} . $self->{tempin} . ")" : q{};
395 0         0 $display .= $self->{id} . "\t";
396 0         0 $display .= $self->{cutseq} . $staract . "\t";
397 0         0 $display .= $self->{type} . "\t";
398 0 0       0 $display .= $self->{start} . "\t" if ($self->{start});
399 0         0 $display .= $self->{temp} . $inact . "\t";
400 0         0 $display .= join(", ", @blocked) . "\t";
401 0         0 $display .= join(", ", @inhibed) . "\t";
402 0         0 $display .= $self->{score} . "\t";
403 0         0 $display .= $buffstr . "\t";
404 0         0 $display .= $vendstr . "\t";
405 0         0 return $display;
406             }
407              
408             =head2 common_buffers
409              
410             Returns an array reference listing the buffers, if any, in which two enzymes
411             both have 100% activity. in boolean mode returns the number of buffers
412              
413             =cut
414              
415             sub common_buffers
416             {
417 0     0 1 0 my ($self, $buddy, $bool) = @_;
418 0 0       0 $self->throw("Argument is not a Bio::GeneDesign::RestrictionEnzyme")
419             unless $buddy->isa("Bio::GeneDesign::RestrictionEnzyme");
420              
421 0         0 my $sbuffs = $self->{buffers};
422 0         0 my $bbuffs = $buddy->{buffers};
423 0         0 my @answer;
424 0         0 foreach my $skey (sort keys %{$sbuffs})
  0         0  
425             {
426 0         0 my $sval = $sbuffs->{$skey};
427 0         0 my $bval = $bbuffs->{$skey};
428 0 0 0     0 if ($skey eq "Other" && $sval && $bval && "$sval" eq "$bval")
    0 0        
      0        
      0        
      0        
      0        
429             {
430 0         0 push @answer, $skey;
431             }
432             elsif ($sval && $bval && "$sval" == 100 && "$bval" == 100)
433             {
434 0         0 push @answer, $skey;
435             }
436             }
437 0 0       0 return $bool ? scalar(@answer) : \@answer;
438             }
439              
440             =head2 acceptable_buffer
441              
442             Returns a buffer in which both enzymes will have at least a thresholded amount
443             of activity.
444              
445             =cut
446              
447             sub acceptable_buffer
448             {
449 0     0 1 0 my ($self, $buddy, $level) = @_;
450 0 0       0 $self->throw("Argument is not a Bio::GeneDesign::RestrictionEnzyme")
451             unless $buddy->isa("Bio::GeneDesign::RestrictionEnzyme");
452              
453 0   0     0 $level = $level || 75;
454 0         0 my $sbuffs = $self->{buffers};
455 0         0 my $bbuffs = $buddy->{buffers};
456 0         0 my %answers;
457 0         0 foreach my $skey (sort keys %{$sbuffs})
  0         0  
458             {
459 0         0 my $sval = $sbuffs->{$skey};
460 0         0 my $bval = $bbuffs->{$skey};
461 0 0 0     0 if ($skey eq "Other" && $sval && $bval && $sval == $bval)
    0 0        
      0        
      0        
      0        
      0        
462             {
463 0         0 $answers{$skey} = 200;
464             }
465             elsif ($sval && $bval && $sval >= $level && $bval >= $level)
466             {
467 0         0 $answers{$skey} = $sval + $bval;
468             }
469             }
470 0 0       0 my @keys = sort {$answers{$b} <=> $answers{$a} && $b cmp $a} keys %answers;
  0         0  
471 0 0       0 return scalar @keys ? $keys[0] : undef;
472             }
473              
474             =head2 units
475              
476             Returns the number of units needed to cleave some sequence
477              
478             =cut
479              
480             sub units
481             {
482 0     0 1 0 my ($self, @args) = @_;
483              
484 0         0 my ($buffer, $sequence) = $self->_rearrange([qw(buffer sequence)], @args);
485              
486              
487 0         0 my $poshsh = $self->positions($sequence);
488 0         0 my $count = scalar keys %{$poshsh};
  0         0  
489              
490 0         0 my $freq = $count / (length $sequence);
491              
492 0   0     0 my $aggr = $self->aggress() || .000001;
493 0         0 $aggr = 1 / $aggr;
494              
495 0   0     0 $buffer = $buffer || $self->acceptable_buffer($self, 100);
496 0   0     0 my $buff = $self->buffers->{$buffer} || 1;
497 0         0 my $jad = $buff / 100;
498 0 0       0 my $adj = $jad > 0 ? 1 / $jad : 0;
499              
500 0         0 my $units = sprintf("%.1f", $freq * $aggr * $adj);
501              
502 0         0 return $units;
503             }
504              
505              
506             =head1 FILTERING METHODS
507              
508             =head2 filter_by_sequence
509              
510             Arguments: an arrayref of string nucleotide sequences (may be ambiguous)
511             a flag indicating whether or not the sequences in the array are
512             required (1 means they must NOT match; default 0 means they must
513             match)
514              
515             Returns : 1 if the enzyme passes;
516             0 if the enzyme fails.
517              
518             =cut
519              
520             sub filter_by_sequence
521             {
522 0     0 1 0 my ($self, $arrref, $req) = @_;
523 0 0       0 $req = 0 if (! $req);
524 0         0 my $result = 1;
525 0         0 foreach my $seq (@$arrref)
526             {
527 0         0 my $regex = _regres($seq, 1);
528 0 0       0 if ($regex =~ /\[ X \]/x)
529             {
530 0         0 print "\tWARNING: Cannot parse sequence $seq containing non-nucleotide "
531             . "characters - ignoring.\n";
532 0         0 next;
533             }
534 0 0 0     0 $result = 0 if ( $req == 1 && $self->{recseq} =~ $regex );
535 0 0 0     0 $result = 0 if ( $req == 0 && $self->{recseq} !~ $regex );
536             }
537 0         0 return $result;
538             }
539              
540             =head2 filter_by_score
541              
542             Arguments : a float
543              
544             Returns : 1 if the enzyme's score is less than or equal to the argument,
545             0 if the enzyme's score is higher.
546              
547             =cut
548              
549             sub filter_by_score
550             {
551 0     0 1 0 my ($self, $score) = @_;
552 0         0 my $result = 1;
553 0 0       0 $result = 0 if ($self->{score} > $score);
554 0         0 return $result;
555             }
556              
557             =head2 filter_by_vendor
558              
559             Arguments : an arrayref of vendor abbreviations; see vendor().
560              
561             Returns : 1 if the enzyme is supplied by any of the vendors queried,
562             0 else.
563              
564             =cut
565              
566             sub filter_by_vendor
567             {
568 0     0 1 0 my ($self, $vendlist) = @_;
569 0         0 my $result = 1;
570 0         0 my $flag = 0;
571 0         0 foreach my $vend (@$vendlist)
572             {
573 0 0       0 unless (exists($RE_vendors{$vend}))
574             {
575 0         0 print "\tWARNING: Cannot parse vendor argument $vend - ignoring.\n";
576 0         0 next;
577             }
578 0 0       0 $flag++ if ( exists( $self->{vendors}->{$vend} ) );
579             }
580 0 0       0 $result = $flag == 0 ? 0 : 1;
581 0         0 return $result;
582             }
583              
584             =head2 filter_by_buffer_activity
585              
586             Arguments : a hashref of buffer thresholds; the key is the buffer name, the
587             value is an activity threshold.
588              
589             Returns : 1 if the enzyme meets all the buffer requirements,
590             0 else.
591              
592             =cut
593              
594             sub filter_by_buffer_activity
595             {
596 0     0 1 0 my ($self, $hshref) = @_;
597 0         0 my $result = 1;
598 0         0 my $rebuff = $self->{buffers};
599 0         0 foreach my $buff (keys %$hshref)
600             {
601 0         0 my $val = $hshref->{$buff};
602 0 0 0     0 $result = 0 if ( ! exists($rebuff->{$buff}) || $rebuff->{$buff} < $val );
603             }
604              
605 0         0 return $result;
606             }
607              
608             =head2 filter_by_dcm_sensitivity
609              
610             Arguments : an arrayref of sensitivity values; the key is the sensitivity
611             blocked, inhibited, or indifferent
612              
613             Returns : 1 if the enzyme meets the dcm sensitivity requirements,
614             0 else.
615              
616             =cut
617              
618             sub filter_by_dcm_sensitivity
619             {
620 0     0 1 0 my ($self, $arrref) = @_;
621 0         0 my $result = 1;
622 0         0 my %sensehsh;
623 0         0 foreach my $sense (@$arrref)
624             {
625 0 0 0     0 if ($sense ne "blocked" && $sense ne "inhibited" && $sense ne "indifferent")
      0        
626             {
627 0         0 $sense = lc $sense;
628 0         0 print "\tWARNING: Cannot parse dcmsense argument $sense - ignoring.\n";
629 0         0 next;
630             }
631 0         0 $sensehsh{$sense}++;
632             }
633 0 0       0 $result = 0 unless ( exists($sensehsh{$self->{methdcm}}) );
634 0         0 return $result;
635             }
636              
637             =head2 filter_by_dam_sensitivity
638              
639             Arguments : an arrayref of sensitivity values; the key is the sensitivity
640             blocked, inhibited, or indifferent
641              
642             Returns : 1 if the enzyme meets the dam sensitivity requirements,
643             0 else.
644              
645             =cut
646              
647             sub filter_by_dam_sensitivity
648             {
649 0     0 1 0 my ($self, $arrref) = @_;
650 0         0 my $result = 1;
651 0         0 my %sensehsh;
652 0         0 foreach my $sense (@$arrref)
653             {
654 0 0 0     0 if ($sense ne "blocked" && $sense ne "inhibited" && $sense ne "indifferent")
      0        
655             {
656 0         0 $sense = lc $sense;
657 0         0 print "\tWARNING: Cannot parse damsense argument $sense - ignoring.\n";
658 0         0 next;
659             }
660 0         0 $sensehsh{$sense}++;
661             }
662 0 0       0 $result = 0 unless ( exists($sensehsh{$self->{methdam}}) );
663 0         0 return $result;
664             }
665              
666             =head2 filter_by_cpg_sensitivity
667              
668             Arguments : an arrayref of sensitivity values; the key is the sensitivity
669             blocked, inhibited, or indifferent
670              
671             Returns : 1 if the enzyme meets the cpg sensitivity requirements,
672             0 else.
673              
674             =cut
675              
676             sub filter_by_cpg_sensitivity
677             {
678 0     0 1 0 my ($self, $arrref) = @_;
679 0         0 my $result = 1;
680 0         0 my %sensehsh;
681 0         0 foreach my $sense (@$arrref)
682             {
683 0 0 0     0 if ($sense ne "blocked" && $sense ne "inhibited" && $sense ne "indifferent")
      0        
684             {
685 0         0 $sense = lc $sense;
686 0         0 print "\tWARNING: Cannot parse cpgsense argument $sense - ignoring.\n";
687 0         0 next;
688             }
689 0         0 $sensehsh{$sense}++;
690             }
691 0 0       0 $result = 0 unless ( exists($sensehsh{$self->{methcpg}}) );
692 0         0 return $result;
693             }
694              
695             =head2 filter_by_star_activity
696              
697             Arguments : 1 if star activity required, 0 else
698              
699             Returns : 1 if the enzyme meets the star activity requirements,
700             0 else.
701              
702             =cut
703              
704             sub filter_by_star_activity
705             {
706 0     0 1 0 my ($self, $star) = @_;
707 0         0 my $result = 1;
708 0 0       0 $star = 0 unless ($star);
709 0 0 0     0 $result = 0 if (($star && ! $self->{staract}) || (! $star && $self->{staract}));
      0        
      0        
710 0         0 return $result;
711             }
712              
713             =head2 filter_by_incubation_temperature
714              
715             Arguments : an arrayref of acceptable integer incubation temperatures
716              
717             Returns : 1 if the enzyme meets the incubation temperature requirements,
718             0 else.
719              
720             =cut
721              
722             sub filter_by_incubation_temperature
723             {
724 0     0 1 0 my ($self, $arrref) = @_;
725 0         0 my $result = 1;
726 0         0 my %temps;
727 0         0 foreach my $temp (@$arrref)
728             {
729 0 0 0     0 if ($temp !~ /\d/x || $temp <= 0)
730             {
731 0         0 print "\tWARNING: Cannot parse incubation argument $temp - ignoring.\n";
732             }
733 0         0 $temps{$temp}++;
734             }
735 0 0       0 $result = 0 unless ( exists $temps{$self->{temp}});
736 0         0 return $result;
737             }
738              
739             =head2 filter_by_inactivation_temperature
740              
741             Arguments : an acceptable integer inactivation temperature maximum
742              
743             Returns : 1 if the enzyme meets the inactivation temperature requirement,
744             0 else.
745              
746             =cut
747              
748             sub filter_by_inactivation_temperature
749             {
750 0     0 1 0 my ($self, $temp) = @_;
751 0         0 my $result = 1;
752 0 0 0     0 if ($temp !~ /\d/x || $temp <= 0)
753             {
754 0         0 print "\tWARNING: Cannot parse inactivation argument $temp - ignoring.\n";
755             }
756             else
757             {
758 0 0       0 $result = 0 if ($self->{tempin} > $temp);
759             }
760 0         0 return $result;
761             }
762              
763             =head2 filter_by_base_ambiguity
764              
765             Arguments : "nonNonly" if any non N bases are allowed; "ATCGonly" if only
766             A, T, C, or G are allowed
767              
768             Returns : 1 if the enzyme meets the ambiguous nucleotide requirement,
769             0 else.
770              
771             =cut
772              
773             sub filter_by_base_ambiguity
774             {
775 0     0 1 0 my ($self, $ambig) = @_;
776 0         0 my $result = 1;
777 0 0 0     0 if ($ambig ne "nonNonly" && $ambig ne "ATCGonly")
778             {
779 0         0 print "\tWARNING: Cannot parse ambiguity argument $ambig - ignoring.\n";
780             }
781             else
782             {
783 0         0 my $ambregex;
784 0 0       0 $ambregex = qr/N/ if ($ambig eq "nonNonly");
785 0 0       0 $ambregex = $ambnt if ($ambig eq "ATCGonly");
786 0 0       0 $result = 0 unless ( $self->{recseq} =~ $ambregex );
787             }
788 0         0 return $result;
789             }
790              
791             =head2 filter_by_length
792              
793             Arguments : an arrayref of acceptable recognition site lengths
794              
795             Returns : 1 if the enzyme meets the recognition site length requirements,
796             0 else.
797              
798             =cut
799              
800             sub filter_by_length
801             {
802 0     0 1 0 my ($self, $arrref) = @_;
803 0         0 my $result = 1;
804 0         0 my %lens;
805 0         0 foreach my $len (@$arrref)
806             {
807 0 0 0     0 if ($len =~ /\D/x || $len <= 0)
808             {
809 0         0 print "\tWARNING: Cannot parse length argument $len - ignoring.\n";
810 0         0 next;
811             }
812 0         0 $lens{$len}++;
813             }
814 0 0       0 $result = 0 unless ( exists $lens{length($self->{recseq})} );
815 0         0 return $result;
816             }
817              
818             =head2 filter_by_overhang_palindromy
819              
820             Arguments : an arrayref of acceptable overhang palindromys, from the list
821             pal (palindromic),
822             nonpal (nonpalindromic),
823             pnon (potentially nonpalindromic)
824              
825             Returns : 1 if the enzyme meets the palindromy requirements,
826             0 else.
827              
828             =cut
829              
830             sub filter_by_overhang_palindromy
831             {
832 0     0 1 0 my ($self, $arrref) = @_;
833 0         0 my $result = 1;
834 0         0 my %pals;
835 0         0 foreach my $pal (@$arrref)
836             {
837 0 0 0     0 if ($pal ne "pal" && $pal ne "pnon" && $pal ne "nonpal")
      0        
838             {
839 0         0 print "\tWARNING: Cannot parse palindromy argument $pal - ignoring.\n";
840 0         0 next;
841             }
842 0         0 $pals{$pal}++;
843             }
844 0 0       0 $result = 0 unless (exists $pals{$self->{palindromy}});
845 0         0 return $result;
846             }
847              
848             =head2 filter_by_stickiness
849              
850             Arguments : an arrayref of acceptable overhang orientations, from the list
851             1 (single basepair overhang),
852             5 (five prime overhang),
853             3 (three prime overhang),
854             b (blunt ended)
855              
856             Returns : 1 if the enzyme meets the overhang requirements,
857             0 else.
858              
859             =cut
860              
861             sub filter_by_stickiness
862             {
863 0     0 1 0 my ($self, $arrref) = @_;
864 0         0 my $result = 1;
865 0         0 my %sticks;
866 0         0 foreach my $stick (@$arrref)
867             {
868 0 0 0     0 if ($stick ne "5" && $stick ne "3" && $stick ne "1" && $stick ne "b")
      0        
      0        
869             {
870 0         0 print "\tWARNING: Cannot parse sticky argument $stick - ignoring.\n";
871 0         0 next;
872             }
873 0         0 $sticks{$stick}++;
874             }
875 0 0 0     0 $result = 0 if ($self->{onebpoverhang} && ! exists $sticks{1});
876 0         0 my $type = $self->{type};
877 0         0 $type =~ s/\'//xg;
878 0 0       0 $result = 0 unless (exists $sticks{$type});
879 0         0 return $result;
880             }
881              
882             =head1 ACCESSOR METHODS
883              
884             Methods for setting and accessing enzyme attributes
885              
886             =head2 id
887              
888             The name of the enzyme.
889              
890             =cut
891              
892             sub id
893             {
894 98     98 1 194 my ($self) = @_;
895 98         288 return $self->{id};
896             }
897              
898             =head2 display_name
899              
900             The name of the enzyme.
901              
902             =cut
903              
904             sub display_name
905             {
906 0     0 1 0 my ($self) = @_;
907 0         0 return $self->{id};
908             }
909              
910             =head2 score
911              
912             This attribute initially holds the price in dollars per unit of the enzyme
913             (2009 US Dollars) but can be used to hold any score or cost value.
914              
915             =cut
916              
917             sub score
918             {
919 0     0 1 0 my ($self, $value) = @_;
920 0 0       0 if (defined $value)
921             {
922 0         0 $self->{score} = $value;
923             }
924 0         0 return $self->{score};
925             }
926              
927             =head2 aggress
928              
929             Aggressiveness is the number of recognition sites in a template piece of DNA
930             (usually lambda, but sometimes adeno2, pBR322, pUC19, pXba, etc) over the total
931             length of that template piece of DNA. This number tells the manufacturer how
932             much enzyme to sell as a "unit" - the amount of enzyme required to fully digest
933             one microgram of template DNA under reaction conditions in an hour.
934              
935             =cut
936              
937             sub aggress
938             {
939 0     0 1 0 my ($self, $value) = @_;
940 0 0       0 if (defined $value)
941             {
942 0         0 $self->{aggress} = $value;
943             }
944 0         0 return $self->{aggress};
945             }
946              
947             =head2 len
948              
949             The length in bases of the recognition sequence (recseq).
950              
951             =cut
952              
953             sub len
954             {
955 0     0 1 0 my ($self) = @_;
956 0         0 return $self->{length};
957             }
958              
959             =head2 methcpg
960              
961             The effect of CpG methylation on the enzyme's efficacy.
962              
963             =cut
964              
965             sub methcpg
966             {
967 0     0 1 0 my ($self) = @_;
968 0         0 return $self->{methcpg};
969             }
970              
971             =head2 methdcm
972              
973             The effect of Dcm methylation on the enzyme's efficacy.
974              
975             =cut
976              
977             sub methdcm
978             {
979 0     0 1 0 my ($self) = @_;
980 0         0 return $self->{methdcm};
981             }
982              
983             =head2 methdam
984              
985             The effect of Dam methylation on the enzyme's efficacy.
986              
987             =cut
988              
989             sub methdam
990             {
991 0     0 1 0 my ($self) = @_;
992 0         0 return $self->{methdam};
993             }
994              
995             =head2 buffers
996              
997             A hash reference where the keys are buffer names and the values are the activity
998             level of the enzyme in that Buffer. Since most of the enzymes in the default
999             GeneDesign list are NEB enzymes, this is usually full of NEB buffers.
1000              
1001             =cut
1002              
1003             sub buffers
1004             {
1005 0     0 1 0 my ($self) = @_;
1006 0         0 return $self->{buffers};
1007             }
1008              
1009             =head2 vendors
1010              
1011             A hash reference where the keys are abbreviations for and the values are names
1012             of vendors that stock the enzyme. These are read in from the enzyme file.
1013              
1014             B = Invitrogen
1015             C = Minotech
1016             E = Stratagene
1017             F = Thermo Scientific Fermentas
1018             I = SibEnzyme
1019             J = Nippon Gene Co.
1020             K = Takara
1021             M = Roche Applied Science
1022             N = New England Biolabs
1023             O = Toyobo Technologies
1024             Q = Molecular Biology Resources
1025             R = Promega
1026             S = Sigma Aldrich
1027             U = Bangalore Genei
1028             V = Vivantis
1029             X = EURx
1030             Y = CinnaGen
1031              
1032             =cut
1033              
1034             sub vendors
1035             {
1036 0     0 1 0 my ($self) = @_;
1037 0         0 return $self->{vendors};
1038             }
1039              
1040             =head2 tempin
1041              
1042             The temperature in degrees Celsius that deactivates the enzyme.
1043              
1044             =cut
1045              
1046             sub tempin
1047             {
1048 0     0 1 0 my ($self) = @_;
1049 0         0 return $self->{tempin};
1050             }
1051              
1052             =head2 timein
1053              
1054             The time required at inactivation temperature to deactivate the enzyme.
1055              
1056             =cut
1057              
1058             sub timein
1059             {
1060 0     0 1 0 my ($self) = @_;
1061 0         0 return $self->{timein};
1062             }
1063              
1064             =head2 temp
1065              
1066             Incubation temperature for the best enzyme activity, in degrees Celsius.
1067              
1068             =cut
1069              
1070             sub temp
1071             {
1072 0     0 1 0 my ($self) = @_;
1073 0         0 return $self->{temp};
1074             }
1075              
1076             =head2 recseq
1077              
1078             This attribute is the "clean" description of the enzyme's recognition sequence -
1079             that is, no information about cleavage site can be gained from this attribute.
1080             This is determined automatically from the cleavage string (cutseq) at
1081             instantiation.
1082              
1083             =cut
1084              
1085             sub recseq
1086             {
1087 0     0 1 0 my ($self) = @_;
1088 0         0 return $self->{recseq};
1089             }
1090              
1091             =head2 seq
1092              
1093             Synonym for recseq
1094              
1095             =cut
1096              
1097             sub seq
1098             {
1099 21     21 1 38 my ($self) = @_;
1100 21         75 return $self->{recseq};
1101             }
1102              
1103             =head2 cutseq
1104              
1105             This attribute is the string description of the enzyme's recognition sequence.
1106             It includes information about both the recognition and cleavage sites.
1107             See http://rebase.neb.com/rebase/rebrec.html for help interpreting this field.
1108              
1109             =cut
1110              
1111             sub cutseq
1112             {
1113 0     0 1 0 my ($self) = @_;
1114 0         0 return $self->{cutseq};
1115             }
1116              
1117             =head2 regex
1118              
1119             This attribute stores a set of regular expressions as an array reference to
1120             speed the search for recognition sites in sequence. The first entry in the
1121             arrayref is the regular expression representing the forward orientation of
1122             the recognition sequence; the second entry represents the reverse orientation
1123             and is only defined if the recognition site is nonpalindromic.
1124              
1125             This attribute is defined at instantiation.
1126              
1127             =cut
1128              
1129             sub regex
1130             {
1131 1     1 1 3 my ($self) = @_;
1132 1         3 return $self->{regex};
1133             }
1134              
1135             =head2 class
1136              
1137             Class describes the cutting behavior of an enzyme. The classes used by
1138             GeneDesign uses a generalized subset of the classes as described at Rebase - for
1139             the purposes of enzyme editing, three classes have so far proven to be enough.
1140             See http://rebase.neb.com/cgi-bin/sublist for the full description of enzyme
1141             classes.
1142              
1143             IIP : This enzyme has a symmetric target and a symmetric cleavage site; this
1144             usually means that the enzyme cleaves inside its own recognition site.
1145             This is not the same as overhang palindromy!
1146              
1147             IIA : This enzyme has an asymmetric recognition site and usually cleaves
1148             outside of it.
1149              
1150             IIB : This enzyme has one recognition site and two cleavage sites, one on
1151             either side of the recognition site, and thus cuts itself out of
1152             sequence.
1153              
1154             =cut
1155              
1156             sub class
1157             {
1158 0     0 1 0 my ($self) = @_;
1159 0         0 return $self->{class};
1160             }
1161              
1162             =head2 classex
1163              
1164             =cut
1165              
1166             sub classex
1167             {
1168 0     0 1 0 my ($self) = @_;
1169 0         0 return $self->{classex};
1170             }
1171              
1172             =head2 class_regexes
1173              
1174             Short cut to accessing class regular expressions
1175              
1176             =cut
1177              
1178             sub class_regexes
1179             {
1180 0     0 1 0 return {'IIP' => $IIPreg, 'IIA' => $IIAreg, 'IIB' => $IIBreg};
1181             }
1182              
1183             =head2 type
1184              
1185             Type describes the kind of overhang left by an enzyme. This is probably not a
1186             good use of the word type.
1187              
1188             Type may be 5', for a five prime overhang; 3', for a three prime overhang;
1189             or b for blunt ends.
1190              
1191             =cut
1192              
1193             sub type
1194             {
1195 0     0 1 0 my ($self) = @_;
1196 0         0 return $self->{type};
1197             }
1198              
1199              
1200             =head2 onebpoverhang
1201              
1202             One basepair overhangs can be harder to ligate than blunt ends. This attribute
1203             returns 1 if an enzyme leaves a 1bp overhang and 0 else.
1204              
1205             =cut
1206              
1207             sub onebpoverhang
1208             {
1209 0     0 1 0 my ($self) = @_;
1210 0         0 return $self->{onebpoverhang};
1211             }
1212              
1213             =head2 exclude
1214              
1215             Some enzymes share overlapping recognition sites. If you are trying to ensure
1216             the absence or uniqueness of a recognition site, you will want to be sure to
1217             exclude isoschizomers and neoschizomers from consideration elsewhere. The
1218             exclude attribute stores an array reference that lists the ids of neo- and
1219             isoschizomers - or any arbitrary enzyme that is incompatible with this enzyme -
1220             for easy lookup.
1221              
1222             =cut
1223              
1224             sub exclude
1225             {
1226 768     768 1 1742 my ($self, $value) = @_;
1227 768 50       1365 if (defined $value)
1228             {
1229 768 50       1699 $self->throw("$value is not a reference to an array")
1230             unless (ref $value eq "ARRAY");
1231 768         1402 $self->{exclude} = $value;
1232             }
1233 768         3171 return $self->{exclude};
1234             }
1235              
1236             =head2 palindromy
1237              
1238             Information about the overhang the enzyme leaves.
1239              
1240             pal = palindromic
1241             nonpal = nonpalindromic
1242             pnon = potentially nonpalindromic, or sometimes palindromic and sometimes
1243             nonpalindromic
1244             unknown = unknown
1245              
1246             =cut
1247              
1248             sub palindromy
1249             {
1250 0     0 1   my ($self) = @_;
1251 0           return $self->{palindromy};
1252             }
1253              
1254             =head2 staract
1255              
1256             1 if the enzyme exhibits star activity, 0 else
1257              
1258             =cut
1259              
1260             sub staract
1261             {
1262 0     0 1   my ($self) = @_;
1263 0           return $self->{staract};
1264             }
1265              
1266             =head2 start
1267              
1268             The offset in nucleotides of the enzymes recognition site in an ORF
1269              
1270             =cut
1271              
1272             sub start
1273             {
1274 0     0 1   my ($self, $value) = @_;
1275 0 0         if (defined $value)
1276             {
1277 0           $self->{start} = $value;
1278             }
1279 0           return $self->{start};
1280             }
1281              
1282             =head2 outside_cut
1283              
1284             =cut
1285              
1286             sub outside_cut
1287             {
1288 0     0 1   my ($self) = @_;
1289 0           return $self->{outside_cut};
1290             }
1291              
1292             =head2 inside_cut
1293              
1294             =cut
1295              
1296             sub inside_cut
1297             {
1298 0     0 1   my ($self) = @_;
1299 0           return $self->{inside_cut};
1300             }
1301              
1302             1;
1303              
1304             __END__