|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # BioPerl module Bio::Restriction::Analysis  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Please direct questions and support issues to    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Cared for by Rob Edwards   | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # You may distribute this module under the same terms as perl itself  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## POD Documentation:  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bio::Restriction::Analysis - cutting sequences with restriction  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 enzymes  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # analyze a DNA sequence for restriction enzymes  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use Bio::Restriction::Analysis;  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use Bio::PrimarySeq;  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use Data::Dumper;  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # get a DNA sequence from somewhere  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $seq = Bio::PrimarySeq->new  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       (-seq =>'AGCTTAATTCATTAGCTCTGACTGCAACGGGCAATATGTCTC',  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        -primary_id => 'synopsis',  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        -molecule => 'dna');  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # now start an analysis.  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # this is using the default set of enzymes  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $ra = Bio::Restriction::Analysis->new(-seq=>$seq);  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # find unique cutters. This returns a  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Bio::Restriction::EnzymeCollection object  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $enzymes = $ra->unique_cutters;  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print "Unique cutters: ", join (', ',   | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       map {$_->name} $enzymes->unique_cutters), "\n";  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # AluI is one them. Where does it cut?  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # This is will return an array of the sequence strings  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $enz = 'AluI';  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my @frags = $ra->fragments($enz);  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # how big are the fragments?  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print "AluI fragment lengths: ", join(' & ', map {length $_} @frags), "\n";  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # You can also bypass fragments and call sizes directly:  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # to see all the fragment sizes  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print "All sizes: ", join " ", $ra->sizes($enz), "\n";  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # to see all the fragment sizes sorted by size like on a gel  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print "All sizes, sorted ", join (" ", $ra->sizes($enz, 0, 1)), "\n";  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # how many times does each enzyme cut  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $cuts = $ra->cuts_by_enzyme('BamHI');  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print "BamHI cuts $cuts times\n";  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # How many enzymes do not cut at all?  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print "There are ", scalar $ra->zero_cutters->each_enzyme,  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         " enzymes that do not cut\n";  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # what about enzymes that cut twice?  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $two_cutters = $ra->cutters(2);  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print join (" ", map {$_->name} $two_cutters->each_enzyme),  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       " cut the sequence twice\n";  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # what are all the enzymes that cut, and how often do they cut  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   printf "\n%-10s%s\n", 'Enzyme', 'Number of Cuts';  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $all_cutters = $ra->cutters;  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   map {  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       printf "%-10s%s\n", $_->name, $ra->cuts_by_enzyme($_->name)  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } $all_cutters->each_enzyme;  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Finally, we can interact the restriction enzyme object by  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # retrieving it from the collection object see the docs for  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Bio::Restriction::Enzyme.pm  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $enzobj = $enzymes->get_enzyme($enz);  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bio::Restriction::Analysis describes the results of cutting a DNA  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sequence with restriction enzymes.  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 To use this module you can pass a sequence object and optionally a  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bio::Restriction::EnzymeCollection that contains the enzyme(s) to cut the  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sequences with. There is a default set of enzymes that will be loaded  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 if you do not pass in a Bio::Restriction::EnzymeCollection.  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 To cut a sequence, set up a Restriction::Analysis object with a sequence  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 like this:  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use Bio::Restriction::Analysis;  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $ra = Bio::Restriction::Analysis->new(-seq=>$seqobj);  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 or  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $ra = Bio::Restriction::Analysis->new  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       (-seq=>$seqobj, -enzymes=>$enzs);  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Then, to get the fragments for a particular enzyme use this:  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   @fragments = $ra->fragments('EcoRI');  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Note that the naming of restriction enzymes is that the last numbers  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 are usually Roman numbers (I, II, III, etc). You may want to use  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 something like this:  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # get a reference to an array of unique (single) cutters  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $singles = $re->unique_cutters;  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   foreach my $enz ($singles->each_enzyme) {  | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       @fragments = $re->fragments($enz);  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ... do something here ...  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Note that if your sequence is circular, the first and last fragment  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 will be joined so that they are the appropriate length and sequence  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 for further analysis. This fragment will also be checked for cuts  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 by the enzyme(s).  However, this will change the start of the  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sequence!  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 There are two separate algorithms used depending on whether your  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 enzyme has ambiguity. The non-ambiguous algorithm is a lot faster,  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and if you are using very large sequences you should try and use  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 this algorithm. If you have a large sequence (e.g. genome) and   | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 want to use ambgiuous enzymes you may want to make separate  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bio::Restriction::Enzyme objects for each of the possible  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 alternatives and make sure that you do not set is_ambiguous!  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This version should correctly deal with overlapping cut sites  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 in both ambiguous and non-ambiguous enzymes.  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 I have tried to write this module with speed and memory in mind  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 so that it can be effectively used for large (e.g. genome sized)  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sequence. This module only stores the cut positions internally,  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and calculates everything else on an as-needed basis. Therefore  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 when you call fragment_maps (for example), there may be another  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 delay while these are generated.  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 FEEDBACK  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Mailing Lists   | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 User feedback is an integral part of the evolution of this and other  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bioperl modules. Send your comments and suggestions preferably to one  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of the Bioperl mailing lists. Your participation is much appreciated.  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bioperl-l@bioperl.org                  - General discussion  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Support   | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Please direct usage questions or support issues to the mailing list:  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 I  | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 rather than to the module maintainer directly. Many experienced and   | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 reponsive experts will be able look at the problem and quickly   | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 address it. Please include a thorough description of the problem   | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 with code and data examples if at all possible.  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Reporting Bugs  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Report bugs to the Bioperl bug tracking system to help us keep track  | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the bugs and their resolution. Bug reports can be submitted via the  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 web:  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   https://github.com/bioperl/bioperl-live/issues  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Rob Edwards, redwards@utmem.edu,   | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Steve Chervitz, sac@bioperl.org  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 CONTRIBUTORS  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Heikki Lehvaslaiho, heikki-at-bioperl-dot-org  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Mark A. Jensen, maj-at-fortinbras-dot-us  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 COPYRIGHT  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Copyright (c) 2003 Rob Edwards.  Some of this work is Copyright (c)  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1997-2002 Steve A. Chervitz. All Rights Reserved.  | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This module is free software; you can redistribute it and/or modify it  | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 under the same terms as Perl itself.  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SEE ALSO  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L,   | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 APPENDIX  | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Methods beginning with a leading underscore are considered private and  | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 are intended for internal use by this module. They are not considered  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 part of the public interface and are described here for documentation  | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 purposes only.  | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Bio::Restriction::Analysis;  | 
| 
203
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
3680
 | 
 use Bio::Restriction::EnzymeCollection;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
65
 | 
    | 
| 
204
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
13
 | 
 use strict;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
    | 
| 
205
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
8
 | 
 use Data::Dumper;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
105
 | 
    | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
207
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
10
 | 
 use base qw(Bio::Root::Root);  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
167
 | 
    | 
| 
208
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
11
 | 
 use Scalar::Util qw(blessed);  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6085
 | 
    | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 new  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title     : new  | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function  : Initializes the restriction enzyme object  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns   : The Restriction::Analysis object   | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Arguments :   | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     $re_anal->new(-seq=$seqobj,   | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  -enzymes=>Restriction::EnzymeCollection object)  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     -seq requires a Bio::PrimarySeq object  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     -enzymes is optional.  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               If omitted it will use the default set of enzymes  | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is the place to start. Pass in a sequence, and you will be able  | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to get the fragments back out.  Several other things are available  | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 like the number of zero cutters or single cutters.  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
230
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
790
 | 
     my($class, @args) = @_;  | 
| 
231
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
     my $self = $class->SUPER::new(@args);  | 
| 
232
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     my ($seq,$enzymes) =  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self->_rearrange([qw(  | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                               SEQ  | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                               ENZYMES  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                              )], @args);  | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
238
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     $seq && $self->seq($seq);  | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $enzymes ?  $self->enzymes($enzymes)  | 
| 
241
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
24
 | 
         :  ($self->{'_enzymes'} = Bio::Restriction::EnzymeCollection->new );  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # keep track of status  | 
| 
244
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     $self->{'_cut'} = 0;  | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # left these here because we want to reforce a _cut if someone  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # just calls new  | 
| 
248
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     $self->{maximum_cuts} = 0;  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
250
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     $self->{'_number_of_cuts_by_enzyme'} = {};  | 
| 
251
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     $self->{'_number_of_cuts_by_cuts'} = {};  | 
| 
252
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     $self->{'_fragments'} = {};  | 
| 
253
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     $self->{'_cut_positions'} = {}; # cut position is the real position   | 
| 
254
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     $self->{'_frag_map_list'} = {};  | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
256
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     return $self;  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 Methods to set parameters  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 seq  | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title    : seq  | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage    : $ranalysis->seq($newval);  | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function : get/set method for the  sequence to be cut  | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Example  : $re->seq($seq);  | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns  : value of seq  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args     : A Bio::PrimarySeqI dna object (optional)  | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub seq {  | 
| 
276
 | 
16
 | 
 
 | 
 
 | 
  
16
  
 | 
  
1
  
 | 
31
 | 
      my $self = shift;  | 
| 
277
 | 
16
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
53
 | 
      if (@_) {  | 
| 
278
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
          my $seq = shift;  | 
| 
279
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
42
 | 
          $self->throw('Need a sequence object ['. ref $seq.  ']')  | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              unless $seq->isa('Bio::PrimarySeqI');  | 
| 
281
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
20
 | 
          $self->throw('Need a DNA sequence object ['. $seq->alphabet.  ']')  | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              unless $seq->alphabet eq 'dna';  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
284
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
          $self->{'_seq'} = $seq;  | 
| 
285
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
          $self->{'_cut'} = 0;  | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      }  | 
| 
287
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
      return $self->{'_seq'};  | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 enzymes  | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title    : enzymes  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage    : $re->enzymes($newval)  | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function : gets/Set the restriction enzyme enzymes  | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Example  : $re->enzymes('EcoRI')  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns  : reference to the collection  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args     : an array of Bio::Restriction::EnzymeCollection and/or  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             Bio::Restriction::Enzyme objects  | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The default object for this method is  | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bio::Restriction::EnzymeCollection.  However, you can also pass it a  | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 list of Bio::Restriction::Enzyme objects - even mixed with Collection  | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 objects.  They will all be stored into one collection.  | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub enzymes {  | 
| 
309
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
4
 | 
      my $self = shift;  | 
| 
310
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
      if (@_) {  | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          $self->{'_enzymes'} = Bio::Restriction::EnzymeCollection->new (-empty => 1)  | 
| 
312
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
13
 | 
              unless $self->{'_enzymes'};  | 
| 
313
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
          $self->{'_enzymes'}->enzymes(@_);  | 
| 
314
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
          $self->{'_cut'} = 0;  | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      }  | 
| 
316
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
      return $self->{'_enzymes'};  | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 Perform the analysis  | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 cut  | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title    : cut  | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage    : $re->cut()  | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function : Cut the sequence with the enzymes  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Example  : $re->cut(); $re->cut('single'); or $re->cut('multiple', $enzymecollection);  | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns  : $self  | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args     : 'single' (optional), 'multiple' with enzyme collection.  | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 An explicit cut method is needed to pass arguments to it.   | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 There are two varieties of cut. Single is the default, and need  | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 not be explicitly called. This cuts the sequence with each  | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 enzyme separately.  | 
| 
338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Multiple cuts a sequence with more than one enzyme. You must pass  | 
| 
340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 it a Bio::Restriction::EnzymeCollection object of the set  | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of enzymes that you want to use in the double digest. The results  | 
| 
342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 will be stored as an enzyme named "multiple_digest", so you can  | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 use all the retrieval methods to get the data.  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If you want to use the default setting there is no need to call cut  | 
| 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 directly. Every method in the class that needs output checks the  | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 object's internal status and recalculates the cuts if needed.  | 
| 
348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Note: cut doesn't now re-initialize everything before figuring  | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 out cuts. This is so that you can do multiple digests, or add more  | 
| 
351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 data or whatever. You'll have to use new to reset everything.  | 
| 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See also the comments in above about ambiguous and non-ambiguous  | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sequences.  | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub cut {  | 
| 
359
 | 
10
 | 
 
 | 
 
 | 
  
10
  
 | 
  
1
  
 | 
18
 | 
     my ($self, $opt, $ec) = @_;  | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # for the moment I have left this as a separate routine so  | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # the user calls cut rather than _cuts. This also initializes  | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # some stuff we need to use.  | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
365
 | 
10
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
28
 | 
     $self->throw("A sequence must be supplied")  | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         unless $self->seq;  | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
368
 | 
10
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
49
 | 
     if ($opt && uc($opt) eq "MULTIPLE") {  | 
| 
369
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
       $self->throw("You must supply a separate enzyme collection for multiple digests") unless $ec;  | 
| 
370
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
       $self->_multiple_cuts($ec); # multiple digests  | 
| 
371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # reset some of the things that we save  | 
| 
373
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
        $self->{maximum_cuts} = 0;  | 
| 
374
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
        $self->{'_number_of_cuts_by_enzyme'} = {};  | 
| 
375
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1141
 | 
        $self->{'_number_of_cuts_by_cuts'} = {};  | 
| 
376
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
288
 | 
        $self->{'_fragments'} = {};  | 
| 
377
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
        $self->{'_cut_positions'} = {}; # cut position is the real position   | 
| 
378
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
861
 | 
        $self->{'_frag_map_list'} = {};  | 
| 
379
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
        $self->_cuts;  | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }   | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
382
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1358
 | 
     $self->{'_cut'} = 1;  | 
| 
383
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
     return $self;  | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 mulitple_digest  | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title     : multiple_digest  | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function  : perform a multiple digest on a sequence  | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns   : $self so you can go and get any of the other methods  | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Arguments : An enzyme collection  | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Multiple digests can use 1 or more enzymes, and the data is stored  | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  in as if it were an enzyme called multiple_digest. You can then  | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  retrieve information about multiple digests from any of the other  | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  methods.  | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  You can use this method in place of $re->cut('multiple', $enz_coll);  | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub multiple_digest {  | 
| 
403
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
0
 | 
  my ($self, $ec)=@_;  | 
| 
404
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
  return $self->cut('multiple', $ec);  | 
| 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 Query the results of the analysis  | 
| 
408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 positions  | 
| 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Title    : positions  | 
| 
414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Function : Retrieve the positions that an enzyme cuts at  | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Returns  : An array of the positions that an enzyme cuts at  | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            : or an empty array if the enzyme doesn't cut  | 
| 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Arguments: An enzyme name to retrieve the positions for  | 
| 
418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Comments : The cut occurs after the base specified.  | 
| 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub positions {  | 
| 
423
 | 
8
 | 
 
 | 
 
 | 
  
8
  
 | 
  
1
  
 | 
329
 | 
     my ($self, $enz) = @_;  | 
| 
424
 | 
8
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
27
 | 
     $self->cut unless $self->{'_cut'};  | 
| 
425
 | 
8
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     $self->throw('no enzyme selected to get positions for')  | 
| 
426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         unless $enz;  | 
| 
427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return defined $self->{'_cut_positions'}->{$enz} ?  | 
| 
429
 | 
8
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
22
 | 
         @{$self->{'_cut_positions'}->{$enz}} :   | 
| 
 
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
    | 
| 
430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ();  | 
| 
431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 fragments  | 
| 
434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Title    : fragments  | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Function : Retrieve the fragments that we cut  | 
| 
437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Returns  : An array of the fragments retrieved.   | 
| 
438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Arguments: An enzyme name to retrieve the fragments for  | 
| 
439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For example this code will retrieve the fragments for all enzymes that  | 
| 
441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 cut your sequence  | 
| 
442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $all_cutters = $analysis->cutters;  | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   foreach my $enz ($$all_cutters->each_enzyme}) {  | 
| 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       @fragments=$analysis->fragments($enz);  | 
| 
446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub fragments {  | 
| 
451
 | 
18
 | 
 
 | 
 
 | 
  
18
  
 | 
  
1
  
 | 
4327
 | 
     my ($self, $enz) = @_;  | 
| 
452
 | 
18
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
57
 | 
     $self->cut unless $self->{'_cut'};  | 
| 
453
 | 
18
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
44
 | 
     $self->throw('no enzyme selected to get fragments for')  | 
| 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         unless $enz;  | 
| 
455
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
     my @fragments;  | 
| 
456
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
     for ($self->fragment_maps($enz)) {push @fragments, $_->{seq}}  | 
| 
 
 | 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
    | 
| 
457
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
91
 | 
     return @fragments;  | 
| 
458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 fragment_maps  | 
| 
461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Title     : fragment_maps  | 
| 
463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Function  : Retrieves fragment sequences with start and end  | 
| 
464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               points. Useful for feature construction.  | 
| 
465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Returns   : An array containing a hash reference for each fragment,  | 
| 
467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               containing the start point, end point and DNA  | 
| 
468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               sequence. The hash keys are 'start', 'end' and  | 
| 
469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               'seq'. Returns an empty array if not defined.  | 
| 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Arguments : An enzyme name, enzyme object,   | 
| 
472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               or enzyme collection to retrieve the fragments for.  | 
| 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If passes an enzyme collection it will return the result of a multiple  | 
| 
475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 digest. This : will also cause the special enzyme 'multiple_digest' to  | 
| 
476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 be created so you can get : other information about this multiple  | 
| 
477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 digest. (TMTOWTDI).  | 
| 
478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 There is a minor problem with this and $self-Efragments that I  | 
| 
480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 haven't got a good answer for (at the moment). If the sequence is not  | 
| 
481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 cut, do we return undef, or the whole sequence?  | 
| 
482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For linear fragments it would be good to return the whole  | 
| 
484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sequence. For circular fragments I am not sure.  | 
| 
485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 At the moment it returns the whole sequence with start of 1 and end of  | 
| 
487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 length of the sequence.  For example:  | 
| 
488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use Bio::Restriction::Analysis;  | 
| 
490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use Bio::Restriction::EnzymeCollection;  | 
| 
491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use Bio::PrimarySeq;  | 
| 
492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $seq = Bio::PrimarySeq->new  | 
| 
494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       (-seq =>'AGCTTAATTCATTAGCTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATCCAAAAAAGAGTGAGCTTCTGAT',  | 
| 
495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        -primary_id => 'synopsis',  | 
| 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        -molecule => 'dna');  | 
| 
497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $ra = Bio::Restriction::Analysis->new(-seq=>$seq);  | 
| 
499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my @gel;  | 
| 
501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my @bam_maps = $ra->fragment_maps('BamHI');  | 
| 
502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   foreach my $i (@bam_maps) {  | 
| 
503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      my $start = $i->{start};  | 
| 
504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      my $end = $i->{end};  | 
| 
505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      my $sequence = $i->{seq};  | 
| 
506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      push @gel, "$start--$sequence--$end";  | 
| 
507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      @gel = sort {length $b <=> length $a} @gel;  | 
| 
508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print join("\n", @gel) . "\n";  | 
| 
510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub fragment_maps {  | 
| 
514
 | 
21
 | 
 
 | 
 
 | 
  
21
  
 | 
  
1
  
 | 
24
 | 
     my ($self, $enz) = @_;  | 
| 
515
 | 
21
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
45
 | 
     $self->cut unless $self->{'_cut'};  | 
| 
516
 | 
21
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
41
 | 
     $self->throw('no enzyme selected to get fragment maps for')  | 
| 
517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         unless $enz;  | 
| 
518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # we are going to generate this on an as-needed basis rather than  | 
| 
520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # for every enzyme this should cut down on the amount of  | 
| 
521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # duplicated data we are trying to save in memory and make this  | 
| 
522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # faster and easier for large sequences, e.g. genome analysis  | 
| 
523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
524
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     my @cut_positions;  | 
| 
525
 | 
21
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
144
 | 
     if (ref $enz eq '' && exists $self->{'_cut_positions'}->{$enz}) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
526
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
         @cut_positions=@{$self->{'_cut_positions'}->{$enz}};  | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
    | 
| 
527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($enz->isa("Bio::Restriction::EnzymeI")) {  | 
| 
528
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         @cut_positions=@{$self->{'_cut_positions'}->{$enz->name}};  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    | 
| 
529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($enz->isa("Bio::Restriction::EnzymeCollection")) {  | 
| 
530
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
         $self->cut('multiple', $enz);  | 
| 
531
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
         @cut_positions=@{$self->{'_cut_positions'}->{'multiple_digest'}};  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
534
 | 
21
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
45
 | 
     unless (defined($cut_positions[0])) {  | 
| 
535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # it doesn't cut  | 
| 
536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # return the whole sequence  | 
| 
537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # this should probably have the is_circular command  | 
| 
538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my %map=(  | 
| 
539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  'start'  => 1,  | 
| 
540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  'end'    => $self->{'_seq'}->length,  | 
| 
541
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
                  'seq'    => $self->{'_seq'}->seq  | 
| 
542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 );  | 
| 
543
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
         push (@{$self->{'_frag_map_list'}->{$enz}}, \%map);  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
    | 
| 
544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return defined $self->{'_frag_map_list'}->{$enz} ?  | 
| 
545
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
             @{$self->{'_frag_map_list'}->{$enz}} : ();  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
548
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
     @cut_positions=sort {$a <=> $b} @cut_positions;  | 
| 
 
 | 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
83
 | 
    | 
| 
549
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
     push my @cuts, $cut_positions[0];  | 
| 
550
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     foreach my $i (@cut_positions) {  | 
| 
551
 | 
72
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
123
 | 
         push @cuts, $i if $i != $cuts[$#cuts];  | 
| 
552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
554
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     my $start=1; my $stop; my %seq; my %stop;  | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
    | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
555
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     foreach $stop (@cuts) {  | 
| 
556
 | 
67
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
93
 | 
         next if !$stop; # cuts at beginning of sequence  | 
| 
557
 | 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
144
 | 
         $seq{$start}=$self->{'_seq'}->subseq($start, $stop);  | 
| 
558
 | 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
86
 | 
         $stop{$start}=$stop;  | 
| 
559
 | 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
68
 | 
         $start=$stop+1;  | 
| 
560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
561
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
     $stop=$self->{'_seq'}->length;  | 
| 
562
 | 
17
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
33
 | 
     if ($start > $stop) {  | 
| 
563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # borderline case. The enzyme cleaved at the end of the sequence  | 
| 
564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # what do I do now?  | 
| 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
567
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
          $seq{$start}=$self->{'_seq'}->subseq($start, $stop);  | 
| 
568
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
          $stop{$start}=$stop;  | 
| 
569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
571
 | 
17
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
43
 | 
     if ($self->{'_seq'}->is_circular) {  | 
| 
572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # join the first and last fragments  | 
| 
573
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
         $seq{$start}.=$seq{'1'};  | 
| 
574
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
         delete $seq{'1'};  | 
| 
575
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
         $stop{$start}=$stop{'1'};  | 
| 
576
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
         delete $stop{'1'};  | 
| 
577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
578
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
579
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
61
 | 
     foreach my $start (sort {$a <=> $b} keys %seq) {  | 
| 
 
 | 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
131
 | 
    | 
| 
580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my %map=(  | 
| 
581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  'start'  => $start,  | 
| 
582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  'end'    => $stop{$start},  | 
| 
583
 | 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
166
 | 
                  'seq'    => $seq{$start}  | 
| 
584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 );  | 
| 
585
 | 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
         push (@{$self->{'_frag_map_list'}->{$enz}}, \%map);  | 
| 
 
 | 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
201
 | 
    | 
| 
586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return defined $self->{'_frag_map_list'}->{$enz} ?  | 
| 
589
 | 
17
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
43
 | 
         @{$self->{'_frag_map_list'}->{$enz}} : ();  | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
86
 | 
    | 
| 
590
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 sizes  | 
| 
594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Title    : sizes  | 
| 
596
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Function : Retrieves an array with the sizes of the fragments  | 
| 
597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Returns  : Array that has the sizes of the fragments ordered from   | 
| 
598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              largest to smallest like they would appear in a gel.  | 
| 
599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Arguments: An enzyme name to retrieve the sizes for is required and  | 
| 
600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              kilobases to the nearest 0.1 kb, else it will be in  | 
| 
601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              bp. If the optional third entry is set the results will  | 
| 
602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              be sorted.  | 
| 
603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is designed to make it easy to see what fragments you should get  | 
| 
605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 on a gel!  | 
| 
606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 You should be able to do these:  | 
| 
608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # to see all the fragment sizes,  | 
| 
610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print join "\n", $re->sizes($enz), "\n";  | 
| 
611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # to see all the fragment sizes sorted  | 
| 
612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print join "\n", $re->sizes($enz, 0, 1), "\n";  | 
| 
613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # to see all the fragment sizes in kb sorted  | 
| 
614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print join "\n", $re->sizes($enz, 1, 1), "\n";  | 
| 
615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub sizes {  | 
| 
619
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
7
 | 
     my ($self, $enz, $kb, $sort) = @_;  | 
| 
620
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     $self->throw('no enzyme selected to get fragments for')  | 
| 
621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         unless $enz;  | 
| 
622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
623
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     if (blessed($enz)) {  | 
| 
624
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         $self->throw("Enzyme must be enzyme name or a Bio::Restriction::EnzymeI, not ".ref($enz))  | 
| 
625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             if !$enz->isa('Bio::Restriction::EnzymeI');  | 
| 
626
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
         $enz = $enz->name;  | 
| 
627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
628
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     $self->cut unless $self->{'_cut'};  | 
| 
629
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     my @frag; my $lastsite=0;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
631
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     foreach my $site (@{$self->{'_cut_positions'}->{$enz}}) {  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
632
 | 
9
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
       $kb ? push (@frag, (int($site-($lastsite))/100)/10)  | 
| 
633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           : push (@frag, $site-($lastsite));  | 
| 
634
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
       $lastsite=$site;  | 
| 
635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $kb ? push (@frag, (int($self->{'_seq'}->length-($lastsite))/100)/10)  | 
| 
637
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
13
 | 
         : push (@frag, $self->{'_seq'}->length-($lastsite));  | 
| 
638
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     if ($self->{'_seq'}->is_circular) {  | 
| 
639
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
        my $first=shift @frag;  | 
| 
640
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
        my $last=pop @frag;  | 
| 
641
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
        push @frag, ($first+$last);  | 
| 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
643
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     $sort ? @frag = sort {$b <=> $a} @frag : 1;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
645
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     return @frag;  | 
| 
646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
648
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 How many times does enzymes X cut?  | 
| 
649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 cuts_by_enzyme  | 
| 
653
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title     : cuts_by_enzyme  | 
| 
655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function  : Return the number of cuts for an enzyme  | 
| 
656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns   : An integer with the number of times each enzyme cuts.  | 
| 
657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              Returns 0 if doesn't cut or undef if not defined  | 
| 
658
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Arguments : An enzyme name string  | 
| 
659
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
662
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
663
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub cuts_by_enzyme {  | 
| 
664
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
6
 | 
     my ($self, $enz)=@_;  | 
| 
665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
666
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     $self->throw("Need an enzyme name")  | 
| 
667
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         unless defined $enz;  | 
| 
668
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     $self->cut unless $self->{'_cut'};  | 
| 
669
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     return $self->{'_number_of_cuts_by_enzyme'}->{$enz};  | 
| 
670
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
672
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 Which enzymes cut the sequence N times?  | 
| 
673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
676
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 cutters  | 
| 
677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
678
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title     : cutters  | 
| 
679
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function  : Find enzymes that cut a given number of times  | 
| 
680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns   : a Bio::Restriction::EnzymeCollection  | 
| 
681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Arguments : 1. exact time or lower limit,  | 
| 
682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 non-negative integer, optional  | 
| 
683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              2. upper limit, non-negative integer,  | 
| 
684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 larger or equalthan first, optional  | 
| 
685
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If no arguments are given, the method returns all enzymes that do cut  | 
| 
688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the sequence. The argument zero, '0', is same as method  | 
| 
689
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 zero_cutters().  The argument one, '1', corresponds to unique_cutters.  | 
| 
690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If either of the limits is larger than number of cuts any enzyme cuts the  | 
| 
691
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sequence, the that limit is automagically lowered. The method max_cuts()  | 
| 
692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 gives the largest number of cuts.  | 
| 
693
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See Also : L,  | 
| 
695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L, L  | 
| 
696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub cutters {  | 
| 
700
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
7
 | 
     my ($self, $a, $z) = @_;  | 
| 
701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
702
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     $self->cut unless $self->{'_cut'};  | 
| 
703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
704
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     my ($start, $end);  | 
| 
705
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     if (defined $a) {  | 
| 
706
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
19
 | 
         $self->throw("Need a non-zero integer [$a]")  | 
| 
707
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             unless $a =~ /^[+]?\d+$/;  | 
| 
708
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         $start = $a;  | 
| 
709
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
710
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
         $start = 1;  | 
| 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
712
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     $start = $self->{'maximum_cuts'} if $start > $self->{'maximum_cuts'};  | 
| 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
714
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     if (defined $z) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
715
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
         $self->throw("Need a non-zero integer no smaller than start [0]")  | 
| 
716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             unless $z =~ /^[+]?\d+$/ and $z >= $a;  | 
| 
717
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $end = $z;  | 
| 
718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
719
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif (defined $a) {  | 
| 
720
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         $end = $start;  | 
| 
721
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
722
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
         $end = $self->{'maximum_cuts'};  | 
| 
723
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
724
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     $end = $self->{'maximum_cuts'} if $end > $self->{'maximum_cuts'};  | 
| 
725
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     my $set = Bio::Restriction::EnzymeCollection->new(-empty => 1);  | 
| 
726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #return an empty set if nothing cuts  | 
| 
728
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     return $set unless $self->{'maximum_cuts'};  | 
| 
729
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
730
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     for (my $i=$start; $i<=$end; $i++) {  | 
| 
731
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
         $set->enzymes( @{$self->{_number_of_cuts_by_cuts}->{$i}} )  | 
| 
732
 | 
13
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
25
 | 
             if defined $self->{_number_of_cuts_by_cuts}->{$i};  | 
| 
733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
735
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     return $set;  | 
| 
736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 unique_cutters  | 
| 
740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title     : unique_cutters  | 
| 
742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function  : A special case if cutters() where enzymes only cut once  | 
| 
743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns   : a Bio::Restriction::EnzymeCollection  | 
| 
744
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Arguments : -  | 
| 
745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See also:  L, L  | 
| 
748
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
750
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
751
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub unique_cutters {  | 
| 
752
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
8
 | 
     shift->cutters(1);  | 
| 
753
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 zero_cutters  | 
| 
756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
757
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title     : zero_cutters  | 
| 
758
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function  : A special case if cutters() where enzymes don't cut the sequence  | 
| 
759
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns   : a Bio::Restriction::EnzymeCollection  | 
| 
760
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Arguments : -  | 
| 
761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See also:  L, L  | 
| 
763
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
765
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub zero_cutters {  | 
| 
767
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
3
 | 
     shift->cutters(0);  | 
| 
768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 max_cuts  | 
| 
771
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
772
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title     : max_cuts  | 
| 
773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function  : Find the most number of cuts  | 
| 
774
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns   : The number of times the enzyme that cuts most cuts.  | 
| 
775
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Arguments : None  | 
| 
776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
777
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is not a very practical method, but if you are curious...  | 
| 
778
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
779
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
780
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
781
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
4
 | 
 sub max_cuts { return shift->{maximum_cuts} }  | 
| 
782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
783
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 Internal methods  | 
| 
784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
785
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
786
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
787
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 _cuts  | 
| 
788
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
789
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title     : _cuts  | 
| 
790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function  : Figures out which enzymes we know about and cuts the sequence.  | 
| 
791
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns   : Nothing.  | 
| 
792
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Arguments : None.  | 
| 
793
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Comments  : An internal method. This will figure out where the sequence   | 
| 
794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              should be cut, and provide the appropriate results.  | 
| 
795
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
796
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
797
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
798
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _cuts {  | 
| 
799
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
 
 | 
11
 | 
     my $self = shift;  | 
| 
800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
801
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
     my $target_seq=uc $self->{'_seq'}->seq; # I have been burned on this before :)  | 
| 
802
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
803
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # first, find out all the enzymes that we have  | 
| 
805
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
     foreach my $enz ($self->{'_enzymes'}->each_enzyme) {  | 
| 
806
 | 
9575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6150
 | 
         my @all_cuts;  | 
| 
807
 | 
9575
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
22804
 | 
         my @others = $enz->others if $enz->can("others");  | 
| 
808
 | 
9575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8692
 | 
         foreach my $enzyme ($enz, @others) {  | 
| 
809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # cut the sequence  | 
| 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # _make_cuts handles all cases (amibiguous, non-ambiguous) X  | 
| 
811
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # (palindromic X non-palindromic)  | 
| 
812
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    #   | 
| 
813
 | 
9641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11358
 | 
             my $cut_positions = $self->_make_cuts($target_seq, $enzyme);  | 
| 
814
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
815
 | 
9641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7988
 | 
             push @all_cuts, @$cut_positions;  | 
| 
816
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
817
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    #### need to refactor circular handling....  | 
| 
818
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    ####  | 
| 
819
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
820
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # deal with is_circular sequences  | 
| 
821
 | 
9641
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
18008
 | 
             if ($self->{'_seq'}->is_circular) {  | 
| 
822
 | 
4286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6352
 | 
                 $cut_positions=$self->_circular($target_seq, $enzyme);  | 
| 
823
 | 
4286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4811
 | 
                push @all_cuts, @$cut_positions;  | 
| 
824
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
825
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # non-symmetric cutters (most external cutters, e.g.) need   | 
| 
826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # special handling  | 
| 
827
 | 
9641
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
14507
 | 
             unless ($enzyme->is_symmetric) {  | 
| 
828
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# do all of above with explicit use of the   | 
| 
829
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# enzyme's 'complementary_cut'...  | 
| 
830
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
831
 | 
868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1304
 | 
 		$cut_positions = $self->_make_cuts($target_seq, $enzyme, 'COMP');  | 
| 
832
 | 
868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
991
 | 
                 push @all_cuts, @$cut_positions;  | 
| 
833
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # deal with is_circular sequences  | 
| 
834
 | 
868
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1714
 | 
 		if ($self->{'_seq'}->is_circular) {  | 
| 
835
 | 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
870
 | 
 		    $cut_positions=$self->_circular($target_seq, $enzyme, 'COMP');  | 
| 
836
 | 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
842
 | 
 		    push @all_cuts, @$cut_positions;  | 
| 
837
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
838
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
839
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
840
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
841
 | 
9575
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
11060
 | 
 	if (defined $all_cuts[0]) {  | 
| 
842
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # now just remove any duplicate cut sites  | 
| 
843
 | 
1717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2843
 | 
             @all_cuts = sort {$a <=> $b} @all_cuts;  | 
| 
 
 | 
1350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1668
 | 
    | 
| 
844
 | 
1717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1225
 | 
             push  @{$self->{'_cut_positions'}->{$enz->name}},  $all_cuts[0];  | 
| 
 
 | 
1717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2952
 | 
    | 
| 
845
 | 
1717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2054
 | 
             foreach my $i (@all_cuts) {  | 
| 
846
 | 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
840
 | 
                 push @{$self->{'_cut_positions'}->{$enz->name}}, $i   | 
| 
847
 | 
2636
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1690
 | 
                     if $i != ${$self->{'_cut_positions'}->{$enz->name}}[$#{$self->{'_cut_positions'}->{$enz->name}}];  | 
| 
 
 | 
2636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3198
 | 
    | 
| 
 
 | 
2636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3456
 | 
    | 
| 
848
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
849
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } else {  | 
| 
850
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # this just fixes an eror when @all_cuts is not defined!  | 
| 
851
 | 
7858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5521
 | 
             @{$self->{'_cut_positions'}->{$enz->name}}=();  | 
| 
 
 | 
7858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12019
 | 
    | 
| 
852
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
854
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # note I have removed saving any other information except the  | 
| 
855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # cut_positions this should significantly decrease the amount  | 
| 
856
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # of memory that is required for large sequences. It should  | 
| 
857
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # also speed things up dramatically, because fragments and  | 
| 
858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # fragment maps are only calculated for those enzymes they are  | 
| 
859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # needed for.  | 
| 
860
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # finally, save minimal information about each enzyme  | 
| 
862
 | 
9575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7517
 | 
 	my $number_of_cuts=scalar @{$self->{'_cut_positions'}->{$enz->name}};  | 
| 
 
 | 
9575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12827
 | 
    | 
| 
863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # now just store the number of cuts  | 
| 
864
 | 
9575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14128
 | 
 	$self->{_number_of_cuts_by_enzyme}->{$enz->name}=$number_of_cuts;  | 
| 
865
 | 
9575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7675
 | 
         push (@{$self->{_number_of_cuts_by_cuts}->{$number_of_cuts}}, $enz);  | 
| 
 
 | 
9575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13517
 | 
    | 
| 
866
 | 
9575
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
17667
 | 
         if ($number_of_cuts > $self->{maximum_cuts}) {  | 
| 
867
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
             $self->{maximum_cuts}=$number_of_cuts;  | 
| 
868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
869
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
870
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
871
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
872
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
873
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 _enzyme_sites  | 
| 
874
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title     : _enzyme_sites  | 
| 
876
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function  : An internal method to figure out the two sides of an enzyme  | 
| 
877
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns   : The sequence before the cut and the sequence after the cut  | 
| 
878
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Arguments : A Bio::Restriction::Enzyme object,  | 
| 
879
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              $comp : boolean, calculate based on $enz->complementary_cut()  | 
| 
880
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                      if true, $enz->cut() if false  | 
| 
881
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Status    : NOW DEPRECATED - maj  | 
| 
882
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
883
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _enzyme_sites {  | 
| 
886
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
     my ($self, $enz, $comp )=@_;  | 
| 
887
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # get the cut site  | 
| 
888
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # I have reworked this so that it uses $enz->cut to get the site  | 
| 
889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
890
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $site= ( $comp ? $enz->complementary_cut : $enz->cut );  | 
| 
891
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # split it into the two fragments for the sequence before and after.  | 
| 
892
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $site=0 unless defined $site;  | 
| 
893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
894
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # the default values just stop an error from an undefined  | 
| 
895
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # string. But they don't affect the split.  | 
| 
896
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my ($beforeseq, $afterseq)= ('.', '.');  | 
| 
897
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
898
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # extra-site cutting  | 
| 
899
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # the before seq is going to be the entire site  | 
| 
900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # the after seq is empty  | 
| 
901
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # BUT, need to communicate how to cut within the sample sequence  | 
| 
902
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #  relative to the end of the site (do through $enz->cut), and  | 
| 
903
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # ALSO, need to check length of sample seq so that if cut falls  | 
| 
904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #  outside the input sequence, we have a warning/throw. /maj  | 
| 
905
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
906
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # pre-site cutting  | 
| 
907
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # need to handle negative site numbers  | 
| 
908
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
909
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if ($site <= 0) { # <= to handle pre-site cutting  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
910
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        $afterseq=$enz->string;  | 
| 
911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
912
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ($site >= $enz->seq->length) { # >= to handle extrasite cutters/maj  | 
| 
913
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        $beforeseq=$enz->string;  | 
| 
914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
915
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  # $site < $enz->seq->length  | 
| 
916
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        $beforeseq=$enz->seq->subseq(1, $site);  | 
| 
917
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        $afterseq=$enz->seq->subseq($site+1, $enz->seq->length);  | 
| 
918
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
919
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # if the enzyme is ambiguous we need to convert this into a perl string  | 
| 
920
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if ($enz->is_ambiguous) {  | 
| 
921
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        $beforeseq=$self->_expanded_string($beforeseq);  | 
| 
922
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        $afterseq =$self->_expanded_string($afterseq);  | 
| 
923
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
924
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
925
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return ($beforeseq, $afterseq);  | 
| 
926
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
927
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
928
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 _non_pal_enz  | 
| 
930
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
931
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Title    : _non_pal_enz  | 
| 
932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Function : Analyses non_palindromic enzymes for cuts in both ways  | 
| 
933
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              (in fact, delivers only minus strand cut positions in the   | 
| 
934
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               plus strand coordinates/maj)  | 
| 
935
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Returns  : A reference to an array of cut positions  | 
| 
936
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Arguments: The sequence to check and the enzyme object  | 
| 
937
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   NOW DEPRECATED/maj  | 
| 
938
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
939
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
940
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
941
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _non_pal_enz {  | 
| 
942
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
     my ($self, $target_seq, $enz) =@_;  | 
| 
943
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # add support for non-palindromic sequences  | 
| 
944
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # the enzyme is not the same forwards and backwards  | 
| 
945
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
946
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $site=$enz->complementary_cut;  | 
| 
947
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # complementary_cut is in plus strand coordinates  | 
| 
948
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
949
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # we are going to rc the sequence, so complementary_cut becomes length-complementary_cut  | 
| 
950
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
951
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # I think this is wrong; cut sites are a matter of position with respect  | 
| 
952
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # to the plus strand: the recognition site is double stranded and   | 
| 
953
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # directly identifiable on the plus strand sequence. /maj  | 
| 
954
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
955
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # what really needs doing is to keep track of plus strand and minus strand  | 
| 
956
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # nicks separately./maj  | 
| 
957
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
958
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    my ($beforeseq, $afterseq)=('.', '.');  | 
| 
959
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # now, for extra-site cuts, $site > length...so...?/maj  | 
| 
961
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $new_left_cut=$enz->seq->length-$site;  | 
| 
962
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # there is a problem when this is actually zero  | 
| 
963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
964
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if ($new_left_cut == 0) {$afterseq=$enz->seq->revcom->seq}  | 
| 
 
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
965
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     elsif ($new_left_cut == $enz->seq->length) {$beforeseq=$enz->seq->revcom->seq}  | 
| 
966
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
967
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# this can't be right./maj  | 
| 
968
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        $beforeseq=$enz->seq->revcom->subseq(1, ($enz->seq->length-$site));  | 
| 
969
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        $afterseq=$enz->seq->revcom->subseq(($enz->seq->length-$site), $enz->seq->length);  | 
| 
970
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
971
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
972
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # do this correctly, in the context of the current code design,  | 
| 
973
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # by providing a "complement" argument to _ambig_cuts and _nonambig_cuts,  | 
| 
974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # use these explicitly rather than this wrapper./maj  | 
| 
975
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
976
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $results=[];  | 
| 
977
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if ($enz->is_ambiguous) {  | 
| 
978
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $results= $self->_ambig_cuts($beforeseq, $afterseq, $target_seq, $enz);  | 
| 
979
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
980
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $results= $self->_nonambig_cuts($beforeseq, $afterseq, $target_seq, $enz);  | 
| 
981
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
983
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # deal with is_circular  | 
| 
984
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $more_results=[];  | 
| 
985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $more_results=$self->_circular($beforeseq, $afterseq, $enz)   | 
| 
986
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if ($self->{'_seq'}->is_circular);  | 
| 
987
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
988
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return [@$more_results, @$results];  | 
| 
989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 _ambig_cuts  | 
| 
992
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
993
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title     : _ambig_cuts  | 
| 
994
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function  : An internal method to localize the cuts in the sequence  | 
| 
995
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns   : A reference to an array of cut positions  | 
| 
996
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Arguments : The separated enzyme site, the target sequence, and the enzyme object  | 
| 
997
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Comments  : This is a slow implementation but works for ambiguous sequences.  | 
| 
998
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              Whenever possible, _nonambig_cuts should be used as it is a lot faster.  | 
| 
999
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1001
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1002
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # we have problems here when the cut is extrasite: $beforeseq/$afterseq do  | 
| 
1003
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # not define the cut site then! I am renaming this to _ambig_cuts_depr,  | 
| 
1004
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # providing a more compact method that correctly handles extrasite cuts  | 
| 
1005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # below /maj  | 
| 
1006
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1007
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _ambig_cuts_depr {  | 
| 
1008
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
     my ($self, $beforeseq, $afterseq, $target_seq, $enz) = @_;  | 
| 
1009
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1010
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # cut the sequence. This is done with split so we can use  | 
| 
1011
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # regexp.   | 
| 
1012
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $target_seq = uc $target_seq;  | 
| 
1013
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my @cuts = split /($beforeseq)($afterseq)/i, $target_seq;  | 
| 
1014
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # now the array has extra elements --- the before and after!  | 
| 
1015
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # we have:  | 
| 
1016
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # element 0 sequence  | 
| 
1017
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # element 1 3' end  | 
| 
1018
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # element 2 5' end of next sequence  | 
| 
1019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # element 3 sequence  | 
| 
1020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # ....  | 
| 
1021
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1022
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # we need to loop through the array and add the ends to the  | 
| 
1023
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # appropriate parts of the sequence  | 
| 
1024
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1025
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $i=0;  | 
| 
1026
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my @re_frags;  | 
| 
1027
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if ($#cuts) {           # there is >1 element  | 
| 
1028
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         while ($i<$#cuts) {  | 
| 
1029
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             my $joinedseq;  | 
| 
1030
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # the first sequence is a special case  | 
| 
1031
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             if ($i == 0) {  | 
| 
1032
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $joinedseq=$cuts[$i].$cuts[$i+1];  | 
| 
1033
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             } else {  | 
| 
1034
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $joinedseq=$cuts[$i-1].$cuts[$i].$cuts[$i+1];  | 
| 
1035
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1036
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # now deal with overlapping sequences  | 
| 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # we can do this through a regular regexp as we only  | 
| 
1038
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # have a short fragment to look through  | 
| 
1039
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1040
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    while ($joinedseq =~ /$beforeseq$afterseq/) {  | 
| 
1041
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $joinedseq =~ s/^(.*?$beforeseq)($afterseq)/$2/;  | 
| 
1042
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 push @re_frags, $1;  | 
| 
1043
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1044
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             push @re_frags, $joinedseq;  | 
| 
1045
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $i+=3;  | 
| 
1046
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1047
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1048
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # I don't think we want the last fragment in. It is messing up the _circular  | 
| 
1049
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # part of things. So I deleted this part of the code :)  | 
| 
1050
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1051
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
1052
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # if we don't cut, leave the array empty  | 
| 
1053
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    return [];  | 
| 
1054
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } # the sequence was not cut.  | 
| 
1055
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1056
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # now @re_frags has the fragments of all the sequences  | 
| 
1057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # but some people want to have this return the lengths  | 
| 
1058
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # of the fragments.  | 
| 
1059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1060
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # in theory the actual cut sites should be the length  | 
| 
1061
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # of the fragments in @re_frags  | 
| 
1062
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1063
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # note, that now this is the only data that we are saving. We  | 
| 
1064
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # will have to go back add regenerate re_frags. The reason is  | 
| 
1065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # that we can use this in _circular easier  | 
| 
1066
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1067
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my @cut_positions = map {length($_)} @re_frags;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1068
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1069
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # the cut positions are right now the lengths of the sequence, but  | 
| 
1070
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # we need to add them all onto each other  | 
| 
1071
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1072
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     for (my $i=1; $i<=$#cut_positions; $i++) {  | 
| 
1073
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
      $cut_positions[$i]+=$cut_positions[$i-1];  | 
| 
1074
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1075
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1076
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # in one of those oddities in life, 2 fragments mean an enzyme cut once  | 
| 
1077
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # so $#re_frags is the number of cuts  | 
| 
1078
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return \@cut_positions;  | 
| 
1079
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1080
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1081
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # new version/maj  | 
| 
1082
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _ambig_cuts {  | 
| 
1084
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
     my ($self, $before, $after, $target, $enz, $comp) = @_;  | 
| 
1085
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $cut_site = ($comp ? $enz->complementary_cut : $enz->cut);  | 
| 
1086
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     local $_ = uc $target;  | 
| 
1087
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my @cuts;  | 
| 
1088
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $recog = $enz->recog;  | 
| 
1089
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $site_re = qr/($recog)/;  | 
| 
1090
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     push @cuts, pos while (/$site_re/g);  | 
| 
1091
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $_ = $_ - length($enz->recog) + $cut_site for @cuts;  | 
| 
1092
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return [@cuts];  | 
| 
1093
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1094
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1095
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 _nonambig_cuts  | 
| 
1096
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1097
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title     : _nonambig_cuts  | 
| 
1098
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function  : Figures out which enzymes we know about and cuts the sequence.  | 
| 
1099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns   : Nothing.  | 
| 
1100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Arguments : The separated enzyme site, the target sequence, and the enzyme object  | 
| 
1101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 An internal method. This will figure out where the sequence should be  | 
| 
1103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 cut, and provide the appropriate results.  This is a much faster  | 
| 
1104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 implementation because it doesn't use a regexp, but it can not deal  | 
| 
1105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 with ambiguous sequences  | 
| 
1106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # now, DO want the enzyme object.../maj  | 
| 
1110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _nonambig_cuts {  | 
| 
1112
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
     my ($self, $beforeseq, $afterseq, $target_seq, $enz, $comp) = @_;  | 
| 
1113
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $cut_site = ($comp ? $enz->complementary_cut : $enz->cut);  | 
| 
1114
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if ($beforeseq eq ".") {$beforeseq = ''}  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1115
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if ($afterseq  eq ".") {$afterseq  = ''}  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1116
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $target_seq = uc $target_seq;  | 
| 
1117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    my $index_posn=index($target_seq, $beforeseq.$afterseq);  | 
| 
1118
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $index_posn=index($target_seq, $enz->recog);  | 
| 
1119
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return [] if ($index_posn == -1); # there is no match to the sequence  | 
| 
1120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # there is at least one cut site  | 
| 
1122
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my @cuts;  | 
| 
1123
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     while ($index_posn > -1) {  | 
| 
1124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# extrasite cutting issue here...  | 
| 
1125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# think we want $index_posn+$enz->cut  | 
| 
1126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	  push (@cuts, $index_posn+length($beforeseq));  | 
| 
1127
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	  push (@cuts, $index_posn+$cut_site);  | 
| 
1128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	  $index_posn=index($target_seq, $beforeseq.$afterseq, $index_posn+1);  | 
| 
1129
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	  $index_posn=index($target_seq, $enz->recog, $index_posn+1);  | 
| 
1130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1132
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return \@cuts;  | 
| 
1133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 _make_cuts  | 
| 
1136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : _make_cuts  | 
| 
1138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : $an->_make_cuts( $target_sequence, $enzyme, $complement_q )  | 
| 
1139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Returns an array of cut sites on target seq, using enzyme  | 
| 
1140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            on the plus strand ($complement_q = 0) or minus strand  | 
| 
1141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            ($complement_q = 1); follows Enzyme objects in  | 
| 
1142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            $enzyme->others()  | 
| 
1143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : array of scalar integers  | 
| 
1144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : sequence string, B:R:Enzyme object, boolean  | 
| 
1145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _make_cuts {  | 
| 
1149
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
16
 | 
     no warnings qw( uninitialized );  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2017
 | 
    | 
| 
1150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1151
 | 
15249
 | 
 
 | 
 
 | 
  
15249
  
 | 
 
 | 
18169
 | 
     my ($self, $target, $enz, $comp) = @_;  | 
| 
1152
 | 
15249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21189
 | 
     local $_ = uc $target;  | 
| 
1153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1154
 | 
15249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10747
 | 
     my @cuts;  | 
| 
1155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1156
 | 
15249
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
32501
 | 
     my @enzs = map { $_ || () } ($enz, $enz->can('others') ? $enz->others : ());  | 
| 
 
 | 
15481
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
32923
 | 
    | 
| 
1157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ENZ:  | 
| 
1158
 | 
15249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14179
 | 
     foreach $enz (@enzs) {  | 
| 
1159
 | 
15481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23409
 | 
 	my $recog = $enz->recog;  | 
| 
1160
 | 
15481
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
26542
 | 
 	my $cut_site = ($comp ? $enz->complementary_cut : $enz->cut);  | 
| 
1161
 | 
15481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10644
 | 
 	my @these_cuts;  | 
| 
1162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1163
 | 
15481
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
30646
 | 
 	if ( $recog =~ /[^\w]/ ) { # "ambig"  | 
| 
1164
 | 
5810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44908
 | 
 	    my $site_re = qr/($recog)/;  | 
| 
1165
 | 
5810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30176
 | 
 	    push @these_cuts, pos while (/$site_re/g);  | 
| 
1166
 | 
5810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8316
 | 
 	    $_ = $_ - length($enz->string) + $cut_site for @these_cuts;  | 
| 
1167
 | 
5810
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
10025
 | 
 	    if (!$enz->is_palindromic) {  | 
| 
1168
 | 
864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1459
 | 
 		pos = 0;  | 
| 
1169
 | 
864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
791
 | 
 		my @these_rev_cuts;  | 
| 
1170
 | 
864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1238
 | 
 		$recog = $enz->revcom_recog;  | 
| 
1171
 | 
864
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1240
 | 
 		$cut_site = length($enz->string) - ($comp ? $enz->cut : $enz->complementary_cut);  | 
| 
1172
 | 
864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3742
 | 
 		$site_re = qr/($recog)/;  | 
| 
1173
 | 
864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3515
 | 
 		push @these_rev_cuts, pos while (/$site_re/g);  | 
| 
1174
 | 
864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1277
 | 
 		$_ = $_ - length($enz->string) + $cut_site for @these_rev_cuts;  | 
| 
1175
 | 
864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1148
 | 
 		push @these_cuts, @these_rev_cuts;  | 
| 
1176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else { # "nonambig"  | 
| 
1179
 | 
9671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13714
 | 
 	    my $index_posn=index($_, $recog);  | 
| 
1180
 | 
9671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13915
 | 
 	    while ($index_posn > -1) {  | 
| 
1181
 | 
1547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1750
 | 
 		push (@these_cuts, $index_posn+$cut_site);  | 
| 
1182
 | 
1547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2916
 | 
 		$index_posn=index($_, $recog, $index_posn+1);  | 
| 
1183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1184
 | 
9671
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
13417
 | 
 	    if (!$enz->is_palindromic) {  | 
| 
1185
 | 
1943
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3028
 | 
 		$recog = $enz->revcom_recog;  | 
| 
1186
 | 
1943
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3023
 | 
 		$cut_site = length($enz->string) - ($comp ? $enz->cut : $enz->complementary_cut);  | 
| 
1187
 | 
1943
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2962
 | 
 		$index_posn=index($_, $recog);  | 
| 
1188
 | 
1943
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3346
 | 
 		while ($index_posn > -1) {  | 
| 
1189
 | 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
262
 | 
 		    push @these_cuts, $index_posn+$cut_site;  | 
| 
1190
 | 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
476
 | 
 		    $index_posn=index($_, $recog, $index_posn+1);  | 
| 
1191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1194
 | 
15481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20834
 | 
 	push @cuts, @these_cuts;  | 
| 
1195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1196
 | 
15249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22747
 | 
     return [@cuts];  | 
| 
1197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 _multiple_cuts  | 
| 
1200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title     : _multiple_cuts  | 
| 
1202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function  : Figures out multiple digests  | 
| 
1203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns   : An array of the cut sites for multiply digested DNA  | 
| 
1204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Arguments : A Bio::Restriction::EnzymeCollection object  | 
| 
1205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Comments  : Double digests is one subset of this, but you can use  | 
| 
1206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              as many enzymes as you want.  | 
| 
1207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _multiple_cuts {  | 
| 
1211
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
5
 | 
     my ($self, $ec)=@_;  | 
| 
1212
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     $self->cut unless $self->{'_cut'};  | 
| 
1213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # now that we are using positions rather than fragments  | 
| 
1215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # this is really easy  | 
| 
1216
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     my @cuts;  | 
| 
1217
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     foreach my $enz ($ec->each_enzyme) {   | 
| 
1218
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
        push @cuts, @{$self->{'_cut_positions'}->{$enz->name}}  | 
| 
1219
 | 
15
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
33
 | 
            if defined $self->{'_cut_positions'}->{$enz->name};  | 
| 
1220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1221
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     @{$self->{'_cut_positions'}->{'multiple_digest'}}=sort {$a <=> $b} @cuts;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    | 
| 
 
 | 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
65
 | 
    | 
| 
1222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1223
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     my $number_of_cuts;  | 
| 
1224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1225
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     $number_of_cuts=scalar @{$self->{'_cut_positions'}->{'multiple_digest'}};  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
1226
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     $self->{_number_of_cuts_by_enzyme}->{'multiple_digest'}=$number_of_cuts;  | 
| 
1227
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     push (@{$self->{_number_of_cuts_by_cuts}->{$number_of_cuts}}, 'multiple_digest');  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
1228
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     if ($number_of_cuts > $self->{maximum_cuts}) {  | 
| 
1229
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $self->{maximum_cuts}=$number_of_cuts;  | 
| 
1230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 _circular  | 
| 
1235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title     : _circular  | 
| 
1237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function  : Identifies cuts at the join of the end of the target with  | 
| 
1238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              the beginning of the target  | 
| 
1239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns   : array of scalar integers ( cut sites near join, if any )  | 
| 
1240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Arguments : scalar string (target sequence), Bio::Restriction::Enzyme obj  | 
| 
1241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _circular {  | 
| 
1245
 | 
4718
 | 
 
 | 
 
 | 
  
4718
  
 | 
 
 | 
4972
 | 
     my ($self, $target, $enz, $comp) = @_;  | 
| 
1246
 | 
4718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6780
 | 
     $target=uc $target;  | 
| 
1247
 | 
4718
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
7535
 | 
     my $patch_len = ( length $target > 20 ? 10 : int( length($target)/2 ) );  | 
| 
1248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1249
 | 
4718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7850
 | 
     my ($first, $last) =  | 
| 
1250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	(substr($target, 0, $patch_len),substr($target, -$patch_len));  | 
| 
1251
 | 
4718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5328
 | 
     my $patch=$last.$first;  | 
| 
1252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # now find the cut sites  | 
| 
1254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1255
 | 
4718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6205
 | 
     my $cut_positions = $self->_make_cuts($patch, $enz, $comp);  | 
| 
1256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # the enzyme doesn't cut in the new fragment  | 
| 
1258
 | 
4718
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
7124
 | 
     return [] if (!$cut_positions);  | 
| 
1259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # now we are going to add things to _cut_positions  | 
| 
1261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # in this shema it doesn't matter if the site is there twice -   | 
| 
1262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # we will take care of that later. Because we are using position  | 
| 
1263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # rather than frag or anything else, we can just  | 
| 
1264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # remove duplicates.  | 
| 
1265
 | 
4718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3476
 | 
     my @circ_cuts;  | 
| 
1266
 | 
4718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5665
 | 
     foreach my $cut (@$cut_positions) {  | 
| 
1267
 | 
275
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
867
 | 
 	if ($cut == length($last)) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # the cut is actually at position 0, but we're going to call this the  | 
| 
1269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # length of the sequence so we don't confuse no cuts with a 0 cut  | 
| 
1270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	    push (@circ_cuts, $self->{'_seq'}->length);  | 
| 
1271
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	    push (@circ_cuts, 0);  | 
| 
1272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($cut < length($last)) {  | 
| 
1275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # the cut is before the end of the sequence  | 
| 
1276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    #check  | 
| 
1277
 | 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
512
 | 
 	    push (@circ_cuts, $self->{'_seq'}->length - (length($last) - $cut));  | 
| 
1278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else {  | 
| 
1280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # the cut is at the start of the sequence (position >=1)  | 
| 
1281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      | 
| 
1282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # note, we put this at the beginning of the array rather than the end!  | 
| 
1283
 | 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
374
 | 
 	    unshift (@circ_cuts, $cut-length($last));  | 
| 
1284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1286
 | 
4718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8302
 | 
     return \@circ_cuts;  | 
| 
1287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 _expanded_string  | 
| 
1294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title     : _expanded_string  | 
| 
1296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function  : Expand nucleotide ambiguity codes to their representative letters  | 
| 
1297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns   : The full length string  | 
| 
1298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Arguments : The string to be expanded.  | 
| 
1299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Stolen from the original RestrictionEnzyme.pm  | 
| 
1301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _expanded_string {  | 
| 
1306
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
     my ($self, $str) = @_;  | 
| 
1307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1308
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $str =~ s/N|X/\./g;  | 
| 
1309
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $str =~ s/R/\[AG\]/g;  | 
| 
1310
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $str =~ s/Y/\[CT\]/g;  | 
| 
1311
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $str =~ s/S/\[GC\]/g;  | 
| 
1312
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $str =~ s/W/\[AT\]/g;  | 
| 
1313
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $str =~ s/M/\[AC\]/g;  | 
| 
1314
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $str =~ s/K/\[TG\]/g;  | 
| 
1315
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $str =~ s/B/\[CGT\]/g;  | 
| 
1316
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $str =~ s/D/\[AGT\]/g;  | 
| 
1317
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $str =~ s/H/\[ACT\]/g;  | 
| 
1318
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $str =~ s/V/\[ACG\]/g;  | 
| 
1319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1320
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return $str;  | 
| 
1321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  |