line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module Bio::Restriction::Analysis |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Rob Edwards |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
## POD Documentation: |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Bio::Restriction::Analysis - cutting sequences with restriction |
15
|
|
|
|
|
|
|
enzymes |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 SYNOPSIS |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# analyze a DNA sequence for restriction enzymes |
20
|
|
|
|
|
|
|
use Bio::Restriction::Analysis; |
21
|
|
|
|
|
|
|
use Bio::PrimarySeq; |
22
|
|
|
|
|
|
|
use Data::Dumper; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# get a DNA sequence from somewhere |
25
|
|
|
|
|
|
|
my $seq = Bio::PrimarySeq->new |
26
|
|
|
|
|
|
|
(-seq =>'AGCTTAATTCATTAGCTCTGACTGCAACGGGCAATATGTCTC', |
27
|
|
|
|
|
|
|
-primary_id => 'synopsis', |
28
|
|
|
|
|
|
|
-molecule => 'dna'); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# now start an analysis. |
31
|
|
|
|
|
|
|
# this is using the default set of enzymes |
32
|
|
|
|
|
|
|
my $ra = Bio::Restriction::Analysis->new(-seq=>$seq); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# find unique cutters. This returns a |
35
|
|
|
|
|
|
|
# Bio::Restriction::EnzymeCollection object |
36
|
|
|
|
|
|
|
my $enzymes = $ra->unique_cutters; |
37
|
|
|
|
|
|
|
print "Unique cutters: ", join (', ', |
38
|
|
|
|
|
|
|
map {$_->name} $enzymes->unique_cutters), "\n"; |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
# AluI is one them. Where does it cut? |
41
|
|
|
|
|
|
|
# This is will return an array of the sequence strings |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
my $enz = 'AluI'; |
44
|
|
|
|
|
|
|
my @frags = $ra->fragments($enz); |
45
|
|
|
|
|
|
|
# how big are the fragments? |
46
|
|
|
|
|
|
|
print "AluI fragment lengths: ", join(' & ', map {length $_} @frags), "\n"; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# You can also bypass fragments and call sizes directly: |
49
|
|
|
|
|
|
|
# to see all the fragment sizes |
50
|
|
|
|
|
|
|
print "All sizes: ", join " ", $ra->sizes($enz), "\n"; |
51
|
|
|
|
|
|
|
# to see all the fragment sizes sorted by size like on a gel |
52
|
|
|
|
|
|
|
print "All sizes, sorted ", join (" ", $ra->sizes($enz, 0, 1)), "\n"; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# how many times does each enzyme cut |
55
|
|
|
|
|
|
|
my $cuts = $ra->cuts_by_enzyme('BamHI'); |
56
|
|
|
|
|
|
|
print "BamHI cuts $cuts times\n"; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# How many enzymes do not cut at all? |
59
|
|
|
|
|
|
|
print "There are ", scalar $ra->zero_cutters->each_enzyme, |
60
|
|
|
|
|
|
|
" enzymes that do not cut\n"; |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# what about enzymes that cut twice? |
63
|
|
|
|
|
|
|
my $two_cutters = $ra->cutters(2); |
64
|
|
|
|
|
|
|
print join (" ", map {$_->name} $two_cutters->each_enzyme), |
65
|
|
|
|
|
|
|
" cut the sequence twice\n"; |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# what are all the enzymes that cut, and how often do they cut |
68
|
|
|
|
|
|
|
printf "\n%-10s%s\n", 'Enzyme', 'Number of Cuts'; |
69
|
|
|
|
|
|
|
my $all_cutters = $ra->cutters; |
70
|
|
|
|
|
|
|
map { |
71
|
|
|
|
|
|
|
printf "%-10s%s\n", $_->name, $ra->cuts_by_enzyme($_->name) |
72
|
|
|
|
|
|
|
} $all_cutters->each_enzyme; |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# Finally, we can interact the restriction enzyme object by |
75
|
|
|
|
|
|
|
# retrieving it from the collection object see the docs for |
76
|
|
|
|
|
|
|
# Bio::Restriction::Enzyme.pm |
77
|
|
|
|
|
|
|
my $enzobj = $enzymes->get_enzyme($enz); |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=head1 DESCRIPTION |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
Bio::Restriction::Analysis describes the results of cutting a DNA |
83
|
|
|
|
|
|
|
sequence with restriction enzymes. |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
To use this module you can pass a sequence object and optionally a |
86
|
|
|
|
|
|
|
Bio::Restriction::EnzymeCollection that contains the enzyme(s) to cut the |
87
|
|
|
|
|
|
|
sequences with. There is a default set of enzymes that will be loaded |
88
|
|
|
|
|
|
|
if you do not pass in a Bio::Restriction::EnzymeCollection. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
To cut a sequence, set up a Restriction::Analysis object with a sequence |
91
|
|
|
|
|
|
|
like this: |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
use Bio::Restriction::Analysis; |
94
|
|
|
|
|
|
|
my $ra = Bio::Restriction::Analysis->new(-seq=>$seqobj); |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
or |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
my $ra = Bio::Restriction::Analysis->new |
99
|
|
|
|
|
|
|
(-seq=>$seqobj, -enzymes=>$enzs); |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Then, to get the fragments for a particular enzyme use this: |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
@fragments = $ra->fragments('EcoRI'); |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Note that the naming of restriction enzymes is that the last numbers |
106
|
|
|
|
|
|
|
are usually Roman numbers (I, II, III, etc). You may want to use |
107
|
|
|
|
|
|
|
something like this: |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# get a reference to an array of unique (single) cutters |
110
|
|
|
|
|
|
|
$singles = $re->unique_cutters; |
111
|
|
|
|
|
|
|
foreach my $enz ($singles->each_enzyme) { |
112
|
|
|
|
|
|
|
@fragments = $re->fragments($enz); |
113
|
|
|
|
|
|
|
... do something here ... |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
Note that if your sequence is circular, the first and last fragment |
117
|
|
|
|
|
|
|
will be joined so that they are the appropriate length and sequence |
118
|
|
|
|
|
|
|
for further analysis. This fragment will also be checked for cuts |
119
|
|
|
|
|
|
|
by the enzyme(s). However, this will change the start of the |
120
|
|
|
|
|
|
|
sequence! |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
There are two separate algorithms used depending on whether your |
123
|
|
|
|
|
|
|
enzyme has ambiguity. The non-ambiguous algorithm is a lot faster, |
124
|
|
|
|
|
|
|
and if you are using very large sequences you should try and use |
125
|
|
|
|
|
|
|
this algorithm. If you have a large sequence (e.g. genome) and |
126
|
|
|
|
|
|
|
want to use ambgiuous enzymes you may want to make separate |
127
|
|
|
|
|
|
|
Bio::Restriction::Enzyme objects for each of the possible |
128
|
|
|
|
|
|
|
alternatives and make sure that you do not set is_ambiguous! |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
This version should correctly deal with overlapping cut sites |
131
|
|
|
|
|
|
|
in both ambiguous and non-ambiguous enzymes. |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
I have tried to write this module with speed and memory in mind |
134
|
|
|
|
|
|
|
so that it can be effectively used for large (e.g. genome sized) |
135
|
|
|
|
|
|
|
sequence. This module only stores the cut positions internally, |
136
|
|
|
|
|
|
|
and calculates everything else on an as-needed basis. Therefore |
137
|
|
|
|
|
|
|
when you call fragment_maps (for example), there may be another |
138
|
|
|
|
|
|
|
delay while these are generated. |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=head1 FEEDBACK |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=head2 Mailing Lists |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
145
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
146
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
149
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head2 Support |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
I |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
158
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
159
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
160
|
|
|
|
|
|
|
with code and data examples if at all possible. |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=head2 Reporting Bugs |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
165
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
166
|
|
|
|
|
|
|
web: |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=head1 AUTHOR |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
Rob Edwards, redwards@utmem.edu, |
173
|
|
|
|
|
|
|
Steve Chervitz, sac@bioperl.org |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
Heikki Lehvaslaiho, heikki-at-bioperl-dot-org |
178
|
|
|
|
|
|
|
Mark A. Jensen, maj-at-fortinbras-dot-us |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head1 COPYRIGHT |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
Copyright (c) 2003 Rob Edwards. Some of this work is Copyright (c) |
183
|
|
|
|
|
|
|
1997-2002 Steve A. Chervitz. All Rights Reserved. |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
This module is free software; you can redistribute it and/or modify it |
186
|
|
|
|
|
|
|
under the same terms as Perl itself. |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=head1 SEE ALSO |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
L, |
191
|
|
|
|
|
|
|
L |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=head1 APPENDIX |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
Methods beginning with a leading underscore are considered private and |
196
|
|
|
|
|
|
|
are intended for internal use by this module. They are not considered |
197
|
|
|
|
|
|
|
part of the public interface and are described here for documentation |
198
|
|
|
|
|
|
|
purposes only. |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=cut |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
package Bio::Restriction::Analysis; |
203
|
3
|
|
|
3
|
|
3348
|
use Bio::Restriction::EnzymeCollection; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
89
|
|
204
|
3
|
|
|
3
|
|
14
|
use strict; |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
62
|
|
205
|
3
|
|
|
3
|
|
12
|
use Data::Dumper; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
190
|
|
206
|
|
|
|
|
|
|
|
207
|
3
|
|
|
3
|
|
13
|
use base qw(Bio::Root::Root); |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
228
|
|
208
|
3
|
|
|
3
|
|
14
|
use Scalar::Util qw(blessed); |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
8269
|
|
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=head1 new |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
Title : new |
213
|
|
|
|
|
|
|
Function : Initializes the restriction enzyme object |
214
|
|
|
|
|
|
|
Returns : The Restriction::Analysis object |
215
|
|
|
|
|
|
|
Arguments : |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
$re_anal->new(-seq=$seqobj, |
218
|
|
|
|
|
|
|
-enzymes=>Restriction::EnzymeCollection object) |
219
|
|
|
|
|
|
|
-seq requires a Bio::PrimarySeq object |
220
|
|
|
|
|
|
|
-enzymes is optional. |
221
|
|
|
|
|
|
|
If omitted it will use the default set of enzymes |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
This is the place to start. Pass in a sequence, and you will be able |
224
|
|
|
|
|
|
|
to get the fragments back out. Several other things are available |
225
|
|
|
|
|
|
|
like the number of zero cutters or single cutters. |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
=cut |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
sub new { |
230
|
4
|
|
|
4
|
1
|
802
|
my($class, @args) = @_; |
231
|
4
|
|
|
|
|
21
|
my $self = $class->SUPER::new(@args); |
232
|
4
|
|
|
|
|
20
|
my ($seq,$enzymes) = |
233
|
|
|
|
|
|
|
$self->_rearrange([qw( |
234
|
|
|
|
|
|
|
SEQ |
235
|
|
|
|
|
|
|
ENZYMES |
236
|
|
|
|
|
|
|
)], @args); |
237
|
|
|
|
|
|
|
|
238
|
4
|
50
|
|
|
|
18
|
$seq && $self->seq($seq); |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
$enzymes ? $self->enzymes($enzymes) |
241
|
4
|
100
|
|
|
|
24
|
: ($self->{'_enzymes'} = Bio::Restriction::EnzymeCollection->new ); |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
# keep track of status |
244
|
4
|
|
|
|
|
9
|
$self->{'_cut'} = 0; |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# left these here because we want to reforce a _cut if someone |
247
|
|
|
|
|
|
|
# just calls new |
248
|
4
|
|
|
|
|
10
|
$self->{maximum_cuts} = 0; |
249
|
|
|
|
|
|
|
|
250
|
4
|
|
|
|
|
9
|
$self->{'_number_of_cuts_by_enzyme'} = {}; |
251
|
4
|
|
|
|
|
8
|
$self->{'_number_of_cuts_by_cuts'} = {}; |
252
|
4
|
|
|
|
|
11
|
$self->{'_fragments'} = {}; |
253
|
4
|
|
|
|
|
9
|
$self->{'_cut_positions'} = {}; # cut position is the real position |
254
|
4
|
|
|
|
|
7
|
$self->{'_frag_map_list'} = {}; |
255
|
|
|
|
|
|
|
|
256
|
4
|
|
|
|
|
21
|
return $self; |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=head1 Methods to set parameters |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=cut |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=head2 seq |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
Title : seq |
267
|
|
|
|
|
|
|
Usage : $ranalysis->seq($newval); |
268
|
|
|
|
|
|
|
Function : get/set method for the sequence to be cut |
269
|
|
|
|
|
|
|
Example : $re->seq($seq); |
270
|
|
|
|
|
|
|
Returns : value of seq |
271
|
|
|
|
|
|
|
Args : A Bio::PrimarySeqI dna object (optional) |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
=cut |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
sub seq { |
276
|
16
|
|
|
16
|
1
|
26
|
my $self = shift; |
277
|
16
|
100
|
|
|
|
41
|
if (@_) { |
278
|
6
|
|
|
|
|
8
|
my $seq = shift; |
279
|
6
|
50
|
|
|
|
27
|
$self->throw('Need a sequence object ['. ref $seq. ']') |
280
|
|
|
|
|
|
|
unless $seq->isa('Bio::PrimarySeqI'); |
281
|
6
|
50
|
|
|
|
16
|
$self->throw('Need a DNA sequence object ['. $seq->alphabet. ']') |
282
|
|
|
|
|
|
|
unless $seq->alphabet eq 'dna'; |
283
|
|
|
|
|
|
|
|
284
|
6
|
|
|
|
|
38
|
$self->{'_seq'} = $seq; |
285
|
6
|
|
|
|
|
10
|
$self->{'_cut'} = 0; |
286
|
|
|
|
|
|
|
} |
287
|
16
|
|
|
|
|
38
|
return $self->{'_seq'}; |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=head2 enzymes |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
Title : enzymes |
293
|
|
|
|
|
|
|
Usage : $re->enzymes($newval) |
294
|
|
|
|
|
|
|
Function : gets/Set the restriction enzyme enzymes |
295
|
|
|
|
|
|
|
Example : $re->enzymes('EcoRI') |
296
|
|
|
|
|
|
|
Returns : reference to the collection |
297
|
|
|
|
|
|
|
Args : an array of Bio::Restriction::EnzymeCollection and/or |
298
|
|
|
|
|
|
|
Bio::Restriction::Enzyme objects |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
The default object for this method is |
302
|
|
|
|
|
|
|
Bio::Restriction::EnzymeCollection. However, you can also pass it a |
303
|
|
|
|
|
|
|
list of Bio::Restriction::Enzyme objects - even mixed with Collection |
304
|
|
|
|
|
|
|
objects. They will all be stored into one collection. |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
=cut |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
sub enzymes { |
309
|
2
|
|
|
2
|
1
|
5
|
my $self = shift; |
310
|
2
|
50
|
|
|
|
5
|
if (@_) { |
311
|
|
|
|
|
|
|
$self->{'_enzymes'} = Bio::Restriction::EnzymeCollection->new (-empty => 1) |
312
|
2
|
50
|
|
|
|
19
|
unless $self->{'_enzymes'}; |
313
|
2
|
|
|
|
|
7
|
$self->{'_enzymes'}->enzymes(@_); |
314
|
2
|
|
|
|
|
3
|
$self->{'_cut'} = 0; |
315
|
|
|
|
|
|
|
} |
316
|
2
|
|
|
|
|
4
|
return $self->{'_enzymes'}; |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=head1 Perform the analysis |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
=cut |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=head2 cut |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
Title : cut |
327
|
|
|
|
|
|
|
Usage : $re->cut() |
328
|
|
|
|
|
|
|
Function : Cut the sequence with the enzymes |
329
|
|
|
|
|
|
|
Example : $re->cut(); $re->cut('single'); or $re->cut('multiple', $enzymecollection); |
330
|
|
|
|
|
|
|
Returns : $self |
331
|
|
|
|
|
|
|
Args : 'single' (optional), 'multiple' with enzyme collection. |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
An explicit cut method is needed to pass arguments to it. |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
There are two varieties of cut. Single is the default, and need |
336
|
|
|
|
|
|
|
not be explicitly called. This cuts the sequence with each |
337
|
|
|
|
|
|
|
enzyme separately. |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
Multiple cuts a sequence with more than one enzyme. You must pass |
340
|
|
|
|
|
|
|
it a Bio::Restriction::EnzymeCollection object of the set |
341
|
|
|
|
|
|
|
of enzymes that you want to use in the double digest. The results |
342
|
|
|
|
|
|
|
will be stored as an enzyme named "multiple_digest", so you can |
343
|
|
|
|
|
|
|
use all the retrieval methods to get the data. |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
If you want to use the default setting there is no need to call cut |
346
|
|
|
|
|
|
|
directly. Every method in the class that needs output checks the |
347
|
|
|
|
|
|
|
object's internal status and recalculates the cuts if needed. |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
Note: cut doesn't now re-initialize everything before figuring |
350
|
|
|
|
|
|
|
out cuts. This is so that you can do multiple digests, or add more |
351
|
|
|
|
|
|
|
data or whatever. You'll have to use new to reset everything. |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
See also the comments in above about ambiguous and non-ambiguous |
354
|
|
|
|
|
|
|
sequences. |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
=cut |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
sub cut { |
359
|
10
|
|
|
10
|
1
|
19
|
my ($self, $opt, $ec) = @_; |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
# for the moment I have left this as a separate routine so |
362
|
|
|
|
|
|
|
# the user calls cut rather than _cuts. This also initializes |
363
|
|
|
|
|
|
|
# some stuff we need to use. |
364
|
|
|
|
|
|
|
|
365
|
10
|
50
|
|
|
|
26
|
$self->throw("A sequence must be supplied") |
366
|
|
|
|
|
|
|
unless $self->seq; |
367
|
|
|
|
|
|
|
|
368
|
10
|
100
|
66
|
|
|
41
|
if ($opt && uc($opt) eq "MULTIPLE") { |
369
|
3
|
50
|
|
|
|
10
|
$self->throw("You must supply a separate enzyme collection for multiple digests") unless $ec; |
370
|
3
|
|
|
|
|
12
|
$self->_multiple_cuts($ec); # multiple digests |
371
|
|
|
|
|
|
|
} else { |
372
|
|
|
|
|
|
|
# reset some of the things that we save |
373
|
7
|
|
|
|
|
13
|
$self->{maximum_cuts} = 0; |
374
|
7
|
|
|
|
|
14
|
$self->{'_number_of_cuts_by_enzyme'} = {}; |
375
|
7
|
|
|
|
|
596
|
$self->{'_number_of_cuts_by_cuts'} = {}; |
376
|
7
|
|
|
|
|
160
|
$self->{'_fragments'} = {}; |
377
|
7
|
|
|
|
|
14
|
$self->{'_cut_positions'} = {}; # cut position is the real position |
378
|
7
|
|
|
|
|
571
|
$self->{'_frag_map_list'} = {}; |
379
|
7
|
|
|
|
|
29
|
$self->_cuts; |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
|
382
|
10
|
|
|
|
|
819
|
$self->{'_cut'} = 1; |
383
|
10
|
|
|
|
|
32
|
return $self; |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=head2 mulitple_digest |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
Title : multiple_digest |
389
|
|
|
|
|
|
|
Function : perform a multiple digest on a sequence |
390
|
|
|
|
|
|
|
Returns : $self so you can go and get any of the other methods |
391
|
|
|
|
|
|
|
Arguments : An enzyme collection |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
Multiple digests can use 1 or more enzymes, and the data is stored |
394
|
|
|
|
|
|
|
in as if it were an enzyme called multiple_digest. You can then |
395
|
|
|
|
|
|
|
retrieve information about multiple digests from any of the other |
396
|
|
|
|
|
|
|
methods. |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
You can use this method in place of $re->cut('multiple', $enz_coll); |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
=cut |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
sub multiple_digest { |
403
|
0
|
|
|
0
|
0
|
0
|
my ($self, $ec)=@_; |
404
|
0
|
|
|
|
|
0
|
return $self->cut('multiple', $ec); |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=head1 Query the results of the analysis |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=cut |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=head2 positions |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
Title : positions |
414
|
|
|
|
|
|
|
Function : Retrieve the positions that an enzyme cuts at |
415
|
|
|
|
|
|
|
Returns : An array of the positions that an enzyme cuts at |
416
|
|
|
|
|
|
|
: or an empty array if the enzyme doesn't cut |
417
|
|
|
|
|
|
|
Arguments: An enzyme name to retrieve the positions for |
418
|
|
|
|
|
|
|
Comments : The cut occurs after the base specified. |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
=cut |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
sub positions { |
423
|
8
|
|
|
8
|
1
|
293
|
my ($self, $enz) = @_; |
424
|
8
|
50
|
|
|
|
24
|
$self->cut unless $self->{'_cut'}; |
425
|
8
|
50
|
|
|
|
17
|
$self->throw('no enzyme selected to get positions for') |
426
|
|
|
|
|
|
|
unless $enz; |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
return defined $self->{'_cut_positions'}->{$enz} ? |
429
|
8
|
50
|
|
|
|
21
|
@{$self->{'_cut_positions'}->{$enz}} : |
|
8
|
|
|
|
|
40
|
|
430
|
|
|
|
|
|
|
(); |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=head2 fragments |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
Title : fragments |
436
|
|
|
|
|
|
|
Function : Retrieve the fragments that we cut |
437
|
|
|
|
|
|
|
Returns : An array of the fragments retrieved. |
438
|
|
|
|
|
|
|
Arguments: An enzyme name to retrieve the fragments for |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
For example this code will retrieve the fragments for all enzymes that |
441
|
|
|
|
|
|
|
cut your sequence |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
my $all_cutters = $analysis->cutters; |
444
|
|
|
|
|
|
|
foreach my $enz ($$all_cutters->each_enzyme}) { |
445
|
|
|
|
|
|
|
@fragments=$analysis->fragments($enz); |
446
|
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
=cut |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
sub fragments { |
451
|
18
|
|
|
18
|
1
|
4143
|
my ($self, $enz) = @_; |
452
|
18
|
100
|
|
|
|
58
|
$self->cut unless $self->{'_cut'}; |
453
|
18
|
50
|
|
|
|
35
|
$self->throw('no enzyme selected to get fragments for') |
454
|
|
|
|
|
|
|
unless $enz; |
455
|
18
|
|
|
|
|
19
|
my @fragments; |
456
|
18
|
|
|
|
|
40
|
for ($self->fragment_maps($enz)) {push @fragments, $_->{seq}} |
|
46
|
|
|
|
|
53
|
|
457
|
18
|
|
|
|
|
81
|
return @fragments; |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
=head2 fragment_maps |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
Title : fragment_maps |
463
|
|
|
|
|
|
|
Function : Retrieves fragment sequences with start and end |
464
|
|
|
|
|
|
|
points. Useful for feature construction. |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
Returns : An array containing a hash reference for each fragment, |
467
|
|
|
|
|
|
|
containing the start point, end point and DNA |
468
|
|
|
|
|
|
|
sequence. The hash keys are 'start', 'end' and |
469
|
|
|
|
|
|
|
'seq'. Returns an empty array if not defined. |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
Arguments : An enzyme name, enzyme object, |
472
|
|
|
|
|
|
|
or enzyme collection to retrieve the fragments for. |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
If passes an enzyme collection it will return the result of a multiple |
475
|
|
|
|
|
|
|
digest. This : will also cause the special enzyme 'multiple_digest' to |
476
|
|
|
|
|
|
|
be created so you can get : other information about this multiple |
477
|
|
|
|
|
|
|
digest. (TMTOWTDI). |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
There is a minor problem with this and $self-Efragments that I |
480
|
|
|
|
|
|
|
haven't got a good answer for (at the moment). If the sequence is not |
481
|
|
|
|
|
|
|
cut, do we return undef, or the whole sequence? |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
For linear fragments it would be good to return the whole |
484
|
|
|
|
|
|
|
sequence. For circular fragments I am not sure. |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
At the moment it returns the whole sequence with start of 1 and end of |
487
|
|
|
|
|
|
|
length of the sequence. For example: |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
use Bio::Restriction::Analysis; |
490
|
|
|
|
|
|
|
use Bio::Restriction::EnzymeCollection; |
491
|
|
|
|
|
|
|
use Bio::PrimarySeq; |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
my $seq = Bio::PrimarySeq->new |
494
|
|
|
|
|
|
|
(-seq =>'AGCTTAATTCATTAGCTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATCCAAAAAAGAGTGAGCTTCTGAT', |
495
|
|
|
|
|
|
|
-primary_id => 'synopsis', |
496
|
|
|
|
|
|
|
-molecule => 'dna'); |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
my $ra = Bio::Restriction::Analysis->new(-seq=>$seq); |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
my @gel; |
501
|
|
|
|
|
|
|
my @bam_maps = $ra->fragment_maps('BamHI'); |
502
|
|
|
|
|
|
|
foreach my $i (@bam_maps) { |
503
|
|
|
|
|
|
|
my $start = $i->{start}; |
504
|
|
|
|
|
|
|
my $end = $i->{end}; |
505
|
|
|
|
|
|
|
my $sequence = $i->{seq}; |
506
|
|
|
|
|
|
|
push @gel, "$start--$sequence--$end"; |
507
|
|
|
|
|
|
|
@gel = sort {length $b <=> length $a} @gel; |
508
|
|
|
|
|
|
|
} |
509
|
|
|
|
|
|
|
print join("\n", @gel) . "\n"; |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
=cut |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
sub fragment_maps { |
514
|
21
|
|
|
21
|
1
|
24
|
my ($self, $enz) = @_; |
515
|
21
|
100
|
|
|
|
42
|
$self->cut unless $self->{'_cut'}; |
516
|
21
|
50
|
|
|
|
40
|
$self->throw('no enzyme selected to get fragment maps for') |
517
|
|
|
|
|
|
|
unless $enz; |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
# we are going to generate this on an as-needed basis rather than |
520
|
|
|
|
|
|
|
# for every enzyme this should cut down on the amount of |
521
|
|
|
|
|
|
|
# duplicated data we are trying to save in memory and make this |
522
|
|
|
|
|
|
|
# faster and easier for large sequences, e.g. genome analysis |
523
|
|
|
|
|
|
|
|
524
|
21
|
|
|
|
|
19
|
my @cut_positions; |
525
|
21
|
100
|
66
|
|
|
138
|
if (ref $enz eq '' && exists $self->{'_cut_positions'}->{$enz}) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
526
|
13
|
|
|
|
|
8
|
@cut_positions=@{$self->{'_cut_positions'}->{$enz}}; |
|
13
|
|
|
|
|
32
|
|
527
|
|
|
|
|
|
|
} elsif ($enz->isa("Bio::Restriction::EnzymeI")) { |
528
|
5
|
|
|
|
|
4
|
@cut_positions=@{$self->{'_cut_positions'}->{$enz->name}}; |
|
5
|
|
|
|
|
13
|
|
529
|
|
|
|
|
|
|
} elsif ($enz->isa("Bio::Restriction::EnzymeCollection")) { |
530
|
2
|
|
|
|
|
7
|
$self->cut('multiple', $enz); |
531
|
2
|
|
|
|
|
2
|
@cut_positions=@{$self->{'_cut_positions'}->{'multiple_digest'}}; |
|
2
|
|
|
|
|
8
|
|
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
|
534
|
21
|
100
|
|
|
|
43
|
unless (defined($cut_positions[0])) { |
535
|
|
|
|
|
|
|
# it doesn't cut |
536
|
|
|
|
|
|
|
# return the whole sequence |
537
|
|
|
|
|
|
|
# this should probably have the is_circular command |
538
|
|
|
|
|
|
|
my %map=( |
539
|
|
|
|
|
|
|
'start' => 1, |
540
|
|
|
|
|
|
|
'end' => $self->{'_seq'}->length, |
541
|
4
|
|
|
|
|
16
|
'seq' => $self->{'_seq'}->seq |
542
|
|
|
|
|
|
|
); |
543
|
4
|
|
|
|
|
6
|
push (@{$self->{'_frag_map_list'}->{$enz}}, \%map); |
|
4
|
|
|
|
|
13
|
|
544
|
|
|
|
|
|
|
return defined $self->{'_frag_map_list'}->{$enz} ? |
545
|
4
|
50
|
|
|
|
10
|
@{$self->{'_frag_map_list'}->{$enz}} : (); |
|
4
|
|
|
|
|
12
|
|
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
|
548
|
17
|
|
|
|
|
38
|
@cut_positions=sort {$a <=> $b} @cut_positions; |
|
109
|
|
|
|
|
75
|
|
549
|
17
|
|
|
|
|
24
|
push my @cuts, $cut_positions[0]; |
550
|
17
|
|
|
|
|
23
|
foreach my $i (@cut_positions) { |
551
|
72
|
100
|
|
|
|
111
|
push @cuts, $i if $i != $cuts[$#cuts]; |
552
|
|
|
|
|
|
|
} |
553
|
|
|
|
|
|
|
|
554
|
17
|
|
|
|
|
18
|
my $start=1; my $stop; my %seq; my %stop; |
|
17
|
|
|
|
|
16
|
|
|
0
|
|
|
|
|
0
|
|
555
|
17
|
|
|
|
|
22
|
foreach $stop (@cuts) { |
556
|
67
|
100
|
|
|
|
83
|
next if !$stop; # cuts at beginning of sequence |
557
|
66
|
|
|
|
|
124
|
$seq{$start}=$self->{'_seq'}->subseq($start, $stop); |
558
|
66
|
|
|
|
|
65
|
$stop{$start}=$stop; |
559
|
66
|
|
|
|
|
63
|
$start=$stop+1; |
560
|
|
|
|
|
|
|
} |
561
|
17
|
|
|
|
|
34
|
$stop=$self->{'_seq'}->length; |
562
|
17
|
50
|
|
|
|
31
|
if ($start > $stop) { |
563
|
|
|
|
|
|
|
# borderline case. The enzyme cleaved at the end of the sequence |
564
|
|
|
|
|
|
|
# what do I do now? |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
else { |
567
|
17
|
|
|
|
|
31
|
$seq{$start}=$self->{'_seq'}->subseq($start, $stop); |
568
|
17
|
|
|
|
|
25
|
$stop{$start}=$stop; |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
|
571
|
17
|
100
|
|
|
|
39
|
if ($self->{'_seq'}->is_circular) { |
572
|
|
|
|
|
|
|
# join the first and last fragments |
573
|
6
|
|
|
|
|
11
|
$seq{$start}.=$seq{'1'}; |
574
|
6
|
|
|
|
|
11
|
delete $seq{'1'}; |
575
|
6
|
|
|
|
|
7
|
$stop{$start}=$stop{'1'}; |
576
|
6
|
|
|
|
|
6
|
delete $stop{'1'}; |
577
|
|
|
|
|
|
|
} |
578
|
|
|
|
|
|
|
|
579
|
17
|
|
|
|
|
56
|
foreach my $start (sort {$a <=> $b} keys %seq) { |
|
133
|
|
|
|
|
117
|
|
580
|
|
|
|
|
|
|
my %map=( |
581
|
|
|
|
|
|
|
'start' => $start, |
582
|
|
|
|
|
|
|
'end' => $stop{$start}, |
583
|
77
|
|
|
|
|
162
|
'seq' => $seq{$start} |
584
|
|
|
|
|
|
|
); |
585
|
77
|
|
|
|
|
49
|
push (@{$self->{'_frag_map_list'}->{$enz}}, \%map); |
|
77
|
|
|
|
|
145
|
|
586
|
|
|
|
|
|
|
} |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
return defined $self->{'_frag_map_list'}->{$enz} ? |
589
|
17
|
50
|
|
|
|
65
|
@{$self->{'_frag_map_list'}->{$enz}} : (); |
|
17
|
|
|
|
|
86
|
|
590
|
|
|
|
|
|
|
} |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
=head2 sizes |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
Title : sizes |
596
|
|
|
|
|
|
|
Function : Retrieves an array with the sizes of the fragments |
597
|
|
|
|
|
|
|
Returns : Array that has the sizes of the fragments ordered from |
598
|
|
|
|
|
|
|
largest to smallest like they would appear in a gel. |
599
|
|
|
|
|
|
|
Arguments: An enzyme name to retrieve the sizes for is required and |
600
|
|
|
|
|
|
|
kilobases to the nearest 0.1 kb, else it will be in |
601
|
|
|
|
|
|
|
bp. If the optional third entry is set the results will |
602
|
|
|
|
|
|
|
be sorted. |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
This is designed to make it easy to see what fragments you should get |
605
|
|
|
|
|
|
|
on a gel! |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
You should be able to do these: |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
# to see all the fragment sizes, |
610
|
|
|
|
|
|
|
print join "\n", $re->sizes($enz), "\n"; |
611
|
|
|
|
|
|
|
# to see all the fragment sizes sorted |
612
|
|
|
|
|
|
|
print join "\n", $re->sizes($enz, 0, 1), "\n"; |
613
|
|
|
|
|
|
|
# to see all the fragment sizes in kb sorted |
614
|
|
|
|
|
|
|
print join "\n", $re->sizes($enz, 1, 1), "\n"; |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
=cut |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
sub sizes { |
619
|
3
|
|
|
3
|
1
|
9
|
my ($self, $enz, $kb, $sort) = @_; |
620
|
3
|
50
|
|
|
|
6
|
$self->throw('no enzyme selected to get fragments for') |
621
|
|
|
|
|
|
|
unless $enz; |
622
|
|
|
|
|
|
|
|
623
|
3
|
100
|
|
|
|
12
|
if (blessed($enz)) { |
624
|
1
|
50
|
|
|
|
4
|
$self->throw("Enzyme must be enzyme name or a Bio::Restriction::EnzymeI, not ".ref($enz)) |
625
|
|
|
|
|
|
|
if !$enz->isa('Bio::Restriction::EnzymeI'); |
626
|
1
|
|
|
|
|
4
|
$enz = $enz->name; |
627
|
|
|
|
|
|
|
} |
628
|
3
|
50
|
|
|
|
5
|
$self->cut unless $self->{'_cut'}; |
629
|
3
|
|
|
|
|
4
|
my @frag; my $lastsite=0; |
|
3
|
|
|
|
|
4
|
|
630
|
|
|
|
|
|
|
|
631
|
3
|
|
|
|
|
3
|
foreach my $site (@{$self->{'_cut_positions'}->{$enz}}) { |
|
3
|
|
|
|
|
7
|
|
632
|
9
|
50
|
|
|
|
14
|
$kb ? push (@frag, (int($site-($lastsite))/100)/10) |
633
|
|
|
|
|
|
|
: push (@frag, $site-($lastsite)); |
634
|
9
|
|
|
|
|
9
|
$lastsite=$site; |
635
|
|
|
|
|
|
|
} |
636
|
|
|
|
|
|
|
$kb ? push (@frag, (int($self->{'_seq'}->length-($lastsite))/100)/10) |
637
|
3
|
50
|
|
|
|
11
|
: push (@frag, $self->{'_seq'}->length-($lastsite)); |
638
|
3
|
100
|
|
|
|
8
|
if ($self->{'_seq'}->is_circular) { |
639
|
1
|
|
|
|
|
2
|
my $first=shift @frag; |
640
|
1
|
|
|
|
|
2
|
my $last=pop @frag; |
641
|
1
|
|
|
|
|
4
|
push @frag, ($first+$last); |
642
|
|
|
|
|
|
|
} |
643
|
3
|
50
|
|
|
|
5
|
$sort ? @frag = sort {$b <=> $a} @frag : 1; |
|
0
|
|
|
|
|
0
|
|
644
|
|
|
|
|
|
|
|
645
|
3
|
|
|
|
|
14
|
return @frag; |
646
|
|
|
|
|
|
|
} |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
=head1 How many times does enzymes X cut? |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
=cut |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
=head2 cuts_by_enzyme |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
Title : cuts_by_enzyme |
655
|
|
|
|
|
|
|
Function : Return the number of cuts for an enzyme |
656
|
|
|
|
|
|
|
Returns : An integer with the number of times each enzyme cuts. |
657
|
|
|
|
|
|
|
Returns 0 if doesn't cut or undef if not defined |
658
|
|
|
|
|
|
|
Arguments : An enzyme name string |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
=cut |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
sub cuts_by_enzyme { |
664
|
4
|
|
|
4
|
1
|
8
|
my ($self, $enz)=@_; |
665
|
|
|
|
|
|
|
|
666
|
4
|
50
|
|
|
|
10
|
$self->throw("Need an enzyme name") |
667
|
|
|
|
|
|
|
unless defined $enz; |
668
|
4
|
50
|
|
|
|
7
|
$self->cut unless $self->{'_cut'}; |
669
|
4
|
|
|
|
|
14
|
return $self->{'_number_of_cuts_by_enzyme'}->{$enz}; |
670
|
|
|
|
|
|
|
} |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
=head1 Which enzymes cut the sequence N times? |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
=cut |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
=head2 cutters |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
Title : cutters |
679
|
|
|
|
|
|
|
Function : Find enzymes that cut a given number of times |
680
|
|
|
|
|
|
|
Returns : a Bio::Restriction::EnzymeCollection |
681
|
|
|
|
|
|
|
Arguments : 1. exact time or lower limit, |
682
|
|
|
|
|
|
|
non-negative integer, optional |
683
|
|
|
|
|
|
|
2. upper limit, non-negative integer, |
684
|
|
|
|
|
|
|
larger or equalthan first, optional |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
If no arguments are given, the method returns all enzymes that do cut |
688
|
|
|
|
|
|
|
the sequence. The argument zero, '0', is same as method |
689
|
|
|
|
|
|
|
zero_cutters(). The argument one, '1', corresponds to unique_cutters. |
690
|
|
|
|
|
|
|
If either of the limits is larger than number of cuts any enzyme cuts the |
691
|
|
|
|
|
|
|
sequence, the that limit is automagically lowered. The method max_cuts() |
692
|
|
|
|
|
|
|
gives the largest number of cuts. |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
See Also : L, |
695
|
|
|
|
|
|
|
L, L |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
=cut |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
sub cutters { |
700
|
5
|
|
|
5
|
1
|
6
|
my ($self, $a, $z) = @_; |
701
|
|
|
|
|
|
|
|
702
|
5
|
100
|
|
|
|
15
|
$self->cut unless $self->{'_cut'}; |
703
|
|
|
|
|
|
|
|
704
|
5
|
|
|
|
|
3
|
my ($start, $end); |
705
|
5
|
100
|
|
|
|
11
|
if (defined $a) { |
706
|
4
|
50
|
|
|
|
21
|
$self->throw("Need a non-zero integer [$a]") |
707
|
|
|
|
|
|
|
unless $a =~ /^[+]?\d+$/; |
708
|
4
|
|
|
|
|
4
|
$start = $a; |
709
|
|
|
|
|
|
|
} else { |
710
|
1
|
|
|
|
|
1
|
$start = 1; |
711
|
|
|
|
|
|
|
} |
712
|
5
|
50
|
|
|
|
10
|
$start = $self->{'maximum_cuts'} if $start > $self->{'maximum_cuts'}; |
713
|
|
|
|
|
|
|
|
714
|
5
|
50
|
|
|
|
10
|
if (defined $z) { |
|
|
100
|
|
|
|
|
|
715
|
0
|
0
|
0
|
|
|
0
|
$self->throw("Need a non-zero integer no smaller than start [0]") |
716
|
|
|
|
|
|
|
unless $z =~ /^[+]?\d+$/ and $z >= $a; |
717
|
0
|
|
|
|
|
0
|
$end = $z; |
718
|
|
|
|
|
|
|
} |
719
|
|
|
|
|
|
|
elsif (defined $a) { |
720
|
4
|
|
|
|
|
5
|
$end = $start; |
721
|
|
|
|
|
|
|
} else { |
722
|
1
|
|
|
|
|
1
|
$end = $self->{'maximum_cuts'}; |
723
|
|
|
|
|
|
|
} |
724
|
5
|
50
|
|
|
|
10
|
$end = $self->{'maximum_cuts'} if $end > $self->{'maximum_cuts'}; |
725
|
5
|
|
|
|
|
21
|
my $set = Bio::Restriction::EnzymeCollection->new(-empty => 1); |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
#return an empty set if nothing cuts |
728
|
5
|
50
|
|
|
|
11
|
return $set unless $self->{'maximum_cuts'}; |
729
|
|
|
|
|
|
|
|
730
|
5
|
|
|
|
|
9
|
for (my $i=$start; $i<=$end; $i++) { |
731
|
8
|
|
|
|
|
27
|
$set->enzymes( @{$self->{_number_of_cuts_by_cuts}->{$i}} ) |
732
|
13
|
100
|
|
|
|
25
|
if defined $self->{_number_of_cuts_by_cuts}->{$i}; |
733
|
|
|
|
|
|
|
} |
734
|
|
|
|
|
|
|
|
735
|
5
|
|
|
|
|
17
|
return $set; |
736
|
|
|
|
|
|
|
} |
737
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
=head2 unique_cutters |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
Title : unique_cutters |
742
|
|
|
|
|
|
|
Function : A special case if cutters() where enzymes only cut once |
743
|
|
|
|
|
|
|
Returns : a Bio::Restriction::EnzymeCollection |
744
|
|
|
|
|
|
|
Arguments : - |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
See also: L, L |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
=cut |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
sub unique_cutters { |
752
|
2
|
|
|
2
|
1
|
8
|
shift->cutters(1); |
753
|
|
|
|
|
|
|
} |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
=head2 zero_cutters |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
Title : zero_cutters |
758
|
|
|
|
|
|
|
Function : A special case if cutters() where enzymes don't cut the sequence |
759
|
|
|
|
|
|
|
Returns : a Bio::Restriction::EnzymeCollection |
760
|
|
|
|
|
|
|
Arguments : - |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
See also: L, L |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
=cut |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
sub zero_cutters { |
767
|
1
|
|
|
1
|
1
|
3
|
shift->cutters(0); |
768
|
|
|
|
|
|
|
} |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
=head2 max_cuts |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
Title : max_cuts |
773
|
|
|
|
|
|
|
Function : Find the most number of cuts |
774
|
|
|
|
|
|
|
Returns : The number of times the enzyme that cuts most cuts. |
775
|
|
|
|
|
|
|
Arguments : None |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
This is not a very practical method, but if you are curious... |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
=cut |
780
|
|
|
|
|
|
|
|
781
|
1
|
|
|
1
|
1
|
5
|
sub max_cuts { return shift->{maximum_cuts} } |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
=head1 Internal methods |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
=cut |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
=head2 _cuts |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
Title : _cuts |
790
|
|
|
|
|
|
|
Function : Figures out which enzymes we know about and cuts the sequence. |
791
|
|
|
|
|
|
|
Returns : Nothing. |
792
|
|
|
|
|
|
|
Arguments : None. |
793
|
|
|
|
|
|
|
Comments : An internal method. This will figure out where the sequence |
794
|
|
|
|
|
|
|
should be cut, and provide the appropriate results. |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
=cut |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
sub _cuts { |
799
|
7
|
|
|
7
|
|
15
|
my $self = shift; |
800
|
|
|
|
|
|
|
|
801
|
7
|
|
|
|
|
26
|
my $target_seq=uc $self->{'_seq'}->seq; # I have been burned on this before :) |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
# first, find out all the enzymes that we have |
805
|
7
|
|
|
|
|
26
|
foreach my $enz ($self->{'_enzymes'}->each_enzyme) { |
806
|
9575
|
|
|
|
|
5955
|
my @all_cuts; |
807
|
9575
|
100
|
|
|
|
19912
|
my @others = $enz->others if $enz->can("others"); |
808
|
9575
|
|
|
|
|
7513
|
foreach my $enzyme ($enz, @others) { |
809
|
|
|
|
|
|
|
# cut the sequence |
810
|
|
|
|
|
|
|
# _make_cuts handles all cases (amibiguous, non-ambiguous) X |
811
|
|
|
|
|
|
|
# (palindromic X non-palindromic) |
812
|
|
|
|
|
|
|
# |
813
|
9641
|
|
|
|
|
9245
|
my $cut_positions = $self->_make_cuts($target_seq, $enzyme); |
814
|
|
|
|
|
|
|
|
815
|
9641
|
|
|
|
|
7212
|
push @all_cuts, @$cut_positions; |
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
#### need to refactor circular handling.... |
818
|
|
|
|
|
|
|
#### |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
# deal with is_circular sequences |
821
|
9641
|
100
|
|
|
|
16413
|
if ($self->{'_seq'}->is_circular) { |
822
|
4286
|
|
|
|
|
4903
|
$cut_positions=$self->_circular($target_seq, $enzyme); |
823
|
4286
|
|
|
|
|
3869
|
push @all_cuts, @$cut_positions; |
824
|
|
|
|
|
|
|
} |
825
|
|
|
|
|
|
|
# non-symmetric cutters (most external cutters, e.g.) need |
826
|
|
|
|
|
|
|
# special handling |
827
|
9641
|
100
|
|
|
|
12834
|
unless ($enzyme->is_symmetric) { |
828
|
|
|
|
|
|
|
# do all of above with explicit use of the |
829
|
|
|
|
|
|
|
# enzyme's 'complementary_cut'... |
830
|
|
|
|
|
|
|
|
831
|
868
|
|
|
|
|
1117
|
$cut_positions = $self->_make_cuts($target_seq, $enzyme, 'COMP'); |
832
|
868
|
|
|
|
|
734
|
push @all_cuts, @$cut_positions; |
833
|
|
|
|
|
|
|
# deal with is_circular sequences |
834
|
868
|
100
|
|
|
|
1516
|
if ($self->{'_seq'}->is_circular) { |
835
|
432
|
|
|
|
|
539
|
$cut_positions=$self->_circular($target_seq, $enzyme, 'COMP'); |
836
|
432
|
|
|
|
|
666
|
push @all_cuts, @$cut_positions; |
837
|
|
|
|
|
|
|
} |
838
|
|
|
|
|
|
|
} |
839
|
|
|
|
|
|
|
} |
840
|
|
|
|
|
|
|
|
841
|
9575
|
100
|
|
|
|
9971
|
if (defined $all_cuts[0]) { |
842
|
|
|
|
|
|
|
# now just remove any duplicate cut sites |
843
|
1717
|
|
|
|
|
2090
|
@all_cuts = sort {$a <=> $b} @all_cuts; |
|
1350
|
|
|
|
|
1165
|
|
844
|
1717
|
|
|
|
|
1022
|
push @{$self->{'_cut_positions'}->{$enz->name}}, $all_cuts[0]; |
|
1717
|
|
|
|
|
2357
|
|
845
|
1717
|
|
|
|
|
1926
|
foreach my $i (@all_cuts) { |
846
|
565
|
|
|
|
|
683
|
push @{$self->{'_cut_positions'}->{$enz->name}}, $i |
847
|
2636
|
100
|
|
|
|
1675
|
if $i != ${$self->{'_cut_positions'}->{$enz->name}}[$#{$self->{'_cut_positions'}->{$enz->name}}]; |
|
2636
|
|
|
|
|
3059
|
|
|
2636
|
|
|
|
|
3125
|
|
848
|
|
|
|
|
|
|
} |
849
|
|
|
|
|
|
|
} else { |
850
|
|
|
|
|
|
|
# this just fixes an eror when @all_cuts is not defined! |
851
|
7858
|
|
|
|
|
5059
|
@{$self->{'_cut_positions'}->{$enz->name}}=(); |
|
7858
|
|
|
|
|
9887
|
|
852
|
|
|
|
|
|
|
} |
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
# note I have removed saving any other information except the |
855
|
|
|
|
|
|
|
# cut_positions this should significantly decrease the amount |
856
|
|
|
|
|
|
|
# of memory that is required for large sequences. It should |
857
|
|
|
|
|
|
|
# also speed things up dramatically, because fragments and |
858
|
|
|
|
|
|
|
# fragment maps are only calculated for those enzymes they are |
859
|
|
|
|
|
|
|
# needed for. |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
# finally, save minimal information about each enzyme |
862
|
9575
|
|
|
|
|
6916
|
my $number_of_cuts=scalar @{$self->{'_cut_positions'}->{$enz->name}}; |
|
9575
|
|
|
|
|
10716
|
|
863
|
|
|
|
|
|
|
# now just store the number of cuts |
864
|
9575
|
|
|
|
|
12267
|
$self->{_number_of_cuts_by_enzyme}->{$enz->name}=$number_of_cuts; |
865
|
9575
|
|
|
|
|
6706
|
push (@{$self->{_number_of_cuts_by_cuts}->{$number_of_cuts}}, $enz); |
|
9575
|
|
|
|
|
11543
|
|
866
|
9575
|
100
|
|
|
|
15339
|
if ($number_of_cuts > $self->{maximum_cuts}) { |
867
|
24
|
|
|
|
|
36
|
$self->{maximum_cuts}=$number_of_cuts; |
868
|
|
|
|
|
|
|
} |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
} |
871
|
|
|
|
|
|
|
} |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
=head2 _enzyme_sites |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
Title : _enzyme_sites |
876
|
|
|
|
|
|
|
Function : An internal method to figure out the two sides of an enzyme |
877
|
|
|
|
|
|
|
Returns : The sequence before the cut and the sequence after the cut |
878
|
|
|
|
|
|
|
Arguments : A Bio::Restriction::Enzyme object, |
879
|
|
|
|
|
|
|
$comp : boolean, calculate based on $enz->complementary_cut() |
880
|
|
|
|
|
|
|
if true, $enz->cut() if false |
881
|
|
|
|
|
|
|
Status : NOW DEPRECATED - maj |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
=cut |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
sub _enzyme_sites { |
886
|
0
|
|
|
0
|
|
0
|
my ($self, $enz, $comp )=@_; |
887
|
|
|
|
|
|
|
# get the cut site |
888
|
|
|
|
|
|
|
# I have reworked this so that it uses $enz->cut to get the site |
889
|
|
|
|
|
|
|
|
890
|
0
|
0
|
|
|
|
0
|
my $site= ( $comp ? $enz->complementary_cut : $enz->cut ); |
891
|
|
|
|
|
|
|
# split it into the two fragments for the sequence before and after. |
892
|
0
|
0
|
|
|
|
0
|
$site=0 unless defined $site; |
893
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
# the default values just stop an error from an undefined |
895
|
|
|
|
|
|
|
# string. But they don't affect the split. |
896
|
0
|
|
|
|
|
0
|
my ($beforeseq, $afterseq)= ('.', '.'); |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
# extra-site cutting |
899
|
|
|
|
|
|
|
# the before seq is going to be the entire site |
900
|
|
|
|
|
|
|
# the after seq is empty |
901
|
|
|
|
|
|
|
# BUT, need to communicate how to cut within the sample sequence |
902
|
|
|
|
|
|
|
# relative to the end of the site (do through $enz->cut), and |
903
|
|
|
|
|
|
|
# ALSO, need to check length of sample seq so that if cut falls |
904
|
|
|
|
|
|
|
# outside the input sequence, we have a warning/throw. /maj |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
# pre-site cutting |
907
|
|
|
|
|
|
|
# need to handle negative site numbers |
908
|
|
|
|
|
|
|
|
909
|
0
|
0
|
|
|
|
0
|
if ($site <= 0) { # <= to handle pre-site cutting |
|
|
0
|
|
|
|
|
|
910
|
0
|
|
|
|
|
0
|
$afterseq=$enz->string; |
911
|
|
|
|
|
|
|
} |
912
|
|
|
|
|
|
|
elsif ($site >= $enz->seq->length) { # >= to handle extrasite cutters/maj |
913
|
0
|
|
|
|
|
0
|
$beforeseq=$enz->string; |
914
|
|
|
|
|
|
|
} |
915
|
|
|
|
|
|
|
else { # $site < $enz->seq->length |
916
|
0
|
|
|
|
|
0
|
$beforeseq=$enz->seq->subseq(1, $site); |
917
|
0
|
|
|
|
|
0
|
$afterseq=$enz->seq->subseq($site+1, $enz->seq->length); |
918
|
|
|
|
|
|
|
} |
919
|
|
|
|
|
|
|
# if the enzyme is ambiguous we need to convert this into a perl string |
920
|
0
|
0
|
|
|
|
0
|
if ($enz->is_ambiguous) { |
921
|
0
|
|
|
|
|
0
|
$beforeseq=$self->_expanded_string($beforeseq); |
922
|
0
|
|
|
|
|
0
|
$afterseq =$self->_expanded_string($afterseq); |
923
|
|
|
|
|
|
|
} |
924
|
|
|
|
|
|
|
|
925
|
0
|
|
|
|
|
0
|
return ($beforeseq, $afterseq); |
926
|
|
|
|
|
|
|
} |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
=head2 _non_pal_enz |
930
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
Title : _non_pal_enz |
932
|
|
|
|
|
|
|
Function : Analyses non_palindromic enzymes for cuts in both ways |
933
|
|
|
|
|
|
|
(in fact, delivers only minus strand cut positions in the |
934
|
|
|
|
|
|
|
plus strand coordinates/maj) |
935
|
|
|
|
|
|
|
Returns : A reference to an array of cut positions |
936
|
|
|
|
|
|
|
Arguments: The sequence to check and the enzyme object |
937
|
|
|
|
|
|
|
NOW DEPRECATED/maj |
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
=cut |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
sub _non_pal_enz { |
942
|
0
|
|
|
0
|
|
0
|
my ($self, $target_seq, $enz) =@_; |
943
|
|
|
|
|
|
|
# add support for non-palindromic sequences |
944
|
|
|
|
|
|
|
# the enzyme is not the same forwards and backwards |
945
|
|
|
|
|
|
|
|
946
|
0
|
|
|
|
|
0
|
my $site=$enz->complementary_cut; |
947
|
|
|
|
|
|
|
# complementary_cut is in plus strand coordinates |
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
# we are going to rc the sequence, so complementary_cut becomes length-complementary_cut |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
# I think this is wrong; cut sites are a matter of position with respect |
952
|
|
|
|
|
|
|
# to the plus strand: the recognition site is double stranded and |
953
|
|
|
|
|
|
|
# directly identifiable on the plus strand sequence. /maj |
954
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
# what really needs doing is to keep track of plus strand and minus strand |
956
|
|
|
|
|
|
|
# nicks separately./maj |
957
|
|
|
|
|
|
|
|
958
|
0
|
|
|
|
|
0
|
my ($beforeseq, $afterseq)=('.', '.'); |
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
# now, for extra-site cuts, $site > length...so...?/maj |
961
|
0
|
|
|
|
|
0
|
my $new_left_cut=$enz->seq->length-$site; |
962
|
|
|
|
|
|
|
# there is a problem when this is actually zero |
963
|
|
|
|
|
|
|
|
964
|
0
|
0
|
|
|
|
0
|
if ($new_left_cut == 0) {$afterseq=$enz->seq->revcom->seq} |
|
0
|
0
|
|
|
|
0
|
|
965
|
0
|
|
|
|
|
0
|
elsif ($new_left_cut == $enz->seq->length) {$beforeseq=$enz->seq->revcom->seq} |
966
|
|
|
|
|
|
|
else { |
967
|
|
|
|
|
|
|
# this can't be right./maj |
968
|
0
|
|
|
|
|
0
|
$beforeseq=$enz->seq->revcom->subseq(1, ($enz->seq->length-$site)); |
969
|
0
|
|
|
|
|
0
|
$afterseq=$enz->seq->revcom->subseq(($enz->seq->length-$site), $enz->seq->length); |
970
|
|
|
|
|
|
|
} |
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
# do this correctly, in the context of the current code design, |
973
|
|
|
|
|
|
|
# by providing a "complement" argument to _ambig_cuts and _nonambig_cuts, |
974
|
|
|
|
|
|
|
# use these explicitly rather than this wrapper./maj |
975
|
|
|
|
|
|
|
|
976
|
0
|
|
|
|
|
0
|
my $results=[]; |
977
|
0
|
0
|
|
|
|
0
|
if ($enz->is_ambiguous) { |
978
|
0
|
|
|
|
|
0
|
$results= $self->_ambig_cuts($beforeseq, $afterseq, $target_seq, $enz); |
979
|
|
|
|
|
|
|
} else { |
980
|
0
|
|
|
|
|
0
|
$results= $self->_nonambig_cuts($beforeseq, $afterseq, $target_seq, $enz); |
981
|
|
|
|
|
|
|
} |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
# deal with is_circular |
984
|
0
|
|
|
|
|
0
|
my $more_results=[]; |
985
|
|
|
|
|
|
|
$more_results=$self->_circular($beforeseq, $afterseq, $enz) |
986
|
0
|
0
|
|
|
|
0
|
if ($self->{'_seq'}->is_circular); |
987
|
|
|
|
|
|
|
|
988
|
0
|
|
|
|
|
0
|
return [@$more_results, @$results]; |
989
|
|
|
|
|
|
|
} |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
=head2 _ambig_cuts |
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
Title : _ambig_cuts |
994
|
|
|
|
|
|
|
Function : An internal method to localize the cuts in the sequence |
995
|
|
|
|
|
|
|
Returns : A reference to an array of cut positions |
996
|
|
|
|
|
|
|
Arguments : The separated enzyme site, the target sequence, and the enzyme object |
997
|
|
|
|
|
|
|
Comments : This is a slow implementation but works for ambiguous sequences. |
998
|
|
|
|
|
|
|
Whenever possible, _nonambig_cuts should be used as it is a lot faster. |
999
|
|
|
|
|
|
|
|
1000
|
|
|
|
|
|
|
=cut |
1001
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
# we have problems here when the cut is extrasite: $beforeseq/$afterseq do |
1003
|
|
|
|
|
|
|
# not define the cut site then! I am renaming this to _ambig_cuts_depr, |
1004
|
|
|
|
|
|
|
# providing a more compact method that correctly handles extrasite cuts |
1005
|
|
|
|
|
|
|
# below /maj |
1006
|
|
|
|
|
|
|
|
1007
|
|
|
|
|
|
|
sub _ambig_cuts_depr { |
1008
|
0
|
|
|
0
|
|
0
|
my ($self, $beforeseq, $afterseq, $target_seq, $enz) = @_; |
1009
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
# cut the sequence. This is done with split so we can use |
1011
|
|
|
|
|
|
|
# regexp. |
1012
|
0
|
|
|
|
|
0
|
$target_seq = uc $target_seq; |
1013
|
0
|
|
|
|
|
0
|
my @cuts = split /($beforeseq)($afterseq)/i, $target_seq; |
1014
|
|
|
|
|
|
|
# now the array has extra elements --- the before and after! |
1015
|
|
|
|
|
|
|
# we have: |
1016
|
|
|
|
|
|
|
# element 0 sequence |
1017
|
|
|
|
|
|
|
# element 1 3' end |
1018
|
|
|
|
|
|
|
# element 2 5' end of next sequence |
1019
|
|
|
|
|
|
|
# element 3 sequence |
1020
|
|
|
|
|
|
|
# .... |
1021
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
# we need to loop through the array and add the ends to the |
1023
|
|
|
|
|
|
|
# appropriate parts of the sequence |
1024
|
|
|
|
|
|
|
|
1025
|
0
|
|
|
|
|
0
|
my $i=0; |
1026
|
0
|
|
|
|
|
0
|
my @re_frags; |
1027
|
0
|
0
|
|
|
|
0
|
if ($#cuts) { # there is >1 element |
1028
|
0
|
|
|
|
|
0
|
while ($i<$#cuts) { |
1029
|
0
|
|
|
|
|
0
|
my $joinedseq; |
1030
|
|
|
|
|
|
|
# the first sequence is a special case |
1031
|
0
|
0
|
|
|
|
0
|
if ($i == 0) { |
1032
|
0
|
|
|
|
|
0
|
$joinedseq=$cuts[$i].$cuts[$i+1]; |
1033
|
|
|
|
|
|
|
} else { |
1034
|
0
|
|
|
|
|
0
|
$joinedseq=$cuts[$i-1].$cuts[$i].$cuts[$i+1]; |
1035
|
|
|
|
|
|
|
} |
1036
|
|
|
|
|
|
|
# now deal with overlapping sequences |
1037
|
|
|
|
|
|
|
# we can do this through a regular regexp as we only |
1038
|
|
|
|
|
|
|
# have a short fragment to look through |
1039
|
|
|
|
|
|
|
|
1040
|
0
|
|
|
|
|
0
|
while ($joinedseq =~ /$beforeseq$afterseq/) { |
1041
|
0
|
|
|
|
|
0
|
$joinedseq =~ s/^(.*?$beforeseq)($afterseq)/$2/; |
1042
|
0
|
|
|
|
|
0
|
push @re_frags, $1; |
1043
|
|
|
|
|
|
|
} |
1044
|
0
|
|
|
|
|
0
|
push @re_frags, $joinedseq; |
1045
|
0
|
|
|
|
|
0
|
$i+=3; |
1046
|
|
|
|
|
|
|
} |
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
# I don't think we want the last fragment in. It is messing up the _circular |
1049
|
|
|
|
|
|
|
# part of things. So I deleted this part of the code :) |
1050
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
} else { |
1052
|
|
|
|
|
|
|
# if we don't cut, leave the array empty |
1053
|
0
|
|
|
|
|
0
|
return []; |
1054
|
|
|
|
|
|
|
} # the sequence was not cut. |
1055
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
# now @re_frags has the fragments of all the sequences |
1057
|
|
|
|
|
|
|
# but some people want to have this return the lengths |
1058
|
|
|
|
|
|
|
# of the fragments. |
1059
|
|
|
|
|
|
|
|
1060
|
|
|
|
|
|
|
# in theory the actual cut sites should be the length |
1061
|
|
|
|
|
|
|
# of the fragments in @re_frags |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
# note, that now this is the only data that we are saving. We |
1064
|
|
|
|
|
|
|
# will have to go back add regenerate re_frags. The reason is |
1065
|
|
|
|
|
|
|
# that we can use this in _circular easier |
1066
|
|
|
|
|
|
|
|
1067
|
0
|
|
|
|
|
0
|
my @cut_positions = map {length($_)} @re_frags; |
|
0
|
|
|
|
|
0
|
|
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
# the cut positions are right now the lengths of the sequence, but |
1070
|
|
|
|
|
|
|
# we need to add them all onto each other |
1071
|
|
|
|
|
|
|
|
1072
|
0
|
|
|
|
|
0
|
for (my $i=1; $i<=$#cut_positions; $i++) { |
1073
|
0
|
|
|
|
|
0
|
$cut_positions[$i]+=$cut_positions[$i-1]; |
1074
|
|
|
|
|
|
|
} |
1075
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
# in one of those oddities in life, 2 fragments mean an enzyme cut once |
1077
|
|
|
|
|
|
|
# so $#re_frags is the number of cuts |
1078
|
0
|
|
|
|
|
0
|
return \@cut_positions; |
1079
|
|
|
|
|
|
|
} |
1080
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
# new version/maj |
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
sub _ambig_cuts { |
1084
|
0
|
|
|
0
|
|
0
|
my ($self, $before, $after, $target, $enz, $comp) = @_; |
1085
|
0
|
0
|
|
|
|
0
|
my $cut_site = ($comp ? $enz->complementary_cut : $enz->cut); |
1086
|
0
|
|
|
|
|
0
|
local $_ = uc $target; |
1087
|
0
|
|
|
|
|
0
|
my @cuts; |
1088
|
0
|
|
|
|
|
0
|
my $recog = $enz->recog; |
1089
|
0
|
|
|
|
|
0
|
my $site_re = qr/($recog)/; |
1090
|
0
|
|
|
|
|
0
|
push @cuts, pos while (/$site_re/g); |
1091
|
0
|
|
|
|
|
0
|
$_ = $_ - length($enz->recog) + $cut_site for @cuts; |
1092
|
0
|
|
|
|
|
0
|
return [@cuts]; |
1093
|
|
|
|
|
|
|
} |
1094
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
=head2 _nonambig_cuts |
1096
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
Title : _nonambig_cuts |
1098
|
|
|
|
|
|
|
Function : Figures out which enzymes we know about and cuts the sequence. |
1099
|
|
|
|
|
|
|
Returns : Nothing. |
1100
|
|
|
|
|
|
|
Arguments : The separated enzyme site, the target sequence, and the enzyme object |
1101
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
An internal method. This will figure out where the sequence should be |
1103
|
|
|
|
|
|
|
cut, and provide the appropriate results. This is a much faster |
1104
|
|
|
|
|
|
|
implementation because it doesn't use a regexp, but it can not deal |
1105
|
|
|
|
|
|
|
with ambiguous sequences |
1106
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
=cut |
1108
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
# now, DO want the enzyme object.../maj |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
sub _nonambig_cuts { |
1112
|
0
|
|
|
0
|
|
0
|
my ($self, $beforeseq, $afterseq, $target_seq, $enz, $comp) = @_; |
1113
|
0
|
0
|
|
|
|
0
|
my $cut_site = ($comp ? $enz->complementary_cut : $enz->cut); |
1114
|
0
|
0
|
|
|
|
0
|
if ($beforeseq eq ".") {$beforeseq = ''} |
|
0
|
|
|
|
|
0
|
|
1115
|
0
|
0
|
|
|
|
0
|
if ($afterseq eq ".") {$afterseq = ''} |
|
0
|
|
|
|
|
0
|
|
1116
|
0
|
|
|
|
|
0
|
$target_seq = uc $target_seq; |
1117
|
|
|
|
|
|
|
# my $index_posn=index($target_seq, $beforeseq.$afterseq); |
1118
|
0
|
|
|
|
|
0
|
my $index_posn=index($target_seq, $enz->recog); |
1119
|
0
|
0
|
|
|
|
0
|
return [] if ($index_posn == -1); # there is no match to the sequence |
1120
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
# there is at least one cut site |
1122
|
0
|
|
|
|
|
0
|
my @cuts; |
1123
|
0
|
|
|
|
|
0
|
while ($index_posn > -1) { |
1124
|
|
|
|
|
|
|
# extrasite cutting issue here... |
1125
|
|
|
|
|
|
|
# think we want $index_posn+$enz->cut |
1126
|
|
|
|
|
|
|
# push (@cuts, $index_posn+length($beforeseq)); |
1127
|
0
|
|
|
|
|
0
|
push (@cuts, $index_posn+$cut_site); |
1128
|
|
|
|
|
|
|
# $index_posn=index($target_seq, $beforeseq.$afterseq, $index_posn+1); |
1129
|
0
|
|
|
|
|
0
|
$index_posn=index($target_seq, $enz->recog, $index_posn+1); |
1130
|
|
|
|
|
|
|
} |
1131
|
|
|
|
|
|
|
|
1132
|
0
|
|
|
|
|
0
|
return \@cuts; |
1133
|
|
|
|
|
|
|
} |
1134
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
=head2 _make_cuts |
1136
|
|
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
Title : _make_cuts |
1138
|
|
|
|
|
|
|
Usage : $an->_make_cuts( $target_sequence, $enzyme, $complement_q ) |
1139
|
|
|
|
|
|
|
Function: Returns an array of cut sites on target seq, using enzyme |
1140
|
|
|
|
|
|
|
on the plus strand ($complement_q = 0) or minus strand |
1141
|
|
|
|
|
|
|
($complement_q = 1); follows Enzyme objects in |
1142
|
|
|
|
|
|
|
$enzyme->others() |
1143
|
|
|
|
|
|
|
Returns : array of scalar integers |
1144
|
|
|
|
|
|
|
Args : sequence string, B:R:Enzyme object, boolean |
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
=cut |
1147
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
sub _make_cuts { |
1149
|
3
|
|
|
3
|
|
17
|
no warnings qw( uninitialized ); |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
2012
|
|
1150
|
|
|
|
|
|
|
|
1151
|
15249
|
|
|
15249
|
|
15003
|
my ($self, $target, $enz, $comp) = @_; |
1152
|
15249
|
|
|
|
|
18374
|
local $_ = uc $target; |
1153
|
|
|
|
|
|
|
|
1154
|
15249
|
|
|
|
|
8740
|
my @cuts; |
1155
|
|
|
|
|
|
|
|
1156
|
15249
|
50
|
|
|
|
26704
|
my @enzs = map { $_ || () } ($enz, $enz->can('others') ? $enz->others : ()); |
|
15481
|
100
|
|
|
|
28145
|
|
1157
|
|
|
|
|
|
|
ENZ: |
1158
|
15249
|
|
|
|
|
12965
|
foreach $enz (@enzs) { |
1159
|
15481
|
|
|
|
|
17442
|
my $recog = $enz->recog; |
1160
|
15481
|
100
|
|
|
|
23699
|
my $cut_site = ($comp ? $enz->complementary_cut : $enz->cut); |
1161
|
15481
|
|
|
|
|
10023
|
my @these_cuts; |
1162
|
|
|
|
|
|
|
|
1163
|
15481
|
100
|
|
|
|
24278
|
if ( $recog =~ /[^\w]/ ) { # "ambig" |
1164
|
5810
|
|
|
|
|
35383
|
my $site_re = qr/($recog)/; |
1165
|
5810
|
|
|
|
|
25144
|
push @these_cuts, pos while (/$site_re/g); |
1166
|
5810
|
|
|
|
|
7399
|
$_ = $_ - length($enz->string) + $cut_site for @these_cuts; |
1167
|
5810
|
100
|
|
|
|
7874
|
if (!$enz->is_palindromic) { |
1168
|
864
|
|
|
|
|
1179
|
pos = 0; |
1169
|
864
|
|
|
|
|
727
|
my @these_rev_cuts; |
1170
|
864
|
|
|
|
|
1064
|
$recog = $enz->revcom_recog; |
1171
|
864
|
100
|
|
|
|
1071
|
$cut_site = length($enz->string) - ($comp ? $enz->cut : $enz->complementary_cut); |
1172
|
864
|
|
|
|
|
2928
|
$site_re = qr/($recog)/; |
1173
|
864
|
|
|
|
|
2871
|
push @these_rev_cuts, pos while (/$site_re/g); |
1174
|
864
|
|
|
|
|
984
|
$_ = $_ - length($enz->string) + $cut_site for @these_rev_cuts; |
1175
|
864
|
|
|
|
|
1063
|
push @these_cuts, @these_rev_cuts; |
1176
|
|
|
|
|
|
|
} |
1177
|
|
|
|
|
|
|
} |
1178
|
|
|
|
|
|
|
else { # "nonambig" |
1179
|
9671
|
|
|
|
|
12183
|
my $index_posn=index($_, $recog); |
1180
|
9671
|
|
|
|
|
12274
|
while ($index_posn > -1) { |
1181
|
1547
|
|
|
|
|
1414
|
push (@these_cuts, $index_posn+$cut_site); |
1182
|
1547
|
|
|
|
|
2478
|
$index_posn=index($_, $recog, $index_posn+1); |
1183
|
|
|
|
|
|
|
} |
1184
|
9671
|
100
|
|
|
|
11365
|
if (!$enz->is_palindromic) { |
1185
|
1943
|
|
|
|
|
2378
|
$recog = $enz->revcom_recog; |
1186
|
1943
|
100
|
|
|
|
2627
|
$cut_site = length($enz->string) - ($comp ? $enz->cut : $enz->complementary_cut); |
1187
|
1943
|
|
|
|
|
2416
|
$index_posn=index($_, $recog); |
1188
|
1943
|
|
|
|
|
2818
|
while ($index_posn > -1) { |
1189
|
246
|
|
|
|
|
222
|
push @these_cuts, $index_posn+$cut_site; |
1190
|
246
|
|
|
|
|
425
|
$index_posn=index($_, $recog, $index_posn+1); |
1191
|
|
|
|
|
|
|
} |
1192
|
|
|
|
|
|
|
} |
1193
|
|
|
|
|
|
|
} |
1194
|
15481
|
|
|
|
|
17372
|
push @cuts, @these_cuts; |
1195
|
|
|
|
|
|
|
} |
1196
|
15249
|
|
|
|
|
18480
|
return [@cuts]; |
1197
|
|
|
|
|
|
|
} |
1198
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
=head2 _multiple_cuts |
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
Title : _multiple_cuts |
1202
|
|
|
|
|
|
|
Function : Figures out multiple digests |
1203
|
|
|
|
|
|
|
Returns : An array of the cut sites for multiply digested DNA |
1204
|
|
|
|
|
|
|
Arguments : A Bio::Restriction::EnzymeCollection object |
1205
|
|
|
|
|
|
|
Comments : Double digests is one subset of this, but you can use |
1206
|
|
|
|
|
|
|
as many enzymes as you want. |
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
=cut |
1209
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
sub _multiple_cuts { |
1211
|
3
|
|
|
3
|
|
5
|
my ($self, $ec)=@_; |
1212
|
3
|
50
|
|
|
|
9
|
$self->cut unless $self->{'_cut'}; |
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
# now that we are using positions rather than fragments |
1215
|
|
|
|
|
|
|
# this is really easy |
1216
|
3
|
|
|
|
|
5
|
my @cuts; |
1217
|
3
|
|
|
|
|
14
|
foreach my $enz ($ec->each_enzyme) { |
1218
|
15
|
|
|
|
|
21
|
push @cuts, @{$self->{'_cut_positions'}->{$enz->name}} |
1219
|
15
|
50
|
|
|
|
26
|
if defined $self->{'_cut_positions'}->{$enz->name}; |
1220
|
|
|
|
|
|
|
} |
1221
|
3
|
|
|
|
|
11
|
@{$self->{'_cut_positions'}->{'multiple_digest'}}=sort {$a <=> $b} @cuts; |
|
3
|
|
|
|
|
13
|
|
|
87
|
|
|
|
|
56
|
|
1222
|
|
|
|
|
|
|
|
1223
|
3
|
|
|
|
|
4
|
my $number_of_cuts; |
1224
|
|
|
|
|
|
|
|
1225
|
3
|
|
|
|
|
4
|
$number_of_cuts=scalar @{$self->{'_cut_positions'}->{'multiple_digest'}}; |
|
3
|
|
|
|
|
8
|
|
1226
|
3
|
|
|
|
|
5
|
$self->{_number_of_cuts_by_enzyme}->{'multiple_digest'}=$number_of_cuts; |
1227
|
3
|
|
|
|
|
4
|
push (@{$self->{_number_of_cuts_by_cuts}->{$number_of_cuts}}, 'multiple_digest'); |
|
3
|
|
|
|
|
10
|
|
1228
|
3
|
50
|
|
|
|
12
|
if ($number_of_cuts > $self->{maximum_cuts}) { |
1229
|
0
|
|
|
|
|
0
|
$self->{maximum_cuts}=$number_of_cuts; |
1230
|
|
|
|
|
|
|
} |
1231
|
|
|
|
|
|
|
} |
1232
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
=head2 _circular |
1235
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
Title : _circular |
1237
|
|
|
|
|
|
|
Function : Identifies cuts at the join of the end of the target with |
1238
|
|
|
|
|
|
|
the beginning of the target |
1239
|
|
|
|
|
|
|
Returns : array of scalar integers ( cut sites near join, if any ) |
1240
|
|
|
|
|
|
|
Arguments : scalar string (target sequence), Bio::Restriction::Enzyme obj |
1241
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
=cut |
1243
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
sub _circular { |
1245
|
4718
|
|
|
4718
|
|
4075
|
my ($self, $target, $enz, $comp) = @_; |
1246
|
4718
|
|
|
|
|
5580
|
$target=uc $target; |
1247
|
4718
|
50
|
|
|
|
5315
|
my $patch_len = ( length $target > 20 ? 10 : int( length($target)/2 ) ); |
1248
|
|
|
|
|
|
|
|
1249
|
4718
|
|
|
|
|
6203
|
my ($first, $last) = |
1250
|
|
|
|
|
|
|
(substr($target, 0, $patch_len),substr($target, -$patch_len)); |
1251
|
4718
|
|
|
|
|
4276
|
my $patch=$last.$first; |
1252
|
|
|
|
|
|
|
|
1253
|
|
|
|
|
|
|
# now find the cut sites |
1254
|
|
|
|
|
|
|
|
1255
|
4718
|
|
|
|
|
5091
|
my $cut_positions = $self->_make_cuts($patch, $enz, $comp); |
1256
|
|
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
# the enzyme doesn't cut in the new fragment |
1258
|
4718
|
50
|
|
|
|
6182
|
return [] if (!$cut_positions); |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
# now we are going to add things to _cut_positions |
1261
|
|
|
|
|
|
|
# in this shema it doesn't matter if the site is there twice - |
1262
|
|
|
|
|
|
|
# we will take care of that later. Because we are using position |
1263
|
|
|
|
|
|
|
# rather than frag or anything else, we can just |
1264
|
|
|
|
|
|
|
# remove duplicates. |
1265
|
4718
|
|
|
|
|
3319
|
my @circ_cuts; |
1266
|
4718
|
|
|
|
|
4555
|
foreach my $cut (@$cut_positions) { |
1267
|
275
|
100
|
|
|
|
516
|
if ($cut == length($last)) { |
|
|
100
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
# the cut is actually at position 0, but we're going to call this the |
1269
|
|
|
|
|
|
|
# length of the sequence so we don't confuse no cuts with a 0 cut |
1270
|
|
|
|
|
|
|
# push (@circ_cuts, $self->{'_seq'}->length); |
1271
|
2
|
|
|
|
|
4
|
push (@circ_cuts, 0); |
1272
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
} |
1274
|
|
|
|
|
|
|
elsif ($cut < length($last)) { |
1275
|
|
|
|
|
|
|
# the cut is before the end of the sequence |
1276
|
|
|
|
|
|
|
#check |
1277
|
150
|
|
|
|
|
292
|
push (@circ_cuts, $self->{'_seq'}->length - (length($last) - $cut)); |
1278
|
|
|
|
|
|
|
} |
1279
|
|
|
|
|
|
|
else { |
1280
|
|
|
|
|
|
|
# the cut is at the start of the sequence (position >=1) |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
# note, we put this at the beginning of the array rather than the end! |
1283
|
123
|
|
|
|
|
187
|
unshift (@circ_cuts, $cut-length($last)); |
1284
|
|
|
|
|
|
|
} |
1285
|
|
|
|
|
|
|
} |
1286
|
4718
|
|
|
|
|
6275
|
return \@circ_cuts; |
1287
|
|
|
|
|
|
|
} |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
=head2 _expanded_string |
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
Title : _expanded_string |
1296
|
|
|
|
|
|
|
Function : Expand nucleotide ambiguity codes to their representative letters |
1297
|
|
|
|
|
|
|
Returns : The full length string |
1298
|
|
|
|
|
|
|
Arguments : The string to be expanded. |
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
Stolen from the original RestrictionEnzyme.pm |
1301
|
|
|
|
|
|
|
|
1302
|
|
|
|
|
|
|
=cut |
1303
|
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
sub _expanded_string { |
1306
|
0
|
|
|
0
|
|
|
my ($self, $str) = @_; |
1307
|
|
|
|
|
|
|
|
1308
|
0
|
|
|
|
|
|
$str =~ s/N|X/\./g; |
1309
|
0
|
|
|
|
|
|
$str =~ s/R/\[AG\]/g; |
1310
|
0
|
|
|
|
|
|
$str =~ s/Y/\[CT\]/g; |
1311
|
0
|
|
|
|
|
|
$str =~ s/S/\[GC\]/g; |
1312
|
0
|
|
|
|
|
|
$str =~ s/W/\[AT\]/g; |
1313
|
0
|
|
|
|
|
|
$str =~ s/M/\[AC\]/g; |
1314
|
0
|
|
|
|
|
|
$str =~ s/K/\[TG\]/g; |
1315
|
0
|
|
|
|
|
|
$str =~ s/B/\[CGT\]/g; |
1316
|
0
|
|
|
|
|
|
$str =~ s/D/\[AGT\]/g; |
1317
|
0
|
|
|
|
|
|
$str =~ s/H/\[ACT\]/g; |
1318
|
0
|
|
|
|
|
|
$str =~ s/V/\[ACG\]/g; |
1319
|
|
|
|
|
|
|
|
1320
|
0
|
|
|
|
|
|
return $str; |
1321
|
|
|
|
|
|
|
} |
1322
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
1; |