File Coverage

Bio/Restriction/EnzymeCollection.pm
Criterion Covered Total %
statement 74 86 86.0
branch 29 38 76.3
condition 7 9 77.7
subroutine 12 13 92.3
pod 8 8 100.0
total 130 154 84.4


line stmt bran cond sub pod time code
1             #-------------------------------------------------------------------------------
2             #
3             # BioPerl module Bio::Restriction::EnzymeCollection
4             #
5             # Please direct questions and support issues to
6             #
7             # Cared for by Rob Edwards
8             #
9             # You may distribute this module under the same terms as perl itself
10             #-------------------------------------------------------------------------------
11              
12             ## POD Documentation:
13              
14             =head1 NAME
15              
16             Bio::Restriction::EnzymeCollection - Set of restriction endonucleases
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Restriction::EnzymeCollection;
21              
22             # Create a collection with the default enzymes.
23             my $default_collection = Bio::Restriction::EnzymeCollection->new();
24              
25             # Or create a collection from a REBASE 'withrefm' file obtained from
26             # ftp://ftp.neb.com/pub/rebase/. (See Bio::Restriction::IO for more
27             # information.)
28             my $rebase = Bio::Restriction::IO->new(
29             -file => 'withrefm.610',
30             -format => 'withrefm' );
31             my $rebase_collection = $rebase->read();
32              
33             # Or create an empty collection and set the enzymes later. See
34             # 'CUSTOM COLLECTIONS' below for more information.
35             my $empty_collection =
36             Bio::Restriction::EnzymeCollection->new( -empty => 1 );
37              
38             # Get an array of Bio::Restriction::Enzyme objects from the collection.
39             my @enzymes = $default_collection->each_enzyme();
40              
41             # Get a Bio::Restriction::Enzyme object for a particular enzyme by name.
42             my $enz = $default_collection->get_enzyme( 'EcoRI' );
43              
44             # Get a Bio::Restriction::EnzymeCollection object containing the enzymes
45             # that have the equivalent of 6-bp recognition sequences.
46             my $six_cutters = $default_collection->cutters( 6 );
47              
48             # Get a Bio::Restriction::EnzymeCollection object containing the enzymes
49             # that are rare cutters.
50             my $rare_cutters = $default_collection->cutters( -start => 6, -end => 8 );
51              
52             # Get a Bio::Restriction::EnzymeCollection object that contains enzymes
53             # that generate blunt ends:
54             my $blunt_cutters = $default_collection->blunt_enzymes();
55              
56             # See 'CUSTOM COLLECTIONS' below for an example of creating a
57             # Bio::Restriction::EnzymeCollection object with a specified subset of
58             # enzymes using methods provided by the Bio::RestrictionEnzyme class.
59              
60             =head1 DESCRIPTION
61              
62             Bio::Restriction::EnzymeCollection represents a collection of
63             restriction enzymes.
64              
65             If you create a new collection directly rather than from a REBASE
66             file using L, it will be populated by a
67             default set of enzymes with site and cut information
68             only.
69              
70             Use L to figure out which enzymes are
71             available and where they cut your sequence.
72              
73             =head1 CUSTOM COLLECTIONS
74              
75             Note that the underlying L objects have a rich
76             variety of methods that allow more complicated selections than the methods
77             that are defined by Bio::Restriction::EnzymeCollection.
78              
79             For example, the way to create a custom collection of Type II enzymes
80             is as follows:
81              
82             my $complete_collection =
83             Bio::Restriction::EnzymeCollection->new();
84             my $type_ii_collection =
85             Bio::Restriction::EnzymeCollection->new( -empty => 1 );
86             $type_ii_collection->enzymes(
87             grep { $_->type() eq 'II' } $complete_collection->each_enzyme() );
88              
89             =head1 SEE ALSO
90              
91             L - read in enzymes from REBASE files
92              
93             L - figure out what enzymes cut a sequence
94              
95             L - define a single restriction enzyme
96              
97             =head1 FEEDBACK
98              
99             =head2 Mailing Lists
100              
101             User feedback is an integral part of the evolution of this and other
102             Bioperl modules. Send your comments and suggestions preferably to one
103             of the Bioperl mailing lists. Your participation is much appreciated.
104              
105             bioperl-l@bioperl.org - General discussion
106             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
107              
108             =head2 Support
109              
110             Please direct usage questions or support issues to the mailing list:
111              
112             I
113              
114             rather than to the module maintainer directly. Many experienced and
115             reponsive experts will be able look at the problem and quickly
116             address it. Please include a thorough description of the problem
117             with code and data examples if at all possible.
118              
119             =head2 Reporting Bugs
120              
121             Report bugs to the Bioperl bug tracking system to help us keep track
122             the bugs and their resolution. Bug reports can be submitted via the
123             web:
124              
125             https://github.com/bioperl/bioperl-live/issues
126              
127             =head1 AUTHOR
128              
129             Rob Edwards, redwards@utmem.edu
130              
131             =head1 CONTRIBUTORS
132              
133             Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
134              
135             =head1 COPYRIGHT
136              
137             Copyright (c) 2003 Rob Edwards.
138              
139             Some of this work is Copyright (c) 1997-2002 Steve A. Chervitz. All
140             Rights Reserved.
141              
142             This module is free software; you can redistribute it and/or modify it
143             under the same terms as Perl itself.
144              
145             =head1 APPENDIX
146              
147             Methods beginning with a leading underscore are considered private and
148             are intended for internal use by this module. They are not considered
149             part of the public interface and are described here for documentation
150             purposes only.
151              
152             =cut
153              
154              
155             package Bio::Restriction::EnzymeCollection;
156 4     4   711 use strict;
  4         10  
  4         112  
157              
158 4     4   737 use Bio::Restriction::Enzyme;
  4         9  
  4         106  
159 4     4   557 use Bio::Restriction::IO;
  4         15  
  4         144  
160              
161 4     4   28 use Data::Dumper;
  4         8  
  4         289  
162              
163 4     4   28 use base qw(Bio::Root::Root);
  4         9  
  4         3836  
164              
165             =head2 new
166              
167             Title : new
168             Function : Initializes the Restriction::EnzymeCollection object
169             Returns : The Restriction::EnzymeCollection object
170             Arguments : optional named parameter -empty
171              
172             Set parameter -empty to true if you do NOT want the collection be
173             populated by the default set of prototype type II enzymes.
174              
175             Alternatively, pass an array of enzymes to -enzymes parameter.
176              
177             =cut
178              
179             sub new {
180 30     30 1 78 my($class, @args) = @_;
181 30         113 my $self = $class->SUPER::new(@args);
182              
183 30         122 my ($empty, $enzymes) =
184             $self->_rearrange([qw(
185             EMPTY
186             ENZYMES
187             )], @args);
188              
189 30         82 $self->{'_all_enzymes'} = [];
190 30         52 $self->{'_enzymes'} = {};
191              
192 30 100       105 return $self if $empty;
193              
194              
195 5 100       19 if ($enzymes) {
196             # as advertised in pod/maj
197 1 50       4 $self->throw( "Arg to -enzymes must be an arrayref to Bio::Restriction::Enzyme objects") unless ref($enzymes) eq 'ARRAY';
198 1         4 $self->enzymes(@$enzymes);
199 1         5 return $self;
200             }
201             else {
202             # the default set of enzymes
203 4         13 my $in = Bio::Restriction::IO->new(-verbose => $self->verbose);
204 4         18 return $in->read;
205             }
206             }
207              
208             =head2 Manipulate the enzymes within the collection
209              
210             =cut
211              
212             =head2 enzymes
213              
214             Title : enzyme
215             Function : add/get method for enzymes and enzyme collections
216             Returns : object itself
217             Arguments : array of Bio::Restriction::Enzyme and
218             Bio::Restriction::EnzymeCollection objects
219              
220             =cut
221              
222             sub enzymes {
223 10615     10615 1 17264 my ($self, @enzs)=@_;
224 10615         15494 foreach my $e (@enzs) {
225 15075 50       42200 if ( ref $e eq '') {
    100          
    50          
226 0         0 print "|$e|\n";
227             }
228             elsif ($e->isa('Bio::Restriction::EnzymeI')) {
229 15073         15154 push(@{$self->{'_all_enzymes'}},$e);
  15073         21755  
230 15073         28157 $self->{'_enzymes'}->{$e->name} = $e;
231             }
232             elsif ($e->isa('Bio::Restriction::EnzymeCollection')) {
233 2         6 $self->enzymes($e->each_enzyme);
234             } else {
235 0         0 my $r = 1;
236 0         0 $self->warn("EnzymeCollection can not deal with ".
237             ref($e)." objects");
238             }
239             }
240 10615         58770 return $self;
241             }
242              
243             #
244             # method to remove duplicates?
245             #
246              
247             =head2 each_enzyme
248              
249             Title : each_enzyme
250             Function : get an array of enzymes
251             Returns : array of Bio::Restriction::Enzyme objects
252             Arguments : -
253              
254             =cut
255              
256             sub each_enzyme {
257 35     35 1 76 my $self = shift;
258 35         49 return @{$self->{'_all_enzymes'}};
  35         1592  
259             }
260              
261             =head2 get_enzyme
262              
263             Title : get_enzyme
264             Function : Gets a Bio::Restriction::Enzyme object for the enzyme name
265             Returns : A Bio::Restriction::Enzyme object or undef
266             Arguments : An enzyme name that is in the collection
267              
268             =cut
269              
270             sub get_enzyme {
271 21     21 1 1842 my ($self, $name)=@_;
272 21         87 return $self->{'_enzymes'}->{$name};
273             }
274              
275              
276             =head2 available_list
277              
278             Title : available_list
279             Function : Gets a list of all the enzymes that we know about
280             Returns : A reference to an array with all the enzyme names
281             that we have defined or 0 if none are defined
282             Arguments : Nothing
283             Comments : Note, I maintain this for backwards compatibility,
284             but I don't like the name as it is very ambiguous
285              
286             =cut
287              
288             sub available_list {
289 1     1 1 3 my ($self, $size)=@_;
290 1         2 my @keys = sort keys %{$self->{'_enzymes'}};
  1         339  
291 1         68 return @keys;
292             }
293              
294             =head2 longest_cutter
295              
296             Title : longest_cutter
297             Function : Gets the enzyme with the longest recognition site
298             Returns : A Bio::Restriction::Enzyme object
299             Arguments : Nothing
300             Comments : Note, this is used by Bio::Restriction::Analysis
301             to figure out what to do with circular sequences
302              
303             =cut
304              
305             sub longest_cutter {
306 0     0 1 0 my ($self)=@_;
307 0         0 my $longest=0; my $longest_enz='.';
  0         0  
308 0         0 foreach my $enz ($self->each_enzyme) {
309 0         0 my $len=$enz->recognition_length;
310 0 0       0 if ($len > $longest) {$longest=$len; $longest_enz=$enz}
  0         0  
  0         0  
311             }
312 0         0 return $longest_enz;
313             }
314              
315             =head2 Filter enzymes
316              
317             =cut
318              
319             =head2 blunt_enzymes
320              
321             Title : blunt_enzymes
322             Function : Gets a list of all the enzymes that are blunt cutters
323             Returns : A reference to an array with all the enzyme names that
324             are blunt cutters or 0 if none are defined
325             Arguments : Nothing
326             Comments :
327              
328             This is an example of the kind of filtering better done by the scripts
329             using the rich collection of methods in Bio::Restriction::Enzyme.
330              
331             =cut
332              
333             sub blunt_enzymes {
334 1     1 1 3 my $self=shift;
335 1         4 my $bs = Bio::Restriction::EnzymeCollection->new(-empty => 1);
336 1         3 return $bs->enzymes( grep { $_->overhang eq 'blunt' } $self->each_enzyme );
  532         814  
337             }
338              
339              
340             =head2 cutters
341              
342             Title : cutters
343             Function : Gets a list of all the enzymes that recognize a
344             certain size, e.g. 6-cutters
345             Usage : $cutters = $collection->cutters(6);
346             Returns : A reference to an array with all the enzyme names
347             that are x cutters or 0 if none are defined
348             Arguments : A positive number for the size of cutters to return
349             OR
350             A range: (-start => 6, -end => 8,
351             -inclusive => 1, -exclusive = 0 )
352              
353             The default for a range is 'inclusive'
354              
355              
356             =cut
357              
358             sub cutters {
359 5     5 1 11 my ($self) = shift;
360              
361 5 50       16 return unless @_; # no argument
362              
363 5 100       16 if (scalar @_ == 1 ) {
364 2         3 my $size = shift;
365 2         3 my @sizes;
366 2 100       8 (ref $size eq 'ARRAY') ? push @sizes, @{$size} : push @sizes, $size;
  1         3  
367 2         9 my $bs = Bio::Restriction::EnzymeCollection->new(-empty => 1);
368 2         6 for my $size (@sizes) {
369 3 50       19 $self->throw("Need a positive number [$size]")
370             unless $size =~ /[+]?[\d\.]+/;
371 3         8 foreach my $e ($self->each_enzyme) {
372             ##print $e->name, ": ", $e->cutter, "\n" if $e->cutter == $size;
373 1596 100       2108 $bs->enzymes($e) if $e->cutter == $size;
374             }
375             }
376 2         21 return $bs;
377              
378             } else { # named arguments
379              
380 3         21 my ($start, $end, $inclusive, $exclusive ) =
381             $self->_rearrange([qw(
382             START
383             END
384             INCLUSIVE
385             EXCLUSIVE
386             )], @_);
387              
388 3 50       23 $self->throw("Start needs a positive number [$start]")
389             unless $start =~ /[+]?[\d\.]+/;
390 3 50       11 $self->throw("End needs a positive number [$end]")
391             unless $end =~ /[+]?[\d\.]+/;
392              
393 3         4 my $limits;
394 3 100 66     14 $inclusive = 1 if $inclusive or not $exclusive;
395 3 100       8 $inclusive = 0 if $exclusive;
396              
397 3         9 my $bs = Bio::Restriction::EnzymeCollection->new(-empty => 1);
398 3 100       8 if ($inclusive) {
399 2         5 foreach my $e ($self->each_enzyme) {
400 1064 100 66     1475 $bs->enzymes($e) if $e->cutter >= $start and $e->cutter <= $end;
401             }
402             } else {
403 1         4 foreach my $e ($self->each_enzyme) {
404 532 100 100     733 $bs->enzymes($e) if $e->cutter > $start and $e->cutter < $end;
405             }
406             }
407 3         94 return $bs;
408             }
409             }
410              
411              
412             1;