File Coverage

Bio/Restriction/IO/withrefm.pm
Criterion Covered Total %
statement 63 66 95.4
branch 18 22 81.8
condition 3 6 50.0
subroutine 10 11 90.9
pod 2 2 100.0
total 96 107 89.7


line stmt bran cond sub pod time code
1             # BioPerl module for Bio::Restriction::IO::withrefm
2             #
3             # Please direct questions and support issues to
4             #
5             # Cared for by Rob Edwards
6             #
7             # Copyright Rob Edwards
8             #
9             # You may distribute this module under the same terms as perl itself
10             #
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Restriction::IO::withrefm - withrefm enzyme set
17              
18             =head1 SYNOPSIS
19              
20             Do not use this module directly. Use it via the Bio::Restriction::IO class.
21              
22             =head1 DESCRIPTION
23              
24             This is the most complete format of the REBASE files, and basically
25             includes all the data on each of the restriction enzymes.
26              
27              
28             =head1 FEEDBACK
29              
30             =head2 Mailing Lists
31              
32             User feedback is an integral part of the evolution of this and other
33             Bioperl modules. Send your comments and suggestions preferably to the
34             Bioperl mailing lists Your participation is much appreciated.
35              
36             bioperl-l@bioperl.org - General discussion
37             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
38              
39             =head2 Support
40              
41             Please direct usage questions or support issues to the mailing list:
42              
43             I
44              
45             rather than to the module maintainer directly. Many experienced and
46             reponsive experts will be able look at the problem and quickly
47             address it. Please include a thorough description of the problem
48             with code and data examples if at all possible.
49              
50             =head2 Reporting Bugs
51              
52             Report bugs to the Bioperl bug tracking system to help us keep track
53             the bugs and their resolution. Bug reports can be submitted via the
54             web:
55              
56             https://github.com/bioperl/bioperl-live/issues
57              
58             =head1 AUTHOR
59              
60             Rob Edwards, redwards@utmem.edu
61              
62             =head1 CONTRIBUTORS
63              
64             Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
65             Mark A. Jensen, maj-at-fortinbras-dot-us
66              
67             =head1 APPENDIX
68              
69             The rest of the documentation details each of the object
70             methods. Internal methods are usually preceded with a _
71              
72             =cut
73              
74             # Let the code begin...
75              
76             package Bio::Restriction::IO::withrefm;
77              
78 2     2   14 use vars qw(%WITH_REFM_FIELD);
  2         3  
  2         103  
79 2     2   10 use strict;
  2         3  
  2         38  
80              
81             #use Bio::Restriction::IO;
82 2     2   7 use Bio::Restriction::Enzyme;
  2         3  
  2         39  
83 2     2   7 use Bio::Restriction::EnzymeCollection;
  2         3  
  2         29  
84              
85 2     2   7 use Data::Dumper;
  2         4  
  2         102  
86              
87 2     2   9 use base qw(Bio::Restriction::IO::base);
  2         2  
  2         636  
88              
89             =head2 read
90              
91             Title : read
92             Usage : $renzs = $stream->read
93             Function: reads all the restrction enzymes from the stream
94             Returns : a Bio::Restriction::Restriction object
95             Args : none
96              
97             =cut
98              
99             sub read {
100 3     3 1 5 my $self = shift;
101              
102 3         21 my $renzs = Bio::Restriction::EnzymeCollection->new(-empty => 1);
103              
104 3         13 local $/ = '<1>';
105 3         17 while (defined(my $entry=$self->_readline()) ) {
106              
107             # not an entry.
108 8049 100       17563 next unless $entry =~ /<2>/;
109              
110             #$self->debug("|$entry|\n");
111              
112             #
113             # Minimal information
114             #
115 8046         21857 my ($name) = $entry =~ /^(\S+)/;
116 8046         20024 my ($site) = $entry =~ /\<3\>([^\n]+)/;
117              
118 8046 100 33     30577 if ( ! defined $site || $site eq '' or $site eq '?') {
      66        
119 593 50       1348 $self->warn("$name: no site. Skipping") if $self->verbose > 1;
120 593         1336 next;
121             }
122              
123             # there are a couple of sequences that have multiple
124             # recognition sites eg M.PhiBssHII: ACGCGT,CCGCGG,RGCGCY,RCCGGY,GCGCGC
125             # TaqII : GACCGA(11/9),CACCCA(11/9)
126              
127 7453         8391 my @sequences;
128 7453 100       11171 if ($site =~ /\,/) {
129 14         40 @sequences = split (/\,/, $site);
130 14         26 $site=shift @sequences;
131             }
132              
133             # this regexp now parses all possible components
134             # $1 : (s/t) or undef
135             # $2 : [site]
136             # $3 : (m/n) or undef /maj
137              
138 2     2   12 no warnings; # avoid faulty 'uninitialized value' warnings
  2         4  
  2         488  
139             # occurring against the variables set by
140             # regexp matching (unless anyone has other ideas...)
141              
142 7453         25338 my ($precut, $recog, $postcut) = ( $site =~ m/^(?:\((-?\w+\/-?\w+)\))?([\w^]+)(?:\((-?\w+\/-?\w+)\))?/ );
143              
144              
145             #
146             # prototype / isoschizomers
147             #
148              
149 7453         25699 my ($isoschizomers) = $entry =~ /<2>([^\n]+)/;
150 7453         68660 my @isos = split(/\,/,$isoschizomers);
151 7453 100       12198 my $is_prototype = (@isos ? 1 : 0);
152              
153             #
154             # microbe
155             #
156 7453         18676 my ($microbe) = $entry =~ /<5>([^\n]+)/;
157              
158             #
159             # source
160             #
161 7453         16261 my ($source) = $entry =~ /<6>([^\n]+)/;
162              
163             #
164             # vendors
165             #
166 7453         15305 my ($vendors) = $entry =~ /<7>([^\n]+)/;
167 7453         12974 my @vendors = split(/ */, $vendors);
168              
169              
170             #
171             # references
172             #
173 7453         22195 my ($refs) = $entry =~ /<8>(.+)<1>/s;
174 7453         11680 my @refs = map {split /\n+/} $refs;
  7453         25682  
175              
176 2     2   11 use warnings;
  2         4  
  2         631  
177            
178             # when enz is constructed, site() will contain original characters,
179             # but recog() will contain a regexp if required.../maj
180 7453         89385 my $re = Bio::Restriction::Enzyme->new(
181             -name => $name,
182             -site => $recog,
183             -recog => $recog,
184             -precut => $precut,
185             -postcut => $postcut,
186             -is_prototype => $is_prototype,
187             -isoschizomers => [@isos],
188             -source => $source,
189             -vendors => [@vendors],
190             -references => [@refs],
191             -xln_sub => \&_xln_sub
192             );
193              
194             #
195             # methylation: easier to set here during parsing/maj
196             #
197              
198 7453         28627 my ($meth) = $entry =~ /<4>([^\n]+)/;
199 7453         8936 my @meths;
200 7453 100       10085 if ($meth) {
201             # this can be either X(Y) or X(Y),X2(Y2)
202             # where X is the base and y is the type of methylation
203 754 100       2670 if ( $meth =~ /(\S+)\((\d+)\),(\S+)\((\d+)\)/ ) { # two msites per site
    50          
204             #my ($p1, $m1, $p2, $m2) = ($1, $2, $3, $4);
205 47         147 $re->methylation_sites($self->_meth($re,$1, $2),
206             $self->_meth($re,$3,$4));
207             }
208             elsif ($meth =~ /(\S+)\((\d+)\)/ ) { # one msite per site or more sites
209 707         2043 $re->methylation_sites( $self->_meth($re,$1,$2) );
210 707         2211 @meths = split (/\, /, $meth);
211 707         1048 $meth=shift @meths;
212             } else {
213 0 0       0 $self->warn("Unknown methylation format [$meth]") if $self->verbose >0;
214             }
215             }
216              
217             # the _make_multicuts function now takes place in the
218             # Enzyme constructor / maj
219              
220             #
221             # create special types of Enzymes
222             # (because of object cloning in _make_multisites, this happens
223             # after everything else is set /maj)
224             # (with the removal of the collection from the arglist, this
225             # call (or its code) could now be placed in the constructor,
226             # which is safer (since this has to happen last),
227             # but it requires the methylation info, which
228             # is more natural to get out here in the parsing./maj
229              
230 7453 100       11368 $self->_make_multisites($re, \@sequences, \@meths, \&_xln_sub) if @sequences;
231              
232 7453         15641 $renzs->enzymes($re);
233              
234              
235             }
236              
237 3         27 return $renzs;
238             }
239              
240             =head2 _xln_sub
241              
242             Title : _xln_sub
243             Function: Translates withrefm coords to Bio::Restriction coords
244             Args : Bio::Restriction::Enzyme object, scalar integer (cut posn)
245             Note : Used internally; pass as a coderef to the B:R::Enzyme
246             constructor
247              
248             =cut
249              
250             sub _xln_sub {
251 998     998   1399 my ($z,$c) = @_;
252 998 100       2358 return ($c < 0 ? $c : length($z->string)+$c);
253             }
254              
255             =head2 write
256              
257             Title : write
258             Usage : $stream->write($renzs)
259             Function: writes restriction enzymes into the stream
260             Returns : 1 for success and 0 for error
261             Args : a Bio::Restriction::Enzyme
262             or a Bio::Restriction::EnzymeCollection object
263              
264             =cut
265              
266             sub write {
267 0     0 1   my ($self,@h) = @_;
268 0           $self->throw_not_implemented;
269             }
270              
271             1;