File Coverage

Bio/SeqIO/gcg.pm
Criterion Covered Total %
statement 102 107 95.3
branch 38 58 65.5
condition 9 16 56.2
subroutine 8 8 100.0
pod 3 3 100.0
total 160 192 83.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::SeqIO::gcg
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Ewan Birney
7             # and Lincoln Stein
8             #
9             # Copyright Ewan Birney & Lincoln Stein
10             #
11             # You may distribute this module under the same terms as perl itself
12             #
13             # _history
14             # October 18, 1999 Largely rewritten by Lincoln Stein
15              
16             # POD documentation - main docs before the code
17              
18             =head1 NAME
19              
20             Bio::SeqIO::gcg - GCG sequence input/output stream
21              
22             =head1 SYNOPSIS
23              
24             Do not use this module directly. Use it via the Bio::SeqIO class.
25              
26             =head1 DESCRIPTION
27              
28             This object can transform Bio::Seq objects to and from GCG flat
29             file databases.
30              
31             =head1 FEEDBACK
32              
33             =head2 Mailing Lists
34              
35             User feedback is an integral part of the evolution of this and other
36             Bioperl modules. Send your comments and suggestions preferably to one
37             of the Bioperl mailing lists. Your participation is much appreciated.
38              
39             bioperl-l@bioperl.org - General discussion
40             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
41              
42             =head2 Support
43              
44             Please direct usage questions or support issues to the mailing list:
45              
46             I
47              
48             rather than to the module maintainer directly. Many experienced and
49             reponsive experts will be able look at the problem and quickly
50             address it. Please include a thorough description of the problem
51             with code and data examples if at all possible.
52              
53             =head2 Reporting Bugs
54              
55             Report bugs to the Bioperl bug tracking system to help us keep track
56             the bugs and their resolution. Bug reports can be submitted via the web:
57              
58             https://github.com/bioperl/bioperl-live/issues
59              
60             =head1 AUTHORS - Ewan Birney & Lincoln Stein
61              
62             Email: Ebirney@ebi.ac.ukE
63             Elstein@cshl.orgE
64              
65             =head1 CONTRIBUTORS
66              
67             Jason Stajich, jason@bioperl.org
68              
69             =head1 APPENDIX
70              
71             The rest of the documentation details each of the object
72             methods. Internal methods are usually preceded with a _
73              
74             =cut
75              
76             # Let the code begin...
77              
78             package Bio::SeqIO::gcg;
79 5     5   401 use strict;
  5         7  
  5         129  
80              
81 5     5   793 use Bio::Seq::SeqFactory;
  5         7  
  5         159  
82              
83 5     5   19 use base qw(Bio::SeqIO);
  5         6  
  5         2714  
84              
85             sub _initialize {
86 9     9   16 my($self,@args) = @_;
87 9         24 $self->SUPER::_initialize(@args);
88 9 50       44 if( ! defined $self->sequence_factory ) {
89 9         26 $self->sequence_factory(Bio::Seq::SeqFactory->new
90             (-verbose => $self->verbose(),
91             -type => 'Bio::Seq::RichSeq'));
92             }
93             }
94              
95             =head2 next_seq
96              
97             Title : next_seq
98             Usage : $seq = $stream->next_seq()
99             Function: returns the next sequence in the stream
100             Returns : Bio::Seq object
101             Args :
102              
103             =cut
104              
105             sub next_seq {
106 8     8 1 1555 my ($self,@args) = @_;
107 8         9 my($id,$type,$desc,$line,$chksum,$sequence,$date,$len);
108              
109 8         32 while( defined($_ = $self->_readline()) ) {
110              
111             ## Get the descriptive info (anything before the line with '..')
112 15 100       48 unless( /\.\.$/ ) { $desc.= $_; }
  9         12  
113             ## Pull ID, Checksum & Type from the line containing '..'
114 15 100       37 /\.\.$/ && do { $line = $_; chomp;
  6         7  
  6         12  
115 6 50       21 if(/Check\:\s(\d+)\s/) { $chksum = $1; }
  6         12  
116 6 50       16 if(/Type:\s(\w)\s/) { $type = $1; }
  6         8  
117 6 50       24 if(/(\S+)\s+Length/)
118 6         8 { $id = $1; }
119 6 50       34 if(/Length:\s+(\d+)\s+(\S.+\S)\s+Type/ )
120 6         8 { $len = $1; $date = $2;}
  6         7  
121 6         12 last;
122             }
123             }
124 8 100       16 return if ( !defined $_);
125 6         7 chomp($desc); # remove last "\n"
126              
127 6         10 while( defined($_ = $self->_readline()) ) {
128              
129             ## This is where we grab the sequence info.
130              
131 86 50       107 if( /\.\.$/ ) {
132 0         0 $self->throw("Looks like start of another sequence. See documentation. ");
133             }
134              
135 86 100       140 next if($_ eq "\n"); ## skip whitespace lines in formatted seq
136 43         277 s/[\d\s\t]//g; ## remove anything that is not alphabet char: preserve anything that is not explicitly specified for removal (Stefan Kirov)
137             # $_ = uc($_); ## uppercase sequence: NO. Keep the case. HL
138 43         83 $sequence .= $_;
139             }
140             ##If we parsed out a checksum, we might as well test it
141              
142 6 50       15 if(defined $chksum) {
143 6 50       18 unless(_validate_checksum(uc($sequence),$chksum)) {
144 0         0 $self->throw("Checksum failure on parsed sequence.");
145             }
146             }
147              
148             ## Remove whitespace from identifier because the constructor
149             ## will throw a warning otherwise...
150 6 50       15 if(defined $id) { $id =~ s/\s+//g;}
  6         12  
151              
152             ## Turn our parsed "Type: N" or "Type: P" (if found) into the appropriate
153             ## keyword that the constructor expects...
154 6 50       11 if(defined $type) {
155 6 50       13 if($type eq "N") { $type = "dna"; }
  0         0  
156 6 50       10 if($type eq "P") { $type = "prot"; }
  6         8  
157             }
158              
159 6         15 return $self->sequence_factory->create(-seq => $sequence,
160             -id => $id,
161             -desc => $desc,
162             -type => $type,
163             -dates => [ $date ]
164             );
165             }
166              
167             =head2 write_seq
168              
169             Title : write_seq
170             Usage : $stream->write_seq(@seq)
171             Function: writes the formatted $seq object into the stream
172             Returns : 1 for success and 0 for error
173             Args : array of Bio::PrimarySeqI object
174              
175              
176             =cut
177              
178             sub write_seq {
179 3     3 1 11 my ($self,@seq) = @_;
180 3         6 for my $seq (@seq) {
181 3 50 33     27 $self->throw("Did not provide a valid Bio::PrimarySeqI object")
      33        
182             unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
183              
184 3 50       11 $self->warn("No whitespace allowed in GCG ID [". $seq->display_id. "]")
185             if $seq->display_id =~ /\s/;
186              
187 3         9 my $str = $seq->seq;
188 3   50     8 my $comment = $seq->desc || '';
189 3         7 my $id = $seq->id;
190 3 50       11 my $type = ( $seq->alphabet() =~ /[dr]na/i ) ? 'N' : 'P';
191 3         4 my $timestamp;
192              
193 3 50       30 if( $seq->can('get_dates') ) {
194 3         7 ($timestamp) = $seq->get_dates;
195             } else {
196 0         0 $timestamp = localtime(time);
197             }
198 3         4 my($sum,$offset,$len,$i,$j,$cnt,@out);
199              
200 3         3 $len = length($str);
201             ## Set the offset if we have any non-standard numbering going on
202 3         3 $offset=1;
203             # checksum
204 3         5 $sum = $self->GCG_checksum($seq);
205              
206             #Output the sequence header info
207 3         8 push(@out,"$comment\n");
208 3         13 push(@out,"$id Length: $len $timestamp Type: $type Check: $sum ..\n\n");
209              
210             #Format the sequence
211 3         4 $i = $#out + 1;
212 3         9 for($j = 0 ; $j < $len ; ) {
213 108 100       117 if( $j % 50 == 0) {
214 24         43 $out[$i] = sprintf("%8d ",($j+$offset)); #numbering
215             }
216 108         153 $out[$i] .= sprintf("%s",substr($str,$j,10));
217 108         98 $j += 10;
218 108 100 100     274 if( $j < $len && $j % 50 != 0 ) {
    100          
219 84         105 $out[$i] .= " ";
220             }elsif($j % 50 == 0 ) {
221 21         32 $out[$i++] .= "\n\n";
222             }
223             }
224 3         12 local($^W) = 0;
225 3 50       8 if($j % 50 != 0 ) {
226 3         3 $out[$i] .= "\n";
227             }
228 3         3 $out[$i] .= "\n";
229 3 50       21 return unless $self->_print(@out);
230             }
231              
232 3 50 33     6 $self->flush if $self->_flush_on_write && defined $self->_fh;
233 3         8 return 1;
234             }
235              
236             =head2 GCG_checksum
237              
238             Title : GCG_checksum
239             Usage : $cksum = $gcgio->GCG_checksum($seq);
240             Function : returns a gcg checksum for the sequence specified
241              
242             This method can also be called as a class method.
243             Example :
244             Returns : a GCG checksum string
245             Argument : a Bio::PrimarySeqI implementing object
246              
247             =cut
248              
249             sub GCG_checksum {
250 25     25 1 24 my ($self,$seqobj) = @_;
251 25         19 my $index = 0;
252 25         20 my $checksum = 0;
253 25         19 my $char;
254              
255 25         31 my $seq = $seqobj->seq();
256 25         41 $seq =~ tr/a-z/A-Z/;
257              
258 25         1840 foreach $char ( split(/[\.\-]*/, $seq)) {
259 6280         3267 $index++;
260 6280   100     7438 $checksum += ($index * (unpack("c",$char) || 0) );
261 6280 100       6946 if( $index == 57 ) {
262 105         86 $index = 0;
263             }
264             }
265              
266 25         338 return ($checksum % 10000);
267             }
268              
269             =head2 _validate_checksum
270              
271             Title : _validate_checksum
272             Usage : n/a - internal method
273             Function: if parsed gcg sequence contains a checksum field
274             : we compare it to a value computed here on the parsed
275             : sequence. A checksum mismatch would indicate some
276             : type of parsing failure occured.
277             :
278             Returns : 1 for success, 0 for failure
279             Args : string containing parsed seq, value of parsed cheksum
280              
281              
282             =cut
283              
284             sub _validate_checksum {
285 6     6   8 my($seq,$parsed_sum) = @_;
286 6         4 my($i,$len,$computed_sum,$cnt);
287              
288 6         7 $len = length($seq);
289              
290             #Generate the GCG Checksum value
291              
292 6         12 for($i=0; $i<$len ;$i++) {
293 1936         1060 $cnt++;
294 1936         1312 $computed_sum += $cnt * ord(substr($seq,$i,1));
295 1936 100       2962 ($cnt == 57) && ($cnt=0);
296             }
297 6         11 $computed_sum %= 10000;
298              
299             ## Compare and decide if success or failure
300              
301 6 50       22 if($parsed_sum == $computed_sum) {
302 6         19 return 1;
303 0           } else { return 0; }
304              
305              
306             }
307              
308             1;