File Coverage

Bio/SeqIO/kegg.pm
Criterion Covered Total %
statement 95 109 87.1
branch 14 28 50.0
condition 3 9 33.3
subroutine 10 11 90.9
pod 2 2 100.0
total 124 159 77.9


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::SeqIO::kegg
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Allen Day
7             #
8             # Copyright Allen Day
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::SeqIO::kegg - KEGG sequence input/output stream
17              
18             =head1 SYNOPSIS
19              
20             # It is probably best not to use this object directly, but
21             # rather go through the SeqIO handler system. Go:
22              
23             use Bio::SeqIO;
24              
25             $stream = Bio::SeqIO->new(-file => $filename, -format => 'KEGG');
26              
27             while ( my $seq = $stream->next_seq() ) {
28             # do something with $seq
29             }
30              
31             =head1 DESCRIPTION
32              
33             This class transforms KEGG gene records into Bio::Seq objects.
34              
35             =head2 Mapping of record properties to object properties
36              
37             This section is supposed to document which sections and properties of
38             a KEGG databank record end up where in the Bioperl object model. It
39             is far from complete and presently focuses only on those mappings
40             which may be non-obvious. $seq in the text refers to the
41             Bio::Seq::RichSeqI implementing object returned by the parser for each
42             record.
43              
44             =over 4
45              
46             =item 'ENTRY'
47              
48             $seq->primary_id
49              
50             =item 'NAME'
51              
52             $seq->display_id
53              
54             =item 'DEFINITION'
55              
56             $seq->annotation->get_Annotations('description');
57              
58             =item 'ORTHOLOG'
59              
60             grep {$_->database eq 'KO'} $seq->annotation->get_Annotations('dblink')
61              
62             =item 'CLASS'
63              
64             grep {$_->database eq 'PATH'}
65             $seq->annotation->get_Annotations('dblink')
66              
67             =item 'POSITION'
68              
69             FIXME, NOT IMPLEMENTED
70              
71             =item 'PATHWAY'
72              
73             for my $pathway ( $seq->annotation->get_Annotations('pathway') ) {
74             #
75             }
76              
77             =item 'DBLINKS'
78              
79             $seq->annotation->get_Annotations('dblink')
80              
81             =item 'CODON_USAGE'
82              
83             FIXME, NOT IMPLEMENTED
84              
85             =item 'AASEQ'
86              
87             $seq->translate->seq
88              
89             =item 'NTSEQ'
90              
91             $seq-Eseq
92              
93             =back
94              
95             =head1 FEEDBACK
96              
97             =head2 Mailing Lists
98              
99             User feedback is an integral part of the evolution of this and other
100             Bioperl modules. Send your comments and suggestions preferably to one
101             of the Bioperl mailing lists. Your participation is much appreciated.
102              
103             bioperl-l@bioperl.org - General discussion
104             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
105              
106             =head2 Support
107              
108             Please direct usage questions or support issues to the mailing list:
109              
110             I
111              
112             rather than to the module maintainer directly. Many experienced and
113             reponsive experts will be able look at the problem and quickly
114             address it. Please include a thorough description of the problem
115             with code and data examples if at all possible.
116              
117             =head2 Reporting Bugs
118              
119             Report bugs to the Bioperl bug tracking system to help us keep track
120             the bugs and their resolution. Bug reports can be submitted via the web:
121              
122             https://github.com/bioperl/bioperl-live/issues
123              
124             =head1 AUTHOR - Allen Day
125              
126             Email allenday@ucla.edu
127              
128             =head1 APPENDIX
129              
130             The rest of the documentation details each of the object
131             methods. Internal methods are usually preceded with a _
132              
133             =cut
134              
135             # Let the code begin...
136              
137             package Bio::SeqIO::kegg;
138 1     1   490 use strict;
  1         1  
  1         24  
139              
140 1     1   324 use Bio::SeqFeature::Generic;
  1         2  
  1         22  
141 1     1   286 use Bio::Species;
  1         1  
  1         21  
142 1     1   4 use Bio::Seq::SeqFactory;
  1         1  
  1         15  
143 1     1   3 use Bio::Annotation::Collection;
  1         1  
  1         13  
144 1     1   294 use Bio::Annotation::Comment;
  1         1  
  1         29  
145 1     1   267 use Bio::Annotation::DBLink;
  1         2  
  1         35  
146              
147 1     1   6 use base qw(Bio::SeqIO);
  1         1  
  1         340  
148              
149             sub _initialize {
150 1     1   2 my($self,@args) = @_;
151              
152 1         5 $self->SUPER::_initialize(@args);
153             # hash for functions for decoding keys.
154 1         2 $self->{'_func_ftunit_hash'} = {};
155 1 50       6 if( ! defined $self->sequence_factory ) {
156 1         5 $self->sequence_factory(Bio::Seq::SeqFactory->new
157             (-verbose => $self->verbose(),
158             -type => 'Bio::Seq::RichSeq'));
159             }
160             }
161              
162             =head2 next_seq
163              
164             Title : next_seq
165             Usage : $seq = $stream->next_seq()
166             Function: returns the next sequence in the stream
167             Returns : Bio::Seq::RichSeq object
168             Args :
169              
170             =cut
171              
172             sub next_seq {
173 1     1 1 675 my ($self,@args) = @_;
174 1         3 my $builder = $self->sequence_builder();
175 1         2 my $seq;
176             my %params;
177              
178 0         0 my $buffer;
179 0         0 my (@acc, @features);
180 0         0 my ($display_id, $annotation);
181 0         0 my $species;
182              
183             # initialize; we may come here because of starting over
184 1         2 @features = ();
185 1         1 $annotation = undef;
186 1         1 @acc = ();
187 1         1 $species = undef;
188 1         4 %params = (-verbose => $self->verbose); # reset hash
189 1         3 local($/) = "///\n";
190              
191 1         7 $buffer = $self->_readline();
192              
193 1 50       3 return if( !defined $buffer ); # end of file
194 1 50       10 $buffer =~ /^ENTRY/ ||
195             $self->throw("KEGG stream with bad ENTRY line. Not KEGG in my book. Got $buffer'");
196              
197 1         1 my %FIELDS;
198 1         19 my @chunks = split /\n(?=\S)/, $buffer;
199              
200 1         3 foreach my $chunk (@chunks){
201 11         15 my($key) = $chunk =~ /^(\S+)/;
202 11         15 $FIELDS{$key} = $chunk;
203             }
204              
205             # changing to split method to get entry_ids that include
206             # sequence version like Whatever.1
207             my(undef,$entry_id,$entry_seqtype,$entry_species) =
208 1         3 split(' ',$FIELDS{ENTRY});
209              
210 1         2 my($name);
211 1 50       3 if ($FIELDS{NAME}) {
212 1         3 ($name) = $FIELDS{NAME} =~ /^NAME\s+(.+)$/;
213             }
214              
215 1         1 my( $definition, $aa_length, $aa_seq, $nt_length, $nt_seq );
216              
217 1 50 33     7 if(( exists $FIELDS{DEFINITION} ) and ( $FIELDS{DEFINITION} =~ /^DEFINITION/ )) {
218 1         3 ($definition) = $FIELDS{DEFINITION} =~ /^DEFINITION\s+(.+)$/s;
219 1         6 $definition =~ s/\s+/ /gs;
220             }
221 1 50 33     7 if(( exists $FIELDS{AASEQ} ) and ( $FIELDS{AASEQ} =~ /^AASEQ/ )) {
222 1         4 ($aa_length,$aa_seq) = $FIELDS{AASEQ} =~ /^AASEQ\s+(\d+)\n(.+)$/s;
223 1         9 $aa_seq =~ s/\s+//g;
224             }
225 1 50 33     7 if(( exists $FIELDS{NTSEQ} ) and ( $FIELDS{NTSEQ} =~ /^NTSEQ/ )) {
226 1         4 ($nt_length,$nt_seq) = $FIELDS{NTSEQ} =~ /^NTSEQ\s+(\d+)\n(.+)$/s;
227 1         17 $nt_seq =~ s/\s+//g;
228             }
229              
230 1         8 $annotation = Bio::Annotation::Collection->new();
231              
232 1         7 $annotation->add_Annotation('description',
233             Bio::Annotation::Comment->new(-text => $definition));
234              
235 1         4 $annotation->add_Annotation('aa_seq',
236             Bio::Annotation::Comment->new(-text => $aa_seq));
237              
238 1         1 my($ortholog_db,$ortholog_id,$ortholog_desc);
239 1 50       3 if ($FIELDS{ORTHOLOG}) {
240             ($ortholog_db,$ortholog_id,$ortholog_desc) = $FIELDS{ORTHOLOG}
241 1         7 =~ /^ORTHOLOG\s+(\S+):\s+(\S+)\s+(.*?)$/;
242              
243 1         9 $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
244             -database => $ortholog_db,
245             -primary_id => $ortholog_id,
246             -comment => $ortholog_desc) );
247             }
248              
249 1 50       3 if($FIELDS{MOTIF}){
250 0         0 $FIELDS{MOTIF} =~ s/^MOTIF\s+//;
251 0         0 while($FIELDS{MOTIF} =~/\s*?(\S+):\s+(.+?)$/mg){
252 0         0 my $db = $1;
253 0         0 my $ids = $2;
254 0         0 foreach my $id (split(/\s+/, $ids)){
255              
256 0         0 $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
257             -database =>$db,
258             -primary_id => $id,
259             -comment => "") );
260             }
261             }
262             }
263              
264 1 50       3 if($FIELDS{PATHWAY}) {
265 0         0 $FIELDS{PATHWAY} =~ s/^PATHWAY\s+//;
266 0         0 while($FIELDS{PATHWAY} =~ /\s*PATH:\s+(.+)$/mg){
267 0         0 $annotation->add_Annotation('pathway',
268             Bio::Annotation::Comment->new(-text => "$1"));
269             }
270             }
271              
272 1 50       2 if($FIELDS{POSITION}) {
273 1         4 $FIELDS{POSITION} =~ s/^POSITION\s+//;
274             $annotation->add_Annotation('position',
275 1         4 Bio::Annotation::Comment->new(-text => $FIELDS{POSITION}));
276             }
277            
278 1 50       2 if ($FIELDS{CLASS}) {
279 1         4 $FIELDS{CLASS} =~ s/^CLASS\s+//;
280 1         4 $FIELDS{'CLASS'} =~ s/\n//g;
281 1         7 while($FIELDS{CLASS} =~ /(.*?)\[(\S+):(\S+)\]/g){
282 2         7 my ($pathway,$db,$id) = ($1,$2,$3);
283 2         10 $pathway =~ s/\s+/ /g;
284 2         5 $pathway =~ s/\s$//g;
285 2         4 $pathway =~ s/^\s+//;
286 2         6 $annotation->add_Annotation('pathway',
287             Bio::Annotation::Comment->new(-text => $pathway));
288              
289 2         7 $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
290             -database => $db, -primary_id => $id));
291             }
292             }
293              
294 1 50       3 if($FIELDS{DBLINKS}) {
295 1         4 $FIELDS{DBLINKS} =~ s/^DBLINKS/ /;
296 1         5 while($FIELDS{DBLINKS} =~ /\s+(\S+):\s+(\S+)\n?/gs){ ### modified
297 4 50       16 $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
298             -database => $1, -primary_id => $2)) if $1;
299             }
300             }
301              
302 1         2 $params{'-alphabet'} = 'dna';
303 1         1 $params{'-seq'} = $nt_seq;
304 1         2 $params{'-display_id'} = $name;
305 1         2 $params{'-accession_number'} = $entry_id;
306 1         7 $params{'-species'} = Bio::Species->new(
307             -common_name => $entry_species);
308 1         2 $params{'-annotation'} = $annotation;
309              
310 1         4 $builder->add_slot_value(%params);
311 1         3 $seq = $builder->make_object();
312              
313 1         8 return $seq;
314             }
315              
316             =head2 write_seq
317              
318             Title : write_seq
319             Note : write_seq() is not implemented for KEGG format output.
320              
321             =cut
322              
323             sub write_seq {
324 0     0 1   shift->throw("write_seq() not implemented for KEGG format output.");
325             }
326              
327             1;