File Coverage

Bio/SeqIO/embldriver.pm
Criterion Covered Total %
statement 81 91 89.0
branch 49 58 84.4
condition 11 17 64.7
subroutine 8 10 80.0
pod 3 4 75.0
total 152 180 84.4


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::SeqIO::embldriver
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Ewan Birney
7             #
8             # Copyright Ewan Birney
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::SeqIO::embldriver - EMBL sequence input/output stream
17              
18             =head1 SYNOPSIS
19              
20             It is probably best not to use this object directly, but
21             rather go through the SeqIO handler system. Go:
22              
23             $stream = Bio::SeqIO->new(-file => $filename, -format => 'embldriver');
24              
25             while ( (my $seq = $stream->next_seq()) ) {
26             # do something with $seq
27             }
28              
29             =head1 DESCRIPTION
30              
31             This object can transform Bio::Seq objects to and from EMBL flat
32             file databases.
33              
34             There is a lot of flexibility here about how to dump things which
35             should be documented more fully.
36              
37             There should be a common object that this and Genbank share (probably
38             with Swissprot). Too much of the magic is identical.
39              
40             =head2 Optional functions
41              
42             =over 3
43              
44             =item _show_dna()
45              
46             (output only) shows the dna or not
47              
48             =item _post_sort()
49              
50             (output only) provides a sorting func which is applied to the FTHelpers
51             before printing
52              
53             =item _id_generation_func()
54              
55             This is function which is called as
56              
57             print "ID ", $func($annseq), "\n";
58              
59             To generate the ID line. If it is not there, it generates a sensible ID
60             line using a number of tools.
61              
62             If you want to output annotations in EMBL format they need to be
63             stored in a Bio::Annotation::Collection object which is accessible
64             through the Bio::SeqI interface method L.
65              
66             The following are the names of the keys which are polled from a
67             L object.
68              
69             reference - Should contain Bio::Annotation::Reference objects
70             comment - Should contain Bio::Annotation::Comment objects
71             dblink - Should contain Bio::Annotation::DBLink objects
72              
73             =back
74              
75             =head1 FEEDBACK
76              
77             =head2 Mailing Lists
78              
79             User feedback is an integral part of the evolution of this and other
80             Bioperl modules. Send your comments and suggestions preferably to one
81             of the Bioperl mailing lists. Your participation is much appreciated.
82              
83             bioperl-l@bioperl.org - General discussion
84             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
85              
86             =head2 Support
87              
88             Please direct usage questions or support issues to the mailing list:
89              
90             I
91              
92             rather than to the module maintainer directly. Many experienced and
93             reponsive experts will be able look at the problem and quickly
94             address it. Please include a thorough description of the problem
95             with code and data examples if at all possible.
96              
97             =head2 Reporting Bugs
98              
99             Report bugs to the Bioperl bug tracking system to help us keep track
100             the bugs and their resolution. Bug reports can be submitted via
101             the web:
102              
103             https://github.com/bioperl/bioperl-live/issues
104              
105             =head1 AUTHOR - Ewan Birney
106              
107             Email birney@ebi.ac.uk
108              
109             =head1 APPENDIX
110              
111             The rest of the documentation details each of the object
112             methods. Internal methods are usually preceded with a _
113              
114             =cut
115              
116             # Let the code begin...
117              
118             package Bio::SeqIO::embldriver;
119 1     1   10 use vars qw(%FTQUAL_NO_QUOTE);
  1         2  
  1         58  
120 1     1   5 use strict;
  1         2  
  1         30  
121 1     1   5 use Bio::SeqIO::Handler::GenericRichSeqHandler;
  1         2  
  1         23  
122 1     1   4 use Data::Dumper;
  1         2  
  1         54  
123              
124 1     1   4 use base qw(Bio::SeqIO);
  1         2  
  1         1296  
125              
126             my %FTQUAL_NO_QUOTE = map {$_ => 1} qw(
127             anticodon citation
128             codon codon_start
129             cons_splice direction
130             evidence label
131             mod_base number
132             rpt_type rpt_unit
133             transl_except transl_table
134             usedin
135             LOCATION
136             );
137              
138             my %DATA_KEY = (
139             ID => 'ID',
140             AC => 'ACCESSION',
141             DT => 'DATE',
142             DE => 'DESCRIPTION',
143             KW => 'KEYWORDS',
144             OS => 'SOURCE',
145             OC => 'CLASSIFICATION',
146             OG => 'ORGANELLE',
147             RN => 'REFERENCE',
148             RA => 'AUTHORS',
149             RC => 'COMMENT',
150             RG => 'CONSRTM',
151             RP => 'POSITION',
152             RX => 'CROSSREF',
153             RT => 'TITLE',
154             RL => 'LOCATION',
155             XX => 'SPACER',
156             FH => 'FEATHEADER',
157             FT => 'FEATURES',
158             AH => 'TPA_HEADER', # Third party annotation
159             AS => 'TPA_DATA', # Third party annotation
160             DR => 'DBLINK',
161             CC => 'COMMENT',
162             CO => 'CO',
163             CON => 'CON',
164             WGS => 'WGS',
165             ANN => 'ANN',
166             TPA => 'TPA',
167             SQ => 'SEQUENCE',
168             );
169              
170             my %SEC = (
171             OC => 'CLASSIFICATION',
172             OH => 'HOST', # not currently handled, bundled with organism data for now
173             OG => 'ORGANELLE',
174             OX => 'CROSSREF',
175             RA => 'AUTHORS',
176             RC => 'COMMENT',
177             RG => 'CONSRTM',
178             RP => 'POSITION',
179             RX => 'CROSSREF',
180             RT => 'TITLE',
181             RL => 'JOURNAL',
182             AS => 'ASSEMBLYINFO', # Third party annotation
183             );
184              
185             my %DELIM = (
186             #CC => "\n",
187             #DR => "\n",
188             #DT => "\n",
189             );
190              
191             # signals to process what's in the hash prior to next round
192             # these should be changed to map secondary data
193             my %PRIMARY = map {$_ => 1} qw(ID AC DT DE SV KW OS RN AH DR FH CC SQ FT WGS CON ANN TPA //);
194              
195             sub _initialize {
196 10     10   51 my($self,@args) = @_;
197              
198 10         69 $self->SUPER::_initialize(@args);
199 10         74 my $handler = $self->_rearrange([qw(HANDLER)],@args);
200             # hash for functions for decoding keys.
201 10 50       82 $handler ? $self->seqhandler($handler) :
202             $self->seqhandler(Bio::SeqIO::Handler::GenericRichSeqHandler->new(
203             -format => 'embl',
204             -verbose => $self->verbose,
205             -builder => $self->sequence_builder
206             ));
207             #
208 10 50       68 if( ! defined $self->sequence_factory ) {
209 10         52 $self->sequence_factory(Bio::Seq::SeqFactory->new
210             (-verbose => $self->verbose(),
211             -type => 'Bio::Seq::RichSeq'));
212             }
213             }
214              
215             =head2 next_seq
216              
217             Title : next_seq
218             Usage : $seq = $stream->next_seq()
219             Function: returns the next sequence in the stream
220             Returns : Bio::Seq object
221             Args :
222              
223             =cut
224              
225             sub next_seq {
226 10     10 1 76 my $self = shift;
227 10         40 my $hobj = $self->seqhandler;
228 10         83 local($/) = "\n";
229 10         35 my ($featkey, $qual, $annkey, $delim, $seqdata);
230 10         35 my $lastann = '';
231 10         37 my $ct = 0;
232             PARSER:
233 10         62 while(defined(my $line = $self->_readline)) {
234 2767 100       12115 next PARSER if $line =~ m{^\s*$};
235 2766         5273 chomp $line;
236 2766         13237 my ($ann,$data) = split m{\s{2,3}}, $line , 2;
237 2766 100 100     14207 next PARSER if ($ann eq 'XX' || $ann eq 'FH');
238 2602 100       5812 if ($ann) {
239 2592   100     5915 $data ||='';
240 2592 100       5504 if ($ann eq 'FT') {
241             # seqfeatures
242 1752 100       10919 if ($data =~ m{^(\S+)\s+([^\n]+)}) {
    100          
243 193 50       1466 $hobj->data_handler($seqdata) if $seqdata;
244 193         958 $seqdata = ();
245 193         1497 ($seqdata->{FEATURE_KEY}, $data) = ($1, $2);
246 193         655 $seqdata->{NAME} = $ann;
247 193         563 $qual = 'LOCATION';
248             } elsif ($data =~ m{^\s+/([^=]+)=?(.+)?}) {
249 935   50     4550 ($qual, $data) = ($1, $2 ||'');
250             $ct = (exists $seqdata->{$qual}) ?
251 935 100       3024 ((ref($seqdata->{$qual})) ? scalar(@{ $seqdata->{$qual} }) : 1)
  32 100       79  
252             : 0 ;
253             }
254 1752         6235 $data =~ s{^\s+}{};
255 1752         4151 $data =~ tr{"}{}d; # we don't care about quotes yet...
256 1752 100       5671 my $delim = ($FTQUAL_NO_QUOTE{$qual}) ? '' : ' ';
257 1752 100       4047 if ($ct == 0) {
258 1601 100       10999 $seqdata->{$qual} .= ($seqdata->{$qual}) ?
259             $delim.$data :
260             $data;
261             } else {
262 151 100       469 if (!ref($seqdata->{$qual})) {
263 117         441 $seqdata->{$qual} = [$seqdata->{$qual}];
264             }
265             (exists $seqdata->{$qual}->[$ct]) ?
266             (($seqdata->{$qual}->[$ct]) .= $delim.$data) :
267 151 100       1137 (($seqdata->{$qual}->[$ct]) .= $data);
268             }
269             } else {
270             # simple annotations
271 840         2483 $data =~ s{;$}{};
272 840 50       1997 last PARSER if $ann eq '//';
273 840 100       1791 if ($ann ne $lastann) {
274 454 100 100     1630 if (!$SEC{$ann} && $seqdata) {
275 141         688 $hobj->data_handler($seqdata);
276             # can't use undef here; it can lead to subtle mem leaks
277 141         700 $seqdata = ();
278             }
279             $annkey = (!$SEC{$ann}) ? 'DATA' : # primary data
280 454 100       1443 $SEC{$ann};
281 454 100       1408 $seqdata->{'NAME'} = $ann if !$SEC{$ann};
282             }
283            
284             # toss the data for SQ lines; this needs to be done after the
285             # call to the data handler
286            
287 840 100       1807 next PARSER if $ann eq 'SQ';
288 829   50     2768 my $delim = $DELIM{$ann} || ' ';
289 829 100       3396 $seqdata->{$annkey} .= ($seqdata->{$annkey}) ?
290             $delim.$data : $data;
291 829         3765 $lastann = $ann;
292             }
293             } else {
294             # this should only be sequence (fingers crossed!)
295             SEQUENCE:
296 10         45 while (defined ($line = $self->_readline)) {
297 2477 100       5346 if (index($line, '//') == 0) {
298 9         599 $data =~ tr{0-9 \n}{}d;
299 9         232 $seqdata->{DATA} = $data;
300             #$self->debug(Dumper($seqdata));
301 9         79 $hobj->data_handler($seqdata);
302 9         39 $seqdata = ();
303 9         48 last PARSER;
304             } else {
305 2468         4884 $data .= $line;
306 2468         5381 $line = undef;
307             }
308             }
309             }
310             }
311 10 100       51 $hobj->data_handler($seqdata) if $seqdata;
312 10         25 $seqdata = ();
313 10         62 return $hobj->build_sequence;
314             }
315              
316             sub next_chunk {
317 0     0 0 0 my $self = shift;
318 0         0 my $ct = 0;
319             PARSER:
320 0         0 while(defined(my $line = $self->_readline)) {
321 0 0       0 next if $line =~ m{^\s*$};
322 0         0 chomp $line;
323 0         0 my ($ann,$data) = split m{\s{2,3}}, $line , 2;
324 0   0     0 $data ||= '';
325 0         0 $self->debug("Ann: [$ann]\n\tData: [$data]\n");
326 0 0       0 last PARSER if $ann =~ m{//};
327             }
328             }
329              
330             =head2 write_seq
331              
332             Title : write_seq
333             Usage : $stream->write_seq($seq)
334             Function: writes the $seq object (must be seq) to the stream
335             Returns : 1 for success and 0 for error
336             Args : array of 1 to n Bio::SeqI objects
337              
338             =cut
339              
340             sub write_seq {
341 0     0 1 0 shift->throw("Use Bio::SeqIO::embl for output");
342             # maybe make a Writer class as well????
343             }
344              
345             =head2 seqhandler
346              
347             Title : seqhandler
348             Usage : $stream->seqhandler($handler)
349             Function: Get/Set the Bio::Seq::HandlerBaseI object
350             Returns : Bio::Seq::HandlerBaseI
351             Args : Bio::Seq::HandlerBaseI
352              
353             =cut
354              
355             sub seqhandler {
356 20     20 1 61 my ($self, $handler) = @_;
357 20 100       71 if ($handler) {
358 10 50 33     108 $self->throw("Not a Bio::HandlerBaseI") unless
359             ref($handler) && $handler->isa("Bio::HandlerBaseI");
360 10         63 $self->{'_seqhandler'} = $handler;
361             }
362 20         57 return $self->{'_seqhandler'};
363             }
364              
365             1;
366              
367             __END__