File Coverage

Bio/Structure/IO.pm
Criterion Covered Total %
statement 42 79 53.1
branch 10 40 25.0
condition 6 16 37.5
subroutine 9 18 50.0
pod 4 5 80.0
total 71 158 44.9


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Structure::IO
3             #
4             # Copyright 2001, 2002 Kris Boulez
5             #
6             # You may distribute this module under the same terms as perl itself
7             #
8             # _history
9             # October 18, 1999 Largely rewritten by Lincoln Stein
10             # November 16, 2001 Copied Bio::SeqIO to Bio::Structure::IO and modified
11             # where needed. Factoring out common methods
12             # (to Bio::Root::IO) might be a good idea.
13              
14             # POD documentation - main docs before the code
15              
16             =head1 NAME
17              
18             Bio::Structure::IO - Handler for Structure Formats
19              
20             =head1 SYNOPSIS
21              
22             use Bio::Structure::IO;
23              
24             $in = Bio::Structure::IO->new(-file => "inputfilename",
25             -format => 'pdb');
26              
27             while ( my $struc = $in->next_structure() ) {
28             print "Structure ", $struc->id, " number of models: ",
29             scalar $struc->model,"\n";
30             }
31              
32             =head1 DESCRIPTION
33              
34             Bio::Structure::IO is a handler module for the formats in the
35             Structure::IO set (e.g. L). It is the officially
36             sanctioned way of getting at the format objects, which most people
37             should use.
38              
39             The Bio::Structure::IO system can be thought of like biological file
40             handles. They are attached to filehandles with smart formatting rules
41             (e.g. PDB format) and can either read or write structure objects
42             (Bio::Structure objects, or more correctly, Bio::Structure::StructureI
43             implementing objects, of which Bio::Structure is one such object). If
44             you want to know what to do with a Bio::Structure object, read
45             L.
46              
47             The idea is that you request a stream object for a particular format.
48             All the stream objects have a notion of an internal file that is read
49             from or written to. A particular Structure::IO object instance is
50             configured for either input or output. A specific example of a stream
51             object is the Bio::Structure::IO::pdb object.
52              
53             Each stream object has functions
54              
55             $stream->next_structure();
56              
57             and
58              
59             $stream->write_structure($struc);
60              
61             also
62              
63             $stream->type() # returns 'INPUT' or 'OUTPUT'
64              
65             As an added bonus, you can recover a filehandle that is tied to the
66             Structure::IOIO object, allowing you to use the standard EE
67             and print operations to read and write structure::IOuence objects:
68              
69             use Bio::Structure::IO;
70              
71             $stream = Bio::Structure::IO->newFh(-format => 'pdb'); # read from standard input
72              
73             while ( $structure = <$stream> ) {
74             # do something with $structure
75             }
76              
77             and
78              
79             print $stream $structure; # when stream is in output mode
80              
81              
82             =head1 CONSTRUCTORS
83              
84             =head2 Bio::Structure::IO-Enew()
85              
86             $stream = Bio::Structure::IO->new(-file => 'filename', -format=>$format);
87             $stream = Bio::Structure::IO->new(-fh => \*FILEHANDLE, -format=>$format);
88             $stream = Bio::Structure::IO->new(-format => $format);
89              
90             The new() class method constructs a new Bio::Structure::IO object. The
91             returned object can be used to retrieve or print Bio::Structure
92             objects. new() accepts the following parameters:
93              
94             =over 4
95              
96             =item -file
97              
98             A file path to be opened for reading or writing. The usual Perl
99             conventions apply:
100              
101             'file' # open file for reading
102             '>file' # open file for writing
103             '>>file' # open file for appending
104             '+
105             'command |' # open a pipe from the command
106             '| command' # open a pipe to the command
107              
108             =item -fh
109              
110             You may provide new() with a previously-opened filehandle. For
111             example, to read from STDIN:
112              
113             $strucIO = Bio::Structure::IO->new(-fh => \*STDIN);
114              
115             Note that you must pass filehandles as references to globs.
116              
117             If neither a filehandle nor a filename is specified, then the module
118             will read from the @ARGV array or STDIN, using the familiar EE
119             semantics.
120              
121             =item -format
122              
123             Specify the format of the file. Supported formats include:
124              
125             pdb Protein Data Bank format
126              
127             If no format is specified and a filename is given, then the module
128             will attempt to deduce it from the filename. If this is unsuccessful,
129             PDB format is assumed.
130              
131             The format name is case insensitive. 'PDB', 'Pdb' and 'pdb' are
132             all supported.
133              
134             =back
135              
136             =head2 Bio::Structure::IO-EnewFh()
137              
138             $fh = Bio::Structure::IO->newFh(-fh => \*FILEHANDLE, -format=>$format);
139             $fh = Bio::Structure::IO->newFh(-format => $format);
140             # etc.
141              
142             This constructor behaves like new(), but returns a tied filehandle
143             rather than a Bio::Structure::IO object. You can read structures from this
144             object using the familiar EE operator, and write to it using
145             print(). The usual array and $_ semantics work. For example, you can
146             read all structure objects into an array like this:
147              
148             @structures = <$fh>;
149              
150             Other operations, such as read(), sysread(), write(), close(), and printf()
151             are not supported.
152              
153             =head1 OBJECT METHODS
154              
155             See below for more detailed summaries. The main methods are:
156              
157             =head2 $structure = $structIO-Enext_structure()
158              
159             Fetch the next structure from the stream.
160              
161             =head2 $structIO-Ewrite_structure($struc [,$another_struc,...])
162              
163             Write the specified structure(s) to the stream.
164              
165             =head2 TIEHANDLE(), READLINE(), PRINT()
166              
167             These provide the tie interface. See L for more details.
168              
169             =head1 FEEDBACK
170              
171             =head2 Mailing Lists
172              
173             User feedback is an integral part of the evolution of this and other
174             Bioperl modules. Send your comments and suggestions preferably to one
175             of the Bioperl mailing lists. Your participation is much appreciated.
176              
177             bioperl-l@bioperl.org - General discussion
178             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
179              
180             =head2 Support
181              
182             Please direct usage questions or support issues to the mailing list:
183              
184             I
185              
186             rather than to the module maintainer directly. Many experienced and
187             reponsive experts will be able look at the problem and quickly
188             address it. Please include a thorough description of the problem
189             with code and data examples if at all possible.
190              
191             =head2 Reporting Bugs
192              
193             Report bugs to the Bioperl bug tracking system to help us keep track
194             the bugs and their resolution.
195             Bug reports can be submitted via the web:
196              
197             https://github.com/bioperl/bioperl-live/issues
198              
199             =head1 AUTHORS - Ewan Birney, Lincoln Stein, Kris Boulez
200              
201             Email birney@ebi.ac.uk, lstein@cshl.org, kris.boulez@algonomics.com
202              
203              
204             =head1 APPENDIX
205              
206             The rest of the documentation details each of the object
207             methods. Internal methods are usually preceded with a _
208              
209             =cut
210              
211             # Let the code begin...
212              
213             package Bio::Structure::IO;
214              
215 1     1   503 use strict;
  1         1  
  1         25  
216              
217 1     1   385 use Bio::PrimarySeq;
  1         4  
  1         43  
218 1     1   7 use Symbol;
  1         2  
  1         96  
219              
220 1     1   7 use base qw(Bio::Root::Root Bio::Root::IO);
  1         2  
  1         1165  
221              
222             =head2 new
223              
224             Title : new
225             Usage : $stream = Bio::Structure::IO->new(-file => $filename, -format => 'Format')
226             Function: Returns a new structIOstream
227             Returns : A Bio::Structure::IO handler initialised with the appropriate format
228             Args : -file => $filename
229             -format => format
230             -fh => filehandle to attach to
231              
232             =cut
233              
234             my $entry = 0;
235              
236             sub new {
237 6     6 1 59 my ($caller,@args) = @_;
238 6   33     39 my $class = ref($caller) || $caller;
239              
240             # or do we want to call SUPER on an object if $caller is an
241             # object?
242 6 100       36 if( $class =~ /Bio::Structure::IO::(\S+)/ ) {
243 3         24 my ($self) = $class->SUPER::new(@args);
244 3         15 $self->_initialize(@args);
245 3         21 return $self;
246             } else {
247              
248 3         12 my %param = @args;
249 3         13 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
  6         17  
250             my $format = $param{'-format'} ||
251 3   50     15 $class->_guess_format( $param{-file} || $ARGV[0] ) ||
252             'pdb';
253 3         7 $format = "\L$format"; # normalize capitalization to lower case
254              
255             # normalize capitalization
256 3 50       11 return unless( &_load_format_module($format) );
257 3         37 return "Bio::Structure::IO::$format"->new(@args);
258             }
259             }
260              
261             =head2 newFh
262              
263             Title : newFh
264             Usage : $fh = Bio::Structure::IO->newFh(-file=>$filename,-format=>'Format')
265             Function: does a new() followed by an fh()
266             Example : $fh = Bio::Structure::IO->newFh(-file=>$filename,-format=>'Format')
267             $structure = <$fh>; # read a structure object
268             print $fh $structure; # write a structure object
269             Returns : filehandle tied to the Bio::Structure::IO::Fh class
270             Args :
271              
272             =cut
273              
274             sub newFh {
275 0     0 1 0 my $class = shift;
276 0 0       0 return unless my $self = $class->new(@_);
277 0         0 return $self->fh;
278             }
279              
280             =head2 fh
281              
282             Title : fh
283             Usage : $obj->fh
284             Function:
285             Example : $fh = $obj->fh; # make a tied filehandle
286             $structure = <$fh>; # read a structure object
287             print $fh $structure; # write a structure object
288             Returns : filehandle tied to the Bio::Structure::IO::Fh class
289             Args :
290              
291             =cut
292              
293              
294             sub fh {
295 0     0 1 0 my $self = shift;
296 0   0     0 my $class = ref($self) || $self;
297 0         0 my $s = Symbol::gensym;
298 0         0 tie $$s,$class,$self;
299 0         0 return $s;
300             }
301              
302              
303             =head2 format
304              
305             Title : format
306             Usage : $format = $obj->format()
307             Function: Get the structure format
308             Returns : structure format
309             Args : none
310              
311             =cut
312              
313             # format() method inherited from Bio::Root::IO
314              
315              
316             # _initialize is chained for all SeqIO classes
317              
318             sub _initialize {
319 3     3   11 my($self, @args) = @_;
320              
321             # not really necessary unless we put more in RootI
322 3         21 $self->SUPER::_initialize(@args);
323              
324             # initialize the IO part
325 3         33 $self->_initialize_io(@args);
326             }
327              
328             =head2 next_structure
329              
330             Title : next_structure
331             Usage : $structure = stream->next_structure
332             Function: Reads the next structure object from the stream and returns a
333             Bio::Structure::Entry object.
334              
335             Certain driver modules may encounter entries in the stream that
336             are either misformatted or that use syntax not yet understood
337             by the driver. If such an incident is recoverable, e.g., by
338             dismissing a feature of a feature table or some other non-mandatory
339             part of an entry, the driver will issue a warning. In the case
340             of a non-recoverable situation an exception will be thrown.
341             Do not assume that you can resume parsing the same stream after
342             catching the exception. Note that you can always turn recoverable
343             errors into exceptions by calling $stream->verbose(2) (see
344             Bio::RootI POD page).
345             Returns : a Bio::Structure::Entry object
346             Args : none
347              
348             =cut
349              
350             sub next_structure {
351 0     0 1 0 my ($self, $struc) = @_;
352 0         0 $self->throw("Sorry, you cannot read from a generic Bio::Structure::IO object.");
353             }
354              
355             # Do we want people to read out the sequence directly from a $structIO stream
356             #
357             ##=head2 next_primary_seq
358             ##
359             ## Title : next_primary_seq
360             ## Usage : $seq = $stream->next_primary_seq
361             ## Function: Provides a primaryseq type of sequence object
362             ## Returns : A Bio::PrimarySeqI object
363             ## Args : none
364             ##
365             ##
366             ##=cut
367             ##
368             ##sub next_primary_seq {
369             ## my ($self) = @_;
370             ##
371             ## # in this case, we default to next_seq. This is because
372             ## # Bio::Seq's are Bio::PrimarySeqI objects. However we
373             ## # expect certain sub classes to override this method to provide
374             ## # less parsing heavy methods to retrieving the objects
375             ##
376             ## return $self->next_seq();
377             ##}
378              
379             =head2 write_structure
380              
381             Title : write_structure
382             Usage : $stream->write_structure($structure)
383             Function: writes the $structure object into the stream
384             Returns : 1 for success and 0 for error
385             Args : Bio::Structure object
386              
387             =cut
388              
389             sub write_seq {
390 0     0 0 0 my ($self, $struc) = @_;
391 0         0 $self->throw("Sorry, you cannot write to a generic Bio::Structure::IO object.");
392             }
393              
394              
395             # De we need this here
396             #
397             ##=head2 alphabet
398             ##
399             ## Title : alphabet
400             ## Usage : $self->alphabet($newval)
401             ## Function: Set/get the molecule type for the Seq objects to be created.
402             ## Example : $seqio->alphabet('protein')
403             ## Returns : value of alphabet: 'dna', 'rna', or 'protein'
404             ## Args : newvalue (optional)
405             ## Throws : Exception if the argument is not one of 'dna', 'rna', or 'protein'
406             ##
407             ##=cut
408             ##
409             ##sub alphabet {
410             ## my ($self, $value) = @_;
411             ##
412             ## if ( defined $value) {
413             ## # instead of hard-coding the allowed values once more, we check by
414             ## # creating a dummy sequence object
415             ## eval {
416             ## my $seq = Bio::PrimarySeq->new('-alphabet' => $value);
417             ## };
418             ## if($@) {
419             ## $self->throw("Invalid alphabet: $value\n. See Bio::PrimarySeq for allowed values.");
420             ## }
421             ## $self->{'alphabet'} = "\L$value";
422             ## }
423             ## return $self->{'alphabet'};
424             ##}
425              
426             =head2 _load_format_module
427              
428             Title : _load_format_module
429             Usage : *INTERNAL Structure::IO stuff*
430             Function: Loads up (like use) a module at run time on demand
431             Example :
432             Returns :
433             Args :
434              
435             =cut
436              
437             sub _load_format_module {
438 3     3   7 my ($format) = @_;
439 3         3 my ($module, $load, $m);
440              
441 3         11 $module = "_
442 3         8 $load = "Bio/Structure/IO/$format.pm";
443              
444 3 100       18 return 1 if $main::{$module};
445 1         4 eval {
446 1         711 require $load;
447             };
448 1 50       8 if ( $@ ) {
449 0         0 print STDERR <
450             $load: $format cannot be found
451             Exception $@
452             For more information about the Structure::IO system please see the
453             Bio::Structure::IO docs. This includes ways of checking for formats at
454             compile time, not run time
455             END
456             ;
457 0         0 return;
458             }
459 1         9 return 1;
460             }
461              
462             =head2 _concatenate_lines
463              
464             Title : _concatenate_lines
465             Usage : $s = _concatenate_lines($line, $continuation_line)
466             Function: Private. Concatenates two strings assuming that the second stems
467             from a continuation line of the first. Adds a space between both
468             unless the first ends with a dash.
469              
470             Takes care of either arg being empty.
471             Example :
472             Returns : A string.
473             Args :
474              
475             =cut
476              
477             sub _concatenate_lines {
478 60     60   68 my ($self, $s1, $s2) = @_;
479 60 50 66     184 $s1 .= " " if($s1 && ($s1 !~ /-$/) && $s2);
      66        
480 60 100       213 return ($s1 ? $s1 : "") . ($s2 ? $s2 : "");
    50          
481             }
482              
483             =head2 _filehandle
484              
485             Title : _filehandle
486             Usage : $obj->_filehandle($newval)
487             Function: This method is deprecated. Call _fh() instead.
488             Example :
489             Returns : value of _filehandle
490             Args : newvalue (optional)
491              
492              
493             =cut
494              
495             sub _filehandle {
496 0     0   0 my ($self,@args) = @_;
497 0         0 return $self->_fh(@args);
498             }
499              
500             =head2 _guess_format
501              
502             Title : _guess_format
503             Usage : $obj->_guess_format($filename)
504             Function:
505             Example :
506             Returns : guessed format of filename (lower case)
507             Args :
508              
509             =cut
510              
511             sub _guess_format {
512 0     0   0 my $class = shift;
513 0 0       0 return unless $_ = shift;
514 0 0       0 return 'fasta' if /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i;
515 0 0       0 return 'genbank' if /\.(gb|gbank|genbank)$/i;
516 0 0       0 return 'scf' if /\.scf$/i;
517 0 0       0 return 'pir' if /\.pir$/i;
518 0 0       0 return 'embl' if /\.(embl|ebl|emb|dat)$/i;
519 0 0       0 return 'raw' if /\.(txt)$/i;
520 0 0       0 return 'gcg' if /\.gcg$/i;
521 0 0       0 return 'ace' if /\.ace$/i;
522 0 0       0 return 'bsml' if /\.(bsm|bsml)$/i;
523 0 0       0 return 'pdb' if /\.(ent|pdb)$/i;
524             }
525              
526             sub DESTROY {
527 3     3   930 my $self = shift;
528              
529 3         23 $self->close();
530             }
531              
532             sub TIEHANDLE {
533 0     0     my ($class,$val) = @_;
534 0           return bless {'structio' => $val}, $class;
535             }
536              
537             sub READLINE {
538 0     0     my $self = shift;
539 0 0 0       return $self->{'structio'}->next_seq() || undef unless wantarray;
540 0           my (@list, $obj);
541 0           push @list, $obj while $obj = $self->{'structio'}->next_seq();
542 0           return @list;
543             }
544              
545             sub PRINT {
546 0     0     my $self = shift;
547 0           $self->{'structio'}->write_seq(@_);
548             }
549              
550             1;
551