File Coverage

blib/lib/Bio/FeatureIO.pm
Criterion Covered Total %
statement 48 75 64.0
branch 13 28 46.4
condition 4 12 33.3
subroutine 10 18 55.5
pod 6 6 100.0
total 81 139 58.2


line stmt bran cond sub pod time code
1             # $Id: FeatureIO.pm 16108 2009-09-16 17:07:49Z cjfields $
2             #
3             # BioPerl module for Bio::FeatureIO
4             #
5             # Please direct questions and support issues to
6             #
7             # Reimplementation by Chris Fields
8             #
9             # Original implementation by Allen Day
10             #
11             # Copyright Chris Fields
12             #
13             # You may distribute this module under the same terms as perl itself
14             #
15             # POD documentation - main docs before the code
16              
17             =head1 NAME
18              
19             Bio::FeatureIO - Handler for FeatureIO
20              
21             =head1 SYNOPSIS
22              
23             use Bio::FeatureIO;
24              
25             #read from a file
26             $in = Bio::FeatureIO->new(-file => "my.gff" , -format => 'GFF');
27              
28             #read from a filehandle
29             $in = Bio::FeatureIO->new(-fh => \*GFF , -format => 'GFF');
30              
31             #read features already attached to a sequence
32             my $feat = Bio::FeatureIO->new(-seq => $seq , -format => 'features');
33              
34             #read new features for existing sequence
35             my $seq = Bio::FeatureIO->new(-seq => $seq , -format => 'Das');
36              
37             #write out features
38             $out = Bio::FeatureIO->new(-file => ">outputfilename" ,
39             -format => 'GFF' ,
40             -version => 3);
41              
42             while ( my $feature = $in->next_feature() ) {
43             $out->write_feature($feature);
44             }
45              
46             =head1 DESCRIPTION
47              
48             An I/O iterator subsystem for genomic sequence features.
49              
50             Bio::FeatureIO is a handler module for the formats in the FeatureIO set (eg,
51             Bio::FeatureIO::GFF). It is the officially sanctioned way of getting at the
52             format objects, which most people should use.
53              
54             The Bio::FeatureIO system can be thought of like biological file handles. They
55             are attached to filehandles with smart formatting rules (eg, GFF format, or BED
56             format) and can either read or write feature objects (Bio::SeqFeature objects,
57             or more correctly, Bio::FeatureHolderI implementing objects, of which
58             Bio::SeqFeature is one such object). If you want to know what to do with a
59             Bio::SeqFeatureI object, read L.
60              
61             The idea is that you request a stream object for a particular format. All the
62             stream objects have a notion of an internal file that is read from or written
63             to. A particular FeatureIO object instance is configured for either input or
64             output. A specific example of a stream object is the Bio::FeatureIO::gff object.
65              
66             Each stream object has functions:
67              
68             $stream->next_feature();
69             $stream->write_feature($feature);
70              
71             =head1 SUPPORTED FORMATS
72              
73             name module
74             -----------------------------------
75             BED bed.pm
76             GFF gff.pm
77             GTF gtf.pm
78             InterPro (IPRScan 4.0) interpro.pm
79             PTT (NCBI protein table) ptt.pm
80              
81             =head1 CONSTRUCTORS
82              
83             =head2 Bio::FeatureIO-Enew()
84              
85             $featureIO = Bio::FeatureIO->new(-file => 'filename', -format=>$format);
86             $featureIO = Bio::FeatureIO->new(-fh => \*FILEHANDLE, -format=>$format);
87             $featureIO = Bio::FeatureIO->new(-seq => $seq, -format=>$format);
88              
89             The new() class method constructs a new Bio::FeatureIO object. The
90             returned object can be used to retrieve or print Seq objects. new()
91             accepts the following parameters:
92              
93             =over 4
94              
95             =item -file
96              
97             A file path to be opened for reading or writing. The usual Perl
98             conventions apply:
99              
100             'file' # open file for reading
101             '>file' # open file for writing
102             '>>file' # open file for appending
103             '+
104             'command |' # open a pipe from the command
105             '| command' # open a pipe to the command
106              
107             =item -fh
108              
109             You may provide new() with a previously-opened filehandle. For
110             example, to read from STDIN:
111              
112             $featio = Bio::FeatureIO->new(-fh => \*STDIN);
113              
114             Note that you must pass filehandles as references to globs.
115              
116             If neither a filehandle nor a filename is specified, then the module will read
117             from the @ARGV array or STDIN, using the familiar EE semantics.
118              
119             A string filehandle is handy if you want to modify the output in the memory,
120             before printing it out. The following program reads in EMBL formatted entries
121             from a file and prints them out in fasta format with some HTML tags:
122              
123             use Bio::FeatureIO;
124             use IO::String;
125             my $in = Bio::FeatureIO->new('-file' => "my.gff" ,
126             '-format' => 'EMBL');
127             while ( my $f = $in->next_feature() ) {
128             # the output handle is reset for every file
129             my $stringio = IO::String->new($string);
130             my $out = Bio::FeatureIO->new('-fh' => $stringio,
131             '-format' => 'gtf');
132             # output goes into $string
133             $out->write_feature($f);
134             # modify $string
135             $string =~ s|(>)(\w+)|$1$2|g;
136             # print into STDOUT
137             print $string;
138             }
139              
140             =item -format
141              
142             Specify the format of the file. See above for list of supported formats
143              
144             =item -flush
145              
146             By default, all files (or filehandles) opened for writing sequences will be
147             flushed after each write_seq() (making the file immediately usable). If you
148             don't need this facility and would like to marginally improve the efficiency of
149             writing multiple sequences to the same file (or filehandle), pass the -flush
150             option '0' or any other value that evaluates as defined but false:
151              
152             my $f1 = Bio::FeatureIO->new -file => "
153             -format => "f1";
154             my $f2 = Bio::FeatureIO->new -file => ">a.f2",
155             -format => "f2",
156             -flush => 0; # go as fast as we can!
157              
158             while($feature = $f1->next_feature) { $f2->write_feature($feature) }
159              
160             =back
161              
162             =head2 Bio::FeatureIO-EnewFh()
163              
164             $fh = Bio::FeatureIO->newFh(-fh => \*FILEHANDLE, -format=>$format);
165             $fh = Bio::FeatureIO->newFh(-format => $format);
166             # etc.
167              
168             This constructor behaves like new(), but returns a tied filehandle rather than a
169             Bio::FeatureIO object. You can read sequences from this object using the
170             familiar EE operator, and write to it using print(). The usual array and
171             $_ semantics work. For example, you can read all sequence objects into an array
172             like this:
173              
174             @features = <$fh>;
175              
176             Other operations, such as read(), sysread(), write(), close(), and printf()
177             are not supported.
178              
179             =head1 OBJECT METHODS
180              
181             See below for more detailed summaries. The main methods are:
182              
183             =over 3
184              
185             =item next_feature
186              
187             Fetch the next feature from the stream.
188              
189             =item write_feature
190              
191             Write the specified feature(s) to the stream.
192              
193             =item feature_factory
194              
195             This gets/sets the specific Bio::Factory::FeatureFactoryI
196              
197             =back
198              
199             The following methods delegate to the inter
200              
201             =over 3
202              
203             =item feature_class
204              
205             Set the specific Bio::SeqFeatureI class to return
206              
207             =item type_features
208              
209             Boolean flag, ensures the returned features are typed
210              
211             =item unflatten_features
212              
213             Ensure the returned features are unflattened
214              
215             =back
216              
217             =head2 TIEHANDLE(), READLINE(), PRINT()
218              
219             These provide the tie interface. See L for more details.
220              
221             =head1 FEEDBACK
222              
223             =head2 Mailing Lists
224              
225             User feedback is an integral part of the evolution of this
226             and other Bioperl modules. Send your comments and suggestions preferably
227             to one of the Bioperl mailing lists.
228              
229             Your participation is much appreciated.
230              
231             bioperl-l@bioperl.org - General discussion
232             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
233              
234             =head2 Support
235              
236             Please direct usage questions or support issues to the mailing list:
237              
238             I
239              
240             rather than to the module maintainer directly. Many experienced and
241             reponsive experts will be able look at the problem and quickly
242             address it. Please include a thorough description of the problem
243             with code and data examples if at all possible.
244              
245             =head2 Reporting Bugs
246              
247             Report bugs to the Bioperl bug tracking system to help us keep track
248             the bugs and their resolution. Bug reports can be submitted via the
249             web:
250              
251             http://bugzilla.open-bio.org/
252              
253             =head1 AUTHOR - Allen Day
254              
255             Email allenday@ucla.edu
256              
257             =head1 APPENDIX
258              
259             The rest of the documentation details each of the object
260             methods. Internal methods are usually preceded with a _
261              
262             =cut
263              
264             #' Let the code begin...
265              
266             package Bio::FeatureIO;
267             BEGIN {
268 5     5   549872 $Bio::FeatureIO::AUTHORITY = 'cpan:BIOPERLML';
269             }
270             $Bio::FeatureIO::VERSION = '1.6.905';
271 5     5   43 use strict;
  5         14  
  5         119  
272              
273 5     5   28 use Symbol;
  5         9  
  5         309  
274              
275 5     5   29 use base qw(Bio::Root::Root Bio::Root::IO);
  5         10  
  5         1854  
276              
277             =head2 new
278              
279             Title : new
280             Usage : $stream = Bio::FeatureIO->new(-file => $filename, -format => 'Format')
281             Function: Returns a new feature stream
282             Returns : A Bio::FeatureIO stream initialised with the appropriate format
283             Args : Named parameters:
284             -file => $filename
285             -fh => filehandle to attach to
286             -format => format
287              
288             =cut
289              
290             my $entry = 0;
291              
292             sub new {
293 20     20 1 4819 my ($caller,@args) = @_;
294 20   33     124 my $class = ref($caller) || $caller;
295              
296             # or do we want to call SUPER on an object if $caller is an
297             # object?
298 20 100       138 if( $class =~ /Bio::FeatureIO::(\S+)/ ) {
299              
300 10         66 my ($self) = $class->SUPER::new(@args);
301 10         352 $self->_initialize(@args);
302 10         136 return $self;
303              
304             } else {
305              
306 10         48 my %param = @args;
307              
308 10         49 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
  13         77  
309             my $format = $param{'-format'} ||
310 10   66     89 $class->_guess_format( $param{-file} || $ARGV[0] );
311            
312 10 50       39 if( ! $format ) {
313 0 0       0 if ($param{-file}) {
    0          
314 0         0 $format = $class->_guess_format($param{-file});
315             } elsif ($param{-fh}) {
316 0         0 $format = $class->_guess_format(undef);
317             }
318             }
319 10         28 $format = "\L$format"; # normalize capitalization to lower case
320 10 50       50 return unless( $class->_load_format_module($format) );
321 10         114 return "Bio::FeatureIO::$format"->new(@args);
322              
323             }
324             }
325              
326             =head2 newFh
327              
328             Title : newFh
329             Usage : $fh = Bio::FeatureIO->newFh(-file=>$filename,-format=>'Format')
330             Function: does a new() followed by an fh()
331             Example : $fh = Bio::FeatureIO->newFh(-file=>$filename,-format=>'Format')
332             $feature = <$fh>; # read a feature object
333             print $fh $feature; # write a feature object
334             Returns : filehandle tied to the Bio::FeatureIO::Fh class
335             Args :
336              
337             See L
338              
339             =cut
340              
341             sub newFh {
342 0     0 1 0 my $class = shift;
343 0 0       0 return unless my $self = $class->new(@_);
344 0         0 return $self->fh;
345             }
346              
347             =head2 fh
348              
349             Title : fh
350             Usage : $obj->fh
351             Function:
352             Example : $fh = $obj->fh; # make a tied filehandle
353             $feature = <$fh>; # read a feature object
354             print $fh $feature; # write a feature object
355             Returns : filehandle tied to Bio::FeatureIO class
356             Args : none
357              
358             =cut
359              
360              
361             sub fh {
362 0     0 1 0 my $self = shift;
363 0   0     0 my $class = ref($self) || $self;
364 0         0 my $s = Symbol::gensym;
365 0         0 tie $$s,$class,$self;
366 0         0 return $s;
367             }
368              
369             # _initialize is chained for all FeatureIO classes
370              
371             sub _initialize {
372 10     10   37 my($self, %arg) = @_;
373              
374             # flush is initialized by the Root::IO init
375              
376             # initialize the IO part
377 10         76 $self->seq($arg{-seq});
378 10         70 $self->_initialize_io(%arg);
379             }
380              
381             =head2 next_feature
382              
383             Title : next_feature
384             Usage : $feature = stream->next_feature
385             Function: Reads the next feature object from the stream and returns it.
386              
387             Certain driver modules may encounter entries in the stream
388             that are either misformatted or that use syntax not yet
389             understood by the driver. If such an incident is
390             recoverable, e.g., by dismissing a feature of a feature
391             table or some other non-mandatory part of an entry, the
392             driver will issue a warning. In the case of a
393             non-recoverable situation an exception will be thrown. Do
394             not assume that you can resume parsing the same stream
395             after catching the exception. Note that you can always turn
396             recoverable errors into exceptions by calling
397             $stream->verbose(2).
398              
399             Returns : a Bio::SeqFeatureI feature object
400             Args : none
401              
402             See L, L
403              
404             =cut
405              
406             sub next_feature {
407 0     0 1 0 my ($self, $seq) = @_;
408 0         0 $self->throw_not_implemented;
409             }
410              
411             =head2 write_feature
412              
413             Title : write_feature
414             Usage : $stream->write_feature($feature)
415             Function: writes the $feature object into the stream
416             Returns : 1 for success and 0 for error
417             Args : Bio::SeqFeature object
418              
419             =cut
420              
421             sub write_feature {
422 0     0 1 0 my ($self, $seq) = @_;
423 0         0 $self->throw_not_implemented();
424             }
425              
426             =head2 _load_format_module
427              
428             Title : _load_format_module
429             Usage : *INTERNAL FeatureIO stuff*
430             Function: Loads up (like use) a module at run time on demand
431             Example :
432             Returns :
433             Args :
434              
435             =cut
436              
437             sub _load_format_module {
438 10     10   35 my ($self, $format) = @_;
439 10   33     47 my $class = ref($self) || $self;
440 10         40 my $module = $class."::$format";#"Bio::Feature::" . $format;
441 10         18 my $ok;
442              
443 10         22 eval {
444 10         78 $ok = $self->_load_module($module);
445             };
446 10 50       1422 if ( $@ ) {
447 0         0 print STDERR <
448             $self: $format cannot be found
449             Exception $@
450             For more information about the FeatureIO system please see the FeatureIO docs.
451             This includes ways of checking for formats at compile time, not run time
452             END
453             ;
454             }
455 10         51 return $ok;
456             }
457              
458             =head2 seq
459              
460             Title : seq
461             Usage : $obj->seq() OR $obj->seq($newSeq)
462             Example :
463             Returns : Bio::SeqI object
464             Args : newSeq (optional)
465              
466             =cut
467              
468             sub seq {
469 10     10 1 26 my $self = shift;
470 10         32 my $val = shift;
471              
472 10 50       34 $self->{'seq'} = $val if defined($val);
473 10         26 return $self->{'seq'};
474             }
475              
476             =head2 _filehandle
477              
478             Title : _filehandle
479             Usage : $obj->_filehandle($newval)
480             Function: This method is deprecated. Call _fh() instead.
481             Example :
482             Returns : value of _filehandle
483             Args : newvalue (optional)
484              
485              
486             =cut
487              
488             sub _filehandle {
489 0     0   0 my ($self,@args) = @_;
490 0         0 return $self->_fh(@args);
491             }
492              
493             =head2 _guess_format
494              
495             Title : _guess_format
496             Usage : $obj->_guess_format($filename)
497             Function: guess format based on file suffix
498             Example :
499             Returns : guessed format of filename (lower case)
500             Args :
501             Notes : See "SUPPORTED FORMATS"
502              
503             =cut
504              
505             sub _guess_format {
506 8     8   18 my $class = shift;
507 8 50       29 return unless $_ = shift;
508 8 100       72 return 'gff' if /\.gff3?$/i;
509 2 50       11 return 'gff' if /\.gtf$/i;
510 2 100       9 return 'bed' if /\.bed$/i;
511 1 50       3 return 'ptt' if /\.ptt$/i;
512              
513 1         4 return 'gff'; #the default
514             }
515              
516             sub DESTROY {
517 10     10   19218 my $self = shift;
518 10         79 $self->close();
519             }
520              
521             sub TIEHANDLE {
522 0     0     my ($class,$val) = @_;
523 0           return bless {'featio' => $val}, $class;
524             }
525              
526             sub READLINE {
527 0     0     my $self = shift;
528 0 0         return $self->{'featio'}->next_feature() unless wantarray;
529 0           my (@list, $obj);
530 0           push @list, $obj while $obj = $self->{'featio'}->next_feature();
531 0           return @list;
532             }
533              
534             sub PRINT {
535 0     0     my $self = shift;
536 0           $self->{'featio'}->write_feature(@_);
537             }
538              
539             1;
540