File Coverage

Bio/SeqIO/tab.pm
Criterion Covered Total %
statement 23 25 92.0
branch 8 12 66.6
condition 1 3 33.3
subroutine 5 5 100.0
pod 2 2 100.0
total 39 47 82.9


line stmt bran cond sub pod time code
1             #-----------------------------------------------------------------------------
2             # PACKAGE : Bio::SeqIO::tab
3             # AUTHOR : Philip Lijnzaad
4             # CREATED : Feb 6 2003
5             #
6             # Copyright (c) This module is free software; you can redistribute it
7             # and/or modify it under the same terms as Perl itself.
8             #
9             # _History_
10             #
11             # Ewan Birney developed the SeqIO
12             # schema and the first prototype modules.
13             #
14             # This code is based on his Bio::SeqIO::raw
15             #
16             # You may distribute this module under the same terms as perl itself
17              
18             # POD documentation - main docs before the code
19              
20             =head1 NAME
21              
22             Bio::SeqIO::tab - nearly raw sequence file input/output
23             stream. Reads/writes id"\t"sequence"\n"
24              
25             =head1 SYNOPSIS
26              
27             Do not use this module directly. Use it via the L class.
28              
29             =head1 DESCRIPTION
30              
31             This object can transform Bio::Seq objects to and from tabbed flat
32             file databases.
33              
34             It is very useful when doing large scale stuff using the Unix command
35             line utilities (grep, sort, awk, sed, split, you name it). Imagine
36             that you have a format converter 'seqconvert' along the following
37             lines:
38              
39             my $in = Bio::SeqIO->newFh(-fh => \*STDIN , '-format' => $from);
40             my $out = Bio::SeqIO->newFh(-fh=> \*STDOUT, '-format' => $to);
41             print $out $_ while <$in>;
42              
43             then you can very easily filter sequence files for duplicates as:
44              
45             $ seqconvert < foo.fa -from fasta -to tab | sort -u |\
46             seqconvert -from tab -to fasta > foo-unique.fa
47              
48             Or grep [-v] for certain sequences with:
49              
50             $ seqconvert < foo.fa -from fasta -to tab | grep -v '^S[a-z]*control' |\
51             seqconvert -from tab -to fasta > foo-without-controls.fa
52              
53             Or chop up a huge file with sequences into smaller chunks with:
54              
55             $ seqconvert < all.fa -from fasta -to tab | split -l 10 - chunk-
56             $ for i in chunk-*; do seqconvert -from tab -to fasta < $i > $i.fa; done
57             # (this creates files chunk-aa.fa, chunk-ab.fa, ..., each containing 10
58             # sequences)
59              
60              
61             =head1 FEEDBACK
62              
63             =head2 Mailing Lists
64              
65             User feedback is an integral part of the evolution of this and other
66             Bioperl modules. Send your comments and suggestions preferably to one
67             of the Bioperl mailing lists. Your participation is much appreciated.
68              
69             bioperl-l@bioperl.org - General discussion
70             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
71              
72             =head2 Support
73              
74             Please direct usage questions or support issues to the mailing list:
75              
76             I
77              
78             rather than to the module maintainer directly. Many experienced and
79             reponsive experts will be able look at the problem and quickly
80             address it. Please include a thorough description of the problem
81             with code and data examples if at all possible.
82              
83             =head2 Reporting Bugs
84              
85             Report bugs to the Bioperl bug tracking system to help us keep track
86             the bugs and their resolution.
87             Bug reports can be submitted via the web:
88              
89             https://github.com/bioperl/bioperl-live/issues
90              
91             =head1 AUTHORS
92              
93             Philip Lijnzaad, p.lijnzaad@med.uu.nl
94              
95             =head1 APPENDIX
96              
97             The rest of the documentation details each of the object methods.
98             Internal methods are usually preceded with a _
99              
100             =cut
101              
102              
103             # Let the code begin...
104              
105             package Bio::SeqIO::tab;
106 3     3   442 use strict;
  3         4  
  3         115  
107              
108 3     3   400 use Bio::Seq;
  3         4  
  3         69  
109              
110 3     3   10 use base qw(Bio::SeqIO);
  3         4  
  3         898  
111              
112             =head2 next_seq
113              
114             Title : next_seq
115             Usage : $seq = $stream->next_seq()
116             Function: returns the next sequence in the stream
117             Returns : Bio::Seq object
118             Args :
119              
120              
121             =cut
122              
123             sub next_seq{
124 6     6 1 751 my ($self,@args) = @_;
125             ## When its 1 sequence per line with no formatting at all,
126             ## grabbing it should be easy :)
127              
128 6         25 my $nextline = $self->_readline();
129 6 100       18 chomp($nextline) if defined $nextline;
130 6 100       13 return unless defined $nextline;
131 5 50       29 if ($nextline =~ /^([^\t]*)\t(.*)/) {
132 5         22 my ($id, $seq)=($1, uc($2));
133 5         13 $seq =~ s/\s+//g;
134 5         36 return Bio::Seq->new(-display_id=> $id, -seq => $seq);
135             } else {
136 0         0 $self->throw("Can't parse tabbed sequence entry:'$nextline' around line $.");
137             }
138             }
139              
140             =head2 write_seq
141              
142             Title : write_seq
143             Usage : $stream->write_seq($seq)
144             Function: writes the $seq object into the stream
145             Returns : 1 for success and 0 for error
146             Args : Bio::Seq object
147              
148              
149             =cut
150              
151             sub write_seq {
152 1     1 1 4 my ($self,@seq) = @_;
153 1         2 foreach (@seq) {
154 1 50       2 if ($_->display_id() =~ /\t/) {
155 0         0 $self->throw("display_id [".$_->display_id()."] contains TAB -- illegal in tab format");
156             }
157 1 50       3 $self->_print($_->display_id(), "\t",$_->seq, "\n") or return;
158             }
159              
160 1 50 33     3 $self->flush if $self->_flush_on_write && defined $self->_fh;
161 1         4 return 1;
162             }
163              
164             1;