line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::AlignIO |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# based on the Bio::SeqIO module |
5
|
|
|
|
|
|
|
# by Ewan Birney |
6
|
|
|
|
|
|
|
# and Lincoln Stein |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Peter Schattner |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# History |
13
|
|
|
|
|
|
|
# September, 2000 AlignIO written by Peter Schattner |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 NAME |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
Bio::AlignIO - Handler for AlignIO Formats |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 SYNOPSIS |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
use Bio::AlignIO; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
$inputfilename = "testaln.fasta"; |
26
|
|
|
|
|
|
|
$in = Bio::AlignIO->new(-file => $inputfilename , |
27
|
|
|
|
|
|
|
-format => 'fasta'); |
28
|
|
|
|
|
|
|
$out = Bio::AlignIO->new(-file => ">out.aln.pfam" , |
29
|
|
|
|
|
|
|
-format => 'pfam'); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
while ( my $aln = $in->next_aln() ) { |
32
|
|
|
|
|
|
|
$out->write_aln($aln); |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# OR |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
use Bio::AlignIO; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
open MYIN, '<', 'testaln.fasta' or die "Could not read file 'testaln.fasta': $!\n"; |
40
|
|
|
|
|
|
|
$in = Bio::AlignIO->newFh(-fh => \*MYIN, |
41
|
|
|
|
|
|
|
-format => 'fasta'); |
42
|
|
|
|
|
|
|
open my $MYOUT, '>', 'testaln.pfam' or die "Could not write file 'testaln.pfam': $!\n"; |
43
|
|
|
|
|
|
|
$out = Bio::AlignIO->newFh(-fh => $MYOUT, |
44
|
|
|
|
|
|
|
-format => 'pfam'); |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# World's smallest Fasta<->pfam format converter: |
47
|
|
|
|
|
|
|
print $out $_ while <$in>; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head1 DESCRIPTION |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
L is a handler module for the formats in the AlignIO set, |
52
|
|
|
|
|
|
|
for example, L. It is the officially sanctioned way |
53
|
|
|
|
|
|
|
of getting at the alignment objects. The resulting alignment is a |
54
|
|
|
|
|
|
|
L-compliant object. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
The idea is that you request an object for a particular format. |
57
|
|
|
|
|
|
|
All the objects have a notion of an internal file that is read |
58
|
|
|
|
|
|
|
from or written to. A particular AlignIO object instance is configured |
59
|
|
|
|
|
|
|
for either input or output, you can think of it as a stream object. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Each object has functions: |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
$stream->next_aln(); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
And: |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
$stream->write_aln($aln); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Also: |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
$stream->type() # returns 'INPUT' or 'OUTPUT' |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
As an added bonus, you can recover a filehandle that is tied to the |
74
|
|
|
|
|
|
|
AlignIO object, allowing you to use the standard EE and print |
75
|
|
|
|
|
|
|
operations to read and write alignment objects: |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
use Bio::AlignIO; |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# read from standard input |
80
|
|
|
|
|
|
|
$stream = Bio::AlignIO->newFh(-format => 'Fasta'); |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
while ( $aln = <$stream> ) { |
83
|
|
|
|
|
|
|
# do something with $aln |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
And: |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
print $stream $aln; # when stream is in output mode |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
L is patterned on the L module and shares |
91
|
|
|
|
|
|
|
most of its features. One significant difference is that |
92
|
|
|
|
|
|
|
L usually handles IO for only a single alignment at a time, |
93
|
|
|
|
|
|
|
whereas L handles IO for multiple sequences in a single stream. |
94
|
|
|
|
|
|
|
The principal reason for this is that whereas simultaneously handling |
95
|
|
|
|
|
|
|
multiple sequences is a common requirement, simultaneous handling of |
96
|
|
|
|
|
|
|
multiple alignments is not. The only current exception is format |
97
|
|
|
|
|
|
|
C which parses results of the BLAST C program and which |
98
|
|
|
|
|
|
|
may produce several alignment pairs. This set of alignment pairs can |
99
|
|
|
|
|
|
|
be read using multiple calls to L. |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=head1 CONSTRUCTORS |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head2 Bio::AlignIO-Enew() |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
$seqIO = Bio::AlignIO->new(-file => 'filename', -format=>$format); |
106
|
|
|
|
|
|
|
$seqIO = Bio::AlignIO->new(-fh => \*FILEHANDLE, -format=>$format); |
107
|
|
|
|
|
|
|
$seqIO = Bio::AlignIO->new(-format => $format); |
108
|
|
|
|
|
|
|
$seqIO = Bio::AlignIO->new(-fh => \*STDOUT, -format => $format); |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
The L class method constructs a new L object. |
111
|
|
|
|
|
|
|
The returned object can be used to retrieve or print alignment |
112
|
|
|
|
|
|
|
objects. L accepts the following parameters: |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=over 4 |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=item -file |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
A file path to be opened for reading or writing. The usual Perl |
119
|
|
|
|
|
|
|
conventions apply: |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
'file' # open file for reading |
122
|
|
|
|
|
|
|
'>file' # open file for writing |
123
|
|
|
|
|
|
|
'>>file' # open file for appending |
124
|
|
|
|
|
|
|
'+
|
125
|
|
|
|
|
|
|
'command |' # open a pipe from the command |
126
|
|
|
|
|
|
|
'| command' # open a pipe to the command |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=item -fh |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
You may provide new() with a previously-opened filehandle. For |
131
|
|
|
|
|
|
|
example, to read from STDIN: |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
$seqIO = Bio::AlignIO->new(-fh => \*STDIN); |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
Note that you must pass filehandles as references to globs. |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
If neither a filehandle nor a filename is specified, then the module |
138
|
|
|
|
|
|
|
will read from the @ARGV array or STDIN, using the familiar EE |
139
|
|
|
|
|
|
|
semantics. |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=item -format |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
Specify the format of the file. Supported formats include: |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
bl2seq Bl2seq Blast output |
146
|
|
|
|
|
|
|
clustalw clustalw (.aln) format |
147
|
|
|
|
|
|
|
emboss EMBOSS water and needle format |
148
|
|
|
|
|
|
|
fasta FASTA format |
149
|
|
|
|
|
|
|
maf Multiple Alignment Format |
150
|
|
|
|
|
|
|
mase mase (seaview) format |
151
|
|
|
|
|
|
|
mega MEGA format |
152
|
|
|
|
|
|
|
meme MEME format |
153
|
|
|
|
|
|
|
msf msf (GCG) format |
154
|
|
|
|
|
|
|
nexus Swofford et al NEXUS format |
155
|
|
|
|
|
|
|
pfam Pfam sequence alignment format |
156
|
|
|
|
|
|
|
phylip Felsenstein PHYLIP format |
157
|
|
|
|
|
|
|
prodom prodom (protein domain) format |
158
|
|
|
|
|
|
|
psi PSI-BLAST format |
159
|
|
|
|
|
|
|
selex selex (hmmer) format |
160
|
|
|
|
|
|
|
stockholm stockholm format |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
Currently only those formats which were implemented in L |
163
|
|
|
|
|
|
|
have been incorporated into L. Specifically, C, C |
164
|
|
|
|
|
|
|
and C have only been implemented for input. See the specific module |
165
|
|
|
|
|
|
|
(e.g. L) for notes on supported versions. |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
If no format is specified and a filename is given, then the module |
168
|
|
|
|
|
|
|
will attempt to deduce it from the filename suffix. If this is unsuccessful, |
169
|
|
|
|
|
|
|
C format is assumed. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
The format name is case insensitive; C, C and C are |
172
|
|
|
|
|
|
|
all treated equivalently. |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=back |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=head2 Bio::AlignIO-EnewFh() |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
$fh = Bio::AlignIO->newFh(-fh => \*FILEHANDLE, -format=>$format); |
179
|
|
|
|
|
|
|
# read from STDIN or use @ARGV: |
180
|
|
|
|
|
|
|
$fh = Bio::AlignIO->newFh(-format => $format); |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
This constructor behaves like L, but returns a tied filehandle |
183
|
|
|
|
|
|
|
rather than a L object. You can read sequences from this |
184
|
|
|
|
|
|
|
object using the familiar EE operator, and write to it using |
185
|
|
|
|
|
|
|
L. The usual array and $_ semantics work. For example, you can |
186
|
|
|
|
|
|
|
read all sequence objects into an array like this: |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
@sequences = <$fh>; |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Other operations, such as read(), sysread(), write(), close(), and printf() |
191
|
|
|
|
|
|
|
are not supported. |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=over 1 |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=item -flush |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
By default, all files (or filehandles) opened for writing alignments |
198
|
|
|
|
|
|
|
will be flushed after each write_aln() making the file immediately |
199
|
|
|
|
|
|
|
usable. If you do not need this facility and would like to marginally |
200
|
|
|
|
|
|
|
improve the efficiency of writing multiple sequences to the same file |
201
|
|
|
|
|
|
|
(or filehandle), pass the -flush option '0' or any other value that |
202
|
|
|
|
|
|
|
evaluates as defined but false: |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
my $clustal = Bio::AlignIO->new( -file => "
|
205
|
|
|
|
|
|
|
-format => "clustalw" ); |
206
|
|
|
|
|
|
|
my $msf = Bio::AlignIO->new(-file => ">prot.msf", |
207
|
|
|
|
|
|
|
-format => "msf", |
208
|
|
|
|
|
|
|
-flush => 0 ); # go as fast as we can! |
209
|
|
|
|
|
|
|
while($seq = $clustal->next_aln) { $msf->write_aln($seq) } |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=back |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=head1 OBJECT METHODS |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
See below for more detailed summaries. The main methods are: |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=head2 $alignment = $AlignIO-Enext_aln() |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
Fetch an alignment from a formatted file. |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
=head2 $AlignIO-Ewrite_aln($aln) |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
Write the specified alignment to a file.. |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=head2 TIEHANDLE(), READLINE(), PRINT() |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
These provide the tie interface. See L for more details. |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
=head1 FEEDBACK |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=head2 Mailing Lists |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
234
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
235
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
238
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=head2 Support |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
I |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
247
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
248
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
249
|
|
|
|
|
|
|
with code and data examples if at all possible. |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
=head2 Reporting Bugs |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
254
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
255
|
|
|
|
|
|
|
web: |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
=head1 AUTHOR - Peter Schattner |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
Email: schattner@alum.mit.edu |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
Jason Stajich, jason@bioperl.org |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=head1 APPENDIX |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
270
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=cut |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
# 'Let the code begin... |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
package Bio::AlignIO; |
277
|
|
|
|
|
|
|
|
278
|
35
|
|
|
35
|
|
10193
|
use strict; |
|
35
|
|
|
|
|
58
|
|
|
35
|
|
|
|
|
823
|
|
279
|
|
|
|
|
|
|
|
280
|
35
|
|
|
35
|
|
7699
|
use Bio::Seq; |
|
35
|
|
|
|
|
151
|
|
|
35
|
|
|
|
|
707
|
|
281
|
35
|
|
|
35
|
|
5448
|
use Bio::LocatableSeq; |
|
35
|
|
|
|
|
48
|
|
|
35
|
|
|
|
|
647
|
|
282
|
35
|
|
|
35
|
|
12691
|
use Bio::SimpleAlign; |
|
35
|
|
|
|
|
53
|
|
|
35
|
|
|
|
|
986
|
|
283
|
35
|
|
|
35
|
|
10521
|
use Bio::Tools::GuessSeqFormat; |
|
35
|
|
|
|
|
63
|
|
|
35
|
|
|
|
|
1063
|
|
284
|
35
|
|
|
35
|
|
161
|
use base qw(Bio::Root::Root Bio::Root::IO); |
|
35
|
|
|
|
|
40
|
|
|
35
|
|
|
|
|
30696
|
|
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=head2 new |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
Title : new |
289
|
|
|
|
|
|
|
Usage : $stream = Bio::AlignIO->new(-file => $filename, |
290
|
|
|
|
|
|
|
-format => 'Format') |
291
|
|
|
|
|
|
|
Function: Returns a new seqstream |
292
|
|
|
|
|
|
|
Returns : A Bio::AlignIO::Handler initialised with |
293
|
|
|
|
|
|
|
the appropriate format |
294
|
|
|
|
|
|
|
Args : -file => $filename |
295
|
|
|
|
|
|
|
-format => format |
296
|
|
|
|
|
|
|
-fh => filehandle to attach to |
297
|
|
|
|
|
|
|
-displayname_flat => 1 [optional] |
298
|
|
|
|
|
|
|
to force the displayname to not show start/end |
299
|
|
|
|
|
|
|
information |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=cut |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
sub new { |
304
|
338
|
|
|
338
|
1
|
1467
|
my ($caller,@args) = @_; |
305
|
338
|
|
33
|
|
|
1179
|
my $class = ref($caller) || $caller; |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
# or do we want to call SUPER on an object if $caller is an |
308
|
|
|
|
|
|
|
# object? |
309
|
338
|
100
|
|
|
|
1606
|
if( $class =~ /Bio::AlignIO::(\S+)/ ) { |
310
|
169
|
|
|
|
|
637
|
my ($self) = $class->SUPER::new(@args); |
311
|
169
|
|
|
|
|
624
|
$self->_initialize(@args); |
312
|
169
|
|
|
|
|
749
|
return $self; |
313
|
|
|
|
|
|
|
} else { |
314
|
|
|
|
|
|
|
|
315
|
169
|
|
|
|
|
523
|
my %param = @args; |
316
|
169
|
|
|
|
|
467
|
@param{ map { lc $_ } keys %param } = values %param; # lowercase keys |
|
321
|
|
|
|
|
712
|
|
317
|
|
|
|
|
|
|
my $format = $param{'-format'} || |
318
|
169
|
|
100
|
|
|
674
|
$class->_guess_format( $param{-file} || $ARGV[0] ); |
319
|
169
|
100
|
|
|
|
350
|
unless ($format) { |
320
|
8
|
50
|
|
|
|
22
|
if ($param{-file}) { |
|
|
0
|
|
|
|
|
|
321
|
8
|
|
33
|
|
|
67
|
$format = Bio::Tools::GuessSeqFormat->new(-file => $param{-file}||$ARGV[0] )->guess; |
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
elsif ($param{-fh}) { |
324
|
0
|
|
0
|
|
|
0
|
$format = Bio::Tools::GuessSeqFormat->new(-fh => $param{-fh}||$ARGV[0] )->guess; |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
} |
327
|
169
|
|
|
|
|
291
|
$format = "\L$format"; # normalize capitalization to lower case |
328
|
169
|
50
|
|
|
|
350
|
$class->throw("Unknown format given or could not determine it [$format]") |
329
|
|
|
|
|
|
|
unless $format; |
330
|
|
|
|
|
|
|
|
331
|
169
|
50
|
|
|
|
494
|
return unless( $class->_load_format_module($format) ); |
332
|
169
|
|
|
|
|
1166
|
return "Bio::AlignIO::$format"->new(@args); |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=head2 newFh |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
Title : newFh |
340
|
|
|
|
|
|
|
Usage : $fh = Bio::AlignIO->newFh(-file=>$filename,-format=>'Format') |
341
|
|
|
|
|
|
|
Function: does a new() followed by an fh() |
342
|
|
|
|
|
|
|
Example : $fh = Bio::AlignIO->newFh(-file=>$filename,-format=>'Format') |
343
|
|
|
|
|
|
|
$sequence = <$fh>; # read a sequence object |
344
|
|
|
|
|
|
|
print $fh $sequence; # write a sequence object |
345
|
|
|
|
|
|
|
Returns : filehandle tied to the Bio::AlignIO::Fh class |
346
|
|
|
|
|
|
|
Args : |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
=cut |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
sub newFh { |
351
|
56
|
|
|
56
|
1
|
335
|
my $class = shift; |
352
|
56
|
50
|
|
|
|
178
|
return unless my $self = $class->new(@_); |
353
|
56
|
|
|
|
|
194
|
return $self->fh; |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
=head2 fh |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
Title : fh |
359
|
|
|
|
|
|
|
Usage : $obj->fh |
360
|
|
|
|
|
|
|
Function: |
361
|
|
|
|
|
|
|
Example : $fh = $obj->fh; # make a tied filehandle |
362
|
|
|
|
|
|
|
$sequence = <$fh>; # read a sequence object |
363
|
|
|
|
|
|
|
print $fh $sequence; # write a sequence object |
364
|
|
|
|
|
|
|
Returns : filehandle tied to the Bio::AlignIO::Fh class |
365
|
|
|
|
|
|
|
Args : |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
=cut |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
sub fh { |
371
|
56
|
|
|
56
|
1
|
62
|
my $self = shift; |
372
|
56
|
|
33
|
|
|
141
|
my $class = ref($self) || $self; |
373
|
56
|
|
|
|
|
127
|
my $s = Symbol::gensym; |
374
|
56
|
|
|
|
|
773
|
tie $$s,$class,$self; |
375
|
56
|
|
|
|
|
147
|
return $s; |
376
|
|
|
|
|
|
|
} |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
=head2 format |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
Title : format |
382
|
|
|
|
|
|
|
Usage : $format = $stream->format() |
383
|
|
|
|
|
|
|
Function: Get the alignment format |
384
|
|
|
|
|
|
|
Returns : alignment format |
385
|
|
|
|
|
|
|
Args : none |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
=cut |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
# format() method inherited from Bio::Root::IO |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
# _initialize is where the heavy stuff will happen when new is called |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
sub _initialize { |
395
|
169
|
|
|
169
|
|
284
|
my($self,@args) = @_; |
396
|
169
|
|
|
|
|
759
|
my ($flat,$alphabet,$width) = $self->_rearrange([qw(DISPLAYNAME_FLAT ALPHABET WIDTH)], |
397
|
|
|
|
|
|
|
@args); |
398
|
169
|
50
|
|
|
|
452
|
$self->force_displayname_flat($flat) if defined $flat; |
399
|
169
|
|
|
|
|
543
|
$self->alphabet($alphabet); |
400
|
169
|
50
|
|
|
|
311
|
$self->width($width) if defined $width; |
401
|
169
|
|
|
|
|
555
|
$self->_initialize_io(@args); |
402
|
169
|
|
|
|
|
410
|
1; |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=head2 _load_format_module |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
Title : _load_format_module |
408
|
|
|
|
|
|
|
Usage : *INTERNAL AlignIO stuff* |
409
|
|
|
|
|
|
|
Function: Loads up (like use) a module at run time on demand |
410
|
|
|
|
|
|
|
Example : |
411
|
|
|
|
|
|
|
Returns : |
412
|
|
|
|
|
|
|
Args : |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=cut |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
sub _load_format_module { |
417
|
169
|
|
|
169
|
|
232
|
my ($self,$format) = @_; |
418
|
169
|
|
|
|
|
326
|
my $module = "Bio::AlignIO::" . $format; |
419
|
169
|
|
|
|
|
180
|
my $ok; |
420
|
|
|
|
|
|
|
|
421
|
169
|
|
|
|
|
234
|
eval { |
422
|
169
|
|
|
|
|
653
|
$ok = $self->_load_module($module); |
423
|
|
|
|
|
|
|
}; |
424
|
169
|
50
|
|
|
|
343
|
if ( $@ ) { |
425
|
0
|
|
|
|
|
0
|
print STDERR <
|
426
|
|
|
|
|
|
|
$self: $format cannot be found |
427
|
|
|
|
|
|
|
Exception $@ |
428
|
|
|
|
|
|
|
For more information about the AlignIO system please see the AlignIO docs. |
429
|
|
|
|
|
|
|
This includes ways of checking for formats at compile time, not run time |
430
|
|
|
|
|
|
|
END |
431
|
|
|
|
|
|
|
; |
432
|
0
|
|
|
|
|
0
|
return; |
433
|
|
|
|
|
|
|
} |
434
|
169
|
|
|
|
|
387
|
return 1; |
435
|
|
|
|
|
|
|
} |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=head2 next_aln |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
Title : next_aln |
440
|
|
|
|
|
|
|
Usage : $aln = stream->next_aln |
441
|
|
|
|
|
|
|
Function: reads the next $aln object from the stream |
442
|
|
|
|
|
|
|
Returns : a Bio::Align::AlignI compliant object |
443
|
|
|
|
|
|
|
Args : |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=cut |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
sub next_aln { |
448
|
0
|
|
|
0
|
1
|
0
|
my ($self,$aln) = @_; |
449
|
0
|
|
|
|
|
0
|
$self->throw("Sorry, you cannot read from a generic Bio::AlignIO object."); |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=head2 write_aln |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
Title : write_aln |
455
|
|
|
|
|
|
|
Usage : $stream->write_aln($aln) |
456
|
|
|
|
|
|
|
Function: writes the $aln object into the stream |
457
|
|
|
|
|
|
|
Returns : 1 for success and 0 for error |
458
|
|
|
|
|
|
|
Args : Bio::Seq object |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
=cut |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
sub write_aln { |
463
|
0
|
|
|
0
|
1
|
0
|
my ($self,$aln) = @_; |
464
|
0
|
|
|
|
|
0
|
$self->throw("Sorry, you cannot write to a generic Bio::AlignIO object."); |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=head2 _guess_format |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
Title : _guess_format |
470
|
|
|
|
|
|
|
Usage : $obj->_guess_format($filename) |
471
|
|
|
|
|
|
|
Function: |
472
|
|
|
|
|
|
|
Example : |
473
|
|
|
|
|
|
|
Returns : guessed format of filename (lower case) |
474
|
|
|
|
|
|
|
Args : |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=cut |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
sub _guess_format { |
479
|
25
|
|
|
25
|
|
41
|
my $class = shift; |
480
|
25
|
50
|
|
|
|
56
|
return unless $_ = shift; |
481
|
25
|
100
|
|
|
|
111
|
return 'clustalw' if /\.aln$/i; |
482
|
23
|
50
|
|
|
|
84
|
return 'emboss' if /\.(water|needle)$/i; |
483
|
23
|
100
|
|
|
|
62
|
return 'metafasta' if /\.metafasta$/; |
484
|
22
|
100
|
|
|
|
112
|
return 'fasta' if /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i; |
485
|
20
|
100
|
|
|
|
67
|
return 'maf' if /\.maf/i; |
486
|
18
|
100
|
|
|
|
60
|
return 'mega' if /\.(meg|mega)$/i; |
487
|
17
|
50
|
|
|
|
48
|
return 'meme' if /\.meme$/i; |
488
|
17
|
100
|
|
|
|
58
|
return 'msf' if /\.(msf|pileup|gcg)$/i; |
489
|
15
|
100
|
|
|
|
51
|
return 'nexus' if /\.(nexus|nex)$/i; |
490
|
14
|
100
|
|
|
|
66
|
return 'pfam' if /\.(pfam|pfm)$/i; |
491
|
10
|
100
|
|
|
|
29
|
return 'phylip' if /\.(phylip|phlp|phyl|phy|ph)$/i; |
492
|
9
|
50
|
|
|
|
24
|
return 'psi' if /\.psi$/i; |
493
|
9
|
50
|
|
|
|
25
|
return 'stockholm' if /\.stk$/i; |
494
|
9
|
100
|
|
|
|
31
|
return 'selex' if /\.(selex|slx|selx|slex|sx)$/i; |
495
|
8
|
50
|
|
|
|
39
|
return 'xmfa' if /\.xmfa$/i; |
496
|
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
sub DESTROY { |
499
|
225
|
|
|
225
|
|
19126
|
my $self = shift; |
500
|
225
|
|
|
|
|
772
|
$self->close(); |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
sub TIEHANDLE { |
504
|
56
|
|
|
56
|
|
79
|
my $class = shift; |
505
|
56
|
|
|
|
|
191
|
return bless {'alignio' => shift},$class; |
506
|
|
|
|
|
|
|
} |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
sub READLINE { |
509
|
28
|
|
|
28
|
|
217
|
my $self = shift; |
510
|
28
|
50
|
50
|
|
|
164
|
return $self->{'alignio'}->next_aln() || undef unless wantarray; |
511
|
0
|
|
|
|
|
0
|
my (@list,$obj); |
512
|
0
|
|
|
|
|
0
|
push @list,$obj while $obj = $self->{'alignio'}->next_aln(); |
513
|
0
|
|
|
|
|
0
|
return @list; |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
sub PRINT { |
517
|
13
|
|
|
13
|
|
25
|
my $self = shift; |
518
|
13
|
|
|
|
|
81
|
$self->{'alignio'}->write_aln(@_); |
519
|
|
|
|
|
|
|
} |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=head2 force_displayname_flat |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
Title : force_displayname_flat |
525
|
|
|
|
|
|
|
Usage : $obj->force_displayname_flat($newval) |
526
|
|
|
|
|
|
|
Function: |
527
|
|
|
|
|
|
|
Example : |
528
|
|
|
|
|
|
|
Returns : value of force_displayname_flat (a scalar) |
529
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
=cut |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
sub force_displayname_flat{ |
535
|
4
|
|
|
4
|
1
|
6
|
my $self = shift; |
536
|
4
|
50
|
|
|
|
14
|
return $self->{'_force_displayname_flat'} = shift if @_; |
537
|
4
|
|
50
|
|
|
29
|
return $self->{'_force_displayname_flat'} || 0; |
538
|
|
|
|
|
|
|
} |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
=head2 alphabet |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
Title : alphabet |
543
|
|
|
|
|
|
|
Usage : $obj->alphabet($newval) |
544
|
|
|
|
|
|
|
Function: Get/Set alphabet for purpose of passing to Bio::LocatableSeq creation |
545
|
|
|
|
|
|
|
Example : $obj->alphabet('dna'); |
546
|
|
|
|
|
|
|
Returns : value of alphabet (a scalar) |
547
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
=cut |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
sub alphabet { |
553
|
1185
|
|
|
1185
|
1
|
1151
|
my $self = shift; |
554
|
1185
|
|
|
|
|
938
|
my $value = shift; |
555
|
1185
|
100
|
|
|
|
1839
|
if ( defined $value ) { |
556
|
4
|
50
|
66
|
|
|
29
|
$self->throw("Invalid alphabet $value") unless $value eq 'rna' || $value eq 'protein' || $value eq 'dna'; |
|
|
|
33
|
|
|
|
|
557
|
4
|
|
|
|
|
6
|
$self->{'_alphabet'} = $value; |
558
|
|
|
|
|
|
|
} |
559
|
1185
|
|
|
|
|
4778
|
return $self->{'_alphabet'}; |
560
|
|
|
|
|
|
|
} |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
1; |