line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::AlignIO::nexus |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Copyright Heikki Lehvaslaiho |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 NAME |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
Bio::AlignIO::nexus - NEXUS format sequence input/output stream |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 SYNOPSIS |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Do not use this module directly. Use it via the L class. |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
use Bio::AlignIO; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
my $in = Bio::AlignIO->new(-format => 'nexus', |
18
|
|
|
|
|
|
|
-file => 'aln.nexus'); |
19
|
|
|
|
|
|
|
while( my $aln = $in->next_aln ) { |
20
|
|
|
|
|
|
|
# do something with the alignment |
21
|
|
|
|
|
|
|
} |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 DESCRIPTION |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
This object can transform L objects to and from NEXUS |
26
|
|
|
|
|
|
|
data blocks. See method documentation for supported NEXUS features. |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
Will Fisher has written an excellent standalone NEXUS format parser in |
31
|
|
|
|
|
|
|
Perl, readnexus. A number of tricks were adapted from it. |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head1 FEEDBACK |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=head2 Support |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
I |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
42
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
43
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
44
|
|
|
|
|
|
|
with code and data examples if at all possible. |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head2 Reporting Bugs |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
49
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
50
|
|
|
|
|
|
|
web: |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head1 AUTHORS - Heikki Lehvaslaiho |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
Email: heikki-at-bioperl-dot-org |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=head1 APPENDIX |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
61
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=cut |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# Let the code begin... |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
package Bio::AlignIO::nexus; |
68
|
3
|
|
|
3
|
|
493
|
use vars qw(%valid_type); |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
129
|
|
69
|
3
|
|
|
3
|
|
10
|
use strict; |
|
3
|
|
|
|
|
2
|
|
|
3
|
|
|
|
|
51
|
|
70
|
3
|
|
|
3
|
|
9
|
no strict "refs"; |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
63
|
|
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
|
73
|
3
|
|
|
3
|
|
8
|
use base qw(Bio::AlignIO); |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
447
|
|
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
BEGIN { |
76
|
3
|
|
|
3
|
|
5
|
%valid_type = map {$_, 1} qw( dna rna protein standard ); |
|
12
|
|
|
|
|
5301
|
|
77
|
|
|
|
|
|
|
# standard throws error: inherited from Bio::PrimarySeq |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=head2 new |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
Title : new |
83
|
|
|
|
|
|
|
Usage : $alignio = Bio::AlignIO->new(-format => 'nexus', -file => 'filename'); |
84
|
|
|
|
|
|
|
Function: returns a new Bio::AlignIO object to handle clustalw files |
85
|
|
|
|
|
|
|
Returns : Bio::AlignIO::clustalw object |
86
|
|
|
|
|
|
|
Args : -verbose => verbosity setting (-1,0,1,2) |
87
|
|
|
|
|
|
|
-file => name of file to read in or with ">" - writeout |
88
|
|
|
|
|
|
|
-fh => alternative to -file param - provide a filehandle |
89
|
|
|
|
|
|
|
to read from/write to |
90
|
|
|
|
|
|
|
-format => type of Alignment Format to process or produce |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
Customization of nexus flavor output |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
-show_symbols => print the symbols="ATGC" in the data definition |
95
|
|
|
|
|
|
|
(MrBayes does not like this) |
96
|
|
|
|
|
|
|
boolean [default is 1] |
97
|
|
|
|
|
|
|
-show_endblock => print an 'endblock;' at the end of the data |
98
|
|
|
|
|
|
|
(MyBayes does not like this) |
99
|
|
|
|
|
|
|
boolean [default is 1] |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=cut |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
sub _initialize { |
104
|
25
|
|
|
25
|
|
43
|
my ($self, @args) = @_; |
105
|
25
|
|
|
|
|
63
|
$self->SUPER::_initialize(@args); |
106
|
25
|
|
|
|
|
92
|
my ($show_symbols, $endblock) = |
107
|
|
|
|
|
|
|
$self->_rearrange([qw(SHOW_SYMBOLS SHOW_ENDBLOCK)], @args); |
108
|
25
|
|
|
|
|
51
|
my @names = qw(symbols endblock); |
109
|
25
|
|
|
|
|
43
|
for my $v ( $show_symbols, $endblock ) { |
110
|
50
|
50
|
|
|
|
83
|
$v = 1 unless defined $v; # default value is 1 |
111
|
50
|
|
|
|
|
49
|
my $n = shift @names; |
112
|
50
|
|
|
|
|
82
|
$self->flag($n, $v); |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head2 next_aln |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
Title : next_aln |
120
|
|
|
|
|
|
|
Usage : $aln = $stream->next_aln() |
121
|
|
|
|
|
|
|
Function: Returns the next alignment in the stream. |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Supports the following NEXUS format features: |
124
|
|
|
|
|
|
|
- The file has to start with '#NEXUS' |
125
|
|
|
|
|
|
|
- Reads in the name of the alignment from a comment |
126
|
|
|
|
|
|
|
(anything after 'TITLE: ') . |
127
|
|
|
|
|
|
|
- Sequence names can be given in a taxa block, too. |
128
|
|
|
|
|
|
|
- If matchchar notation is used, converts |
129
|
|
|
|
|
|
|
them back to sequence characters. |
130
|
|
|
|
|
|
|
- Does character conversions specified in the |
131
|
|
|
|
|
|
|
NEXUS equate command. |
132
|
|
|
|
|
|
|
- Sequence names of type 'Homo sapiens' and |
133
|
|
|
|
|
|
|
Homo_sapiens are treated identically. |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
Returns : L object |
136
|
|
|
|
|
|
|
Args : |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=cut |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub next_aln { |
143
|
22
|
|
|
22
|
1
|
472
|
my $self = shift; |
144
|
22
|
|
|
|
|
25
|
my $entry; |
145
|
22
|
|
|
|
|
28
|
my ($aln_name, $seqcount, $residuecount, %hash, $alphabet, |
146
|
|
|
|
|
|
|
$match, $gap, $missing, $equate, $interleave, |
147
|
|
|
|
|
|
|
$name,$str,@names,$seqname,$start,$end,$count,$seq); |
148
|
22
|
|
|
|
|
34
|
local $Bio::LocatableSeq::OTHER_SYMBOLS = '\*\?\.'; |
149
|
22
|
|
|
|
|
21
|
local $Bio::LocatableSeq::GAP_SYMBOLS = '\-'; |
150
|
22
|
|
|
|
|
130
|
my $aln = Bio::SimpleAlign->new(-source => 'nexus'); |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# file starts with '#NEXUS' but we allow white space only lines before it |
153
|
22
|
|
|
|
|
68
|
$entry = $self->_readline; |
154
|
22
|
|
33
|
|
|
121
|
$entry = $self->_readline while defined $entry && $entry =~ /^\s+$/; |
155
|
|
|
|
|
|
|
|
156
|
22
|
50
|
|
|
|
34
|
return unless $entry; |
157
|
22
|
50
|
33
|
|
|
132
|
$self->throw("Not a valid interleaved NEXUS file! [#NEXUS] not starting the file\n$entry") |
158
|
|
|
|
|
|
|
unless ($entry && $entry =~ /^#NEXUS/i); |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# skip anything before either the taxa or data block |
161
|
|
|
|
|
|
|
# but read in the optional title in a comment |
162
|
22
|
|
|
|
|
48
|
while (defined($entry = $self->_readline)) { |
163
|
64
|
|
|
|
|
82
|
local ($_) = $entry; |
164
|
64
|
100
|
|
|
|
122
|
/\[TITLE. *([^\]]+)]\s+/i and $aln_name = $1; |
165
|
64
|
100
|
100
|
|
|
293
|
last if /^begin +data/i || /^begin +taxa/i; |
166
|
|
|
|
|
|
|
} |
167
|
22
|
100
|
50
|
|
|
68
|
$aln_name =~ s/\s/_/g and $aln->id($aln_name) if $aln_name; |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# data and taxa blocks |
170
|
22
|
|
|
|
|
18
|
my $incomment; |
171
|
22
|
|
|
|
|
48
|
while (defined ($entry = $self->_readline)) { |
172
|
402
|
|
|
|
|
433
|
local ($_) = $entry; |
173
|
402
|
100
|
|
|
|
1290
|
next if s/\[[^\]]+\]//g; # remove comments |
174
|
220
|
100
|
|
|
|
541
|
if( s/\[[^\]]+$// ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
175
|
6
|
|
|
|
|
7
|
$incomment = 1; |
176
|
|
|
|
|
|
|
# skip line if it is now empty or contains only whitespace |
177
|
6
|
50
|
|
|
|
27
|
next if /^\s*$/; |
178
|
|
|
|
|
|
|
} elsif($incomment) { |
179
|
39
|
100
|
|
|
|
58
|
if( s/^[^\]]*\]// ) { |
180
|
6
|
|
|
|
|
8
|
$incomment = 0; |
181
|
|
|
|
|
|
|
} else { |
182
|
33
|
|
|
|
|
50
|
next; |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
} elsif( /taxlabels/i ) { |
185
|
|
|
|
|
|
|
# doesn't deal with taxlabels adequately and can mess things up! |
186
|
|
|
|
|
|
|
# @names = $self->_read_taxlabels; |
187
|
|
|
|
|
|
|
} else { |
188
|
|
|
|
|
|
|
|
189
|
162
|
100
|
|
|
|
299
|
/ntax\s*=\s*(\d+)/i and $seqcount = $1; |
190
|
162
|
100
|
|
|
|
285
|
/nchar\s*=\s*(\d+)/i and $residuecount = $1; |
191
|
162
|
100
|
|
|
|
221
|
/matchchar\s*=\s*(.)/i and $match = $1; |
192
|
162
|
100
|
|
|
|
249
|
/gap\s*=\s*(.)/i and $gap = $1; |
193
|
162
|
100
|
|
|
|
255
|
/missing\s*=\s*(.)/i and $missing = $1; |
194
|
162
|
50
|
|
|
|
223
|
/equate\s*=\s*\"([^\"]+)/i and $equate = $1; # "e.g. equate="T=C G=A"; |
195
|
162
|
100
|
|
|
|
235
|
/datatype\s*=\s*(\w+)/i and $alphabet = lc $1; |
196
|
162
|
100
|
|
|
|
209
|
/interleave/i and $interleave = 1 ; |
197
|
162
|
100
|
|
|
|
402
|
last if /matrix/io; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
} |
200
|
22
|
50
|
|
|
|
44
|
$self->throw("Not a valid NEXUS sequence file. Datatype not specified.") |
201
|
|
|
|
|
|
|
unless $alphabet; |
202
|
|
|
|
|
|
|
$self->throw("Not a valid NEXUS sequence file. Datatype should not be [$alphabet]") |
203
|
22
|
50
|
|
|
|
61
|
unless $valid_type{$alphabet}; |
204
|
22
|
50
|
33
|
|
|
96
|
$self->throw("\"$gap\" is not a valid gap character. For compatability, gap char can not be one of: ()[]{}/\,;:=*'`\"<>^") |
205
|
|
|
|
|
|
|
if $gap && $gap =~ /[\(\)\[\]\{\}\/\\\,\;\:\=\*\'\`\<\>\^]/; |
206
|
22
|
50
|
66
|
|
|
91
|
$self->throw("\"$missing\" is not a valid missing character. For compatability, missing char can not be one of: ()[]{}/\,;:=*'`\"<>^") |
207
|
|
|
|
|
|
|
if $missing && $missing =~ /[\(\)\[\]\{\}\/\\\,\;\:\=\*\'\`\<\>\^]/; |
208
|
|
|
|
|
|
|
|
209
|
22
|
|
|
|
|
74
|
$aln->gap_char($gap); |
210
|
22
|
|
|
|
|
51
|
$aln->missing_char($missing); |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# |
213
|
|
|
|
|
|
|
# if data is not right after the matrix line |
214
|
|
|
|
|
|
|
# read the empty lines out |
215
|
|
|
|
|
|
|
# |
216
|
22
|
|
|
|
|
45
|
while ($entry = $self->_readline) { |
217
|
25
|
100
|
|
|
|
81
|
unless ($entry =~ /^\s+$/) { |
218
|
22
|
|
|
|
|
58
|
$self->_pushback($entry); |
219
|
22
|
|
|
|
|
23
|
last; |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
# |
224
|
|
|
|
|
|
|
# matrix command |
225
|
|
|
|
|
|
|
# |
226
|
|
|
|
|
|
|
# first alignment section |
227
|
22
|
50
|
|
|
|
46
|
if (@names == 0) { # taxa block did not exist |
228
|
22
|
|
|
|
|
32
|
while ($entry = $self->_readline) { |
229
|
467
|
|
|
|
|
487
|
local ($_) = $entry; |
230
|
467
|
100
|
|
|
|
1173
|
if( s/\[[^\]]+\]//g ) { #] remove comments |
231
|
205
|
100
|
|
|
|
371
|
next if /^\s*$/; |
232
|
|
|
|
|
|
|
# skip line if it is now empty or contains only whitespace |
233
|
|
|
|
|
|
|
} |
234
|
459
|
100
|
100
|
|
|
1005
|
if ($interleave && defined$count && ($count <= $seqcount)) { |
|
|
|
66
|
|
|
|
|
235
|
78
|
100
|
|
|
|
159
|
/^\s+$/ and last; |
236
|
|
|
|
|
|
|
} else { |
237
|
381
|
100
|
|
|
|
671
|
/^\s+$/ and next; |
238
|
|
|
|
|
|
|
} |
239
|
448
|
100
|
|
|
|
719
|
/^\s*;/ and last; # stop if colon at end of matrix is on it's own line |
240
|
|
|
|
|
|
|
#/^\s*;\s*$/ and last; |
241
|
442
|
50
|
|
|
|
1262
|
if ( /^\s*([\"\'](.+?)[\"\']|(\S+))\s+(.*)\s*$/ ) { |
242
|
|
|
|
|
|
|
# get single and double quoted names, or all the first |
243
|
|
|
|
|
|
|
# nonwhite word as the name, and remained is seq |
244
|
|
|
|
|
|
|
#if (/^\s*('([^']*?)'|([^']\S*))\s+(.*)$/) { #' |
245
|
442
|
|
66
|
|
|
1008
|
$name = ($2 || $3); |
246
|
442
|
100
|
|
|
|
732
|
if ($4) { |
247
|
|
|
|
|
|
|
# seq is on same line as name |
248
|
|
|
|
|
|
|
# this is the usual NEXUS format |
249
|
432
|
|
|
|
|
425
|
$str = $4; |
250
|
|
|
|
|
|
|
} else { |
251
|
|
|
|
|
|
|
# otherwise get seq from following lines. No comments allowed |
252
|
|
|
|
|
|
|
# a less common matrix format, usually used for very long seqs |
253
|
10
|
|
|
|
|
12
|
$str=''; |
254
|
10
|
|
|
|
|
22
|
while (local ($_) = $self->_readline) { |
255
|
1860
|
|
|
|
|
1682
|
my $str_tmp = $_; |
256
|
1860
|
|
|
|
|
3664
|
$str_tmp =~ s/[\s;]//g; |
257
|
1860
|
|
|
|
|
1508
|
$str .= $str_tmp; |
258
|
1860
|
100
|
|
|
|
3807
|
last if length$str == $residuecount; |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
} |
261
|
442
|
|
|
|
|
322
|
$name =~ s/ /_/g; |
262
|
442
|
|
|
|
|
419
|
push @names, $name; |
263
|
|
|
|
|
|
|
|
264
|
442
|
|
|
|
|
4022
|
$str =~ s/[\s;]//g; |
265
|
442
|
|
|
|
|
345
|
$count = @names; |
266
|
442
|
|
|
|
|
705
|
$hash{$count} = $str; |
267
|
|
|
|
|
|
|
} |
268
|
442
|
50
|
|
|
|
572
|
$self->throw("Not a valid interleaved NEXUS file! seqcount [$count] > predeclared [$seqcount] in the first section") if $count > $seqcount; |
269
|
442
|
100
|
|
|
|
1196
|
/;/ and last; # stop if colon at end of matrix is on the same line as the last seq |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# interleaved sections |
274
|
22
|
|
|
|
|
27
|
$count = 0; |
275
|
22
|
100
|
|
|
|
41
|
if ( $interleave ) { # only read next section if file is interleaved |
276
|
7
|
|
|
|
|
14
|
while( $entry = $self->_readline) { |
277
|
1003
|
|
|
|
|
1023
|
local ($_) = $entry; |
278
|
1003
|
100
|
|
|
|
2700
|
if( s/\[[^\]]+\]//g ) { #] remove comments |
279
|
826
|
100
|
|
|
|
1417
|
next if /^\s*$/; # skip line if it is now empty or contains only whitespace |
280
|
|
|
|
|
|
|
} |
281
|
945
|
100
|
|
|
|
1330
|
/^\s*;/ and last; # stop if colon at end of matrix is on it's own line |
282
|
939
|
100
|
|
|
|
1388
|
$count = 0, next if $entry =~ /^\s*$/; |
283
|
872
|
50
|
|
|
|
1913
|
if (/^\s*(\'([^\']*?)\'|([^\']\S*))\s+(.*)$/) { |
284
|
872
|
|
|
|
|
1092
|
$str = $4; |
285
|
872
|
|
|
|
|
2558
|
$str =~ s/[\s;]//g; |
286
|
872
|
|
|
|
|
628
|
$count++; |
287
|
872
|
|
|
|
|
1205
|
$hash{$count} .= $str; |
288
|
|
|
|
|
|
|
}; |
289
|
872
|
50
|
|
|
|
1071
|
$self->throw("Not a valid interleaved NEXUS file! |
290
|
|
|
|
|
|
|
seqcount [$count] > predeclared [$seqcount] ") if $count > $seqcount; |
291
|
872
|
100
|
|
|
|
2036
|
/;/ and last; # stop if colon at end of matrix is on the same line as the last seq |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
|
295
|
22
|
50
|
|
|
|
47
|
return if @names < 1; |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
# sequence creation |
298
|
22
|
|
|
|
|
22
|
$count = 0; |
299
|
22
|
|
|
|
|
37
|
foreach $name ( @names ) { |
300
|
442
|
|
|
|
|
365
|
$count++; |
301
|
442
|
50
|
|
|
|
806
|
if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) { |
302
|
0
|
|
|
|
|
0
|
($seqname,$start,$end) = ($1,$2,$3); |
303
|
|
|
|
|
|
|
} else { |
304
|
442
|
|
|
|
|
998
|
($seqname,$start,$str) = ($name,1,$hash{$count}); |
305
|
442
|
|
|
|
|
7897
|
$str =~ s/[$Bio::LocatableSeq::GAP_SYMBOLS]//g; |
306
|
442
|
|
|
|
|
521
|
$end = length($str); |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
# consistency test |
310
|
|
|
|
|
|
|
$self->throw("Length of sequence [$seqname] is not [$residuecount]; got".CORE::length($hash{$count})) |
311
|
442
|
50
|
|
|
|
800
|
unless CORE::length($hash{$count}) == $residuecount; |
312
|
|
|
|
|
|
|
|
313
|
442
|
|
|
|
|
1072
|
$seq = Bio::LocatableSeq->new('-seq' => $hash{$count}, |
314
|
|
|
|
|
|
|
'-display_id' => $seqname, |
315
|
|
|
|
|
|
|
'-start' => $start, |
316
|
|
|
|
|
|
|
'-end' => $end, |
317
|
|
|
|
|
|
|
'-alphabet' => $alphabet |
318
|
|
|
|
|
|
|
); |
319
|
442
|
|
|
|
|
861
|
$aln->add_seq($seq); |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
# if matchchar is used |
323
|
22
|
100
|
|
|
|
57
|
$aln->unmatch($match) if $match; |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
# if equate ( e.g. equate="T=C G=A") is used |
326
|
22
|
50
|
|
|
|
38
|
if ($equate) { |
327
|
0
|
|
|
|
|
0
|
$aln->map_chars($1, $2) while $equate =~ /(\S)=(\S)/g; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
22
|
|
100
|
|
|
105
|
while (defined $entry && |
331
|
|
|
|
|
|
|
$entry !~ /endblock/i) { |
332
|
453
|
|
|
|
|
615
|
$entry = $self->_readline; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
22
|
50
|
|
|
|
73
|
return $aln if $aln->num_sequences; |
336
|
0
|
|
|
|
|
0
|
return; |
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
sub _read_taxlabels { |
340
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
341
|
0
|
|
|
|
|
0
|
my ($name, @names); |
342
|
0
|
|
|
|
|
0
|
while (my $entry = $self->_readline) { |
343
|
0
|
0
|
|
|
|
0
|
last if $entry =~ m/^\s*(END)?;/i; |
344
|
0
|
0
|
|
|
|
0
|
if( $entry =~ m/\s*(\S+)\s+/ ) { |
345
|
0
|
|
|
|
|
0
|
($name) = ($1); |
346
|
0
|
|
|
|
|
0
|
$name =~ s/\[[^\[]+\]//g; |
347
|
0
|
|
|
|
|
0
|
$name =~ s/\W/_/g; |
348
|
0
|
|
|
|
|
0
|
push @names, $name; |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
} |
351
|
0
|
|
|
|
|
0
|
return @names; |
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=head2 write_aln |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
Title : write_aln |
357
|
|
|
|
|
|
|
Usage : $stream->write_aln(@aln) |
358
|
|
|
|
|
|
|
Function: Writes the $aln object into the stream in interleaved NEXUS |
359
|
|
|
|
|
|
|
format. Everything is written into a data block. |
360
|
|
|
|
|
|
|
SimpleAlign methods match_char, missing_char and gap_char must be set |
361
|
|
|
|
|
|
|
if you want to see them in the output. |
362
|
|
|
|
|
|
|
Returns : 1 for success and 0 for error |
363
|
|
|
|
|
|
|
Args : L object |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=cut |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
sub write_aln { |
368
|
2
|
|
|
2
|
1
|
7
|
my ($self,@aln) = @_; |
369
|
2
|
|
|
|
|
3
|
my $count = 0; |
370
|
2
|
|
|
|
|
2
|
my $wrapped = 0; |
371
|
2
|
|
|
|
|
3
|
my $maxname; |
372
|
2
|
|
|
|
|
3
|
my ($length,$date,$name,$seq,$miss,$pad,%hash,@arr,$tempcount,$index ); |
373
|
2
|
|
|
|
|
5
|
my ($match, $missing, $gap,$symbols) = ('', '', '',''); |
374
|
|
|
|
|
|
|
|
375
|
2
|
|
|
|
|
5
|
foreach my $aln (@aln) { |
376
|
2
|
50
|
33
|
|
|
14
|
if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) { |
377
|
0
|
|
|
|
|
0
|
$self->warn("Must provide a Bio::Align::AlignI object when calling write_aln"); |
378
|
0
|
|
|
|
|
0
|
next; |
379
|
|
|
|
|
|
|
} |
380
|
2
|
50
|
|
|
|
4
|
$self->throw("All sequences in the alignment must be the same length") |
381
|
|
|
|
|
|
|
unless $aln->is_flush($self->verbose); |
382
|
|
|
|
|
|
|
|
383
|
2
|
|
|
|
|
29
|
$length = $aln->length(); |
384
|
|
|
|
|
|
|
|
385
|
2
|
|
|
|
|
7
|
$self->_print (sprintf("#NEXUS\n[TITLE: %s]\n\nbegin data;\ndimensions ntax=%s nchar=%s;\n", |
386
|
|
|
|
|
|
|
$aln->id, $aln->num_sequences, $length)); |
387
|
2
|
50
|
|
|
|
8
|
$match = "match=". $aln->match_char if $aln->match_char; |
388
|
2
|
100
|
|
|
|
6
|
$missing = "missing=". $aln->missing_char if $aln->missing_char; |
389
|
2
|
50
|
|
|
|
6
|
$gap = "gap=". $aln->gap_char if $aln->gap_char; |
390
|
|
|
|
|
|
|
|
391
|
2
|
50
|
33
|
|
|
6
|
$symbols = 'symbols="'.join('',$aln->symbol_chars). '"' |
392
|
|
|
|
|
|
|
if( $self->flag('symbols') && $aln->symbol_chars); |
393
|
2
|
|
|
|
|
7
|
$self->_print |
394
|
|
|
|
|
|
|
(sprintf("format interleave datatype=%s %s %s %s %s;\n\nmatrix\n", |
395
|
|
|
|
|
|
|
$aln->get_seq_by_pos(1)->alphabet, $match, |
396
|
|
|
|
|
|
|
$missing, $gap, $symbols)); |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
# account for single quotes round names |
399
|
2
|
|
|
|
|
23
|
my $indent = $aln->maxdisplayname_length+2; |
400
|
|
|
|
|
|
|
|
401
|
2
|
|
|
|
|
8
|
$aln->set_displayname_flat(); |
402
|
2
|
|
|
|
|
6
|
foreach $seq ( $aln->each_seq() ) { |
403
|
10
|
|
|
|
|
14
|
my $nmid = $aln->displayname($seq->get_nse()); |
404
|
10
|
100
|
|
|
|
22
|
if( $nmid =~ /[^\w\d\.]/ ) { |
405
|
|
|
|
|
|
|
# put name in single quotes incase it contains any of |
406
|
|
|
|
|
|
|
# the following chars: ()[]{}/\,;:=*'"`+-<> that are not |
407
|
|
|
|
|
|
|
# allowed in PAUP* and possible other software |
408
|
|
|
|
|
|
|
|
409
|
1
|
|
|
|
|
3
|
$name = sprintf("%-${indent}s", "\'" . $nmid . "\'"); |
410
|
|
|
|
|
|
|
} else { |
411
|
9
|
|
|
|
|
22
|
$name = sprintf("%-${indent}s", $nmid); |
412
|
|
|
|
|
|
|
} |
413
|
10
|
|
|
|
|
19
|
$hash{$name} = $seq->seq; |
414
|
10
|
|
|
|
|
14
|
push(@arr,$name); |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
2
|
|
|
|
|
8
|
while( $count < $length ) { |
418
|
|
|
|
|
|
|
# there is another block to go! |
419
|
10
|
|
|
|
|
10
|
foreach $name ( @arr ) { |
420
|
58
|
|
|
|
|
38
|
my $dispname = $name; |
421
|
|
|
|
|
|
|
# $dispname = '' if $wrapped; |
422
|
58
|
|
|
|
|
106
|
$self->_print (sprintf("%${indent}s ",$dispname)); |
423
|
58
|
|
|
|
|
42
|
$tempcount = $count; |
424
|
58
|
|
|
|
|
31
|
$index = 0; |
425
|
58
|
|
100
|
|
|
154
|
while( ($tempcount + 10 < $length) && ($index < 5) ) { |
426
|
268
|
|
|
|
|
607
|
$self->_print (sprintf("%s ",substr($hash{$name},$tempcount,10))); |
427
|
268
|
|
|
|
|
204
|
$tempcount += 10; |
428
|
268
|
|
|
|
|
649
|
$index++; |
429
|
|
|
|
|
|
|
} |
430
|
|
|
|
|
|
|
# last |
431
|
58
|
100
|
|
|
|
66
|
if( $index < 5) { |
432
|
|
|
|
|
|
|
# space to print! |
433
|
10
|
|
|
|
|
48
|
$self->_print (sprintf("%s ",substr($hash{$name},$tempcount))); |
434
|
10
|
|
|
|
|
9
|
$tempcount += 10; |
435
|
|
|
|
|
|
|
} |
436
|
58
|
|
|
|
|
64
|
$self->_print ("\n"); |
437
|
|
|
|
|
|
|
} |
438
|
10
|
|
|
|
|
12
|
$self->_print ("\n\n"); |
439
|
10
|
|
|
|
|
8
|
$count = $tempcount; |
440
|
10
|
|
|
|
|
14
|
$wrapped = 1; |
441
|
|
|
|
|
|
|
} |
442
|
2
|
50
|
|
|
|
5
|
if( $self->flag('endblock') ) { |
443
|
2
|
|
|
|
|
4
|
$self->_print (";\n\nendblock;\n"); |
444
|
|
|
|
|
|
|
} else { |
445
|
0
|
|
|
|
|
0
|
$self->_print (";\n\nend;\n"); |
446
|
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
} |
448
|
2
|
50
|
33
|
|
|
7
|
$self->flush if $self->_flush_on_write && defined $self->_fh; |
449
|
2
|
|
|
|
|
12
|
return 1; |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=head2 flag |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
Title : flag |
455
|
|
|
|
|
|
|
Usage : $obj->flag($name,$value) |
456
|
|
|
|
|
|
|
Function: Get/Set a flag value |
457
|
|
|
|
|
|
|
Returns : value of flag (a scalar) |
458
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=cut |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
sub flag{ |
464
|
54
|
|
|
54
|
1
|
56
|
my ($self,$name,$val) = @_; |
465
|
54
|
100
|
|
|
|
192
|
return $self->{'flag'}->{$name} = $val if defined $val; |
466
|
4
|
|
|
|
|
17
|
return $self->{'flag'}->{$name}; |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
1; |