line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Phylo::Parsers::Nexus; |
2
|
6
|
|
|
6
|
|
33
|
use strict; |
|
6
|
|
|
|
|
11
|
|
|
6
|
|
|
|
|
147
|
|
3
|
6
|
|
|
6
|
|
25
|
use warnings; |
|
6
|
|
|
|
|
11
|
|
|
6
|
|
|
|
|
135
|
|
4
|
6
|
|
|
6
|
|
25
|
use base 'Bio::Phylo::Parsers::Abstract'; |
|
6
|
|
|
|
|
10
|
|
|
6
|
|
|
|
|
1512
|
|
5
|
6
|
|
|
6
|
|
35
|
use Bio::Phylo::Factory; |
|
6
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
21
|
|
6
|
6
|
|
|
6
|
|
27
|
use Bio::Phylo::IO 'parse'; |
|
6
|
|
|
|
|
13
|
|
|
6
|
|
|
|
|
265
|
|
7
|
6
|
|
|
6
|
|
32
|
use Bio::Phylo::Util::CONSTANT qw':objecttypes looks_like_instance'; |
|
6
|
|
|
|
|
11
|
|
|
6
|
|
|
|
|
1093
|
|
8
|
6
|
|
|
6
|
|
37
|
use Bio::Phylo::Util::Exceptions 'throw'; |
|
6
|
|
|
|
|
11
|
|
|
6
|
|
|
|
|
2644
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
# TODO: handle mixed? distances, splits, bipartitions |
11
|
|
|
|
|
|
|
my $TAXA = _TAXA_; |
12
|
|
|
|
|
|
|
my $MATRIX = _MATRIX_; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# useful regular expressions |
15
|
|
|
|
|
|
|
my $COMMENT = qr|^\[|; # crude, only checks first char, use after tokenizing! |
16
|
|
|
|
|
|
|
my $QUOTES_OR_BRACKETS = |
17
|
|
|
|
|
|
|
qr/[\[\]'"]/mox; # catch all for opening/closing square brackets and quotes |
18
|
|
|
|
|
|
|
my $OPENING_QUOTE_OR_BRACKET = |
19
|
|
|
|
|
|
|
qr/^(.*?)([\['"].*)$/mox; # capturing regex for opening sq. br. & q. |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# this is a dispatch table whose sub references are invoked |
22
|
|
|
|
|
|
|
# during parsing. the keys match the tokens upon which the |
23
|
|
|
|
|
|
|
# respective subs are called. Underscored (private) fields are for parsing |
24
|
|
|
|
|
|
|
# context. The fields of this table comprise the default state of the |
25
|
|
|
|
|
|
|
# parser object. |
26
|
|
|
|
|
|
|
my %defaults = ( |
27
|
|
|
|
|
|
|
'_lines' => undef, |
28
|
|
|
|
|
|
|
'_current' => undef, |
29
|
|
|
|
|
|
|
'_previous' => undef, |
30
|
|
|
|
|
|
|
'_begin' => undef, |
31
|
|
|
|
|
|
|
'_ntax' => undef, |
32
|
|
|
|
|
|
|
'_nchar' => undef, |
33
|
|
|
|
|
|
|
'_gap' => undef, |
34
|
|
|
|
|
|
|
'_missing' => undef, |
35
|
|
|
|
|
|
|
'_i' => undef, |
36
|
|
|
|
|
|
|
'_tree' => undef, |
37
|
|
|
|
|
|
|
'_trees' => undef, |
38
|
|
|
|
|
|
|
'_treename' => undef, |
39
|
|
|
|
|
|
|
'_treestart' => undef, |
40
|
|
|
|
|
|
|
'_row' => undef, |
41
|
|
|
|
|
|
|
'_matrixtype' => undef, |
42
|
|
|
|
|
|
|
'_found' => 0, |
43
|
|
|
|
|
|
|
'_linemode' => 0, |
44
|
|
|
|
|
|
|
'_taxlabels' => [], |
45
|
|
|
|
|
|
|
'_tokens' => [], |
46
|
|
|
|
|
|
|
'_context' => [], |
47
|
|
|
|
|
|
|
'_translate' => [], |
48
|
|
|
|
|
|
|
'_symbols' => [], |
49
|
|
|
|
|
|
|
'_charlabels' => [], |
50
|
|
|
|
|
|
|
'_statelabels' => [], |
51
|
|
|
|
|
|
|
'_charstatelabels' => [], |
52
|
|
|
|
|
|
|
'_tmpstatelabels' => [], |
53
|
|
|
|
|
|
|
'_comments' => [], |
54
|
|
|
|
|
|
|
'_treenames' => [], |
55
|
|
|
|
|
|
|
'_matrixrowlabels' => [], |
56
|
|
|
|
|
|
|
'_matrix' => {}, |
57
|
|
|
|
|
|
|
'_charset' => {}, |
58
|
|
|
|
|
|
|
'_taxset' => {}, |
59
|
|
|
|
|
|
|
'begin' => \&_begin, |
60
|
|
|
|
|
|
|
'taxa' => \&_taxa, |
61
|
|
|
|
|
|
|
'title' => \&_title, |
62
|
|
|
|
|
|
|
'dimensions' => \&_dimensions, |
63
|
|
|
|
|
|
|
'ntax' => \&_ntax, |
64
|
|
|
|
|
|
|
'taxlabels' => \&_taxlabels, |
65
|
|
|
|
|
|
|
'blockid' => \&_blockid, |
66
|
|
|
|
|
|
|
'data' => \&_data, |
67
|
|
|
|
|
|
|
'characters' => \&_characters, |
68
|
|
|
|
|
|
|
'codons' => \&_codons, |
69
|
|
|
|
|
|
|
'nchar' => \&_nchar, |
70
|
|
|
|
|
|
|
'format' => \&_format, |
71
|
|
|
|
|
|
|
'datatype' => \&_datatype, |
72
|
|
|
|
|
|
|
'matchchar' => \&_matchchar, |
73
|
|
|
|
|
|
|
'gap' => \&_gap, |
74
|
|
|
|
|
|
|
'missing' => \&_missing, |
75
|
|
|
|
|
|
|
'charlabels' => \&_charlabels, |
76
|
|
|
|
|
|
|
'statelabels' => \&_statelabels, |
77
|
|
|
|
|
|
|
'charstatelabels' => \&_charstatelabels, |
78
|
|
|
|
|
|
|
'symbols' => \&_symbols, |
79
|
|
|
|
|
|
|
'items' => \&_items, |
80
|
|
|
|
|
|
|
'matrix' => \&_matrix, |
81
|
|
|
|
|
|
|
'charset' => \&_charset, |
82
|
|
|
|
|
|
|
'taxset' => \&_taxset, |
83
|
|
|
|
|
|
|
'trees' => \&_trees, |
84
|
|
|
|
|
|
|
'translate' => \&_translate, |
85
|
|
|
|
|
|
|
'tree' => \&_tree, |
86
|
|
|
|
|
|
|
'utree' => \&_tree, |
87
|
|
|
|
|
|
|
'end' => \&_end, |
88
|
|
|
|
|
|
|
'endblock' => \&_end, |
89
|
|
|
|
|
|
|
'#nexus' => \&_nexus, |
90
|
|
|
|
|
|
|
'link' => \&_link, |
91
|
|
|
|
|
|
|
';' => \&_semicolon, |
92
|
|
|
|
|
|
|
'interleave' => \&_interleave, |
93
|
|
|
|
|
|
|
); |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=head1 NAME |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
Bio::Phylo::Parsers::Nexus - Parser used by Bio::Phylo::IO, no serviceable parts inside |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=head1 DESCRIPTION |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
This module parses nexus files. It is called by the L<Bio::Phylo::IO> module, |
102
|
|
|
|
|
|
|
there is no direct usage. The parser can handle files and strings with multiple |
103
|
|
|
|
|
|
|
tree, taxon, and characters blocks whose links are defined using Mesquite's |
104
|
|
|
|
|
|
|
"TITLE = 'some_name'" and "LINK TAXA = 'some_name'" tokens. |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
The parser returns a reference to an array containing one or more taxa, trees |
107
|
|
|
|
|
|
|
and matrices objects. Nexus comments are stripped, private nexus blocks (and the |
108
|
|
|
|
|
|
|
'assumptions' block) are skipped. It currently doesn't handle 'mixed' data. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=begin comment |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Type : Constructor |
113
|
|
|
|
|
|
|
Title : _new |
114
|
|
|
|
|
|
|
Usage : my $nexus = Bio::Phylo::Parsers::Nexus->_new; |
115
|
|
|
|
|
|
|
Function: Initializes a Bio::Phylo::Parsers::Nexus object. |
116
|
|
|
|
|
|
|
Returns : A Bio::Phylo::Parsers::Nexus object. |
117
|
|
|
|
|
|
|
Args : none. |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=end comment |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=cut |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
sub _process_defaults { |
124
|
15
|
|
|
15
|
|
27
|
my $self = shift; |
125
|
15
|
|
|
|
|
213
|
for my $key ( keys %defaults ) { |
126
|
990
|
100
|
|
|
|
1487
|
if ( looks_like_instance( $defaults{$key}, 'ARRAY' ) ) { |
|
|
100
|
|
|
|
|
|
127
|
180
|
|
|
|
|
363
|
$self->{$key} = []; |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
elsif ( looks_like_instance( $defaults{$key}, 'HASH' ) ) { |
130
|
45
|
|
|
|
|
124
|
$self->{$key} = {}; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
else { |
133
|
765
|
|
|
|
|
1533
|
$self->{$key} = $defaults{$key}; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
} |
136
|
15
|
|
|
|
|
59
|
return $self; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=begin comment |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
Type : Wrapper |
142
|
|
|
|
|
|
|
Title : _from_handle(\*FH) |
143
|
|
|
|
|
|
|
Usage : $nexus->_from_handle(\*FH); |
144
|
|
|
|
|
|
|
Function: Does all the parser magic, from a file handle |
145
|
|
|
|
|
|
|
Returns : ARRAY |
146
|
|
|
|
|
|
|
Args : \*FH = file handle |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=end comment |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=cut |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
sub _parse { |
153
|
15
|
|
|
15
|
|
31
|
my $self = shift; |
154
|
15
|
|
|
|
|
53
|
$self->_process_defaults; |
155
|
15
|
|
|
|
|
75
|
$self->_logger->info("going to parse nexus data"); |
156
|
15
|
|
|
|
|
68
|
$self->{'_lines'} = $self->_stringify(@_); |
157
|
15
|
|
|
|
|
53
|
$self->{'_tokens'} = $self->_tokenize( $self->{'_lines'} ); |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# iterate over tokens, dispatch methods from %{ $self } table |
160
|
|
|
|
|
|
|
# This is the meat of the parsing, from here everything else is called. |
161
|
15
|
|
|
|
|
59
|
$self->_logger->info("tokenized and split data, going to parse blocks"); |
162
|
15
|
|
|
|
|
28
|
my $i = 0; |
163
|
15
|
|
|
|
|
28
|
my $private_block; |
164
|
15
|
|
|
|
|
40
|
my $token_queue = [ undef, undef, undef ]; |
165
|
6
|
|
|
6
|
|
40
|
no strict 'refs'; |
|
6
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
33597
|
|
166
|
15
|
|
|
|
|
26
|
TOKEN_LINE: for my $token_line ( @{ $self->{'_tokens'} } ) { |
|
15
|
|
|
|
|
51
|
|
167
|
313
|
100
|
|
|
|
586
|
if ( not $self->{'_linemode'} ) { |
|
|
50
|
|
|
|
|
|
168
|
265
|
|
|
|
|
345
|
RAW_TOKEN: for my $raw_token ( @{$token_line} ) { |
|
265
|
|
|
|
|
443
|
|
169
|
1060
|
100
|
|
|
|
3808
|
if ( $raw_token =~ qr/^\[/ ) { |
170
|
10
|
|
|
|
|
17
|
push @{ $self->{'_comments'} }, $raw_token; |
|
10
|
|
|
|
|
30
|
|
171
|
10
|
|
|
|
|
28
|
next RAW_TOKEN; |
172
|
|
|
|
|
|
|
} |
173
|
1050
|
|
|
|
|
2014
|
my $lower_case_token = lc($raw_token); |
174
|
1050
|
|
|
|
|
1694
|
push @$token_queue, $lower_case_token; |
175
|
1050
|
|
|
|
|
1261
|
shift @$token_queue; |
176
|
1050
|
100
|
66
|
|
|
3431
|
if ( exists $self->{$lower_case_token} and not $private_block ) |
|
|
100
|
66
|
|
|
|
|
177
|
|
|
|
|
|
|
{ |
178
|
415
|
50
|
|
|
|
917
|
if ( ref $self->{$lower_case_token} eq 'CODE' ) { |
179
|
415
|
|
|
|
|
608
|
$self->{'_previous'} = $self->{'_current'}; |
180
|
415
|
|
|
|
|
555
|
$self->{'_current'} = $lower_case_token; |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
# pull code ref from dispatch table |
183
|
415
|
|
|
|
|
550
|
my $c = $self->{$lower_case_token}; |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
# invoke as object method |
186
|
415
|
|
|
|
|
1528
|
$self->$c($raw_token); |
187
|
414
|
|
|
|
|
853
|
next RAW_TOKEN; |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
elsif ( $self->{'_current'} and not $private_block ) { |
191
|
634
|
|
|
|
|
954
|
my $c = $self->{ $self->{'_current'} }; |
192
|
634
|
|
|
|
|
1116
|
$self->$c($raw_token); |
193
|
634
|
|
|
|
|
1109
|
next RAW_TOKEN; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# $self->{'_begin'} is switched 'on' by &_begin(), and 'off' |
197
|
|
|
|
|
|
|
# again by any one of the appropriate subsequent tokens, i.e. |
198
|
|
|
|
|
|
|
# taxa, data, characters and trees |
199
|
1
|
0
|
33
|
|
|
10
|
if ( $self->{'_begin'} |
|
|
|
33
|
|
|
|
|
200
|
|
|
|
|
|
|
and not exists $self->{$lower_case_token} |
201
|
|
|
|
|
|
|
and not $private_block ) |
202
|
|
|
|
|
|
|
{ |
203
|
0
|
|
|
|
|
0
|
$private_block = $raw_token; |
204
|
0
|
|
|
|
|
0
|
next RAW_TOKEN; |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
# jump over private block content |
208
|
1
|
50
|
33
|
|
|
4
|
if ( $private_block |
|
|
|
33
|
|
|
|
|
209
|
|
|
|
|
|
|
and $token_queue->[-2] eq 'end' |
210
|
|
|
|
|
|
|
and $token_queue->[-1] eq ';' ) |
211
|
|
|
|
|
|
|
{ |
212
|
0
|
|
|
|
|
0
|
$private_block = 0; |
213
|
0
|
|
|
|
|
0
|
$self->_logger->info( |
214
|
|
|
|
|
|
|
"Skipped private $private_block block"); |
215
|
0
|
|
|
|
|
0
|
next RAW_TOKEN; |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
else { |
218
|
1
|
|
|
|
|
3
|
next RAW_TOKEN; |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
elsif ( $self->{'_linemode'} ) { |
223
|
48
|
|
|
|
|
83
|
my $c = $self->{ $self->{'_current'} }; |
224
|
48
|
|
|
|
|
63
|
push @{$token_queue}, $token_line; |
|
48
|
|
|
|
|
94
|
|
225
|
48
|
|
|
|
|
66
|
shift @$token_queue; |
226
|
48
|
|
|
|
|
104
|
$self->$c($token_line); |
227
|
47
|
|
|
|
|
106
|
next TOKEN_LINE; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
} |
230
|
13
|
|
|
|
|
59
|
return $self->_post_process(@_); |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# makes array reference of strings, one string per line, from input |
234
|
|
|
|
|
|
|
# file handle or string; |
235
|
|
|
|
|
|
|
sub _stringify { |
236
|
15
|
|
|
15
|
|
28
|
my $self = shift; |
237
|
15
|
|
|
|
|
46
|
$self->_logger->info("going to split nexus data on lines"); |
238
|
15
|
|
|
|
|
37
|
my %opts = @_; |
239
|
15
|
|
|
|
|
31
|
my @lines; |
240
|
15
|
|
|
|
|
72
|
my $handle = $self->_handle; |
241
|
15
|
|
|
|
|
89
|
while (<$handle>) { |
242
|
435
|
|
|
|
|
601
|
my $line = $_; |
243
|
435
|
|
|
|
|
1448
|
push @lines, grep { /\S/ } split( /\n|\r|\r\n/, $line ); |
|
379
|
|
|
|
|
1085
|
|
244
|
435
|
|
|
|
|
879
|
$self->_logger->debug("read line: $line"); |
245
|
|
|
|
|
|
|
} |
246
|
15
|
|
|
|
|
57
|
return \@lines; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=begin comment |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
Type : Method |
252
|
|
|
|
|
|
|
Title : _tokenize() |
253
|
|
|
|
|
|
|
Usage : $nexus->_tokenize($lines); |
254
|
|
|
|
|
|
|
Function: Tokenizes lines in $lines array ref |
255
|
|
|
|
|
|
|
Returns : Two dimensional ARRAY |
256
|
|
|
|
|
|
|
Args : An array ref of lines (e.g. read from an input file); |
257
|
|
|
|
|
|
|
Comments: This method accepts an array ref holding lines that may contain |
258
|
|
|
|
|
|
|
single quotes, double quotes or square brackets. Line breaks and |
259
|
|
|
|
|
|
|
spaces inside these quoted/bracketed fragments are ignored, otherwise |
260
|
|
|
|
|
|
|
it is split, e.g.: |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
[ |
263
|
|
|
|
|
|
|
[ '#NEXUS' ], |
264
|
|
|
|
|
|
|
[ 'BEGIN TAXA; [taxablock comment]' ], |
265
|
|
|
|
|
|
|
[ 'DIMENSIONS NTAX=3;' ], |
266
|
|
|
|
|
|
|
[ 'TAXLABELS "Taxon \' A" \'Taxon B\' TAXON[comment]C' ], |
267
|
|
|
|
|
|
|
...etc... |
268
|
|
|
|
|
|
|
] |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
becomes: |
271
|
|
|
|
|
|
|
[ |
272
|
|
|
|
|
|
|
[ '#NEXUS' ], |
273
|
|
|
|
|
|
|
[ |
274
|
|
|
|
|
|
|
'BEGIN', |
275
|
|
|
|
|
|
|
'TAXA', |
276
|
|
|
|
|
|
|
';', |
277
|
|
|
|
|
|
|
'[taxablock comment]' |
278
|
|
|
|
|
|
|
], |
279
|
|
|
|
|
|
|
[ |
280
|
|
|
|
|
|
|
'DIMENSIONS', |
281
|
|
|
|
|
|
|
'NTAX', |
282
|
|
|
|
|
|
|
'=', |
283
|
|
|
|
|
|
|
'3', |
284
|
|
|
|
|
|
|
';' |
285
|
|
|
|
|
|
|
], |
286
|
|
|
|
|
|
|
[ |
287
|
|
|
|
|
|
|
'TAXLABELS', |
288
|
|
|
|
|
|
|
'"Taxon \' A"', |
289
|
|
|
|
|
|
|
'\'Taxon B\'', |
290
|
|
|
|
|
|
|
'TAXON', |
291
|
|
|
|
|
|
|
'[comment]', |
292
|
|
|
|
|
|
|
'C' |
293
|
|
|
|
|
|
|
], |
294
|
|
|
|
|
|
|
...etc... |
295
|
|
|
|
|
|
|
] |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=end comment |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
=cut |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
sub _tokenize { |
303
|
15
|
|
|
15
|
|
45
|
my ( $self, $lines ) = @_; |
304
|
15
|
|
|
|
|
37
|
$self->_logger->info("going to split lines on tokens"); |
305
|
15
|
|
|
|
|
44
|
my ( $extract, $INSIDE_QUOTE, $continue ) = ( '', 0, 0 ); |
306
|
15
|
|
|
|
|
29
|
my ( @tokens, @split ); |
307
|
15
|
|
|
|
|
85
|
my $CLOSING_BRACKET_MIDLINE = qr/^.*?(\])(.*)$/mox; |
308
|
15
|
|
|
|
|
53
|
my $CONTEXT_QB_AT_START = qr/^([\['"])(.*)$/mox; |
309
|
15
|
|
|
|
|
43
|
my $CONTEXT_CLOSER; |
310
|
|
|
|
|
|
|
my $QuoteContext; # either " ' or [ |
311
|
15
|
|
|
|
|
0
|
my $QuoteStartLine; |
312
|
15
|
|
|
|
|
28
|
my $LineCount = 0; |
313
|
15
|
|
|
|
|
86
|
my %CLOSE_CHAR = ( |
314
|
|
|
|
|
|
|
'"' => '"', |
315
|
|
|
|
|
|
|
"'" => "'", |
316
|
|
|
|
|
|
|
'[' => ']', |
317
|
|
|
|
|
|
|
); |
318
|
15
|
|
|
|
|
67
|
my %INVERSE_CLOSE_CHAR = ( |
319
|
|
|
|
|
|
|
'"' => '"', |
320
|
|
|
|
|
|
|
"'" => "'", |
321
|
|
|
|
|
|
|
']' => '[', |
322
|
|
|
|
|
|
|
')' => '(', |
323
|
|
|
|
|
|
|
); |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
# tokenize |
326
|
15
|
|
|
|
|
38
|
LINE: for my $line ( @{$lines} ) { |
|
15
|
|
|
|
|
41
|
|
327
|
379
|
|
|
|
|
456
|
$LineCount++; |
328
|
379
|
|
|
|
|
937
|
TOKEN: while ( $line =~ /\S/ ) { |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
# line in file has no quoting/bracketing characters, and |
331
|
|
|
|
|
|
|
# is no extension of a quoted/bracketed fragment starting |
332
|
|
|
|
|
|
|
# on a previous line |
333
|
408
|
100
|
100
|
|
|
2362
|
if ( $line !~ $QUOTES_OR_BRACKETS && !$INSIDE_QUOTE ) { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
334
|
332
|
100
|
|
|
|
493
|
if ($continue) { |
335
|
20
|
|
|
|
|
28
|
push @{ $tokens[-1] }, $line; |
|
20
|
|
|
|
|
49
|
|
336
|
20
|
|
|
|
|
33
|
$continue = 0; |
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
else { |
339
|
312
|
|
|
|
|
568
|
push @tokens, [$line]; |
340
|
|
|
|
|
|
|
} |
341
|
332
|
|
|
|
|
411
|
my $logline = join( ' ', @{ $tokens[-1] } ); |
|
332
|
|
|
|
|
621
|
|
342
|
332
|
|
|
|
|
452
|
chomp($logline); |
343
|
332
|
|
|
|
|
660
|
$self->_logger->debug("Tokenized line $LineCount: $logline"); |
344
|
332
|
|
|
|
|
618
|
next LINE; |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
# line in file has opening quoting/bracketing characters, and |
348
|
|
|
|
|
|
|
# is no extension of a quoted/bracketed fragment starting |
349
|
|
|
|
|
|
|
# on a previous line |
350
|
|
|
|
|
|
|
elsif ( $line =~ $OPENING_QUOTE_OR_BRACKET && !$INSIDE_QUOTE ) { |
351
|
20
|
|
|
|
|
86
|
my ( $start, $quoted ) = ( $1, $2 ); |
352
|
20
|
|
|
|
|
52
|
push @tokens, [$start]; |
353
|
20
|
|
|
|
|
36
|
$line = $quoted; |
354
|
20
|
|
|
|
|
31
|
$extract = $quoted; |
355
|
20
|
|
|
|
|
30
|
$INSIDE_QUOTE++; |
356
|
20
|
|
|
|
|
33
|
$continue = 1; |
357
|
20
|
|
|
|
|
41
|
$QuoteContext = substr( $quoted, 0, 1 ); |
358
|
20
|
|
|
|
|
52
|
$self->_logger->debug("Line $LineCount contains $QuoteContext"); |
359
|
20
|
|
|
|
|
35
|
$QuoteStartLine = $LineCount; |
360
|
20
|
|
|
|
|
251
|
$CONTEXT_QB_AT_START = qr/^(\Q$QuoteContext\E)(.*)$/; |
361
|
20
|
|
|
|
|
54
|
my $context_closer = $CLOSE_CHAR{$QuoteContext}; |
362
|
20
|
|
|
|
|
235
|
$CONTEXT_CLOSER = qr/^(.*?)(\Q$context_closer\E)(.*)$/; |
363
|
20
|
|
|
|
|
90
|
next TOKEN; |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
# line in file has no quoting/bracketing characters, and |
367
|
|
|
|
|
|
|
# is an extension of a quoted/bracketed fragment starting |
368
|
|
|
|
|
|
|
# on a previous line |
369
|
|
|
|
|
|
|
elsif ( $line !~ $CONTEXT_CLOSER && $INSIDE_QUOTE ) { |
370
|
30
|
|
|
|
|
66
|
$self->_logger->debug( |
371
|
|
|
|
|
|
|
"Line $LineCount extends quote or comment"); |
372
|
30
|
|
|
|
|
41
|
$extract .= $line; |
373
|
30
|
|
|
|
|
53
|
next LINE; |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
elsif ( $line =~ $CONTEXT_QB_AT_START && $INSIDE_QUOTE ) { |
376
|
20
|
|
|
|
|
93
|
my ( $q, $remainder ) = ( $1, $1 . $2 ); |
377
|
20
|
100
|
100
|
|
|
90
|
if ( $q eq '"' || $q eq "'" ) { |
|
|
50
|
|
|
|
|
|
378
|
9
|
50
|
|
|
|
144
|
if ( $remainder =~ m/^($q[^$q]*?$q)(.*)$/ ) { |
|
|
0
|
|
|
|
|
|
379
|
9
|
|
|
|
|
37
|
$self->_logger->debug( |
380
|
|
|
|
|
|
|
"Line $LineCount closes $INVERSE_CLOSE_CHAR{$q} with $q" |
381
|
|
|
|
|
|
|
); |
382
|
9
|
|
|
|
|
16
|
push @{ $tokens[-1] }, ($1); |
|
9
|
|
|
|
|
33
|
|
383
|
9
|
|
|
|
|
30
|
$line = $2; |
384
|
9
|
|
|
|
|
17
|
$INSIDE_QUOTE--; |
385
|
9
|
|
|
|
|
36
|
next TOKEN; |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
elsif ( $remainder =~ m/^$q[^$q]*$/ ) { |
388
|
0
|
|
|
|
|
0
|
$extract .= $line; |
389
|
0
|
|
|
|
|
0
|
$continue = 1; |
390
|
0
|
|
|
|
|
0
|
next LINE; |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
elsif ( $q eq '[' ) { |
394
|
11
|
|
|
|
|
35
|
for my $i ( 1 .. length($line) ) { |
395
|
830
|
100
|
|
|
|
1221
|
$INSIDE_QUOTE++ if substr( $line, $i, 1 ) eq '['; |
396
|
830
|
100
|
66
|
|
|
2019
|
if ( $i and !$INSIDE_QUOTE ) { |
397
|
8
|
|
|
|
|
13
|
push @{ $tokens[-1] }, substr( $line, 0, $i ); |
|
8
|
|
|
|
|
32
|
|
398
|
8
|
|
|
|
|
23
|
my $logqc = substr( $line, ( $i - 1 ), 1 ); |
399
|
8
|
|
|
|
|
28
|
$self->_logger->debug( |
400
|
|
|
|
|
|
|
"Line $LineCount closes $INVERSE_CLOSE_CHAR{$logqc} with $logqc" |
401
|
|
|
|
|
|
|
); |
402
|
8
|
|
|
|
|
20
|
$line = substr( $line, $i ); |
403
|
8
|
|
|
|
|
29
|
next TOKEN; |
404
|
|
|
|
|
|
|
} |
405
|
822
|
100
|
|
|
|
1326
|
$INSIDE_QUOTE-- if substr( $line, $i, 1 ) eq ']'; |
406
|
|
|
|
|
|
|
} |
407
|
3
|
|
|
|
|
8
|
$extract = $line; |
408
|
3
|
|
|
|
|
5
|
$continue = 1; |
409
|
3
|
|
|
|
|
7
|
next LINE; |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
} |
412
|
|
|
|
|
|
|
elsif ( $line =~ $CONTEXT_CLOSER && $INSIDE_QUOTE ) { |
413
|
6
|
|
|
|
|
26
|
my ( $start, $q, $remainder ) = ( $1, $2, $3 ); |
414
|
6
|
|
|
|
|
22
|
$self->_logger->debug( |
415
|
|
|
|
|
|
|
"Line $LineCount closes $INVERSE_CLOSE_CHAR{$q} with $q"); |
416
|
6
|
50
|
|
|
|
19
|
$start = $extract . $start if $continue; |
417
|
6
|
50
|
33
|
|
|
30
|
if ( $q eq '"' or $q eq "'" ) { |
|
|
50
|
|
|
|
|
|
418
|
0
|
|
|
|
|
0
|
push @{ $tokens[-1] }, $start; |
|
0
|
|
|
|
|
0
|
|
419
|
0
|
|
|
|
|
0
|
$line = $remainder; |
420
|
0
|
|
|
|
|
0
|
next TOKEN; |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
elsif ( $q eq ']' ) { |
423
|
6
|
|
|
|
|
16
|
for my $i ( 0 .. length($line) ) { |
424
|
414
|
100
|
|
|
|
602
|
$INSIDE_QUOTE++ if substr( $line, $i, 1 ) eq '['; |
425
|
414
|
100
|
100
|
|
|
856
|
if ( $i and !$INSIDE_QUOTE ) { |
426
|
3
|
|
|
|
|
5
|
my $segment = substr( $line, 0, $i ); |
427
|
3
|
50
|
|
|
|
8
|
if ($continue) { |
428
|
3
|
|
|
|
|
4
|
push @{ $tokens[-1] }, $extract . $segment; |
|
3
|
|
|
|
|
16
|
|
429
|
|
|
|
|
|
|
} |
430
|
|
|
|
|
|
|
else { |
431
|
0
|
|
|
|
|
0
|
push @{ $tokens[-1] }, $segment; |
|
0
|
|
|
|
|
0
|
|
432
|
|
|
|
|
|
|
} |
433
|
3
|
|
|
|
|
7
|
$line = substr( $line, $i ); |
434
|
3
|
|
|
|
|
9
|
next TOKEN; |
435
|
|
|
|
|
|
|
} |
436
|
411
|
100
|
|
|
|
643
|
$INSIDE_QUOTE-- if substr( $line, $i, 1 ) eq ']'; |
437
|
|
|
|
|
|
|
} |
438
|
3
|
50
|
|
|
|
11
|
if ($continue) { |
439
|
3
|
|
|
|
|
7
|
$extract .= $line; |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
else { |
442
|
0
|
|
|
|
|
0
|
$extract = $line; |
443
|
|
|
|
|
|
|
} |
444
|
3
|
|
|
|
|
6
|
$continue = 1; |
445
|
3
|
|
|
|
|
6
|
next LINE; |
446
|
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
# an exception here means that an opening quote symbol " ' [ |
452
|
|
|
|
|
|
|
# ($QuoteContext) was encountered at input file/string line $QuoteStartLine. |
453
|
|
|
|
|
|
|
# This can happen if any of these symbols is used in an illegal |
454
|
|
|
|
|
|
|
# way, e.g. by using double quotes as gap symbols in matrices. |
455
|
15
|
50
|
|
|
|
45
|
if ($INSIDE_QUOTE) { |
456
|
0
|
|
|
|
|
0
|
throw 'BadArgs' => |
457
|
|
|
|
|
|
|
"Unbalanced $QuoteContext starting at line $QuoteStartLine"; |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
# final split: non-quoted/bracketed fragments are split on whitespace, |
461
|
|
|
|
|
|
|
# others are preserved verbatim |
462
|
|
|
|
|
|
|
$self->_logger->info( |
463
|
15
|
|
|
|
|
59
|
"going to split non-quoted/commented fragments on whitespace"); |
464
|
15
|
|
|
|
|
41
|
foreach my $line (@tokens) { |
465
|
332
|
|
|
|
|
388
|
my @line; |
466
|
332
|
|
|
|
|
511
|
foreach my $word (@$line) { |
467
|
372
|
100
|
|
|
|
1025
|
if ( $word !~ $QUOTES_OR_BRACKETS ) { |
468
|
352
|
|
|
|
|
1603
|
$word =~ s/(=|;|,)/ $1 /g; |
469
|
352
|
|
|
|
|
1371
|
push @line, grep { /\S/ } split /\s+/, $word; |
|
1470
|
|
|
|
|
3280
|
|
470
|
|
|
|
|
|
|
} |
471
|
|
|
|
|
|
|
else { |
472
|
20
|
|
|
|
|
54
|
push @line, $word; |
473
|
|
|
|
|
|
|
} |
474
|
|
|
|
|
|
|
} |
475
|
332
|
|
|
|
|
657
|
push @split, \@line; |
476
|
|
|
|
|
|
|
} |
477
|
15
|
|
|
|
|
163
|
return \@split; |
478
|
|
|
|
|
|
|
} |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
# link matrices and forests to taxa |
481
|
|
|
|
|
|
|
sub _post_process { |
482
|
13
|
|
|
13
|
|
30
|
my $self = shift; |
483
|
13
|
|
|
|
|
29
|
my $taxa = []; |
484
|
13
|
|
|
|
|
34
|
foreach my $block ( @{ $self->{'_context'} } ) { |
|
13
|
|
|
|
|
40
|
|
485
|
26
|
100
|
33
|
|
|
85
|
if ( $block->_type == $TAXA ) { |
|
|
50
|
|
|
|
|
|
486
|
13
|
|
|
|
|
23
|
push @{$taxa}, $block; |
|
13
|
|
|
|
|
26
|
|
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
elsif ( $block->_type != $TAXA and $block->can('set_taxa') ) { |
489
|
13
|
50
|
33
|
|
|
88
|
if ( $taxa->[-1] |
|
|
|
33
|
|
|
|
|
490
|
|
|
|
|
|
|
and $taxa->[-1]->can('_type') == $TAXA |
491
|
|
|
|
|
|
|
and not $block->get_taxa ) |
492
|
|
|
|
|
|
|
{ |
493
|
0
|
|
|
|
|
0
|
$block->set_taxa( $taxa->[-1] ); # XXX exception here? |
494
|
|
|
|
|
|
|
} |
495
|
|
|
|
|
|
|
} |
496
|
|
|
|
|
|
|
} |
497
|
13
|
|
|
|
|
33
|
my $blocks = $self->{'_context'}; |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
# initialize object, note we have to |
500
|
|
|
|
|
|
|
# force data type references to be empty |
501
|
13
|
|
|
|
|
21
|
@{$taxa} = (); |
|
13
|
|
|
|
|
29
|
|
502
|
13
|
|
|
|
|
228
|
for my $key ( keys %defaults ) { |
503
|
858
|
100
|
|
|
|
1322
|
if ( looks_like_instance( $defaults{$key}, 'ARRAY' ) ) { |
|
|
100
|
|
|
|
|
|
504
|
156
|
|
|
|
|
402
|
$self->{$key} = []; |
505
|
|
|
|
|
|
|
} |
506
|
|
|
|
|
|
|
elsif ( looks_like_instance( $defaults{$key}, 'HASH' ) ) { |
507
|
39
|
|
|
|
|
86
|
$self->{$key} = {}; |
508
|
|
|
|
|
|
|
} |
509
|
|
|
|
|
|
|
else { |
510
|
663
|
|
|
|
|
1083
|
$self->{$key} = $defaults{$key}; |
511
|
|
|
|
|
|
|
} |
512
|
|
|
|
|
|
|
} |
513
|
13
|
|
|
|
|
54
|
return @{$blocks}; |
|
13
|
|
|
|
|
71
|
|
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=begin comment |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
The following subs are called by the dispatch table stored in the object when |
519
|
|
|
|
|
|
|
their respective tokens are encountered. |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
=end comment |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
=cut |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
sub _nexus { |
526
|
14
|
|
|
14
|
|
35
|
my $self = shift; |
527
|
14
|
50
|
|
|
|
58
|
if ( uc( $_[0] ) eq '#NEXUS' ) { |
528
|
14
|
|
|
|
|
45
|
$self->_logger->info("found nexus token"); |
529
|
|
|
|
|
|
|
} |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
sub _begin { |
533
|
43
|
|
|
43
|
|
66
|
my $self = shift; |
534
|
43
|
|
|
|
|
75
|
$self->{'_begin'} = 1; |
535
|
|
|
|
|
|
|
} |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
sub _taxa { |
538
|
18
|
|
|
18
|
|
30
|
my $self = shift; |
539
|
18
|
100
|
|
|
|
49
|
if ( $self->{'_begin'} ) { |
540
|
13
|
|
|
|
|
61
|
my $taxa = $self->_factory->create_taxa; |
541
|
13
|
|
|
|
|
29
|
push @{ $self->{'_context'} }, $taxa; |
|
13
|
|
|
|
|
41
|
|
542
|
13
|
|
|
|
|
42
|
$self->_logger->info("starting taxa block"); |
543
|
13
|
|
|
|
|
36
|
$self->{'_begin'} = 0; |
544
|
|
|
|
|
|
|
} |
545
|
|
|
|
|
|
|
else { |
546
|
5
|
|
|
|
|
9
|
$self->{'_current'} = 'link'; # because of 'link taxa = blah' construct |
547
|
|
|
|
|
|
|
} |
548
|
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
sub _charset { |
551
|
17
|
|
|
17
|
|
19
|
my $self = shift; |
552
|
17
|
|
|
|
|
20
|
my $token = shift; |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
# first thing after the CHARSET token is the set name |
555
|
17
|
100
|
100
|
|
|
119
|
if ( $token !~ /CHARSET/i && ! $self->{'_charset'}->{'name'} ) { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
|
556
|
2
|
|
|
|
|
6
|
$self->{'_charset'}->{'name'} = $token; |
557
|
2
|
|
|
|
|
8
|
$self->{'_charset'}->{'range'} = []; |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
# then there might be a mesquite-style matrix reference, e.g. (CHARACTERS = matrix_name) |
561
|
|
|
|
|
|
|
elsif ( $token =~ m/^\(/ ) { |
562
|
1
|
|
|
|
|
3
|
$self->{'_charset'}->{'matrix'} = ''; |
563
|
|
|
|
|
|
|
} |
564
|
|
|
|
|
|
|
elsif ( defined $self->{'_charset'}->{'matrix'} && ! $self->{'_charset'}->{'matrix'} && $token !~ /(?:\(?CHARACTERS|=)/i ) { |
565
|
1
|
|
|
|
|
4
|
$token =~ s/\)$//; |
566
|
1
|
|
|
|
|
3
|
$self->{'_charset'}->{'matrix'} = $token; |
567
|
|
|
|
|
|
|
} |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
# then come the indices |
570
|
|
|
|
|
|
|
elsif ( $token =~ /(?:\d+|-)/ ) { |
571
|
8
|
|
|
|
|
9
|
push @{ $self->{'_charset'}->{'range'} }, $token; |
|
8
|
|
|
|
|
16
|
|
572
|
|
|
|
|
|
|
} |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
sub _taxset { |
576
|
19
|
|
|
19
|
|
21
|
my $self = shift; |
577
|
19
|
|
|
|
|
24
|
my $token = shift; |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
# first thing after the TAXSET token is the set name |
580
|
19
|
100
|
100
|
|
|
116
|
if ( $token !~ /TAXSET/i && ! $self->{'_taxset'}->{'name'} ) { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
|
581
|
2
|
|
|
|
|
5
|
$self->{'_taxset'}->{'name'} = $token; |
582
|
2
|
|
|
|
|
6
|
$self->{'_taxset'}->{'range'} = []; |
583
|
|
|
|
|
|
|
} |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
# then there might be a mesquite-style taxa reference, e.g. (TAXA = matrix_name) |
586
|
|
|
|
|
|
|
elsif ( $token =~ m/^\(/ ) { |
587
|
1
|
|
|
|
|
3
|
$self->{'_taxset'}->{'taxa'} = ''; |
588
|
|
|
|
|
|
|
} |
589
|
|
|
|
|
|
|
elsif ( defined $self->{'_taxset'}->{'taxa'} && ! $self->{'_taxset'}->{'taxa'} && $token !~ /(?:\(?TAXA|=)/ ) { |
590
|
1
|
|
|
|
|
5
|
$token =~ s/\)$//; |
591
|
1
|
|
|
|
|
4
|
$self->{'_taxset'}->{'taxa'} = $token; |
592
|
|
|
|
|
|
|
} |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
# then come the indices |
595
|
|
|
|
|
|
|
elsif ( $token =~ /(?:\d+|-)/ ) { |
596
|
10
|
|
|
|
|
13
|
push @{ $self->{'_taxset'}->{'range'} }, $token; |
|
10
|
|
|
|
|
20
|
|
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
} |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
sub _interleave { |
601
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
602
|
0
|
|
|
|
|
0
|
my $token = shift; |
603
|
0
|
|
|
|
|
0
|
$self->_logger->info("perhaps we'll need to parse interleaved"); |
604
|
0
|
0
|
0
|
|
|
0
|
if ( defined $token and uc($token) eq 'NO' ) { |
605
|
0
|
|
|
|
|
0
|
$self->_logger->info("no, we don't need to parse interleaved"); |
606
|
|
|
|
|
|
|
} |
607
|
|
|
|
|
|
|
} |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
sub _title { |
610
|
25
|
|
|
25
|
|
37
|
my $self = shift; |
611
|
25
|
|
|
|
|
34
|
my $token = shift; |
612
|
25
|
100
|
66
|
|
|
96
|
if ( defined $token and uc($token) ne 'TITLE' ) { |
613
|
11
|
|
|
|
|
19
|
my $title = $token; |
614
|
11
|
50
|
|
|
|
31
|
if ( not $self->_current->get_name ) { |
615
|
11
|
|
|
|
|
23
|
$self->_current->set_name($title); |
616
|
11
|
|
|
|
|
30
|
$self->_logger->info("block has title '$title'"); |
617
|
|
|
|
|
|
|
} |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
} |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
sub _link { |
622
|
2
|
|
|
2
|
|
4
|
my $self = shift; |
623
|
2
|
|
|
|
|
2
|
my $token = shift; |
624
|
2
|
50
|
33
|
|
|
17
|
if ( defined $token and $token !~ m/^(?:LINK|TAXA|=)$/i ) { |
625
|
0
|
|
|
|
|
0
|
my $link = $token; |
626
|
0
|
0
|
|
|
|
0
|
if ( not $self->_current->get_taxa ) { |
627
|
0
|
|
|
|
|
0
|
foreach my $block ( @{ $self->{'_context'} } ) { |
|
0
|
|
|
|
|
0
|
|
628
|
0
|
0
|
0
|
|
|
0
|
if ( $block->get_name and $block->get_name eq $link ) { |
629
|
0
|
|
|
|
|
0
|
$self->_current->set_taxa($block); |
630
|
0
|
|
|
|
|
0
|
last; |
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
} |
633
|
|
|
|
|
|
|
$self->_logger->info( |
634
|
0
|
|
|
|
|
0
|
"block links to taxa block with title '$link'"); |
635
|
|
|
|
|
|
|
} |
636
|
|
|
|
|
|
|
} |
637
|
|
|
|
|
|
|
} |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
22
|
|
|
sub _dimensions { |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
#my $self = shift; |
642
|
|
|
|
|
|
|
} |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
sub _ntax { |
645
|
39
|
|
|
39
|
|
54
|
my $self = shift; |
646
|
39
|
100
|
66
|
|
|
195
|
if ( defined $_[0] and $_[0] =~ m/^\d+$/ ) { |
647
|
13
|
|
|
|
|
33
|
$self->{'_ntax'} = shift; |
648
|
13
|
|
|
|
|
29
|
my $ntax = $self->{'_ntax'}; |
649
|
13
|
|
|
|
|
43
|
$self->_logger->info("number of taxa: $ntax"); |
650
|
|
|
|
|
|
|
} |
651
|
|
|
|
|
|
|
} |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
sub _taxlabels { |
654
|
72
|
|
|
72
|
|
103
|
my $self = shift; |
655
|
72
|
100
|
66
|
|
|
291
|
if ( defined $_[0] and uc( $_[0] ) ne 'TAXLABELS' ) { |
|
|
50
|
33
|
|
|
|
|
656
|
59
|
|
|
|
|
84
|
my $taxon = shift; |
657
|
59
|
|
|
|
|
127
|
$self->_logger->debug("taxon: $taxon"); |
658
|
59
|
|
|
|
|
83
|
push @{ $self->{'_taxlabels'} }, $taxon; |
|
59
|
|
|
|
|
131
|
|
659
|
|
|
|
|
|
|
} |
660
|
|
|
|
|
|
|
elsif ( defined $_[0] and uc( $_[0] ) eq 'TAXLABELS' ) { |
661
|
|
|
|
|
|
|
$self->_current->set_generic( |
662
|
13
|
|
|
|
|
53
|
'nexus_comments' => $self->{'_comments'} ); |
663
|
13
|
|
|
|
|
38
|
$self->{'_comments'} = []; |
664
|
13
|
|
|
|
|
46
|
$self->_logger->info("starting taxlabels"); |
665
|
|
|
|
|
|
|
} |
666
|
|
|
|
|
|
|
} |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
sub _blockid { |
669
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
670
|
0
|
0
|
0
|
|
|
0
|
if ( defined $_[0] and uc( $_[0] ) ne 'BLOCKID' ) { |
671
|
0
|
|
|
|
|
0
|
my $blockid = shift; |
672
|
0
|
|
|
|
|
0
|
$self->_logger->debug("blockid: $blockid"); |
673
|
0
|
|
|
|
|
0
|
$self->_current->set_generic( 'blockid' => $blockid ); |
674
|
|
|
|
|
|
|
} |
675
|
|
|
|
|
|
|
} |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
sub _data { |
678
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
679
|
0
|
0
|
|
|
|
0
|
if ( $self->{'_begin'} ) { |
680
|
0
|
|
|
|
|
0
|
$self->{'_begin'} = 0; |
681
|
0
|
|
|
|
|
0
|
push @{ $self->{'_context'} }, $self->_factory->create_matrix; |
|
0
|
|
|
|
|
0
|
|
682
|
0
|
|
|
|
|
0
|
$self->_logger->info("starting data block"); |
683
|
|
|
|
|
|
|
} |
684
|
|
|
|
|
|
|
} |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
sub _characters { |
687
|
9
|
|
|
9
|
|
18
|
my $self = shift; |
688
|
9
|
50
|
|
|
|
33
|
if ( $self->{'_begin'} ) { |
689
|
9
|
|
|
|
|
18
|
$self->{'_begin'} = 0; |
690
|
9
|
|
|
|
|
16
|
push @{ $self->{'_context'} }, $self->_factory->create_matrix; |
|
9
|
|
|
|
|
37
|
|
691
|
9
|
|
|
|
|
39
|
$self->_logger->info("starting characters block"); |
692
|
|
|
|
|
|
|
} |
693
|
|
|
|
|
|
|
} |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
sub _nchar { |
696
|
27
|
|
|
27
|
|
42
|
my $self = shift; |
697
|
27
|
100
|
66
|
|
|
137
|
if ( defined $_[0] and $_[0] =~ m/^\d+$/ ) { |
698
|
9
|
|
|
|
|
22
|
$self->{'_nchar'} = shift; |
699
|
9
|
|
|
|
|
20
|
my $nchar = $self->{'_nchar'}; |
700
|
9
|
|
|
|
|
80
|
$self->_logger->info("number of characters: $nchar"); |
701
|
|
|
|
|
|
|
} |
702
|
|
|
|
|
|
|
} |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
9
|
|
|
sub _format { |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
#my $self = shift; |
707
|
|
|
|
|
|
|
} |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
sub _datatype { |
710
|
27
|
|
|
27
|
|
39
|
my $self = shift; |
711
|
27
|
100
|
66
|
|
|
148
|
if ( defined $_[0] and $_[0] !~ m/^(?:DATATYPE|=)/i ) { |
712
|
9
|
|
|
|
|
25
|
my $datatype = shift; |
713
|
9
|
|
|
|
|
32
|
$self->_current->set_type($datatype); |
714
|
9
|
|
|
|
|
28
|
$self->_logger->info("datatype: $datatype"); |
715
|
|
|
|
|
|
|
} |
716
|
|
|
|
|
|
|
} |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
sub _matchchar { |
719
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
720
|
0
|
0
|
0
|
|
|
0
|
if ( defined $_[0] and $_[0] !~ m/^(?:MATCHCHAR|=)/i ) { |
721
|
0
|
|
|
|
|
0
|
my $matchchar = shift; |
722
|
0
|
|
|
|
|
0
|
$self->_current->set_matchchar($matchchar); |
723
|
0
|
|
|
|
|
0
|
$self->_logger->info("matchchar: $matchchar"); |
724
|
|
|
|
|
|
|
} |
725
|
|
|
|
|
|
|
} |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
0
|
|
|
sub _items { |
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
#my $self = shift; |
730
|
|
|
|
|
|
|
} |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
sub _gap { |
733
|
27
|
|
|
27
|
|
44
|
my $self = shift; |
734
|
27
|
100
|
66
|
|
|
122
|
if ( $_[0] !~ m/^(?:GAP|=)/i and !$self->{'_gap'} ) { |
735
|
9
|
|
|
|
|
20
|
$self->{'_gap'} = shift; |
736
|
9
|
|
|
|
|
20
|
my $gap = $self->{'_gap'}; |
737
|
9
|
|
|
|
|
26
|
$self->_current->set_gap($gap); |
738
|
9
|
|
|
|
|
28
|
$self->_logger->info("gap character: $gap"); |
739
|
9
|
|
|
|
|
19
|
undef $self->{'_gap'}; |
740
|
|
|
|
|
|
|
} |
741
|
|
|
|
|
|
|
} |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
sub _missing { |
744
|
27
|
|
|
27
|
|
39
|
my $self = shift; |
745
|
27
|
100
|
66
|
|
|
118
|
if ( $_[0] !~ m/^(?:MISSING|=)/i and !$self->{'_missing'} ) { |
746
|
9
|
|
|
|
|
19
|
$self->{'_missing'} = shift; |
747
|
9
|
|
|
|
|
20
|
my $missing = $self->{'_missing'}; |
748
|
9
|
|
|
|
|
26
|
$self->_current->set_missing($missing); |
749
|
9
|
|
|
|
|
29
|
$self->_logger->info("missing character: $missing"); |
750
|
9
|
|
|
|
|
24
|
undef $self->{'_missing'}; |
751
|
|
|
|
|
|
|
} |
752
|
|
|
|
|
|
|
} |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
sub _symbols { |
755
|
15
|
|
|
15
|
|
19
|
my $self = shift; |
756
|
15
|
100
|
66
|
|
|
79
|
if ( $_[0] !~ m/^(?:SYMBOLS|=)$/i and $_[0] =~ m/^"?(.+)"?$/ ) { |
757
|
5
|
|
|
|
|
15
|
my $sym = $1; |
758
|
5
|
|
|
|
|
21
|
$sym =~ s/"//g; |
759
|
5
|
|
|
|
|
40
|
my @syms = grep { /\S+/ } split /\s+/, $sym; |
|
17
|
|
|
|
|
47
|
|
760
|
5
|
|
|
|
|
13
|
push @{ $self->{'_symbols'} }, @syms; |
|
5
|
|
|
|
|
16
|
|
761
|
5
|
|
|
|
|
19
|
$self->_logger->debug("recorded character state symbols '@syms'"); |
762
|
|
|
|
|
|
|
} |
763
|
|
|
|
|
|
|
} |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
sub _charlabels { |
766
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
767
|
0
|
0
|
0
|
|
|
0
|
if ( defined $_[0] and uc $_[0] ne 'CHARLABELS' ) { |
768
|
0
|
|
|
|
|
0
|
push @{ $self->{'_charlabels'} }, shift; |
|
0
|
|
|
|
|
0
|
|
769
|
|
|
|
|
|
|
} |
770
|
|
|
|
|
|
|
} |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
sub _charstatelabels { |
773
|
18
|
|
|
18
|
|
21
|
my $self = shift; |
774
|
18
|
|
|
|
|
23
|
my $token = shift; |
775
|
18
|
|
|
|
|
36
|
$self->_logger->debug($token); |
776
|
18
|
100
|
66
|
|
|
57
|
if ( defined $token and uc $token ne 'CHARSTATELABELS' ) { |
777
|
16
|
|
|
|
|
21
|
push @{ $self->{'_charstatelabels'} }, $token; |
|
16
|
|
|
|
|
30
|
|
778
|
|
|
|
|
|
|
} |
779
|
|
|
|
|
|
|
} |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
sub _statelabels { |
782
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
783
|
0
|
|
|
|
|
0
|
my $token = shift; |
784
|
0
|
0
|
0
|
|
|
0
|
if ( defined $token and uc $token ne 'STATELABELS' ) { |
785
|
0
|
0
|
|
|
|
0
|
if ( $token eq ',' ) { |
786
|
0
|
|
|
|
|
0
|
my $tmpstatelabels = $self->{'_tmpstatelabels'}; |
787
|
0
|
|
|
|
|
0
|
my $index = shift @{$tmpstatelabels}; |
|
0
|
|
|
|
|
0
|
|
788
|
0
|
|
|
|
|
0
|
$self->{'_statelabels'}->[ $index - 1 ] = $tmpstatelabels; |
789
|
0
|
|
|
|
|
0
|
$self->{'_tmpstatelabels'} = []; |
790
|
|
|
|
|
|
|
} |
791
|
|
|
|
|
|
|
else { |
792
|
0
|
|
|
|
|
0
|
push @{ $self->{'_tmpstatelabels'} }, $token; |
|
0
|
|
|
|
|
0
|
|
793
|
|
|
|
|
|
|
} |
794
|
|
|
|
|
|
|
} |
795
|
|
|
|
|
|
|
} |
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
# for data type, character labels, state labels |
798
|
|
|
|
|
|
|
sub _add_matrix_metadata { |
799
|
57
|
|
|
57
|
|
77
|
my $self = shift; |
800
|
57
|
|
|
|
|
111
|
$self->_logger->info("adding matrix metadata"); |
801
|
57
|
100
|
|
|
|
116
|
if ( not defined $self->{'_matrixtype'} ) { |
802
|
9
|
|
|
|
|
24
|
$self->{'_matrixtype'} = $self->_current->get_type; |
803
|
9
|
50
|
|
|
|
19
|
if ( @{ $self->{'_charlabels'} } ) { |
|
9
|
|
|
|
|
34
|
|
804
|
0
|
|
|
|
|
0
|
$self->_current->set_charlabels( $self->{'_charlabels'} ); |
805
|
0
|
|
|
|
|
0
|
$self->_logger->debug("adding character labels"); |
806
|
|
|
|
|
|
|
} |
807
|
9
|
50
|
|
|
|
21
|
if ( @{ $self->{'_statelabels'} } ) { |
|
9
|
|
|
|
|
29
|
|
808
|
0
|
|
|
|
|
0
|
$self->_current->set_statelabels( $self->{'_statelabels'} ); |
809
|
0
|
|
|
|
|
0
|
$self->_logger->debug("adding state labels"); |
810
|
|
|
|
|
|
|
} |
811
|
9
|
50
|
|
|
|
17
|
if ( my @symbols = @{ $self->{'_symbols'} } ) { |
|
9
|
|
|
|
|
34
|
|
812
|
0
|
|
|
|
|
0
|
$self->_logger->debug("updating state lookup table"); |
813
|
0
|
|
|
|
|
0
|
my $to = $self->_current->get_type_object; |
814
|
0
|
|
|
|
|
0
|
my $lookup = $to->get_lookup; |
815
|
0
|
0
|
|
|
|
0
|
if ($lookup) { |
816
|
0
|
|
|
|
|
0
|
for my $sym (@symbols) { |
817
|
0
|
0
|
|
|
|
0
|
if ( not exists $lookup->{$sym} ) { |
818
|
0
|
|
|
|
|
0
|
$lookup->{$sym} = [$sym]; |
819
|
|
|
|
|
|
|
} |
820
|
|
|
|
|
|
|
} |
821
|
|
|
|
|
|
|
} |
822
|
|
|
|
|
|
|
} |
823
|
|
|
|
|
|
|
} |
824
|
57
|
|
|
|
|
74
|
return $self; |
825
|
|
|
|
|
|
|
} |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
sub _add_tokens_to_row { |
828
|
48
|
|
|
48
|
|
77
|
my ( $self, $tokens ) = @_; |
829
|
48
|
|
|
|
|
63
|
my $rowname; |
830
|
48
|
|
|
|
|
58
|
for my $token ( @{$tokens} ) { |
|
48
|
|
|
|
|
92
|
|
831
|
90
|
|
|
|
|
190
|
$self->_logger->debug("token: $token"); |
832
|
90
|
100
|
|
|
|
176
|
last if $token eq ';'; |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
# mesquite sometimes writes multiline (but not interleaved) |
835
|
|
|
|
|
|
|
# matrix rows (harrumph). |
836
|
81
|
100
|
66
|
|
|
474
|
if ( not defined $rowname and $token !~ $COMMENT ) { |
|
|
50
|
33
|
|
|
|
|
837
|
39
|
|
|
|
|
65
|
my $taxa; |
838
|
39
|
50
|
|
|
|
69
|
if ( $taxa = $self->_current->get_taxa ) { |
|
|
50
|
|
|
|
|
|
839
|
0
|
0
|
|
|
|
0
|
if ( my $taxon = $taxa->get_by_name($token) ) { |
840
|
0
|
|
|
|
|
0
|
$rowname = $token; |
841
|
|
|
|
|
|
|
} |
842
|
|
|
|
|
|
|
else { |
843
|
0
|
|
|
|
|
0
|
$rowname = $self->{'_matrixrowlabels'}->[-1]; |
844
|
|
|
|
|
|
|
} |
845
|
|
|
|
|
|
|
} |
846
|
|
|
|
|
|
|
elsif ( $taxa = $self->_find_last_seen_taxa_block ) { |
847
|
39
|
50
|
|
|
|
118
|
if ( my $taxon = $taxa->get_by_name($token) ) { |
848
|
39
|
|
|
|
|
64
|
$rowname = $token; |
849
|
|
|
|
|
|
|
} |
850
|
|
|
|
|
|
|
else { |
851
|
0
|
|
|
|
|
0
|
$rowname = $self->{'_matrixrowlabels'}->[-1]; |
852
|
|
|
|
|
|
|
} |
853
|
|
|
|
|
|
|
} |
854
|
|
|
|
|
|
|
else { |
855
|
0
|
|
|
|
|
0
|
$rowname = $token; |
856
|
|
|
|
|
|
|
} |
857
|
39
|
50
|
|
|
|
91
|
if ( not exists $self->{'_matrix'}->{$rowname} ) { |
858
|
39
|
|
|
|
|
95
|
$self->{'_matrix'}->{$rowname} = []; |
859
|
39
|
|
|
|
|
53
|
push @{ $self->{'_matrixrowlabels'} }, $rowname; |
|
39
|
|
|
|
|
92
|
|
860
|
|
|
|
|
|
|
} |
861
|
|
|
|
|
|
|
} |
862
|
|
|
|
|
|
|
elsif ( defined $rowname and $token !~ $COMMENT ) { |
863
|
42
|
|
|
|
|
74
|
my $row = $self->{'_matrix'}->{$rowname}; |
864
|
42
|
100
|
|
|
|
239
|
if ( $self->{'_matrixtype'} =~ m/^continuous$/i ) { |
865
|
6
|
|
|
|
|
9
|
push @{$row}, split( /\s+/, $token ); |
|
6
|
|
|
|
|
22
|
|
866
|
|
|
|
|
|
|
} |
867
|
|
|
|
|
|
|
else { |
868
|
36
|
|
|
|
|
46
|
push @{$row}, split( //, $token ); |
|
36
|
|
|
|
|
194
|
|
869
|
|
|
|
|
|
|
} |
870
|
|
|
|
|
|
|
} |
871
|
|
|
|
|
|
|
} |
872
|
|
|
|
|
|
|
} |
873
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
sub _find_last_seen_taxa_block { |
875
|
85
|
|
|
85
|
|
126
|
my $self = shift; |
876
|
85
|
|
|
|
|
121
|
my $name = shift; |
877
|
85
|
|
|
|
|
94
|
for ( my $i = $#{ $self->{'_context'} } ; $i >= 0 ; $i-- ) { |
|
85
|
|
|
|
|
251
|
|
878
|
180
|
100
|
|
|
|
468
|
if ( $self->{'_context'}->[$i]->_type == $TAXA ) { |
879
|
84
|
100
|
|
|
|
154
|
if ( $name ) { |
880
|
2
|
100
|
|
|
|
5
|
if ( $self->{'_context'}->[$i]->get_name eq $name ) { |
881
|
1
|
|
|
|
|
3
|
return $self->{'_context'}->[$i]; |
882
|
|
|
|
|
|
|
} |
883
|
|
|
|
|
|
|
} |
884
|
|
|
|
|
|
|
else { |
885
|
82
|
|
|
|
|
211
|
return $self->{'_context'}->[$i]; |
886
|
|
|
|
|
|
|
} |
887
|
|
|
|
|
|
|
} |
888
|
|
|
|
|
|
|
} |
889
|
2
|
|
|
|
|
7
|
return; |
890
|
|
|
|
|
|
|
} |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
sub _find_last_seen_matrix { |
893
|
4
|
|
|
4
|
|
7
|
my $self = shift; |
894
|
4
|
|
|
|
|
8
|
my $name = shift; |
895
|
4
|
|
|
|
|
6
|
for ( my $i = $#{ $self->{'_context'} } ; $i >= 0 ; $i-- ) { |
|
4
|
|
|
|
|
18
|
|
896
|
5
|
50
|
|
|
|
19
|
if ( $self->{'_context'}->[$i]->_type == $MATRIX ) { |
897
|
5
|
100
|
|
|
|
14
|
if ( $name ) { |
898
|
2
|
100
|
|
|
|
6
|
if ( $self->{'_context'}->[$i]->get_name eq $name ) { |
899
|
1
|
|
|
|
|
3
|
return $self->{'_context'}->[$i]; |
900
|
|
|
|
|
|
|
} |
901
|
|
|
|
|
|
|
} |
902
|
|
|
|
|
|
|
else { |
903
|
3
|
|
|
|
|
9
|
return $self->{'_context'}->[$i]; |
904
|
|
|
|
|
|
|
} |
905
|
|
|
|
|
|
|
} |
906
|
|
|
|
|
|
|
} |
907
|
0
|
|
|
|
|
0
|
return; |
908
|
|
|
|
|
|
|
} |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
sub _set_taxon { |
911
|
74
|
|
|
74
|
|
134
|
my ( $self, $obj, $taxa ) = @_; |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
# first case: a taxon by $obj's name already exists |
914
|
74
|
50
|
|
|
|
150
|
if ( my $taxon = $taxa->get_by_name( $obj->get_name ) ) { |
915
|
74
|
|
|
|
|
189
|
$obj->set_taxon($taxon); |
916
|
74
|
|
|
|
|
145
|
return $self; |
917
|
|
|
|
|
|
|
} |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
# second case: no taxon by $obj's name exists yet |
920
|
|
|
|
|
|
|
else { |
921
|
0
|
|
|
|
|
0
|
my $taxon = $self->_factory->create_taxon( '-name' => $obj->get_name ); |
922
|
0
|
|
|
|
|
0
|
$taxa->insert($taxon); |
923
|
0
|
|
|
|
|
0
|
$obj->set_taxon($taxon); |
924
|
0
|
|
|
|
|
0
|
return $self; |
925
|
|
|
|
|
|
|
} |
926
|
|
|
|
|
|
|
} |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
sub _resolve_taxon { |
929
|
132
|
|
|
132
|
|
199
|
my ( $self, $obj ) = @_; |
930
|
132
|
|
|
|
|
209
|
my $container = $self->_current; |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
# first case: the object is actually already |
933
|
|
|
|
|
|
|
# linked to a taxon |
934
|
132
|
100
|
|
|
|
284
|
if ( my $taxon = $obj->get_taxon ) { |
935
|
88
|
|
|
|
|
163
|
return $self; |
936
|
|
|
|
|
|
|
} |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
# second case: the container is already linked |
939
|
|
|
|
|
|
|
# to a taxa block, but the object isn't |
940
|
44
|
100
|
|
|
|
141
|
if ( my $taxa = $container->get_taxa ) { |
941
|
30
|
|
|
|
|
67
|
$self->_set_taxon( $obj, $taxa ); |
942
|
|
|
|
|
|
|
} |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
# third case: the container isn't explicitly linked, |
945
|
|
|
|
|
|
|
# but a taxa block has been seen |
946
|
44
|
100
|
|
|
|
112
|
if ( my $taxa = $self->_find_last_seen_taxa_block ) { |
947
|
42
|
|
|
|
|
159
|
$container->set_taxa($taxa); |
948
|
42
|
|
|
|
|
104
|
$self->_set_taxon( $obj, $taxa ); |
949
|
|
|
|
|
|
|
} |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
# final case: no taxa block exists |
952
|
|
|
|
|
|
|
else { |
953
|
2
|
|
|
|
|
11
|
my $taxa = $container->make_taxa; |
954
|
2
|
|
|
|
|
5
|
pop @{ $self->{'_context'} }; |
|
2
|
|
|
|
|
6
|
|
955
|
2
|
|
|
|
|
4
|
push @{ $self->{'_context'} }, $taxa, $container; |
|
2
|
|
|
|
|
5
|
|
956
|
2
|
|
|
|
|
11
|
$self->_set_taxon( $obj, $taxa ); |
957
|
|
|
|
|
|
|
} |
958
|
|
|
|
|
|
|
} |
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
sub _resolve_ambig { |
961
|
39
|
|
|
39
|
|
83
|
my ( $self, $datum, $chars ) = @_; |
962
|
39
|
|
|
|
|
112
|
my %brackets = ( |
963
|
|
|
|
|
|
|
'(' => ')', |
964
|
|
|
|
|
|
|
'{' => '}', |
965
|
|
|
|
|
|
|
); |
966
|
39
|
|
|
|
|
92
|
my $to = $datum->get_type_object; |
967
|
39
|
|
|
|
|
60
|
my @resolved; |
968
|
39
|
|
|
|
|
55
|
my $in_set = 0; |
969
|
39
|
|
|
|
|
56
|
my @set; |
970
|
|
|
|
|
|
|
my $close; |
971
|
39
|
|
|
|
|
52
|
for my $c ( @{$chars} ) { |
|
39
|
|
|
|
|
77
|
|
972
|
|
|
|
|
|
|
|
973
|
228
|
50
|
33
|
|
|
547
|
if ( not $in_set and not exists $brackets{$c} ) { |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
974
|
228
|
50
|
|
|
|
498
|
push @resolved, $c if defined $c; |
975
|
|
|
|
|
|
|
} |
976
|
|
|
|
|
|
|
elsif ( not $in_set and exists $brackets{$c} ) { |
977
|
0
|
|
|
|
|
0
|
$in_set++; |
978
|
0
|
|
|
|
|
0
|
$close = $brackets{$c}; |
979
|
|
|
|
|
|
|
} |
980
|
|
|
|
|
|
|
elsif ( $in_set and $c ne $close ) { |
981
|
0
|
|
|
|
|
0
|
push @set, $c; |
982
|
|
|
|
|
|
|
} |
983
|
|
|
|
|
|
|
elsif ( $in_set and $c eq $close ) { |
984
|
0
|
|
|
|
|
0
|
push @resolved, $to->get_symbol_for_states(@set); |
985
|
0
|
|
|
|
|
0
|
@set = (); |
986
|
0
|
|
|
|
|
0
|
$in_set = 0; |
987
|
0
|
|
|
|
|
0
|
$close = undef; |
988
|
|
|
|
|
|
|
} |
989
|
|
|
|
|
|
|
} |
990
|
39
|
|
|
|
|
104
|
return \@resolved; |
991
|
|
|
|
|
|
|
} |
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
sub _codons { |
994
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
995
|
0
|
|
|
|
|
0
|
$self->_logger->info(shift); |
996
|
0
|
0
|
|
|
|
0
|
if ( $self->{'_begin'} ) { |
997
|
0
|
|
|
|
|
0
|
$self->{'_begin'} = 0; |
998
|
|
|
|
|
|
|
} |
999
|
|
|
|
|
|
|
} |
1000
|
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
sub _matrix { |
1002
|
57
|
|
|
57
|
|
86
|
my $self = shift; |
1003
|
57
|
|
|
|
|
78
|
my $token = shift; |
1004
|
57
|
|
|
|
|
132
|
$self->_add_matrix_metadata; |
1005
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
# first token: 'MATRIX', i.e. we're just starting to parse |
1007
|
|
|
|
|
|
|
# the actual matrix. Here we need to switch to "linemode", |
1008
|
|
|
|
|
|
|
# so that subsequently tokens will be array references (all |
1009
|
|
|
|
|
|
|
# the tokens on a line). This is so that we can handle |
1010
|
|
|
|
|
|
|
# interleaved matrices, which unfortunately need line breaks |
1011
|
|
|
|
|
|
|
# in them. |
1012
|
57
|
100
|
66
|
|
|
125
|
if ( not looks_like_instance( $token, 'ARRAY' ) and uc($token) eq 'MATRIX' ) |
|
|
100
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
1013
|
|
|
|
|
|
|
{ |
1014
|
9
|
|
|
|
|
23
|
$self->{'_linemode'} = 1; |
1015
|
9
|
|
|
|
|
25
|
$self->_logger->info("starting matrix"); |
1016
|
9
|
|
|
|
|
16
|
return; |
1017
|
|
|
|
|
|
|
} |
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
# a row inside the matrix, after adding tokens to row, nothing |
1020
|
|
|
|
|
|
|
# else to do |
1021
|
|
|
|
|
|
|
elsif ( looks_like_instance( $token, 'ARRAY' ) |
1022
|
90
|
|
|
|
|
303
|
and not grep { /^;$/ } @{$token} ) |
|
48
|
|
|
|
|
93
|
|
1023
|
|
|
|
|
|
|
{ |
1024
|
39
|
|
|
|
|
107
|
$self->_add_tokens_to_row($token); |
1025
|
39
|
|
|
|
|
105
|
$self->_logger->info("adding tokens to row"); |
1026
|
39
|
|
|
|
|
149
|
return; |
1027
|
|
|
|
|
|
|
} |
1028
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
# the last row of the matrix, after adding tokens to row, |
1030
|
|
|
|
|
|
|
# instantiate & populate datum objects, link against taxa |
1031
|
|
|
|
|
|
|
# objects |
1032
|
|
|
|
|
|
|
elsif ( looks_like_instance( $token, 'ARRAY' ) |
1033
|
9
|
|
|
|
|
56
|
and grep { /^;$/ } @{$token} ) |
|
9
|
|
|
|
|
179
|
|
1034
|
|
|
|
|
|
|
{ |
1035
|
9
|
|
|
|
|
32
|
$self->_add_tokens_to_row($token); |
1036
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
# link to taxa |
1038
|
9
|
|
|
|
|
17
|
for my $row ( @{ $self->{'_matrixrowlabels'} } ) { |
|
9
|
|
|
|
|
115
|
|
1039
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
# create new datum |
1041
|
39
|
|
|
|
|
115
|
my $datum = $self->_factory->create_datum( |
1042
|
|
|
|
|
|
|
'-type_object' => $self->_current->get_type_object, |
1043
|
|
|
|
|
|
|
'-name' => $row, |
1044
|
|
|
|
|
|
|
); |
1045
|
|
|
|
|
|
|
my $char = |
1046
|
39
|
|
|
|
|
148
|
$self->_resolve_ambig( $datum, $self->{'_matrix'}->{$row} ); |
1047
|
39
|
|
|
|
|
150
|
$datum->set_char($char); |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
# insert new datum in matrix |
1050
|
39
|
|
|
|
|
93
|
$self->_current->insert($datum); |
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
# link to taxon |
1053
|
39
|
|
|
|
|
138
|
$self->_resolve_taxon($datum); |
1054
|
39
|
|
|
|
|
119
|
my ( $length, $seq ) = ( $datum->get_length, $datum->get_char ); |
1055
|
39
|
|
|
|
|
118
|
$self->_logger->info("parsed $length characters for ${row}: $seq"); |
1056
|
|
|
|
|
|
|
} |
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
# Let's avoid these! |
1059
|
9
|
100
|
33
|
|
|
29
|
if ( $self->_current->get_nchar != $self->{'_nchar'} ) { |
|
|
50
|
|
|
|
|
|
1060
|
|
|
|
|
|
|
my ( $obs, $exp ) = |
1061
|
1
|
|
|
|
|
5
|
( $self->_current->get_nchar, $self->{'_nchar'} ); |
1062
|
1
|
|
|
|
|
6
|
_bad_format("Observed and expected nchar mismatch: $obs vs. $exp"); |
1063
|
|
|
|
|
|
|
} |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
# ntax is only defined for "data" blocks (which have ntax token), |
1066
|
|
|
|
|
|
|
# not for "characters" blocks (which should match up with taxa block) |
1067
|
|
|
|
|
|
|
elsif ( defined $self->{'_ntax'} |
1068
|
|
|
|
|
|
|
and $self->_current->get_ntax != $self->{'_ntax'} ) |
1069
|
|
|
|
|
|
|
{ |
1070
|
0
|
|
|
|
|
0
|
my ( $obs, $exp ) = ( $self->_current->get_ntax, $self->{'_ntax'} ); |
1071
|
0
|
|
|
|
|
0
|
_bad_format("Observed and expected ntax mismatch: $obs vs. $exp"); |
1072
|
|
|
|
|
|
|
} |
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
# XXX matrix clean up here |
1075
|
8
|
|
|
|
|
25
|
$self->{'_ntax'} = undef; |
1076
|
8
|
|
|
|
|
19
|
$self->{'_nchar'} = undef; |
1077
|
8
|
|
|
|
|
20
|
$self->{'_matrixtype'} = undef; |
1078
|
8
|
|
|
|
|
48
|
$self->{'_matrix'} = {}; |
1079
|
8
|
|
|
|
|
21
|
$self->{'_matrixrowlabels'} = []; |
1080
|
8
|
|
|
|
|
28
|
$self->{'_linemode'} = 0; |
1081
|
|
|
|
|
|
|
} |
1082
|
|
|
|
|
|
|
} |
1083
|
|
|
|
|
|
|
|
1084
|
|
|
|
|
|
|
sub _bad_format { |
1085
|
2
|
|
|
2
|
|
13
|
throw 'BadFormat' => shift; |
1086
|
|
|
|
|
|
|
} |
1087
|
408
|
|
|
408
|
|
1337
|
sub _current { shift->{'_context'}->[-1] } |
1088
|
|
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
sub _trees { |
1090
|
5
|
|
|
5
|
|
10
|
my $self = shift; |
1091
|
5
|
50
|
|
|
|
16
|
if ( $self->{'_begin'} ) { |
1092
|
5
|
|
|
|
|
10
|
$self->{'_begin'} = 0; |
1093
|
5
|
|
|
|
|
9
|
$self->{'_trees'} = ''; |
1094
|
5
|
|
|
|
|
16
|
$self->{'_treenames'} = []; |
1095
|
5
|
|
|
|
|
7
|
push @{ $self->{'_context'} }, $self->_factory->create_forest; |
|
5
|
|
|
|
|
25
|
|
1096
|
5
|
|
|
|
|
26
|
$self->_logger->info("starting trees block"); |
1097
|
|
|
|
|
|
|
} |
1098
|
|
|
|
|
|
|
} |
1099
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
sub _translate { |
1101
|
171
|
|
|
171
|
|
203
|
my $self = shift; |
1102
|
171
|
|
|
|
|
221
|
my $i = $self->{'_i'}; |
1103
|
171
|
100
|
100
|
|
|
354
|
if ( $i && $i == 1 ) |
1104
|
|
|
|
|
|
|
{ # actually, $i can be 0 according to BayesPhylogenies translation table |
1105
|
5
|
|
|
|
|
19
|
$self->_logger->info("starting translation table"); |
1106
|
|
|
|
|
|
|
} |
1107
|
171
|
100
|
100
|
|
|
787
|
if ( !defined($i) && $_[0] =~ m/^\d+$/ ) { |
|
|
100
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
1108
|
57
|
|
|
|
|
97
|
$self->{'_i'} = shift; |
1109
|
57
|
|
|
|
|
130
|
$self->{'_translate'}->[ $self->{'_i'} ] = undef; |
1110
|
|
|
|
|
|
|
} |
1111
|
|
|
|
|
|
|
elsif (defined($i) |
1112
|
|
|
|
|
|
|
&& exists $self->{'_translate'}->[$i] |
1113
|
|
|
|
|
|
|
&& !defined $self->{'_translate'}->[$i] |
1114
|
|
|
|
|
|
|
&& $_[0] ne ';' ) |
1115
|
|
|
|
|
|
|
{ |
1116
|
57
|
|
|
|
|
84
|
$self->{'_translate'}->[$i] = $_[0]; |
1117
|
57
|
|
|
|
|
128
|
$self->_logger->debug("Translation: $i => $_[0]"); |
1118
|
57
|
|
|
|
|
95
|
$self->{'_i'} = undef; |
1119
|
|
|
|
|
|
|
} |
1120
|
|
|
|
|
|
|
} |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
sub _tree { |
1123
|
212
|
|
|
212
|
|
245
|
my $self = shift; |
1124
|
212
|
100
|
100
|
|
|
402
|
if ( not $self->{'_treename'} and $_[0] !~ m/^(U?TREE|\*)$/i ) { |
1125
|
13
|
|
|
|
|
23
|
$self->{'_treename'} = $_[0]; |
1126
|
|
|
|
|
|
|
} |
1127
|
212
|
100
|
66
|
|
|
375
|
if ( $_[0] eq '=' and not $self->{'_treestart'} ) { |
1128
|
13
|
|
|
|
|
21
|
$self->{'_treestart'} = 1; |
1129
|
|
|
|
|
|
|
} |
1130
|
212
|
100
|
100
|
|
|
503
|
if ( $_[0] ne '=' and $self->{'_treestart'} ) { |
1131
|
173
|
|
|
|
|
236
|
$self->{'_tree'} .= $_[0]; |
1132
|
|
|
|
|
|
|
} |
1133
|
|
|
|
|
|
|
|
1134
|
|
|
|
|
|
|
# tr/// returns # of replacements, hence can be used to check |
1135
|
|
|
|
|
|
|
# tree description is balanced |
1136
|
212
|
100
|
100
|
|
|
828
|
if ( $self->{'_treestart'} |
|
|
|
100
|
|
|
|
|
1137
|
|
|
|
|
|
|
and $self->{'_tree'} |
1138
|
|
|
|
|
|
|
and $self->{'_tree'} =~ tr/(/(/ == $self->{'_tree'} =~ tr/)/)/ ) |
1139
|
|
|
|
|
|
|
{ |
1140
|
13
|
|
|
|
|
22
|
my $translated = $self->{'_tree'}; |
1141
|
13
|
|
|
|
|
42
|
my $translate = $self->{'_translate'}; |
1142
|
13
|
100
|
|
|
|
28
|
my $start = |
1143
|
|
|
|
|
|
|
exists $translate->[0] |
1144
|
|
|
|
|
|
|
? 0 |
1145
|
|
|
|
|
|
|
: 1; # BayesPhylogenies starts translation table w. 0 |
1146
|
13
|
|
|
|
|
19
|
for my $i ( $start .. $#{$translate} ) { |
|
13
|
|
|
|
|
34
|
|
1147
|
93
|
|
|
|
|
2088
|
$translated =~ s/(\(|,)$i(,|\)|:)/$1$translate->[$i]$2/; |
1148
|
|
|
|
|
|
|
} |
1149
|
|
|
|
|
|
|
my ( $logtreename, $logtree ) = |
1150
|
13
|
|
|
|
|
45
|
( $self->{'_treename'}, $self->{'_tree'} ); |
1151
|
13
|
|
|
|
|
43
|
$self->_logger->info("tree: $logtreename string: $logtree"); |
1152
|
13
|
|
|
|
|
42
|
$self->{'_trees'} .= $translated . ';'; |
1153
|
13
|
|
|
|
|
18
|
push @{ $self->{'_treenames'} }, $self->{'_treename'}; |
|
13
|
|
|
|
|
32
|
|
1154
|
|
|
|
|
|
|
|
1155
|
|
|
|
|
|
|
# XXX tree cleanup here |
1156
|
13
|
|
|
|
|
21
|
$self->{'_treestart'} = 0; |
1157
|
13
|
|
|
|
|
21
|
$self->{'_tree'} = undef; |
1158
|
13
|
|
|
|
|
23
|
$self->{'_treename'} = undef; |
1159
|
|
|
|
|
|
|
} |
1160
|
|
|
|
|
|
|
} |
1161
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
sub _end { |
1163
|
33
|
|
|
33
|
|
57
|
my $self = shift; |
1164
|
33
|
|
|
|
|
86
|
$self->{'_translate'} = []; |
1165
|
33
|
100
|
100
|
|
|
176
|
if ( uc $self->{'_previous'} eq ';' and $self->{'_trees'} ) { |
1166
|
5
|
|
|
|
|
20
|
my $forest = $self->_current; |
1167
|
|
|
|
|
|
|
my $trees = parse( |
1168
|
|
|
|
|
|
|
'-format' => 'newick', |
1169
|
5
|
|
|
|
|
39
|
'-string' => $self->{'_trees'}, |
1170
|
|
|
|
|
|
|
'-as_project' => 0 |
1171
|
|
|
|
|
|
|
); |
1172
|
5
|
|
|
|
|
37
|
for my $tree ( @{ $trees->get_entities } ) { |
|
5
|
|
|
|
|
24
|
|
1173
|
13
|
|
|
|
|
34
|
$forest->insert($tree); |
1174
|
|
|
|
|
|
|
} |
1175
|
|
|
|
|
|
|
|
1176
|
|
|
|
|
|
|
# set tree names |
1177
|
5
|
|
|
|
|
14
|
for my $i ( 0 .. $#{ $self->{'_treenames'} } ) { |
|
5
|
|
|
|
|
22
|
|
1178
|
13
|
|
|
|
|
43
|
$forest->get_by_index($i)->set_name( $self->{'_treenames'}->[$i] ); |
1179
|
|
|
|
|
|
|
} |
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
# link tips to taxa |
1182
|
5
|
|
|
|
|
12
|
for my $tree ( @{ $forest->get_entities } ) { |
|
5
|
|
|
|
|
18
|
|
1183
|
13
|
|
|
|
|
22
|
for my $tip ( @{ $tree->get_terminals } ) { |
|
13
|
|
|
|
|
49
|
|
1184
|
93
|
|
|
|
|
169
|
$self->_resolve_taxon($tip); |
1185
|
|
|
|
|
|
|
} |
1186
|
|
|
|
|
|
|
} |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
# XXX trees cleanup here |
1189
|
5
|
|
|
|
|
18
|
$self->{'_trees'} = ''; |
1190
|
5
|
|
|
|
|
47
|
$self->{'_treenames'} = []; |
1191
|
|
|
|
|
|
|
} |
1192
|
|
|
|
|
|
|
} |
1193
|
|
|
|
|
|
|
|
1194
|
|
|
|
|
|
|
sub _semicolon { |
1195
|
189
|
|
|
189
|
|
264
|
my $self = shift; |
1196
|
189
|
50
|
|
|
|
1187
|
if ( uc $self->{'_previous'} eq 'MATRIX' ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1197
|
0
|
|
|
|
|
0
|
$self->{'_matrixtype'} = undef; |
1198
|
0
|
|
|
|
|
0
|
$self->{'_matrix'} = {}; |
1199
|
0
|
|
|
|
|
0
|
$self->{'_charlabels'} = []; |
1200
|
0
|
|
|
|
|
0
|
$self->{'_statelabels'} = []; |
1201
|
0
|
|
|
|
|
0
|
$self->{'_linemode'} = 0; |
1202
|
0
|
0
|
|
|
|
0
|
if ( not $self->_current->get_ntax ) { |
1203
|
0
|
|
|
|
|
0
|
my $taxon = {}; |
1204
|
0
|
|
|
|
|
0
|
foreach my $row ( @{ $self->_current->get_entities } ) { |
|
0
|
|
|
|
|
0
|
|
1205
|
0
|
|
|
|
|
0
|
$taxon->{ $row->get_taxon }++; |
1206
|
|
|
|
|
|
|
} |
1207
|
0
|
|
|
|
|
0
|
my $ntax = scalar keys %{$taxon}; |
|
0
|
|
|
|
|
0
|
|
1208
|
|
|
|
|
|
|
} |
1209
|
|
|
|
|
|
|
} |
1210
|
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
|
# finalize character set |
1212
|
|
|
|
|
|
|
elsif ( uc $self->{'_previous'} eq 'CHARSET' ) { |
1213
|
2
|
|
|
|
|
8
|
my $matrix = $self->_find_last_seen_matrix( $self->{'_charset'}->{'matrix'} ); |
1214
|
2
|
|
|
|
|
7
|
my $characters = $matrix->get_characters; |
1215
|
2
|
|
|
|
|
6
|
my $set = $self->_factory->create_set( '-name' => $self->{'_charset'}->{'name'} ); |
1216
|
2
|
|
|
|
|
12
|
$characters->add_set($set); |
1217
|
2
|
|
|
|
|
4
|
my $range = $self->{'_charset'}->{'range'}; |
1218
|
2
|
|
|
|
|
4
|
my @range; |
1219
|
2
|
50
|
|
|
|
7
|
if ( ref($range) eq 'ARRAY' ) { |
1220
|
2
|
|
|
|
|
4
|
while ( @{ $range } ) { |
|
6
|
|
|
|
|
12
|
|
1221
|
4
|
|
|
|
|
6
|
my $index = shift @{ $range }; |
|
4
|
|
|
|
|
7
|
|
1222
|
4
|
100
|
66
|
|
|
14
|
if ( $range->[0] && $range->[0] eq '-' ) { |
1223
|
2
|
|
|
|
|
4
|
shift @{ $range }; |
|
2
|
|
|
|
|
24
|
|
1224
|
2
|
|
|
|
|
5
|
my $end = shift @{ $range }; |
|
2
|
|
|
|
|
4
|
|
1225
|
2
|
|
|
|
|
10
|
push @range, ( $index - 1 ) .. ( $end - 1 ); |
1226
|
|
|
|
|
|
|
} |
1227
|
|
|
|
|
|
|
else { |
1228
|
2
|
|
|
|
|
5
|
push @range, ( $index - 1 ); |
1229
|
|
|
|
|
|
|
} |
1230
|
|
|
|
|
|
|
} |
1231
|
2
|
|
|
|
|
4
|
for my $i ( @range ) { |
1232
|
6
|
|
|
|
|
18
|
my $character = $characters->get_by_index($i); |
1233
|
6
|
50
|
|
|
|
10
|
if ( $character ) { |
1234
|
6
|
|
|
|
|
17
|
$characters->add_to_set($character,$set); |
1235
|
|
|
|
|
|
|
} |
1236
|
|
|
|
|
|
|
else { |
1237
|
0
|
|
|
|
|
0
|
throw 'API' => "No character at index $i"; |
1238
|
|
|
|
|
|
|
} |
1239
|
|
|
|
|
|
|
} |
1240
|
|
|
|
|
|
|
} |
1241
|
2
|
|
|
|
|
8
|
$self->{'_charset'} = {}; |
1242
|
|
|
|
|
|
|
} |
1243
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
# finalize character state labels |
1245
|
|
|
|
|
|
|
elsif ( uc $self->{'_previous'} eq 'CHARSTATELABELS' ) { |
1246
|
2
|
|
|
|
|
10
|
my $matrix = $self->_find_last_seen_matrix; |
1247
|
2
|
|
|
|
|
3
|
my @labels = @{ $self->{'_charstatelabels'} }; |
|
2
|
|
|
|
|
7
|
|
1248
|
2
|
100
|
|
|
|
16
|
if ( $matrix->get_type =~ m/continuous/i ) { |
1249
|
1
|
|
|
|
|
3
|
my @charlabels; |
1250
|
1
|
|
|
|
|
2
|
my $charnum = 1; |
1251
|
1
|
|
|
|
|
4
|
while (@labels) { |
1252
|
|
|
|
|
|
|
|
1253
|
|
|
|
|
|
|
# expecting an index at the beginning of the statement |
1254
|
2
|
|
|
|
|
3
|
my $index = shift @labels; |
1255
|
2
|
50
|
|
|
|
6
|
$index != $charnum && _bad_format( "Expecting character number $charnum, observed $index in CHARSTATELABELS" ); |
1256
|
|
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
# then the character label |
1258
|
2
|
|
|
|
|
3
|
push @charlabels, shift @labels; |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
# then a comma |
1261
|
2
|
100
|
|
|
|
5
|
if ( @labels ) { |
1262
|
1
|
50
|
|
|
|
5
|
$labels[0] eq ',' ? shift @labels : _bad_format( "Expecting , observed $labels[0] in CHARSTATELABELS" ); |
1263
|
|
|
|
|
|
|
} |
1264
|
2
|
|
|
|
|
5
|
$charnum++; |
1265
|
|
|
|
|
|
|
} |
1266
|
1
|
|
|
|
|
6
|
$matrix->set_charlabels(\@charlabels); |
1267
|
1
|
|
|
|
|
4
|
$self->{'_charstatelabels'} = []; |
1268
|
|
|
|
|
|
|
} |
1269
|
|
|
|
|
|
|
else { |
1270
|
1
|
|
|
|
|
3
|
my ( @charlabels, @statelabels ); |
1271
|
1
|
|
|
|
|
2
|
my $charnum = 1; |
1272
|
1
|
|
|
|
|
4
|
while (@labels) { |
1273
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
# expecting an index at the beginning of the statement |
1275
|
2
|
|
|
|
|
3
|
my $index = shift @labels; |
1276
|
2
|
50
|
|
|
|
6
|
$index != $charnum && _bad_format( "Expecting character number $charnum, observed $index in CHARSTATELABELS" ); |
1277
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
# then the character label |
1279
|
2
|
|
|
|
|
4
|
push @charlabels, shift @labels; |
1280
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
# then a forward slash |
1282
|
2
|
|
|
|
|
4
|
my $slash = shift @labels; |
1283
|
2
|
50
|
|
|
|
4
|
$slash ne '/' && _bad_format( "Expecting /, observed $slash in CHARSTATELABELS" ); |
1284
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
# then a list of state labels |
1286
|
2
|
|
|
|
|
3
|
my @stateset; |
1287
|
2
|
|
100
|
|
|
16
|
push @stateset, shift @labels while(@labels and $labels[0] ne ','); |
1288
|
2
|
|
|
|
|
5
|
push @statelabels, \@stateset; |
1289
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
# then a comma |
1291
|
2
|
100
|
|
|
|
5
|
if ( @labels ) { |
1292
|
1
|
50
|
|
|
|
3
|
$labels[0] eq ',' ? shift @labels : _bad_format( "Expecting , observed $labels[0] in CHARSTATELABELS" ); |
1293
|
|
|
|
|
|
|
} |
1294
|
2
|
|
|
|
|
4
|
$charnum++; |
1295
|
|
|
|
|
|
|
} |
1296
|
1
|
|
|
|
|
7
|
$matrix->set_charlabels(\@charlabels); |
1297
|
1
|
|
|
|
|
4
|
$matrix->set_statelabels(\@statelabels); |
1298
|
1
|
|
|
|
|
4
|
$self->{'_charstatelabels'} = []; |
1299
|
|
|
|
|
|
|
} |
1300
|
|
|
|
|
|
|
} |
1301
|
|
|
|
|
|
|
|
1302
|
|
|
|
|
|
|
# finalize taxon set |
1303
|
|
|
|
|
|
|
elsif ( uc $self->{'_previous'} eq 'TAXSET' ) { |
1304
|
2
|
|
|
|
|
7
|
my $taxa = $self->_find_last_seen_taxa_block( $self->{'_taxset'}->{'taxa'} ); |
1305
|
2
|
|
|
|
|
7
|
my $set = $self->_factory->create_set( '-name' => $self->{'_taxset'}->{'name'} ); |
1306
|
2
|
|
|
|
|
13
|
$taxa->add_set($set); |
1307
|
2
|
|
|
|
|
4
|
my $range = $self->{'_taxset'}->{'range'}; |
1308
|
2
|
|
|
|
|
4
|
my @range; |
1309
|
2
|
|
|
|
|
4
|
while ( @{ $range } ) { |
|
8
|
|
|
|
|
17
|
|
1310
|
6
|
|
|
|
|
7
|
my $index = shift @{ $range }; |
|
6
|
|
|
|
|
9
|
|
1311
|
6
|
100
|
100
|
|
|
18
|
if ( $range->[0] && $range->[0] eq '-' ) { |
1312
|
2
|
|
|
|
|
4
|
shift @{ $range }; |
|
2
|
|
|
|
|
3
|
|
1313
|
2
|
|
|
|
|
4
|
my $end = shift @{ $range }; |
|
2
|
|
|
|
|
5
|
|
1314
|
2
|
|
|
|
|
7
|
push @range, ( $index - 1 ) .. ( $end - 1 ); |
1315
|
|
|
|
|
|
|
} |
1316
|
|
|
|
|
|
|
else { |
1317
|
4
|
|
|
|
|
7
|
push @range, ( $index - 1 ); |
1318
|
|
|
|
|
|
|
} |
1319
|
|
|
|
|
|
|
} |
1320
|
2
|
|
|
|
|
5
|
for my $i ( @range ) { |
1321
|
8
|
|
|
|
|
22
|
my $taxon = $taxa->get_by_index($i); |
1322
|
8
|
50
|
|
|
|
12
|
if ( $taxon ) { |
1323
|
8
|
|
|
|
|
20
|
$taxa->add_to_set($taxon,$set); |
1324
|
|
|
|
|
|
|
} |
1325
|
|
|
|
|
|
|
else { |
1326
|
0
|
|
|
|
|
0
|
_bad_format( "No taxon at index $i" ); |
1327
|
|
|
|
|
|
|
} |
1328
|
|
|
|
|
|
|
} |
1329
|
2
|
|
|
|
|
10
|
$self->{'_taxset'} = {}; |
1330
|
|
|
|
|
|
|
} |
1331
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
# finalize taxa labels |
1333
|
|
|
|
|
|
|
elsif ( uc $self->{'_previous'} eq 'TAXLABELS' ) { |
1334
|
13
|
|
|
|
|
28
|
foreach my $name ( @{ $self->{'_taxlabels'} } ) { |
|
13
|
|
|
|
|
33
|
|
1335
|
59
|
|
|
|
|
154
|
my $taxon = $self->_factory->create_taxon( '-name' => $name ); |
1336
|
59
|
|
|
|
|
172
|
$self->_current->insert($taxon); |
1337
|
|
|
|
|
|
|
} |
1338
|
13
|
100
|
|
|
|
42
|
if ( $self->_current->get_ntax != $self->{'_ntax'} ) { |
1339
|
|
|
|
|
|
|
_bad_format( |
1340
|
|
|
|
|
|
|
sprintf( |
1341
|
|
|
|
|
|
|
'Mismatch between observed and expected ntax: %d vs %d', |
1342
|
1
|
|
|
|
|
3
|
$self->_current->get_ntax, $self->{'_ntax'} |
1343
|
|
|
|
|
|
|
) |
1344
|
|
|
|
|
|
|
); |
1345
|
|
|
|
|
|
|
} |
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
# XXX taxa cleanup here |
1348
|
12
|
|
|
|
|
28
|
$self->{'_ntax'} = undef; |
1349
|
12
|
|
|
|
|
32
|
$self->{'_taxlabels'} = []; |
1350
|
|
|
|
|
|
|
} |
1351
|
|
|
|
|
|
|
|
1352
|
|
|
|
|
|
|
# finalize symbols list |
1353
|
|
|
|
|
|
|
elsif ( uc $self->{'_previous'} eq 'SYMBOLS' ) { |
1354
|
5
|
|
|
|
|
9
|
my $logsymbols = join( ' ', @{ $self->{'_symbols'} } ); |
|
5
|
|
|
|
|
14
|
|
1355
|
5
|
|
|
|
|
18
|
$self->_logger->info("symbols: $logsymbols"); |
1356
|
5
|
|
|
|
|
14
|
$self->{'_symbols'} = []; |
1357
|
|
|
|
|
|
|
} |
1358
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
# finalize character labels |
1360
|
|
|
|
|
|
|
elsif ( uc $self->{'_previous'} eq 'CHARLABELS' ) { |
1361
|
0
|
0
|
|
|
|
|
if ( @{ $self->{'_charlabels'} } ) { |
|
0
|
|
|
|
|
|
|
1362
|
0
|
|
|
|
|
|
my $logcharlabels = join( ' ', @{ $self->{'_charlabels'} } ); |
|
0
|
|
|
|
|
|
|
1363
|
0
|
|
|
|
|
|
$self->_logger->info("charlabels: $logcharlabels"); |
1364
|
|
|
|
|
|
|
} |
1365
|
|
|
|
|
|
|
} |
1366
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
# finalize state labels |
1368
|
|
|
|
|
|
|
elsif ( uc $self->{'_previous'} eq 'STATELABELS' ) { |
1369
|
0
|
0
|
|
|
|
|
if ( @{ $self->{'_statelabels'} } ) { |
|
0
|
|
|
|
|
|
|
1370
|
0
|
|
|
|
|
|
my $logstatelabels = join( ' ', @{ $self->{'_statelabels'} } ); |
|
0
|
|
|
|
|
|
|
1371
|
0
|
|
|
|
|
|
$self->_logger->info("statelabels: $logstatelabels"); |
1372
|
|
|
|
|
|
|
} |
1373
|
|
|
|
|
|
|
} |
1374
|
|
|
|
|
|
|
} |
1375
|
|
|
|
|
|
|
|
1376
|
|
|
|
|
|
|
# podinherit_insert_token |
1377
|
|
|
|
|
|
|
|
1378
|
|
|
|
|
|
|
=head1 SEE ALSO |
1379
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo> |
1381
|
|
|
|
|
|
|
for any user or developer questions and discussions. |
1382
|
|
|
|
|
|
|
|
1383
|
|
|
|
|
|
|
=over |
1384
|
|
|
|
|
|
|
|
1385
|
|
|
|
|
|
|
=item L<Bio::Phylo::IO> |
1386
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
The nexus parser is called by the L<Bio::Phylo::IO> object. Look there for |
1388
|
|
|
|
|
|
|
examples of file parsing and manipulation. |
1389
|
|
|
|
|
|
|
|
1390
|
|
|
|
|
|
|
=item L<Bio::Phylo::Manual> |
1391
|
|
|
|
|
|
|
|
1392
|
|
|
|
|
|
|
Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>. |
1393
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
=back |
1395
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
=head1 CITATION |
1397
|
|
|
|
|
|
|
|
1398
|
|
|
|
|
|
|
If you use Bio::Phylo in published research, please cite it: |
1399
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen> |
1401
|
|
|
|
|
|
|
and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl. |
1402
|
|
|
|
|
|
|
I<BMC Bioinformatics> B<12>:63. |
1403
|
|
|
|
|
|
|
L<http://dx.doi.org/10.1186/1471-2105-12-63> |
1404
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
=cut |
1406
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
1; |