line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Phylo::Parsers::Nexus; |
2
|
6
|
|
|
6
|
|
39
|
use strict; |
|
6
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
183
|
|
3
|
6
|
|
|
6
|
|
30
|
use base 'Bio::Phylo::Parsers::Abstract'; |
|
6
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
1757
|
|
4
|
6
|
|
|
6
|
|
54
|
use Bio::Phylo::Factory; |
|
6
|
|
|
|
|
13
|
|
|
6
|
|
|
|
|
26
|
|
5
|
6
|
|
|
6
|
|
36
|
use Bio::Phylo::IO 'parse'; |
|
6
|
|
|
|
|
14
|
|
|
6
|
|
|
|
|
369
|
|
6
|
6
|
|
|
6
|
|
40
|
use Bio::Phylo::Util::CONSTANT qw':objecttypes looks_like_instance'; |
|
6
|
|
|
|
|
15
|
|
|
6
|
|
|
|
|
1318
|
|
7
|
6
|
|
|
6
|
|
42
|
use Bio::Phylo::Util::Exceptions 'throw'; |
|
6
|
|
|
|
|
10
|
|
|
6
|
|
|
|
|
3241
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# TODO: handle mixed? distances, splits, bipartitions |
10
|
|
|
|
|
|
|
my $TAXA = _TAXA_; |
11
|
|
|
|
|
|
|
my $MATRIX = _MATRIX_; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# useful regular expressions |
14
|
|
|
|
|
|
|
my $COMMENT = qr|^\[|; # crude, only checks first char, use after tokenizing! |
15
|
|
|
|
|
|
|
my $QUOTES_OR_BRACKETS = |
16
|
|
|
|
|
|
|
qr/[\[\]'"]/mox; # catch all for opening/closing square brackets and quotes |
17
|
|
|
|
|
|
|
my $OPENING_QUOTE_OR_BRACKET = |
18
|
|
|
|
|
|
|
qr/^(.*?)([\['"].*)$/mox; # capturing regex for opening sq. br. & q. |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# this is a dispatch table whose sub references are invoked |
21
|
|
|
|
|
|
|
# during parsing. the keys match the tokens upon which the |
22
|
|
|
|
|
|
|
# respective subs are called. Underscored (private) fields are for parsing |
23
|
|
|
|
|
|
|
# context. The fields of this table comprise the default state of the |
24
|
|
|
|
|
|
|
# parser object. |
25
|
|
|
|
|
|
|
my %defaults = ( |
26
|
|
|
|
|
|
|
'_lines' => undef, |
27
|
|
|
|
|
|
|
'_current' => undef, |
28
|
|
|
|
|
|
|
'_previous' => undef, |
29
|
|
|
|
|
|
|
'_begin' => undef, |
30
|
|
|
|
|
|
|
'_ntax' => undef, |
31
|
|
|
|
|
|
|
'_nchar' => undef, |
32
|
|
|
|
|
|
|
'_gap' => undef, |
33
|
|
|
|
|
|
|
'_missing' => undef, |
34
|
|
|
|
|
|
|
'_i' => undef, |
35
|
|
|
|
|
|
|
'_tree' => undef, |
36
|
|
|
|
|
|
|
'_trees' => undef, |
37
|
|
|
|
|
|
|
'_treename' => undef, |
38
|
|
|
|
|
|
|
'_treestart' => undef, |
39
|
|
|
|
|
|
|
'_row' => undef, |
40
|
|
|
|
|
|
|
'_matrixtype' => undef, |
41
|
|
|
|
|
|
|
'_found' => 0, |
42
|
|
|
|
|
|
|
'_linemode' => 0, |
43
|
|
|
|
|
|
|
'_taxlabels' => [], |
44
|
|
|
|
|
|
|
'_tokens' => [], |
45
|
|
|
|
|
|
|
'_context' => [], |
46
|
|
|
|
|
|
|
'_translate' => [], |
47
|
|
|
|
|
|
|
'_symbols' => [], |
48
|
|
|
|
|
|
|
'_charlabels' => [], |
49
|
|
|
|
|
|
|
'_statelabels' => [], |
50
|
|
|
|
|
|
|
'_charstatelabels' => [], |
51
|
|
|
|
|
|
|
'_tmpstatelabels' => [], |
52
|
|
|
|
|
|
|
'_comments' => [], |
53
|
|
|
|
|
|
|
'_treenames' => [], |
54
|
|
|
|
|
|
|
'_matrixrowlabels' => [], |
55
|
|
|
|
|
|
|
'_matrix' => {}, |
56
|
|
|
|
|
|
|
'_charset' => {}, |
57
|
|
|
|
|
|
|
'_taxset' => {}, |
58
|
|
|
|
|
|
|
'begin' => \&_begin, |
59
|
|
|
|
|
|
|
'taxa' => \&_taxa, |
60
|
|
|
|
|
|
|
'title' => \&_title, |
61
|
|
|
|
|
|
|
'dimensions' => \&_dimensions, |
62
|
|
|
|
|
|
|
'ntax' => \&_ntax, |
63
|
|
|
|
|
|
|
'taxlabels' => \&_taxlabels, |
64
|
|
|
|
|
|
|
'blockid' => \&_blockid, |
65
|
|
|
|
|
|
|
'data' => \&_data, |
66
|
|
|
|
|
|
|
'characters' => \&_characters, |
67
|
|
|
|
|
|
|
'codons' => \&_codons, |
68
|
|
|
|
|
|
|
'nchar' => \&_nchar, |
69
|
|
|
|
|
|
|
'format' => \&_format, |
70
|
|
|
|
|
|
|
'datatype' => \&_datatype, |
71
|
|
|
|
|
|
|
'matchchar' => \&_matchchar, |
72
|
|
|
|
|
|
|
'gap' => \&_gap, |
73
|
|
|
|
|
|
|
'missing' => \&_missing, |
74
|
|
|
|
|
|
|
'charlabels' => \&_charlabels, |
75
|
|
|
|
|
|
|
'statelabels' => \&_statelabels, |
76
|
|
|
|
|
|
|
'charstatelabels' => \&_charstatelabels, |
77
|
|
|
|
|
|
|
'symbols' => \&_symbols, |
78
|
|
|
|
|
|
|
'items' => \&_items, |
79
|
|
|
|
|
|
|
'matrix' => \&_matrix, |
80
|
|
|
|
|
|
|
'charset' => \&_charset, |
81
|
|
|
|
|
|
|
'taxset' => \&_taxset, |
82
|
|
|
|
|
|
|
'trees' => \&_trees, |
83
|
|
|
|
|
|
|
'translate' => \&_translate, |
84
|
|
|
|
|
|
|
'tree' => \&_tree, |
85
|
|
|
|
|
|
|
'utree' => \&_tree, |
86
|
|
|
|
|
|
|
'end' => \&_end, |
87
|
|
|
|
|
|
|
'endblock' => \&_end, |
88
|
|
|
|
|
|
|
'#nexus' => \&_nexus, |
89
|
|
|
|
|
|
|
'link' => \&_link, |
90
|
|
|
|
|
|
|
';' => \&_semicolon, |
91
|
|
|
|
|
|
|
'interleave' => \&_interleave, |
92
|
|
|
|
|
|
|
); |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=head1 NAME |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
Bio::Phylo::Parsers::Nexus - Parser used by Bio::Phylo::IO, no serviceable parts inside |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=head1 DESCRIPTION |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
This module parses nexus files. It is called by the L module, |
101
|
|
|
|
|
|
|
there is no direct usage. The parser can handle files and strings with multiple |
102
|
|
|
|
|
|
|
tree, taxon, and characters blocks whose links are defined using Mesquite's |
103
|
|
|
|
|
|
|
"TITLE = 'some_name'" and "LINK TAXA = 'some_name'" tokens. |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
The parser returns a reference to an array containing one or more taxa, trees |
106
|
|
|
|
|
|
|
and matrices objects. Nexus comments are stripped, private nexus blocks (and the |
107
|
|
|
|
|
|
|
'assumptions' block) are skipped. It currently doesn't handle 'mixed' data. |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=begin comment |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
Type : Constructor |
112
|
|
|
|
|
|
|
Title : _new |
113
|
|
|
|
|
|
|
Usage : my $nexus = Bio::Phylo::Parsers::Nexus->_new; |
114
|
|
|
|
|
|
|
Function: Initializes a Bio::Phylo::Parsers::Nexus object. |
115
|
|
|
|
|
|
|
Returns : A Bio::Phylo::Parsers::Nexus object. |
116
|
|
|
|
|
|
|
Args : none. |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=end comment |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=cut |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub _process_defaults { |
123
|
15
|
|
|
15
|
|
31
|
my $self = shift; |
124
|
15
|
|
|
|
|
274
|
for my $key ( keys %defaults ) { |
125
|
990
|
100
|
|
|
|
1717
|
if ( looks_like_instance( $defaults{$key}, 'ARRAY' ) ) { |
|
|
100
|
|
|
|
|
|
126
|
180
|
|
|
|
|
414
|
$self->{$key} = []; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
elsif ( looks_like_instance( $defaults{$key}, 'HASH' ) ) { |
129
|
45
|
|
|
|
|
130
|
$self->{$key} = {}; |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
else { |
132
|
765
|
|
|
|
|
1961
|
$self->{$key} = $defaults{$key}; |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
} |
135
|
15
|
|
|
|
|
75
|
return $self; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=begin comment |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Type : Wrapper |
141
|
|
|
|
|
|
|
Title : _from_handle(\*FH) |
142
|
|
|
|
|
|
|
Usage : $nexus->_from_handle(\*FH); |
143
|
|
|
|
|
|
|
Function: Does all the parser magic, from a file handle |
144
|
|
|
|
|
|
|
Returns : ARRAY |
145
|
|
|
|
|
|
|
Args : \*FH = file handle |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=end comment |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=cut |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
sub _parse { |
152
|
15
|
|
|
15
|
|
37
|
my $self = shift; |
153
|
15
|
|
|
|
|
63
|
$self->_process_defaults; |
154
|
15
|
|
|
|
|
105
|
$self->_logger->info("going to parse nexus data"); |
155
|
15
|
|
|
|
|
68
|
$self->{'_lines'} = $self->_stringify(@_); |
156
|
15
|
|
|
|
|
71
|
$self->{'_tokens'} = $self->_tokenize( $self->{'_lines'} ); |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# iterate over tokens, dispatch methods from %{ $self } table |
159
|
|
|
|
|
|
|
# This is the meat of the parsing, from here everything else is called. |
160
|
15
|
|
|
|
|
63
|
$self->_logger->info("tokenized and split data, going to parse blocks"); |
161
|
15
|
|
|
|
|
37
|
my $i = 0; |
162
|
15
|
|
|
|
|
32
|
my $private_block; |
163
|
15
|
|
|
|
|
50
|
my $token_queue = [ undef, undef, undef ]; |
164
|
6
|
|
|
6
|
|
47
|
no strict 'refs'; |
|
6
|
|
|
|
|
16
|
|
|
6
|
|
|
|
|
38463
|
|
165
|
15
|
|
|
|
|
32
|
TOKEN_LINE: for my $token_line ( @{ $self->{'_tokens'} } ) { |
|
15
|
|
|
|
|
54
|
|
166
|
313
|
100
|
|
|
|
644
|
if ( not $self->{'_linemode'} ) { |
|
|
50
|
|
|
|
|
|
167
|
265
|
|
|
|
|
341
|
RAW_TOKEN: for my $raw_token ( @{$token_line} ) { |
|
265
|
|
|
|
|
495
|
|
168
|
1060
|
100
|
|
|
|
4593
|
if ( $raw_token =~ qr/^\[/ ) { |
169
|
10
|
|
|
|
|
16
|
push @{ $self->{'_comments'} }, $raw_token; |
|
10
|
|
|
|
|
29
|
|
170
|
10
|
|
|
|
|
22
|
next RAW_TOKEN; |
171
|
|
|
|
|
|
|
} |
172
|
1050
|
|
|
|
|
2212
|
my $lower_case_token = lc($raw_token); |
173
|
1050
|
|
|
|
|
1795
|
push @$token_queue, $lower_case_token; |
174
|
1050
|
|
|
|
|
1400
|
shift @$token_queue; |
175
|
1050
|
100
|
66
|
|
|
3829
|
if ( exists $self->{$lower_case_token} and not $private_block ) |
|
|
100
|
66
|
|
|
|
|
176
|
|
|
|
|
|
|
{ |
177
|
415
|
50
|
|
|
|
1258
|
if ( ref $self->{$lower_case_token} eq 'CODE' ) { |
178
|
415
|
|
|
|
|
670
|
$self->{'_previous'} = $self->{'_current'}; |
179
|
415
|
|
|
|
|
598
|
$self->{'_current'} = $lower_case_token; |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# pull code ref from dispatch table |
182
|
415
|
|
|
|
|
584
|
my $c = $self->{$lower_case_token}; |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
# invoke as object method |
185
|
415
|
|
|
|
|
1418
|
$self->$c($raw_token); |
186
|
414
|
|
|
|
|
982
|
next RAW_TOKEN; |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
elsif ( $self->{'_current'} and not $private_block ) { |
190
|
634
|
|
|
|
|
1084
|
my $c = $self->{ $self->{'_current'} }; |
191
|
634
|
|
|
|
|
1211
|
$self->$c($raw_token); |
192
|
634
|
|
|
|
|
1181
|
next RAW_TOKEN; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# $self->{'_begin'} is switched 'on' by &_begin(), and 'off' |
196
|
|
|
|
|
|
|
# again by any one of the appropriate subsequent tokens, i.e. |
197
|
|
|
|
|
|
|
# taxa, data, characters and trees |
198
|
1
|
0
|
33
|
|
|
4
|
if ( $self->{'_begin'} |
|
|
|
33
|
|
|
|
|
199
|
|
|
|
|
|
|
and not exists $self->{$lower_case_token} |
200
|
|
|
|
|
|
|
and not $private_block ) |
201
|
|
|
|
|
|
|
{ |
202
|
0
|
|
|
|
|
0
|
$private_block = $raw_token; |
203
|
0
|
|
|
|
|
0
|
next RAW_TOKEN; |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
# jump over private block content |
207
|
1
|
50
|
33
|
|
|
4
|
if ( $private_block |
|
|
|
33
|
|
|
|
|
208
|
|
|
|
|
|
|
and $token_queue->[-2] eq 'end' |
209
|
|
|
|
|
|
|
and $token_queue->[-1] eq ';' ) |
210
|
|
|
|
|
|
|
{ |
211
|
0
|
|
|
|
|
0
|
$private_block = 0; |
212
|
0
|
|
|
|
|
0
|
$self->_logger->info( |
213
|
|
|
|
|
|
|
"Skipped private $private_block block"); |
214
|
0
|
|
|
|
|
0
|
next RAW_TOKEN; |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
else { |
217
|
1
|
|
|
|
|
3
|
next RAW_TOKEN; |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
elsif ( $self->{'_linemode'} ) { |
222
|
48
|
|
|
|
|
92
|
my $c = $self->{ $self->{'_current'} }; |
223
|
48
|
|
|
|
|
64
|
push @{$token_queue}, $token_line; |
|
48
|
|
|
|
|
86
|
|
224
|
48
|
|
|
|
|
67
|
shift @$token_queue; |
225
|
48
|
|
|
|
|
108
|
$self->$c($token_line); |
226
|
47
|
|
|
|
|
123
|
next TOKEN_LINE; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
} |
229
|
13
|
|
|
|
|
73
|
return $self->_post_process(@_); |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# makes array reference of strings, one string per line, from input |
233
|
|
|
|
|
|
|
# file handle or string; |
234
|
|
|
|
|
|
|
sub _stringify { |
235
|
15
|
|
|
15
|
|
33
|
my $self = shift; |
236
|
15
|
|
|
|
|
49
|
$self->_logger->info("going to split nexus data on lines"); |
237
|
15
|
|
|
|
|
41
|
my %opts = @_; |
238
|
15
|
|
|
|
|
34
|
my @lines; |
239
|
15
|
|
|
|
|
74
|
my $handle = $self->_handle; |
240
|
15
|
|
|
|
|
116
|
while (<$handle>) { |
241
|
435
|
|
|
|
|
624
|
my $line = $_; |
242
|
435
|
|
|
|
|
1607
|
push @lines, grep { /\S/ } split( /\n|\r|\r\n/, $line ); |
|
379
|
|
|
|
|
1187
|
|
243
|
435
|
|
|
|
|
986
|
$self->_logger->debug("read line: $line"); |
244
|
|
|
|
|
|
|
} |
245
|
15
|
|
|
|
|
65
|
return \@lines; |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=begin comment |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
Type : Method |
251
|
|
|
|
|
|
|
Title : _tokenize() |
252
|
|
|
|
|
|
|
Usage : $nexus->_tokenize($lines); |
253
|
|
|
|
|
|
|
Function: Tokenizes lines in $lines array ref |
254
|
|
|
|
|
|
|
Returns : Two dimensional ARRAY |
255
|
|
|
|
|
|
|
Args : An array ref of lines (e.g. read from an input file); |
256
|
|
|
|
|
|
|
Comments: This method accepts an array ref holding lines that may contain |
257
|
|
|
|
|
|
|
single quotes, double quotes or square brackets. Line breaks and |
258
|
|
|
|
|
|
|
spaces inside these quoted/bracketed fragments are ignored, otherwise |
259
|
|
|
|
|
|
|
it is split, e.g.: |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
[ |
262
|
|
|
|
|
|
|
[ '#NEXUS' ], |
263
|
|
|
|
|
|
|
[ 'BEGIN TAXA; [taxablock comment]' ], |
264
|
|
|
|
|
|
|
[ 'DIMENSIONS NTAX=3;' ], |
265
|
|
|
|
|
|
|
[ 'TAXLABELS "Taxon \' A" \'Taxon B\' TAXON[comment]C' ], |
266
|
|
|
|
|
|
|
...etc... |
267
|
|
|
|
|
|
|
] |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
becomes: |
270
|
|
|
|
|
|
|
[ |
271
|
|
|
|
|
|
|
[ '#NEXUS' ], |
272
|
|
|
|
|
|
|
[ |
273
|
|
|
|
|
|
|
'BEGIN', |
274
|
|
|
|
|
|
|
'TAXA', |
275
|
|
|
|
|
|
|
';', |
276
|
|
|
|
|
|
|
'[taxablock comment]' |
277
|
|
|
|
|
|
|
], |
278
|
|
|
|
|
|
|
[ |
279
|
|
|
|
|
|
|
'DIMENSIONS', |
280
|
|
|
|
|
|
|
'NTAX', |
281
|
|
|
|
|
|
|
'=', |
282
|
|
|
|
|
|
|
'3', |
283
|
|
|
|
|
|
|
';' |
284
|
|
|
|
|
|
|
], |
285
|
|
|
|
|
|
|
[ |
286
|
|
|
|
|
|
|
'TAXLABELS', |
287
|
|
|
|
|
|
|
'"Taxon \' A"', |
288
|
|
|
|
|
|
|
'\'Taxon B\'', |
289
|
|
|
|
|
|
|
'TAXON', |
290
|
|
|
|
|
|
|
'[comment]', |
291
|
|
|
|
|
|
|
'C' |
292
|
|
|
|
|
|
|
], |
293
|
|
|
|
|
|
|
...etc... |
294
|
|
|
|
|
|
|
] |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
=end comment |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=cut |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
sub _tokenize { |
302
|
15
|
|
|
15
|
|
129
|
my ( $self, $lines ) = @_; |
303
|
15
|
|
|
|
|
50
|
$self->_logger->info("going to split lines on tokens"); |
304
|
15
|
|
|
|
|
50
|
my ( $extract, $INSIDE_QUOTE, $continue ) = ( '', 0, 0 ); |
305
|
15
|
|
|
|
|
77
|
my ( @tokens, @split ); |
306
|
15
|
|
|
|
|
97
|
my $CLOSING_BRACKET_MIDLINE = qr/^.*?(\])(.*)$/mox; |
307
|
15
|
|
|
|
|
54
|
my $CONTEXT_QB_AT_START = qr/^([\['"])(.*)$/mox; |
308
|
15
|
|
|
|
|
54
|
my $CONTEXT_CLOSER; |
309
|
|
|
|
|
|
|
my $QuoteContext; # either " ' or [ |
310
|
15
|
|
|
|
|
0
|
my $QuoteStartLine; |
311
|
15
|
|
|
|
|
33
|
my $LineCount = 0; |
312
|
15
|
|
|
|
|
80
|
my %CLOSE_CHAR = ( |
313
|
|
|
|
|
|
|
'"' => '"', |
314
|
|
|
|
|
|
|
"'" => "'", |
315
|
|
|
|
|
|
|
'[' => ']', |
316
|
|
|
|
|
|
|
); |
317
|
15
|
|
|
|
|
257
|
my %INVERSE_CLOSE_CHAR = ( |
318
|
|
|
|
|
|
|
'"' => '"', |
319
|
|
|
|
|
|
|
"'" => "'", |
320
|
|
|
|
|
|
|
']' => '[', |
321
|
|
|
|
|
|
|
')' => '(', |
322
|
|
|
|
|
|
|
); |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
# tokenize |
325
|
15
|
|
|
|
|
34
|
LINE: for my $line ( @{$lines} ) { |
|
15
|
|
|
|
|
46
|
|
326
|
379
|
|
|
|
|
495
|
$LineCount++; |
327
|
379
|
|
|
|
|
1092
|
TOKEN: while ( $line =~ /\S/ ) { |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
# line in file has no quoting/bracketing characters, and |
330
|
|
|
|
|
|
|
# is no extension of a quoted/bracketed fragment starting |
331
|
|
|
|
|
|
|
# on a previous line |
332
|
408
|
100
|
100
|
|
|
2543
|
if ( $line !~ $QUOTES_OR_BRACKETS && !$INSIDE_QUOTE ) { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
333
|
332
|
100
|
|
|
|
549
|
if ($continue) { |
334
|
20
|
|
|
|
|
31
|
push @{ $tokens[-1] }, $line; |
|
20
|
|
|
|
|
46
|
|
335
|
20
|
|
|
|
|
33
|
$continue = 0; |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
else { |
338
|
312
|
|
|
|
|
580
|
push @tokens, [$line]; |
339
|
|
|
|
|
|
|
} |
340
|
332
|
|
|
|
|
458
|
my $logline = join( ' ', @{ $tokens[-1] } ); |
|
332
|
|
|
|
|
647
|
|
341
|
332
|
|
|
|
|
495
|
chomp($logline); |
342
|
332
|
|
|
|
|
669
|
$self->_logger->debug("Tokenized line $LineCount: $logline"); |
343
|
332
|
|
|
|
|
646
|
next LINE; |
344
|
|
|
|
|
|
|
} |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
# line in file has opening quoting/bracketing characters, and |
347
|
|
|
|
|
|
|
# is no extension of a quoted/bracketed fragment starting |
348
|
|
|
|
|
|
|
# on a previous line |
349
|
|
|
|
|
|
|
elsif ( $line =~ $OPENING_QUOTE_OR_BRACKET && !$INSIDE_QUOTE ) { |
350
|
20
|
|
|
|
|
96
|
my ( $start, $quoted ) = ( $1, $2 ); |
351
|
20
|
|
|
|
|
49
|
push @tokens, [$start]; |
352
|
20
|
|
|
|
|
34
|
$line = $quoted; |
353
|
20
|
|
|
|
|
35
|
$extract = $quoted; |
354
|
20
|
|
|
|
|
26
|
$INSIDE_QUOTE++; |
355
|
20
|
|
|
|
|
37
|
$continue = 1; |
356
|
20
|
|
|
|
|
59
|
$QuoteContext = substr( $quoted, 0, 1 ); |
357
|
20
|
|
|
|
|
55
|
$self->_logger->debug("Line $LineCount contains $QuoteContext"); |
358
|
20
|
|
|
|
|
36
|
$QuoteStartLine = $LineCount; |
359
|
20
|
|
|
|
|
290
|
$CONTEXT_QB_AT_START = qr/^(\Q$QuoteContext\E)(.*)$/; |
360
|
20
|
|
|
|
|
55
|
my $context_closer = $CLOSE_CHAR{$QuoteContext}; |
361
|
20
|
|
|
|
|
191
|
$CONTEXT_CLOSER = qr/^(.*?)(\Q$context_closer\E)(.*)$/; |
362
|
20
|
|
|
|
|
91
|
next TOKEN; |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
# line in file has no quoting/bracketing characters, and |
366
|
|
|
|
|
|
|
# is an extension of a quoted/bracketed fragment starting |
367
|
|
|
|
|
|
|
# on a previous line |
368
|
|
|
|
|
|
|
elsif ( $line !~ $CONTEXT_CLOSER && $INSIDE_QUOTE ) { |
369
|
30
|
|
|
|
|
68
|
$self->_logger->debug( |
370
|
|
|
|
|
|
|
"Line $LineCount extends quote or comment"); |
371
|
30
|
|
|
|
|
39
|
$extract .= $line; |
372
|
30
|
|
|
|
|
45
|
next LINE; |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
elsif ( $line =~ $CONTEXT_QB_AT_START && $INSIDE_QUOTE ) { |
375
|
20
|
|
|
|
|
171
|
my ( $q, $remainder ) = ( $1, $1 . $2 ); |
376
|
20
|
100
|
100
|
|
|
100
|
if ( $q eq '"' || $q eq "'" ) { |
|
|
50
|
|
|
|
|
|
377
|
9
|
50
|
|
|
|
151
|
if ( $remainder =~ m/^($q[^$q]*?$q)(.*)$/ ) { |
|
|
0
|
|
|
|
|
|
378
|
9
|
|
|
|
|
45
|
$self->_logger->debug( |
379
|
|
|
|
|
|
|
"Line $LineCount closes $INVERSE_CLOSE_CHAR{$q} with $q" |
380
|
|
|
|
|
|
|
); |
381
|
9
|
|
|
|
|
16
|
push @{ $tokens[-1] }, ($1); |
|
9
|
|
|
|
|
59
|
|
382
|
9
|
|
|
|
|
28
|
$line = $2; |
383
|
9
|
|
|
|
|
19
|
$INSIDE_QUOTE--; |
384
|
9
|
|
|
|
|
43
|
next TOKEN; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
elsif ( $remainder =~ m/^$q[^$q]*$/ ) { |
387
|
0
|
|
|
|
|
0
|
$extract .= $line; |
388
|
0
|
|
|
|
|
0
|
$continue = 1; |
389
|
0
|
|
|
|
|
0
|
next LINE; |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
elsif ( $q eq '[' ) { |
393
|
11
|
|
|
|
|
33
|
for my $i ( 1 .. length($line) ) { |
394
|
830
|
100
|
|
|
|
1180
|
$INSIDE_QUOTE++ if substr( $line, $i, 1 ) eq '['; |
395
|
830
|
100
|
66
|
|
|
1710
|
if ( $i and !$INSIDE_QUOTE ) { |
396
|
8
|
|
|
|
|
11
|
push @{ $tokens[-1] }, substr( $line, 0, $i ); |
|
8
|
|
|
|
|
34
|
|
397
|
8
|
|
|
|
|
19
|
my $logqc = substr( $line, ( $i - 1 ), 1 ); |
398
|
8
|
|
|
|
|
25
|
$self->_logger->debug( |
399
|
|
|
|
|
|
|
"Line $LineCount closes $INVERSE_CLOSE_CHAR{$logqc} with $logqc" |
400
|
|
|
|
|
|
|
); |
401
|
8
|
|
|
|
|
16
|
$line = substr( $line, $i ); |
402
|
8
|
|
|
|
|
27
|
next TOKEN; |
403
|
|
|
|
|
|
|
} |
404
|
822
|
100
|
|
|
|
1254
|
$INSIDE_QUOTE-- if substr( $line, $i, 1 ) eq ']'; |
405
|
|
|
|
|
|
|
} |
406
|
3
|
|
|
|
|
8
|
$extract = $line; |
407
|
3
|
|
|
|
|
5
|
$continue = 1; |
408
|
3
|
|
|
|
|
9
|
next LINE; |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
elsif ( $line =~ $CONTEXT_CLOSER && $INSIDE_QUOTE ) { |
412
|
6
|
|
|
|
|
26
|
my ( $start, $q, $remainder ) = ( $1, $2, $3 ); |
413
|
6
|
|
|
|
|
18
|
$self->_logger->debug( |
414
|
|
|
|
|
|
|
"Line $LineCount closes $INVERSE_CLOSE_CHAR{$q} with $q"); |
415
|
6
|
50
|
|
|
|
32
|
$start = $extract . $start if $continue; |
416
|
6
|
50
|
33
|
|
|
31
|
if ( $q eq '"' or $q eq "'" ) { |
|
|
50
|
|
|
|
|
|
417
|
0
|
|
|
|
|
0
|
push @{ $tokens[-1] }, $start; |
|
0
|
|
|
|
|
0
|
|
418
|
0
|
|
|
|
|
0
|
$line = $remainder; |
419
|
0
|
|
|
|
|
0
|
next TOKEN; |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
elsif ( $q eq ']' ) { |
422
|
6
|
|
|
|
|
17
|
for my $i ( 0 .. length($line) ) { |
423
|
414
|
100
|
|
|
|
576
|
$INSIDE_QUOTE++ if substr( $line, $i, 1 ) eq '['; |
424
|
414
|
100
|
100
|
|
|
837
|
if ( $i and !$INSIDE_QUOTE ) { |
425
|
3
|
|
|
|
|
7
|
my $segment = substr( $line, 0, $i ); |
426
|
3
|
50
|
|
|
|
8
|
if ($continue) { |
427
|
3
|
|
|
|
|
6
|
push @{ $tokens[-1] }, $extract . $segment; |
|
3
|
|
|
|
|
14
|
|
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
else { |
430
|
0
|
|
|
|
|
0
|
push @{ $tokens[-1] }, $segment; |
|
0
|
|
|
|
|
0
|
|
431
|
|
|
|
|
|
|
} |
432
|
3
|
|
|
|
|
8
|
$line = substr( $line, $i ); |
433
|
3
|
|
|
|
|
9
|
next TOKEN; |
434
|
|
|
|
|
|
|
} |
435
|
411
|
100
|
|
|
|
603
|
$INSIDE_QUOTE-- if substr( $line, $i, 1 ) eq ']'; |
436
|
|
|
|
|
|
|
} |
437
|
3
|
50
|
|
|
|
7
|
if ($continue) { |
438
|
3
|
|
|
|
|
9
|
$extract .= $line; |
439
|
|
|
|
|
|
|
} |
440
|
|
|
|
|
|
|
else { |
441
|
0
|
|
|
|
|
0
|
$extract = $line; |
442
|
|
|
|
|
|
|
} |
443
|
3
|
|
|
|
|
5
|
$continue = 1; |
444
|
3
|
|
|
|
|
8
|
next LINE; |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
# an exception here means that an opening quote symbol " ' [ |
451
|
|
|
|
|
|
|
# ($QuoteContext) was encountered at input file/string line $QuoteStartLine. |
452
|
|
|
|
|
|
|
# This can happen if any of these symbols is used in an illegal |
453
|
|
|
|
|
|
|
# way, e.g. by using double quotes as gap symbols in matrices. |
454
|
15
|
50
|
|
|
|
47
|
if ($INSIDE_QUOTE) { |
455
|
0
|
|
|
|
|
0
|
throw 'BadArgs' => |
456
|
|
|
|
|
|
|
"Unbalanced $QuoteContext starting at line $QuoteStartLine"; |
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
# final split: non-quoted/bracketed fragments are split on whitespace, |
460
|
|
|
|
|
|
|
# others are preserved verbatim |
461
|
|
|
|
|
|
|
$self->_logger->info( |
462
|
15
|
|
|
|
|
50
|
"going to split non-quoted/commented fragments on whitespace"); |
463
|
15
|
|
|
|
|
47
|
foreach my $line (@tokens) { |
464
|
332
|
|
|
|
|
422
|
my @line; |
465
|
332
|
|
|
|
|
480
|
foreach my $word (@$line) { |
466
|
372
|
100
|
|
|
|
1155
|
if ( $word !~ $QUOTES_OR_BRACKETS ) { |
467
|
352
|
|
|
|
|
1664
|
$word =~ s/(=|;|,)/ $1 /g; |
468
|
352
|
|
|
|
|
1464
|
push @line, grep { /\S/ } split /\s+/, $word; |
|
1470
|
|
|
|
|
3265
|
|
469
|
|
|
|
|
|
|
} |
470
|
|
|
|
|
|
|
else { |
471
|
20
|
|
|
|
|
47
|
push @line, $word; |
472
|
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
} |
474
|
332
|
|
|
|
|
701
|
push @split, \@line; |
475
|
|
|
|
|
|
|
} |
476
|
15
|
|
|
|
|
175
|
return \@split; |
477
|
|
|
|
|
|
|
} |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
# link matrices and forests to taxa |
480
|
|
|
|
|
|
|
sub _post_process { |
481
|
13
|
|
|
13
|
|
31
|
my $self = shift; |
482
|
13
|
|
|
|
|
30
|
my $taxa = []; |
483
|
13
|
|
|
|
|
35
|
foreach my $block ( @{ $self->{'_context'} } ) { |
|
13
|
|
|
|
|
54
|
|
484
|
26
|
100
|
33
|
|
|
132
|
if ( $block->_type == $TAXA ) { |
|
|
50
|
|
|
|
|
|
485
|
13
|
|
|
|
|
27
|
push @{$taxa}, $block; |
|
13
|
|
|
|
|
36
|
|
486
|
|
|
|
|
|
|
} |
487
|
|
|
|
|
|
|
elsif ( $block->_type != $TAXA and $block->can('set_taxa') ) { |
488
|
13
|
50
|
33
|
|
|
113
|
if ( $taxa->[-1] |
|
|
|
33
|
|
|
|
|
489
|
|
|
|
|
|
|
and $taxa->[-1]->can('_type') == $TAXA |
490
|
|
|
|
|
|
|
and not $block->get_taxa ) |
491
|
|
|
|
|
|
|
{ |
492
|
0
|
|
|
|
|
0
|
$block->set_taxa( $taxa->[-1] ); # XXX exception here? |
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
} |
495
|
|
|
|
|
|
|
} |
496
|
13
|
|
|
|
|
37
|
my $blocks = $self->{'_context'}; |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
# initialize object, note we have to |
499
|
|
|
|
|
|
|
# force data type references to be empty |
500
|
13
|
|
|
|
|
31
|
@{$taxa} = (); |
|
13
|
|
|
|
|
36
|
|
501
|
13
|
|
|
|
|
313
|
for my $key ( keys %defaults ) { |
502
|
858
|
100
|
|
|
|
1487
|
if ( looks_like_instance( $defaults{$key}, 'ARRAY' ) ) { |
|
|
100
|
|
|
|
|
|
503
|
156
|
|
|
|
|
583
|
$self->{$key} = []; |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
elsif ( looks_like_instance( $defaults{$key}, 'HASH' ) ) { |
506
|
39
|
|
|
|
|
105
|
$self->{$key} = {}; |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
else { |
509
|
663
|
|
|
|
|
1235
|
$self->{$key} = $defaults{$key}; |
510
|
|
|
|
|
|
|
} |
511
|
|
|
|
|
|
|
} |
512
|
13
|
|
|
|
|
56
|
return @{$blocks}; |
|
13
|
|
|
|
|
104
|
|
513
|
|
|
|
|
|
|
} |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
=begin comment |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
The following subs are called by the dispatch table stored in the object when |
518
|
|
|
|
|
|
|
their respective tokens are encountered. |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
=end comment |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=cut |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
sub _nexus { |
525
|
14
|
|
|
14
|
|
43
|
my $self = shift; |
526
|
14
|
50
|
|
|
|
63
|
if ( uc( $_[0] ) eq '#NEXUS' ) { |
527
|
14
|
|
|
|
|
46
|
$self->_logger->info("found nexus token"); |
528
|
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
} |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
sub _begin { |
532
|
43
|
|
|
43
|
|
77
|
my $self = shift; |
533
|
43
|
|
|
|
|
90
|
$self->{'_begin'} = 1; |
534
|
|
|
|
|
|
|
} |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
sub _taxa { |
537
|
18
|
|
|
18
|
|
37
|
my $self = shift; |
538
|
18
|
100
|
|
|
|
55
|
if ( $self->{'_begin'} ) { |
539
|
13
|
|
|
|
|
74
|
my $taxa = $self->_factory->create_taxa; |
540
|
13
|
|
|
|
|
32
|
push @{ $self->{'_context'} }, $taxa; |
|
13
|
|
|
|
|
77
|
|
541
|
13
|
|
|
|
|
56
|
$self->_logger->info("starting taxa block"); |
542
|
13
|
|
|
|
|
35
|
$self->{'_begin'} = 0; |
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
else { |
545
|
5
|
|
|
|
|
19
|
$self->{'_current'} = 'link'; # because of 'link taxa = blah' construct |
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
} |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
sub _charset { |
550
|
17
|
|
|
17
|
|
23
|
my $self = shift; |
551
|
17
|
|
|
|
|
19
|
my $token = shift; |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
# first thing after the CHARSET token is the set name |
554
|
17
|
100
|
100
|
|
|
124
|
if ( $token !~ /CHARSET/i && ! $self->{'_charset'}->{'name'} ) { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
|
555
|
2
|
|
|
|
|
8
|
$self->{'_charset'}->{'name'} = $token; |
556
|
2
|
|
|
|
|
8
|
$self->{'_charset'}->{'range'} = []; |
557
|
|
|
|
|
|
|
} |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
# then there might be a mesquite-style matrix reference, e.g. (CHARACTERS = matrix_name) |
560
|
|
|
|
|
|
|
elsif ( $token =~ m/^\(/ ) { |
561
|
1
|
|
|
|
|
4
|
$self->{'_charset'}->{'matrix'} = ''; |
562
|
|
|
|
|
|
|
} |
563
|
|
|
|
|
|
|
elsif ( defined $self->{'_charset'}->{'matrix'} && ! $self->{'_charset'}->{'matrix'} && $token !~ /(?:\(?CHARACTERS|=)/i ) { |
564
|
1
|
|
|
|
|
6
|
$token =~ s/\)$//; |
565
|
1
|
|
|
|
|
4
|
$self->{'_charset'}->{'matrix'} = $token; |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
# then come the indices |
569
|
|
|
|
|
|
|
elsif ( $token =~ /(?:\d+|-)/ ) { |
570
|
8
|
|
|
|
|
11
|
push @{ $self->{'_charset'}->{'range'} }, $token; |
|
8
|
|
|
|
|
18
|
|
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
} |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
sub _taxset { |
575
|
19
|
|
|
19
|
|
24
|
my $self = shift; |
576
|
19
|
|
|
|
|
23
|
my $token = shift; |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
# first thing after the TAXSET token is the set name |
579
|
19
|
100
|
100
|
|
|
141
|
if ( $token !~ /TAXSET/i && ! $self->{'_taxset'}->{'name'} ) { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
|
580
|
2
|
|
|
|
|
6
|
$self->{'_taxset'}->{'name'} = $token; |
581
|
2
|
|
|
|
|
6
|
$self->{'_taxset'}->{'range'} = []; |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
# then there might be a mesquite-style taxa reference, e.g. (TAXA = matrix_name) |
585
|
|
|
|
|
|
|
elsif ( $token =~ m/^\(/ ) { |
586
|
1
|
|
|
|
|
3
|
$self->{'_taxset'}->{'taxa'} = ''; |
587
|
|
|
|
|
|
|
} |
588
|
|
|
|
|
|
|
elsif ( defined $self->{'_taxset'}->{'taxa'} && ! $self->{'_taxset'}->{'taxa'} && $token !~ /(?:\(?TAXA|=)/ ) { |
589
|
1
|
|
|
|
|
8
|
$token =~ s/\)$//; |
590
|
1
|
|
|
|
|
6
|
$self->{'_taxset'}->{'taxa'} = $token; |
591
|
|
|
|
|
|
|
} |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
# then come the indices |
594
|
|
|
|
|
|
|
elsif ( $token =~ /(?:\d+|-)/ ) { |
595
|
10
|
|
|
|
|
14
|
push @{ $self->{'_taxset'}->{'range'} }, $token; |
|
10
|
|
|
|
|
21
|
|
596
|
|
|
|
|
|
|
} |
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
sub _interleave { |
600
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
601
|
0
|
|
|
|
|
0
|
my $token = shift; |
602
|
0
|
|
|
|
|
0
|
$self->_logger->info("perhaps we'll need to parse interleaved"); |
603
|
0
|
0
|
0
|
|
|
0
|
if ( defined $token and uc($token) eq 'NO' ) { |
604
|
0
|
|
|
|
|
0
|
$self->_logger->info("no, we don't need to parse interleaved"); |
605
|
|
|
|
|
|
|
} |
606
|
|
|
|
|
|
|
} |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
sub _title { |
609
|
25
|
|
|
25
|
|
47
|
my $self = shift; |
610
|
25
|
|
|
|
|
50
|
my $token = shift; |
611
|
25
|
100
|
66
|
|
|
127
|
if ( defined $token and uc($token) ne 'TITLE' ) { |
612
|
11
|
|
|
|
|
22
|
my $title = $token; |
613
|
11
|
50
|
|
|
|
47
|
if ( not $self->_current->get_name ) { |
614
|
11
|
|
|
|
|
35
|
$self->_current->set_name($title); |
615
|
11
|
|
|
|
|
45
|
$self->_logger->info("block has title '$title'"); |
616
|
|
|
|
|
|
|
} |
617
|
|
|
|
|
|
|
} |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
sub _link { |
621
|
2
|
|
|
2
|
|
6
|
my $self = shift; |
622
|
2
|
|
|
|
|
7
|
my $token = shift; |
623
|
2
|
50
|
33
|
|
|
34
|
if ( defined $token and $token !~ m/^(?:LINK|TAXA|=)$/i ) { |
624
|
0
|
|
|
|
|
0
|
my $link = $token; |
625
|
0
|
0
|
|
|
|
0
|
if ( not $self->_current->get_taxa ) { |
626
|
0
|
|
|
|
|
0
|
foreach my $block ( @{ $self->{'_context'} } ) { |
|
0
|
|
|
|
|
0
|
|
627
|
0
|
0
|
0
|
|
|
0
|
if ( $block->get_name and $block->get_name eq $link ) { |
628
|
0
|
|
|
|
|
0
|
$self->_current->set_taxa($block); |
629
|
0
|
|
|
|
|
0
|
last; |
630
|
|
|
|
|
|
|
} |
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
$self->_logger->info( |
633
|
0
|
|
|
|
|
0
|
"block links to taxa block with title '$link'"); |
634
|
|
|
|
|
|
|
} |
635
|
|
|
|
|
|
|
} |
636
|
|
|
|
|
|
|
} |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
22
|
|
|
sub _dimensions { |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
#my $self = shift; |
641
|
|
|
|
|
|
|
} |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
sub _ntax { |
644
|
39
|
|
|
39
|
|
60
|
my $self = shift; |
645
|
39
|
100
|
66
|
|
|
213
|
if ( defined $_[0] and $_[0] =~ m/^\d+$/ ) { |
646
|
13
|
|
|
|
|
39
|
$self->{'_ntax'} = shift; |
647
|
13
|
|
|
|
|
34
|
my $ntax = $self->{'_ntax'}; |
648
|
13
|
|
|
|
|
49
|
$self->_logger->info("number of taxa: $ntax"); |
649
|
|
|
|
|
|
|
} |
650
|
|
|
|
|
|
|
} |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
sub _taxlabels { |
653
|
72
|
|
|
72
|
|
97
|
my $self = shift; |
654
|
72
|
100
|
66
|
|
|
284
|
if ( defined $_[0] and uc( $_[0] ) ne 'TAXLABELS' ) { |
|
|
50
|
33
|
|
|
|
|
655
|
59
|
|
|
|
|
88
|
my $taxon = shift; |
656
|
59
|
|
|
|
|
127
|
$self->_logger->debug("taxon: $taxon"); |
657
|
59
|
|
|
|
|
87
|
push @{ $self->{'_taxlabels'} }, $taxon; |
|
59
|
|
|
|
|
120
|
|
658
|
|
|
|
|
|
|
} |
659
|
|
|
|
|
|
|
elsif ( defined $_[0] and uc( $_[0] ) eq 'TAXLABELS' ) { |
660
|
|
|
|
|
|
|
$self->_current->set_generic( |
661
|
13
|
|
|
|
|
56
|
'nexus_comments' => $self->{'_comments'} ); |
662
|
13
|
|
|
|
|
34
|
$self->{'_comments'} = []; |
663
|
13
|
|
|
|
|
44
|
$self->_logger->info("starting taxlabels"); |
664
|
|
|
|
|
|
|
} |
665
|
|
|
|
|
|
|
} |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
sub _blockid { |
668
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
669
|
0
|
0
|
0
|
|
|
0
|
if ( defined $_[0] and uc( $_[0] ) ne 'BLOCKID' ) { |
670
|
0
|
|
|
|
|
0
|
my $blockid = shift; |
671
|
0
|
|
|
|
|
0
|
$self->_logger->debug("blockid: $blockid"); |
672
|
0
|
|
|
|
|
0
|
$self->_current->set_generic( 'blockid' => $blockid ); |
673
|
|
|
|
|
|
|
} |
674
|
|
|
|
|
|
|
} |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
sub _data { |
677
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
678
|
0
|
0
|
|
|
|
0
|
if ( $self->{'_begin'} ) { |
679
|
0
|
|
|
|
|
0
|
$self->{'_begin'} = 0; |
680
|
0
|
|
|
|
|
0
|
push @{ $self->{'_context'} }, $self->_factory->create_matrix; |
|
0
|
|
|
|
|
0
|
|
681
|
0
|
|
|
|
|
0
|
$self->_logger->info("starting data block"); |
682
|
|
|
|
|
|
|
} |
683
|
|
|
|
|
|
|
} |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
sub _characters { |
686
|
9
|
|
|
9
|
|
22
|
my $self = shift; |
687
|
9
|
50
|
|
|
|
39
|
if ( $self->{'_begin'} ) { |
688
|
9
|
|
|
|
|
23
|
$self->{'_begin'} = 0; |
689
|
9
|
|
|
|
|
20
|
push @{ $self->{'_context'} }, $self->_factory->create_matrix; |
|
9
|
|
|
|
|
44
|
|
690
|
9
|
|
|
|
|
53
|
$self->_logger->info("starting characters block"); |
691
|
|
|
|
|
|
|
} |
692
|
|
|
|
|
|
|
} |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
sub _nchar { |
695
|
27
|
|
|
27
|
|
43
|
my $self = shift; |
696
|
27
|
100
|
66
|
|
|
160
|
if ( defined $_[0] and $_[0] =~ m/^\d+$/ ) { |
697
|
9
|
|
|
|
|
26
|
$self->{'_nchar'} = shift; |
698
|
9
|
|
|
|
|
23
|
my $nchar = $self->{'_nchar'}; |
699
|
9
|
|
|
|
|
34
|
$self->_logger->info("number of characters: $nchar"); |
700
|
|
|
|
|
|
|
} |
701
|
|
|
|
|
|
|
} |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
9
|
|
|
sub _format { |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
#my $self = shift; |
706
|
|
|
|
|
|
|
} |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
sub _datatype { |
709
|
27
|
|
|
27
|
|
41
|
my $self = shift; |
710
|
27
|
100
|
66
|
|
|
141
|
if ( defined $_[0] and $_[0] !~ m/^(?:DATATYPE|=)/i ) { |
711
|
9
|
|
|
|
|
27
|
my $datatype = shift; |
712
|
9
|
|
|
|
|
34
|
$self->_current->set_type($datatype); |
713
|
9
|
|
|
|
|
38
|
$self->_logger->info("datatype: $datatype"); |
714
|
|
|
|
|
|
|
} |
715
|
|
|
|
|
|
|
} |
716
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
sub _matchchar { |
718
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
719
|
0
|
0
|
0
|
|
|
0
|
if ( defined $_[0] and $_[0] !~ m/^(?:MATCHCHAR|=)/i ) { |
720
|
0
|
|
|
|
|
0
|
my $matchchar = shift; |
721
|
0
|
|
|
|
|
0
|
$self->_current->set_matchchar($matchchar); |
722
|
0
|
|
|
|
|
0
|
$self->_logger->info("matchchar: $matchchar"); |
723
|
|
|
|
|
|
|
} |
724
|
|
|
|
|
|
|
} |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
0
|
|
|
sub _items { |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
#my $self = shift; |
729
|
|
|
|
|
|
|
} |
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
sub _gap { |
732
|
27
|
|
|
27
|
|
48
|
my $self = shift; |
733
|
27
|
100
|
66
|
|
|
138
|
if ( $_[0] !~ m/^(?:GAP|=)/i and !$self->{'_gap'} ) { |
734
|
9
|
|
|
|
|
24
|
$self->{'_gap'} = shift; |
735
|
9
|
|
|
|
|
25
|
my $gap = $self->{'_gap'}; |
736
|
9
|
|
|
|
|
31
|
$self->_current->set_gap($gap); |
737
|
9
|
|
|
|
|
30
|
$self->_logger->info("gap character: $gap"); |
738
|
9
|
|
|
|
|
33
|
undef $self->{'_gap'}; |
739
|
|
|
|
|
|
|
} |
740
|
|
|
|
|
|
|
} |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
sub _missing { |
743
|
27
|
|
|
27
|
|
47
|
my $self = shift; |
744
|
27
|
100
|
66
|
|
|
146
|
if ( $_[0] !~ m/^(?:MISSING|=)/i and !$self->{'_missing'} ) { |
745
|
9
|
|
|
|
|
23
|
$self->{'_missing'} = shift; |
746
|
9
|
|
|
|
|
22
|
my $missing = $self->{'_missing'}; |
747
|
9
|
|
|
|
|
31
|
$self->_current->set_missing($missing); |
748
|
9
|
|
|
|
|
32
|
$self->_logger->info("missing character: $missing"); |
749
|
9
|
|
|
|
|
25
|
undef $self->{'_missing'}; |
750
|
|
|
|
|
|
|
} |
751
|
|
|
|
|
|
|
} |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
sub _symbols { |
754
|
15
|
|
|
15
|
|
25
|
my $self = shift; |
755
|
15
|
100
|
66
|
|
|
96
|
if ( $_[0] !~ m/^(?:SYMBOLS|=)$/i and $_[0] =~ m/^"?(.+)"?$/ ) { |
756
|
5
|
|
|
|
|
31
|
my $sym = $1; |
757
|
5
|
|
|
|
|
23
|
$sym =~ s/"//g; |
758
|
5
|
|
|
|
|
48
|
my @syms = grep { /\S+/ } split /\s+/, $sym; |
|
17
|
|
|
|
|
60
|
|
759
|
5
|
|
|
|
|
14
|
push @{ $self->{'_symbols'} }, @syms; |
|
5
|
|
|
|
|
22
|
|
760
|
5
|
|
|
|
|
22
|
$self->_logger->debug("recorded character state symbols '@syms'"); |
761
|
|
|
|
|
|
|
} |
762
|
|
|
|
|
|
|
} |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
sub _charlabels { |
765
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
766
|
0
|
0
|
0
|
|
|
0
|
if ( defined $_[0] and uc $_[0] ne 'CHARLABELS' ) { |
767
|
0
|
|
|
|
|
0
|
push @{ $self->{'_charlabels'} }, shift; |
|
0
|
|
|
|
|
0
|
|
768
|
|
|
|
|
|
|
} |
769
|
|
|
|
|
|
|
} |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
sub _charstatelabels { |
772
|
18
|
|
|
18
|
|
30
|
my $self = shift; |
773
|
18
|
|
|
|
|
31
|
my $token = shift; |
774
|
18
|
|
|
|
|
48
|
$self->_logger->debug($token); |
775
|
18
|
100
|
66
|
|
|
77
|
if ( defined $token and uc $token ne 'CHARSTATELABELS' ) { |
776
|
16
|
|
|
|
|
29
|
push @{ $self->{'_charstatelabels'} }, $token; |
|
16
|
|
|
|
|
54
|
|
777
|
|
|
|
|
|
|
} |
778
|
|
|
|
|
|
|
} |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
sub _statelabels { |
781
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
782
|
0
|
|
|
|
|
0
|
my $token = shift; |
783
|
0
|
0
|
0
|
|
|
0
|
if ( defined $token and uc $token ne 'STATELABELS' ) { |
784
|
0
|
0
|
|
|
|
0
|
if ( $token eq ',' ) { |
785
|
0
|
|
|
|
|
0
|
my $tmpstatelabels = $self->{'_tmpstatelabels'}; |
786
|
0
|
|
|
|
|
0
|
my $index = shift @{$tmpstatelabels}; |
|
0
|
|
|
|
|
0
|
|
787
|
0
|
|
|
|
|
0
|
$self->{'_statelabels'}->[ $index - 1 ] = $tmpstatelabels; |
788
|
0
|
|
|
|
|
0
|
$self->{'_tmpstatelabels'} = []; |
789
|
|
|
|
|
|
|
} |
790
|
|
|
|
|
|
|
else { |
791
|
0
|
|
|
|
|
0
|
push @{ $self->{'_tmpstatelabels'} }, $token; |
|
0
|
|
|
|
|
0
|
|
792
|
|
|
|
|
|
|
} |
793
|
|
|
|
|
|
|
} |
794
|
|
|
|
|
|
|
} |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
# for data type, character labels, state labels |
797
|
|
|
|
|
|
|
sub _add_matrix_metadata { |
798
|
57
|
|
|
57
|
|
75
|
my $self = shift; |
799
|
57
|
|
|
|
|
127
|
$self->_logger->info("adding matrix metadata"); |
800
|
57
|
100
|
|
|
|
133
|
if ( not defined $self->{'_matrixtype'} ) { |
801
|
9
|
|
|
|
|
103
|
$self->{'_matrixtype'} = $self->_current->get_type; |
802
|
9
|
50
|
|
|
|
19
|
if ( @{ $self->{'_charlabels'} } ) { |
|
9
|
|
|
|
|
35
|
|
803
|
0
|
|
|
|
|
0
|
$self->_current->set_charlabels( $self->{'_charlabels'} ); |
804
|
0
|
|
|
|
|
0
|
$self->_logger->debug("adding character labels"); |
805
|
|
|
|
|
|
|
} |
806
|
9
|
50
|
|
|
|
17
|
if ( @{ $self->{'_statelabels'} } ) { |
|
9
|
|
|
|
|
34
|
|
807
|
0
|
|
|
|
|
0
|
$self->_current->set_statelabels( $self->{'_statelabels'} ); |
808
|
0
|
|
|
|
|
0
|
$self->_logger->debug("adding state labels"); |
809
|
|
|
|
|
|
|
} |
810
|
9
|
50
|
|
|
|
17
|
if ( my @symbols = @{ $self->{'_symbols'} } ) { |
|
9
|
|
|
|
|
42
|
|
811
|
0
|
|
|
|
|
0
|
$self->_logger->debug("updating state lookup table"); |
812
|
0
|
|
|
|
|
0
|
my $to = $self->_current->get_type_object; |
813
|
0
|
|
|
|
|
0
|
my $lookup = $to->get_lookup; |
814
|
0
|
0
|
|
|
|
0
|
if ($lookup) { |
815
|
0
|
|
|
|
|
0
|
for my $sym (@symbols) { |
816
|
0
|
0
|
|
|
|
0
|
if ( not exists $lookup->{$sym} ) { |
817
|
0
|
|
|
|
|
0
|
$lookup->{$sym} = [$sym]; |
818
|
|
|
|
|
|
|
} |
819
|
|
|
|
|
|
|
} |
820
|
|
|
|
|
|
|
} |
821
|
|
|
|
|
|
|
} |
822
|
|
|
|
|
|
|
} |
823
|
57
|
|
|
|
|
83
|
return $self; |
824
|
|
|
|
|
|
|
} |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
sub _add_tokens_to_row { |
827
|
48
|
|
|
48
|
|
85
|
my ( $self, $tokens ) = @_; |
828
|
48
|
|
|
|
|
68
|
my $rowname; |
829
|
48
|
|
|
|
|
64
|
for my $token ( @{$tokens} ) { |
|
48
|
|
|
|
|
100
|
|
830
|
90
|
|
|
|
|
200
|
$self->_logger->debug("token: $token"); |
831
|
90
|
100
|
|
|
|
290
|
last if $token eq ';'; |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
# mesquite sometimes writes multiline (but not interleaved) |
834
|
|
|
|
|
|
|
# matrix rows (harrumph). |
835
|
81
|
100
|
66
|
|
|
533
|
if ( not defined $rowname and $token !~ $COMMENT ) { |
|
|
50
|
33
|
|
|
|
|
836
|
39
|
|
|
|
|
54
|
my $taxa; |
837
|
39
|
50
|
|
|
|
81
|
if ( $taxa = $self->_current->get_taxa ) { |
|
|
50
|
|
|
|
|
|
838
|
0
|
0
|
|
|
|
0
|
if ( my $taxon = $taxa->get_by_name($token) ) { |
839
|
0
|
|
|
|
|
0
|
$rowname = $token; |
840
|
|
|
|
|
|
|
} |
841
|
|
|
|
|
|
|
else { |
842
|
0
|
|
|
|
|
0
|
$rowname = $self->{'_matrixrowlabels'}->[-1]; |
843
|
|
|
|
|
|
|
} |
844
|
|
|
|
|
|
|
} |
845
|
|
|
|
|
|
|
elsif ( $taxa = $self->_find_last_seen_taxa_block ) { |
846
|
39
|
50
|
|
|
|
144
|
if ( my $taxon = $taxa->get_by_name($token) ) { |
847
|
39
|
|
|
|
|
70
|
$rowname = $token; |
848
|
|
|
|
|
|
|
} |
849
|
|
|
|
|
|
|
else { |
850
|
0
|
|
|
|
|
0
|
$rowname = $self->{'_matrixrowlabels'}->[-1]; |
851
|
|
|
|
|
|
|
} |
852
|
|
|
|
|
|
|
} |
853
|
|
|
|
|
|
|
else { |
854
|
0
|
|
|
|
|
0
|
$rowname = $token; |
855
|
|
|
|
|
|
|
} |
856
|
39
|
50
|
|
|
|
99
|
if ( not exists $self->{'_matrix'}->{$rowname} ) { |
857
|
39
|
|
|
|
|
103
|
$self->{'_matrix'}->{$rowname} = []; |
858
|
39
|
|
|
|
|
57
|
push @{ $self->{'_matrixrowlabels'} }, $rowname; |
|
39
|
|
|
|
|
104
|
|
859
|
|
|
|
|
|
|
} |
860
|
|
|
|
|
|
|
} |
861
|
|
|
|
|
|
|
elsif ( defined $rowname and $token !~ $COMMENT ) { |
862
|
42
|
|
|
|
|
98
|
my $row = $self->{'_matrix'}->{$rowname}; |
863
|
42
|
100
|
|
|
|
84
|
if ( $self->{'_matrixtype'} =~ m/^continuous$/i ) { |
864
|
6
|
|
|
|
|
8
|
push @{$row}, split( /\s+/, $token ); |
|
6
|
|
|
|
|
21
|
|
865
|
|
|
|
|
|
|
} |
866
|
|
|
|
|
|
|
else { |
867
|
36
|
|
|
|
|
54
|
push @{$row}, split( //, $token ); |
|
36
|
|
|
|
|
213
|
|
868
|
|
|
|
|
|
|
} |
869
|
|
|
|
|
|
|
} |
870
|
|
|
|
|
|
|
} |
871
|
|
|
|
|
|
|
} |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
sub _find_last_seen_taxa_block { |
874
|
85
|
|
|
85
|
|
152
|
my $self = shift; |
875
|
85
|
|
|
|
|
135
|
my $name = shift; |
876
|
85
|
|
|
|
|
118
|
for ( my $i = $#{ $self->{'_context'} } ; $i >= 0 ; $i-- ) { |
|
85
|
|
|
|
|
313
|
|
877
|
180
|
100
|
|
|
|
556
|
if ( $self->{'_context'}->[$i]->_type == $TAXA ) { |
878
|
84
|
100
|
|
|
|
171
|
if ( $name ) { |
879
|
2
|
100
|
|
|
|
7
|
if ( $self->{'_context'}->[$i]->get_name eq $name ) { |
880
|
1
|
|
|
|
|
3
|
return $self->{'_context'}->[$i]; |
881
|
|
|
|
|
|
|
} |
882
|
|
|
|
|
|
|
} |
883
|
|
|
|
|
|
|
else { |
884
|
82
|
|
|
|
|
279
|
return $self->{'_context'}->[$i]; |
885
|
|
|
|
|
|
|
} |
886
|
|
|
|
|
|
|
} |
887
|
|
|
|
|
|
|
} |
888
|
2
|
|
|
|
|
7
|
return; |
889
|
|
|
|
|
|
|
} |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
sub _find_last_seen_matrix { |
892
|
4
|
|
|
4
|
|
10
|
my $self = shift; |
893
|
4
|
|
|
|
|
12
|
my $name = shift; |
894
|
4
|
|
|
|
|
10
|
for ( my $i = $#{ $self->{'_context'} } ; $i >= 0 ; $i-- ) { |
|
4
|
|
|
|
|
23
|
|
895
|
5
|
50
|
|
|
|
28
|
if ( $self->{'_context'}->[$i]->_type == $MATRIX ) { |
896
|
5
|
100
|
|
|
|
16
|
if ( $name ) { |
897
|
2
|
100
|
|
|
|
7
|
if ( $self->{'_context'}->[$i]->get_name eq $name ) { |
898
|
1
|
|
|
|
|
5
|
return $self->{'_context'}->[$i]; |
899
|
|
|
|
|
|
|
} |
900
|
|
|
|
|
|
|
} |
901
|
|
|
|
|
|
|
else { |
902
|
3
|
|
|
|
|
18
|
return $self->{'_context'}->[$i]; |
903
|
|
|
|
|
|
|
} |
904
|
|
|
|
|
|
|
} |
905
|
|
|
|
|
|
|
} |
906
|
0
|
|
|
|
|
0
|
return; |
907
|
|
|
|
|
|
|
} |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
sub _set_taxon { |
910
|
74
|
|
|
74
|
|
154
|
my ( $self, $obj, $taxa ) = @_; |
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
# first case: a taxon by $obj's name already exists |
913
|
74
|
50
|
|
|
|
199
|
if ( my $taxon = $taxa->get_by_name( $obj->get_name ) ) { |
914
|
74
|
|
|
|
|
248
|
$obj->set_taxon($taxon); |
915
|
74
|
|
|
|
|
163
|
return $self; |
916
|
|
|
|
|
|
|
} |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
# second case: no taxon by $obj's name exists yet |
919
|
|
|
|
|
|
|
else { |
920
|
0
|
|
|
|
|
0
|
my $taxon = $self->_factory->create_taxon( '-name' => $obj->get_name ); |
921
|
0
|
|
|
|
|
0
|
$taxa->insert($taxon); |
922
|
0
|
|
|
|
|
0
|
$obj->set_taxon($taxon); |
923
|
0
|
|
|
|
|
0
|
return $self; |
924
|
|
|
|
|
|
|
} |
925
|
|
|
|
|
|
|
} |
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
sub _resolve_taxon { |
928
|
132
|
|
|
132
|
|
219
|
my ( $self, $obj ) = @_; |
929
|
132
|
|
|
|
|
263
|
my $container = $self->_current; |
930
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
# first case: the object is actually already |
932
|
|
|
|
|
|
|
# linked to a taxon |
933
|
132
|
100
|
|
|
|
321
|
if ( my $taxon = $obj->get_taxon ) { |
934
|
88
|
|
|
|
|
330
|
return $self; |
935
|
|
|
|
|
|
|
} |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
# second case: the container is already linked |
938
|
|
|
|
|
|
|
# to a taxa block, but the object isn't |
939
|
44
|
100
|
|
|
|
198
|
if ( my $taxa = $container->get_taxa ) { |
940
|
30
|
|
|
|
|
81
|
$self->_set_taxon( $obj, $taxa ); |
941
|
|
|
|
|
|
|
} |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
# third case: the container isn't explicitly linked, |
944
|
|
|
|
|
|
|
# but a taxa block has been seen |
945
|
44
|
100
|
|
|
|
155
|
if ( my $taxa = $self->_find_last_seen_taxa_block ) { |
946
|
42
|
|
|
|
|
213
|
$container->set_taxa($taxa); |
947
|
42
|
|
|
|
|
127
|
$self->_set_taxon( $obj, $taxa ); |
948
|
|
|
|
|
|
|
} |
949
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
# final case: no taxa block exists |
951
|
|
|
|
|
|
|
else { |
952
|
2
|
|
|
|
|
10
|
my $taxa = $container->make_taxa; |
953
|
2
|
|
|
|
|
5
|
pop @{ $self->{'_context'} }; |
|
2
|
|
|
|
|
9
|
|
954
|
2
|
|
|
|
|
6
|
push @{ $self->{'_context'} }, $taxa, $container; |
|
2
|
|
|
|
|
6
|
|
955
|
2
|
|
|
|
|
11
|
$self->_set_taxon( $obj, $taxa ); |
956
|
|
|
|
|
|
|
} |
957
|
|
|
|
|
|
|
} |
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
sub _resolve_ambig { |
960
|
39
|
|
|
39
|
|
92
|
my ( $self, $datum, $chars ) = @_; |
961
|
39
|
|
|
|
|
149
|
my %brackets = ( |
962
|
|
|
|
|
|
|
'(' => ')', |
963
|
|
|
|
|
|
|
'{' => '}', |
964
|
|
|
|
|
|
|
); |
965
|
39
|
|
|
|
|
111
|
my $to = $datum->get_type_object; |
966
|
39
|
|
|
|
|
65
|
my @resolved; |
967
|
39
|
|
|
|
|
64
|
my $in_set = 0; |
968
|
39
|
|
|
|
|
73
|
my @set; |
969
|
|
|
|
|
|
|
my $close; |
970
|
39
|
|
|
|
|
57
|
for my $c ( @{$chars} ) { |
|
39
|
|
|
|
|
95
|
|
971
|
|
|
|
|
|
|
|
972
|
228
|
50
|
33
|
|
|
576
|
if ( not $in_set and not exists $brackets{$c} ) { |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
973
|
228
|
50
|
|
|
|
487
|
push @resolved, $c if defined $c; |
974
|
|
|
|
|
|
|
} |
975
|
|
|
|
|
|
|
elsif ( not $in_set and exists $brackets{$c} ) { |
976
|
0
|
|
|
|
|
0
|
$in_set++; |
977
|
0
|
|
|
|
|
0
|
$close = $brackets{$c}; |
978
|
|
|
|
|
|
|
} |
979
|
|
|
|
|
|
|
elsif ( $in_set and $c ne $close ) { |
980
|
0
|
|
|
|
|
0
|
push @set, $c; |
981
|
|
|
|
|
|
|
} |
982
|
|
|
|
|
|
|
elsif ( $in_set and $c eq $close ) { |
983
|
0
|
|
|
|
|
0
|
push @resolved, $to->get_symbol_for_states(@set); |
984
|
0
|
|
|
|
|
0
|
@set = (); |
985
|
0
|
|
|
|
|
0
|
$in_set = 0; |
986
|
0
|
|
|
|
|
0
|
$close = undef; |
987
|
|
|
|
|
|
|
} |
988
|
|
|
|
|
|
|
} |
989
|
39
|
|
|
|
|
120
|
return \@resolved; |
990
|
|
|
|
|
|
|
} |
991
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
sub _codons { |
993
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
994
|
0
|
|
|
|
|
0
|
$self->_logger->info(shift); |
995
|
0
|
0
|
|
|
|
0
|
if ( $self->{'_begin'} ) { |
996
|
0
|
|
|
|
|
0
|
$self->{'_begin'} = 0; |
997
|
|
|
|
|
|
|
} |
998
|
|
|
|
|
|
|
} |
999
|
|
|
|
|
|
|
|
1000
|
|
|
|
|
|
|
sub _matrix { |
1001
|
57
|
|
|
57
|
|
88
|
my $self = shift; |
1002
|
57
|
|
|
|
|
83
|
my $token = shift; |
1003
|
57
|
|
|
|
|
151
|
$self->_add_matrix_metadata; |
1004
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
# first token: 'MATRIX', i.e. we're just starting to parse |
1006
|
|
|
|
|
|
|
# the actual matrix. Here we need to switch to "linemode", |
1007
|
|
|
|
|
|
|
# so that subsequently tokens will be array references (all |
1008
|
|
|
|
|
|
|
# the tokens on a line). This is so that we can handle |
1009
|
|
|
|
|
|
|
# interleaved matrices, which unfortunately need line breaks |
1010
|
|
|
|
|
|
|
# in them. |
1011
|
57
|
100
|
66
|
|
|
144
|
if ( not looks_like_instance( $token, 'ARRAY' ) and uc($token) eq 'MATRIX' ) |
|
|
100
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
1012
|
|
|
|
|
|
|
{ |
1013
|
9
|
|
|
|
|
30
|
$self->{'_linemode'} = 1; |
1014
|
9
|
|
|
|
|
33
|
$self->_logger->info("starting matrix"); |
1015
|
9
|
|
|
|
|
19
|
return; |
1016
|
|
|
|
|
|
|
} |
1017
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
# a row inside the matrix, after adding tokens to row, nothing |
1019
|
|
|
|
|
|
|
# else to do |
1020
|
|
|
|
|
|
|
elsif ( looks_like_instance( $token, 'ARRAY' ) |
1021
|
90
|
|
|
|
|
309
|
and not grep { /^;$/ } @{$token} ) |
|
48
|
|
|
|
|
100
|
|
1022
|
|
|
|
|
|
|
{ |
1023
|
39
|
|
|
|
|
125
|
$self->_add_tokens_to_row($token); |
1024
|
39
|
|
|
|
|
103
|
$self->_logger->info("adding tokens to row"); |
1025
|
39
|
|
|
|
|
71
|
return; |
1026
|
|
|
|
|
|
|
} |
1027
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
# the last row of the matrix, after adding tokens to row, |
1029
|
|
|
|
|
|
|
# instantiate & populate datum objects, link against taxa |
1030
|
|
|
|
|
|
|
# objects |
1031
|
|
|
|
|
|
|
elsif ( looks_like_instance( $token, 'ARRAY' ) |
1032
|
9
|
|
|
|
|
54
|
and grep { /^;$/ } @{$token} ) |
|
9
|
|
|
|
|
32
|
|
1033
|
|
|
|
|
|
|
{ |
1034
|
9
|
|
|
|
|
37
|
$self->_add_tokens_to_row($token); |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
# link to taxa |
1037
|
9
|
|
|
|
|
28
|
for my $row ( @{ $self->{'_matrixrowlabels'} } ) { |
|
9
|
|
|
|
|
35
|
|
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
# create new datum |
1040
|
39
|
|
|
|
|
125
|
my $datum = $self->_factory->create_datum( |
1041
|
|
|
|
|
|
|
'-type_object' => $self->_current->get_type_object, |
1042
|
|
|
|
|
|
|
'-name' => $row, |
1043
|
|
|
|
|
|
|
); |
1044
|
|
|
|
|
|
|
my $char = |
1045
|
39
|
|
|
|
|
200
|
$self->_resolve_ambig( $datum, $self->{'_matrix'}->{$row} ); |
1046
|
39
|
|
|
|
|
162
|
$datum->set_char($char); |
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
# insert new datum in matrix |
1049
|
39
|
|
|
|
|
107
|
$self->_current->insert($datum); |
1050
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
# link to taxon |
1052
|
39
|
|
|
|
|
144
|
$self->_resolve_taxon($datum); |
1053
|
39
|
|
|
|
|
168
|
my ( $length, $seq ) = ( $datum->get_length, $datum->get_char ); |
1054
|
39
|
|
|
|
|
149
|
$self->_logger->info("parsed $length characters for ${row}: $seq"); |
1055
|
|
|
|
|
|
|
} |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
# Let's avoid these! |
1058
|
9
|
100
|
33
|
|
|
38
|
if ( $self->_current->get_nchar != $self->{'_nchar'} ) { |
|
|
50
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
my ( $obs, $exp ) = |
1060
|
1
|
|
|
|
|
4
|
( $self->_current->get_nchar, $self->{'_nchar'} ); |
1061
|
1
|
|
|
|
|
7
|
_bad_format("Observed and expected nchar mismatch: $obs vs. $exp"); |
1062
|
|
|
|
|
|
|
} |
1063
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
# ntax is only defined for "data" blocks (which have ntax token), |
1065
|
|
|
|
|
|
|
# not for "characters" blocks (which should match up with taxa block) |
1066
|
|
|
|
|
|
|
elsif ( defined $self->{'_ntax'} |
1067
|
|
|
|
|
|
|
and $self->_current->get_ntax != $self->{'_ntax'} ) |
1068
|
|
|
|
|
|
|
{ |
1069
|
0
|
|
|
|
|
0
|
my ( $obs, $exp ) = ( $self->_current->get_ntax, $self->{'_ntax'} ); |
1070
|
0
|
|
|
|
|
0
|
_bad_format("Observed and expected ntax mismatch: $obs vs. $exp"); |
1071
|
|
|
|
|
|
|
} |
1072
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
# XXX matrix clean up here |
1074
|
8
|
|
|
|
|
24
|
$self->{'_ntax'} = undef; |
1075
|
8
|
|
|
|
|
19
|
$self->{'_nchar'} = undef; |
1076
|
8
|
|
|
|
|
24
|
$self->{'_matrixtype'} = undef; |
1077
|
8
|
|
|
|
|
57
|
$self->{'_matrix'} = {}; |
1078
|
8
|
|
|
|
|
28
|
$self->{'_matrixrowlabels'} = []; |
1079
|
8
|
|
|
|
|
26
|
$self->{'_linemode'} = 0; |
1080
|
|
|
|
|
|
|
} |
1081
|
|
|
|
|
|
|
} |
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
sub _bad_format { |
1084
|
2
|
|
|
2
|
|
15
|
throw 'BadFormat' => shift; |
1085
|
|
|
|
|
|
|
} |
1086
|
408
|
|
|
408
|
|
1569
|
sub _current { shift->{'_context'}->[-1] } |
1087
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
sub _trees { |
1089
|
5
|
|
|
5
|
|
14
|
my $self = shift; |
1090
|
5
|
50
|
|
|
|
19
|
if ( $self->{'_begin'} ) { |
1091
|
5
|
|
|
|
|
13
|
$self->{'_begin'} = 0; |
1092
|
5
|
|
|
|
|
14
|
$self->{'_trees'} = ''; |
1093
|
5
|
|
|
|
|
14
|
$self->{'_treenames'} = []; |
1094
|
5
|
|
|
|
|
9
|
push @{ $self->{'_context'} }, $self->_factory->create_forest; |
|
5
|
|
|
|
|
27
|
|
1095
|
5
|
|
|
|
|
28
|
$self->_logger->info("starting trees block"); |
1096
|
|
|
|
|
|
|
} |
1097
|
|
|
|
|
|
|
} |
1098
|
|
|
|
|
|
|
|
1099
|
|
|
|
|
|
|
sub _translate { |
1100
|
171
|
|
|
171
|
|
240
|
my $self = shift; |
1101
|
171
|
|
|
|
|
234
|
my $i = $self->{'_i'}; |
1102
|
171
|
100
|
100
|
|
|
404
|
if ( $i && $i == 1 ) |
1103
|
|
|
|
|
|
|
{ # actually, $i can be 0 according to BayesPhylogenies translation table |
1104
|
5
|
|
|
|
|
20
|
$self->_logger->info("starting translation table"); |
1105
|
|
|
|
|
|
|
} |
1106
|
171
|
100
|
100
|
|
|
971
|
if ( !defined($i) && $_[0] =~ m/^\d+$/ ) { |
|
|
100
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
1107
|
57
|
|
|
|
|
95
|
$self->{'_i'} = shift; |
1108
|
57
|
|
|
|
|
141
|
$self->{'_translate'}->[ $self->{'_i'} ] = undef; |
1109
|
|
|
|
|
|
|
} |
1110
|
|
|
|
|
|
|
elsif (defined($i) |
1111
|
|
|
|
|
|
|
&& exists $self->{'_translate'}->[$i] |
1112
|
|
|
|
|
|
|
&& !defined $self->{'_translate'}->[$i] |
1113
|
|
|
|
|
|
|
&& $_[0] ne ';' ) |
1114
|
|
|
|
|
|
|
{ |
1115
|
57
|
|
|
|
|
96
|
$self->{'_translate'}->[$i] = $_[0]; |
1116
|
57
|
|
|
|
|
144
|
$self->_logger->debug("Translation: $i => $_[0]"); |
1117
|
57
|
|
|
|
|
102
|
$self->{'_i'} = undef; |
1118
|
|
|
|
|
|
|
} |
1119
|
|
|
|
|
|
|
} |
1120
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
sub _tree { |
1122
|
212
|
|
|
212
|
|
299
|
my $self = shift; |
1123
|
212
|
100
|
100
|
|
|
470
|
if ( not $self->{'_treename'} and $_[0] !~ m/^(U?TREE|\*)$/i ) { |
1124
|
13
|
|
|
|
|
28
|
$self->{'_treename'} = $_[0]; |
1125
|
|
|
|
|
|
|
} |
1126
|
212
|
100
|
66
|
|
|
424
|
if ( $_[0] eq '=' and not $self->{'_treestart'} ) { |
1127
|
13
|
|
|
|
|
164
|
$self->{'_treestart'} = 1; |
1128
|
|
|
|
|
|
|
} |
1129
|
212
|
100
|
100
|
|
|
578
|
if ( $_[0] ne '=' and $self->{'_treestart'} ) { |
1130
|
173
|
|
|
|
|
260
|
$self->{'_tree'} .= $_[0]; |
1131
|
|
|
|
|
|
|
} |
1132
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
# tr/// returns # of replacements, hence can be used to check |
1134
|
|
|
|
|
|
|
# tree description is balanced |
1135
|
212
|
100
|
100
|
|
|
847
|
if ( $self->{'_treestart'} |
|
|
|
100
|
|
|
|
|
1136
|
|
|
|
|
|
|
and $self->{'_tree'} |
1137
|
|
|
|
|
|
|
and $self->{'_tree'} =~ tr/(/(/ == $self->{'_tree'} =~ tr/)/)/ ) |
1138
|
|
|
|
|
|
|
{ |
1139
|
13
|
|
|
|
|
25
|
my $translated = $self->{'_tree'}; |
1140
|
13
|
|
|
|
|
25
|
my $translate = $self->{'_translate'}; |
1141
|
13
|
100
|
|
|
|
33
|
my $start = |
1142
|
|
|
|
|
|
|
exists $translate->[0] |
1143
|
|
|
|
|
|
|
? 0 |
1144
|
|
|
|
|
|
|
: 1; # BayesPhylogenies starts translation table w. 0 |
1145
|
13
|
|
|
|
|
26
|
for my $i ( $start .. $#{$translate} ) { |
|
13
|
|
|
|
|
45
|
|
1146
|
93
|
|
|
|
|
2415
|
$translated =~ s/(\(|,)$i(,|\)|:)/$1$translate->[$i]$2/; |
1147
|
|
|
|
|
|
|
} |
1148
|
|
|
|
|
|
|
my ( $logtreename, $logtree ) = |
1149
|
13
|
|
|
|
|
60
|
( $self->{'_treename'}, $self->{'_tree'} ); |
1150
|
13
|
|
|
|
|
57
|
$self->_logger->info("tree: $logtreename string: $logtree"); |
1151
|
13
|
|
|
|
|
55
|
$self->{'_trees'} .= $translated . ';'; |
1152
|
13
|
|
|
|
|
28
|
push @{ $self->{'_treenames'} }, $self->{'_treename'}; |
|
13
|
|
|
|
|
36
|
|
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
# XXX tree cleanup here |
1155
|
13
|
|
|
|
|
28
|
$self->{'_treestart'} = 0; |
1156
|
13
|
|
|
|
|
24
|
$self->{'_tree'} = undef; |
1157
|
13
|
|
|
|
|
36
|
$self->{'_treename'} = undef; |
1158
|
|
|
|
|
|
|
} |
1159
|
|
|
|
|
|
|
} |
1160
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
sub _end { |
1162
|
33
|
|
|
33
|
|
73
|
my $self = shift; |
1163
|
33
|
|
|
|
|
89
|
$self->{'_translate'} = []; |
1164
|
33
|
100
|
100
|
|
|
190
|
if ( uc $self->{'_previous'} eq ';' and $self->{'_trees'} ) { |
1165
|
5
|
|
|
|
|
39
|
my $forest = $self->_current; |
1166
|
|
|
|
|
|
|
my $trees = parse( |
1167
|
|
|
|
|
|
|
'-format' => 'newick', |
1168
|
5
|
|
|
|
|
51
|
'-string' => $self->{'_trees'}, |
1169
|
|
|
|
|
|
|
'-as_project' => 0 |
1170
|
|
|
|
|
|
|
); |
1171
|
5
|
|
|
|
|
52
|
for my $tree ( @{ $trees->get_entities } ) { |
|
5
|
|
|
|
|
28
|
|
1172
|
13
|
|
|
|
|
44
|
$forest->insert($tree); |
1173
|
|
|
|
|
|
|
} |
1174
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
# set tree names |
1176
|
5
|
|
|
|
|
78
|
for my $i ( 0 .. $#{ $self->{'_treenames'} } ) { |
|
5
|
|
|
|
|
32
|
|
1177
|
13
|
|
|
|
|
70
|
$forest->get_by_index($i)->set_name( $self->{'_treenames'}->[$i] ); |
1178
|
|
|
|
|
|
|
} |
1179
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
# link tips to taxa |
1181
|
5
|
|
|
|
|
13
|
for my $tree ( @{ $forest->get_entities } ) { |
|
5
|
|
|
|
|
20
|
|
1182
|
13
|
|
|
|
|
28
|
for my $tip ( @{ $tree->get_terminals } ) { |
|
13
|
|
|
|
|
67
|
|
1183
|
93
|
|
|
|
|
185
|
$self->_resolve_taxon($tip); |
1184
|
|
|
|
|
|
|
} |
1185
|
|
|
|
|
|
|
} |
1186
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
# XXX trees cleanup here |
1188
|
5
|
|
|
|
|
32
|
$self->{'_trees'} = ''; |
1189
|
5
|
|
|
|
|
83
|
$self->{'_treenames'} = []; |
1190
|
|
|
|
|
|
|
} |
1191
|
|
|
|
|
|
|
} |
1192
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
sub _semicolon { |
1194
|
189
|
|
|
189
|
|
290
|
my $self = shift; |
1195
|
189
|
50
|
|
|
|
1392
|
if ( uc $self->{'_previous'} eq 'MATRIX' ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1196
|
0
|
|
|
|
|
0
|
$self->{'_matrixtype'} = undef; |
1197
|
0
|
|
|
|
|
0
|
$self->{'_matrix'} = {}; |
1198
|
0
|
|
|
|
|
0
|
$self->{'_charlabels'} = []; |
1199
|
0
|
|
|
|
|
0
|
$self->{'_statelabels'} = []; |
1200
|
0
|
|
|
|
|
0
|
$self->{'_linemode'} = 0; |
1201
|
0
|
0
|
|
|
|
0
|
if ( not $self->_current->get_ntax ) { |
1202
|
0
|
|
|
|
|
0
|
my $taxon = {}; |
1203
|
0
|
|
|
|
|
0
|
foreach my $row ( @{ $self->_current->get_entities } ) { |
|
0
|
|
|
|
|
0
|
|
1204
|
0
|
|
|
|
|
0
|
$taxon->{ $row->get_taxon }++; |
1205
|
|
|
|
|
|
|
} |
1206
|
0
|
|
|
|
|
0
|
my $ntax = scalar keys %{$taxon}; |
|
0
|
|
|
|
|
0
|
|
1207
|
|
|
|
|
|
|
} |
1208
|
|
|
|
|
|
|
} |
1209
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
# finalize character set |
1211
|
|
|
|
|
|
|
elsif ( uc $self->{'_previous'} eq 'CHARSET' ) { |
1212
|
2
|
|
|
|
|
13
|
my $matrix = $self->_find_last_seen_matrix( $self->{'_charset'}->{'matrix'} ); |
1213
|
2
|
|
|
|
|
7
|
my $characters = $matrix->get_characters; |
1214
|
2
|
|
|
|
|
7
|
my $set = $self->_factory->create_set( '-name' => $self->{'_charset'}->{'name'} ); |
1215
|
2
|
|
|
|
|
20
|
$characters->add_set($set); |
1216
|
2
|
|
|
|
|
5
|
my $range = $self->{'_charset'}->{'range'}; |
1217
|
2
|
|
|
|
|
4
|
my @range; |
1218
|
2
|
50
|
|
|
|
9
|
if ( ref($range) eq 'ARRAY' ) { |
1219
|
2
|
|
|
|
|
5
|
while ( @{ $range } ) { |
|
6
|
|
|
|
|
12
|
|
1220
|
4
|
|
|
|
|
6
|
my $index = shift @{ $range }; |
|
4
|
|
|
|
|
7
|
|
1221
|
4
|
100
|
66
|
|
|
17
|
if ( $range->[0] && $range->[0] eq '-' ) { |
1222
|
2
|
|
|
|
|
4
|
shift @{ $range }; |
|
2
|
|
|
|
|
4
|
|
1223
|
2
|
|
|
|
|
3
|
my $end = shift @{ $range }; |
|
2
|
|
|
|
|
5
|
|
1224
|
2
|
|
|
|
|
10
|
push @range, ( $index - 1 ) .. ( $end - 1 ); |
1225
|
|
|
|
|
|
|
} |
1226
|
|
|
|
|
|
|
else { |
1227
|
2
|
|
|
|
|
6
|
push @range, ( $index - 1 ); |
1228
|
|
|
|
|
|
|
} |
1229
|
|
|
|
|
|
|
} |
1230
|
2
|
|
|
|
|
6
|
for my $i ( @range ) { |
1231
|
6
|
|
|
|
|
19
|
my $character = $characters->get_by_index($i); |
1232
|
6
|
50
|
|
|
|
11
|
if ( $character ) { |
1233
|
6
|
|
|
|
|
20
|
$characters->add_to_set($character,$set); |
1234
|
|
|
|
|
|
|
} |
1235
|
|
|
|
|
|
|
else { |
1236
|
0
|
|
|
|
|
0
|
throw 'API' => "No character at index $i"; |
1237
|
|
|
|
|
|
|
} |
1238
|
|
|
|
|
|
|
} |
1239
|
|
|
|
|
|
|
} |
1240
|
2
|
|
|
|
|
8
|
$self->{'_charset'} = {}; |
1241
|
|
|
|
|
|
|
} |
1242
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
# finalize character state labels |
1244
|
|
|
|
|
|
|
elsif ( uc $self->{'_previous'} eq 'CHARSTATELABELS' ) { |
1245
|
2
|
|
|
|
|
21
|
my $matrix = $self->_find_last_seen_matrix; |
1246
|
2
|
|
|
|
|
6
|
my @labels = @{ $self->{'_charstatelabels'} }; |
|
2
|
|
|
|
|
12
|
|
1247
|
2
|
100
|
|
|
|
19
|
if ( $matrix->get_type =~ m/continuous/i ) { |
1248
|
1
|
|
|
|
|
4
|
my @charlabels; |
1249
|
1
|
|
|
|
|
5
|
my $charnum = 1; |
1250
|
1
|
|
|
|
|
5
|
while (@labels) { |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
# expecting an index at the beginning of the statement |
1253
|
2
|
|
|
|
|
6
|
my $index = shift @labels; |
1254
|
2
|
50
|
|
|
|
10
|
$index != $charnum && _bad_format( "Expecting character number $charnum, observed $index in CHARSTATELABELS" ); |
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
# then the character label |
1257
|
2
|
|
|
|
|
6
|
push @charlabels, shift @labels; |
1258
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
# then a comma |
1260
|
2
|
100
|
|
|
|
6
|
if ( @labels ) { |
1261
|
1
|
50
|
|
|
|
6
|
$labels[0] eq ',' ? shift @labels : _bad_format( "Expecting , observed $labels[0] in CHARSTATELABELS" ); |
1262
|
|
|
|
|
|
|
} |
1263
|
2
|
|
|
|
|
8
|
$charnum++; |
1264
|
|
|
|
|
|
|
} |
1265
|
1
|
|
|
|
|
10
|
$matrix->set_charlabels(\@charlabels); |
1266
|
1
|
|
|
|
|
5
|
$self->{'_charstatelabels'} = []; |
1267
|
|
|
|
|
|
|
} |
1268
|
|
|
|
|
|
|
else { |
1269
|
1
|
|
|
|
|
5
|
my ( @charlabels, @statelabels ); |
1270
|
1
|
|
|
|
|
3
|
my $charnum = 1; |
1271
|
1
|
|
|
|
|
5
|
while (@labels) { |
1272
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
# expecting an index at the beginning of the statement |
1274
|
2
|
|
|
|
|
6
|
my $index = shift @labels; |
1275
|
2
|
50
|
|
|
|
10
|
$index != $charnum && _bad_format( "Expecting character number $charnum, observed $index in CHARSTATELABELS" ); |
1276
|
|
|
|
|
|
|
|
1277
|
|
|
|
|
|
|
# then the character label |
1278
|
2
|
|
|
|
|
7
|
push @charlabels, shift @labels; |
1279
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
# then a forward slash |
1281
|
2
|
|
|
|
|
5
|
my $slash = shift @labels; |
1282
|
2
|
50
|
|
|
|
8
|
$slash ne '/' && _bad_format( "Expecting /, observed $slash in CHARSTATELABELS" ); |
1283
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
# then a list of state labels |
1285
|
2
|
|
|
|
|
6
|
my @stateset; |
1286
|
2
|
|
100
|
|
|
27
|
push @stateset, shift @labels while(@labels and $labels[0] ne ','); |
1287
|
2
|
|
|
|
|
7
|
push @statelabels, \@stateset; |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
# then a comma |
1290
|
2
|
100
|
|
|
|
8
|
if ( @labels ) { |
1291
|
1
|
50
|
|
|
|
6
|
$labels[0] eq ',' ? shift @labels : _bad_format( "Expecting , observed $labels[0] in CHARSTATELABELS" ); |
1292
|
|
|
|
|
|
|
} |
1293
|
2
|
|
|
|
|
8
|
$charnum++; |
1294
|
|
|
|
|
|
|
} |
1295
|
1
|
|
|
|
|
15
|
$matrix->set_charlabels(\@charlabels); |
1296
|
1
|
|
|
|
|
4
|
$matrix->set_statelabels(\@statelabels); |
1297
|
1
|
|
|
|
|
5
|
$self->{'_charstatelabels'} = []; |
1298
|
|
|
|
|
|
|
} |
1299
|
|
|
|
|
|
|
} |
1300
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
# finalize taxon set |
1302
|
|
|
|
|
|
|
elsif ( uc $self->{'_previous'} eq 'TAXSET' ) { |
1303
|
2
|
|
|
|
|
12
|
my $taxa = $self->_find_last_seen_taxa_block( $self->{'_taxset'}->{'taxa'} ); |
1304
|
2
|
|
|
|
|
8
|
my $set = $self->_factory->create_set( '-name' => $self->{'_taxset'}->{'name'} ); |
1305
|
2
|
|
|
|
|
17
|
$taxa->add_set($set); |
1306
|
2
|
|
|
|
|
6
|
my $range = $self->{'_taxset'}->{'range'}; |
1307
|
2
|
|
|
|
|
8
|
my @range; |
1308
|
2
|
|
|
|
|
5
|
while ( @{ $range } ) { |
|
8
|
|
|
|
|
18
|
|
1309
|
6
|
|
|
|
|
11
|
my $index = shift @{ $range }; |
|
6
|
|
|
|
|
11
|
|
1310
|
6
|
100
|
100
|
|
|
23
|
if ( $range->[0] && $range->[0] eq '-' ) { |
1311
|
2
|
|
|
|
|
4
|
shift @{ $range }; |
|
2
|
|
|
|
|
5
|
|
1312
|
2
|
|
|
|
|
4
|
my $end = shift @{ $range }; |
|
2
|
|
|
|
|
6
|
|
1313
|
2
|
|
|
|
|
13
|
push @range, ( $index - 1 ) .. ( $end - 1 ); |
1314
|
|
|
|
|
|
|
} |
1315
|
|
|
|
|
|
|
else { |
1316
|
4
|
|
|
|
|
8
|
push @range, ( $index - 1 ); |
1317
|
|
|
|
|
|
|
} |
1318
|
|
|
|
|
|
|
} |
1319
|
2
|
|
|
|
|
7
|
for my $i ( @range ) { |
1320
|
8
|
|
|
|
|
29
|
my $taxon = $taxa->get_by_index($i); |
1321
|
8
|
50
|
|
|
|
18
|
if ( $taxon ) { |
1322
|
8
|
|
|
|
|
25
|
$taxa->add_to_set($taxon,$set); |
1323
|
|
|
|
|
|
|
} |
1324
|
|
|
|
|
|
|
else { |
1325
|
0
|
|
|
|
|
0
|
_bad_format( "No taxon at index $i" ); |
1326
|
|
|
|
|
|
|
} |
1327
|
|
|
|
|
|
|
} |
1328
|
2
|
|
|
|
|
9
|
$self->{'_taxset'} = {}; |
1329
|
|
|
|
|
|
|
} |
1330
|
|
|
|
|
|
|
|
1331
|
|
|
|
|
|
|
# finalize taxa labels |
1332
|
|
|
|
|
|
|
elsif ( uc $self->{'_previous'} eq 'TAXLABELS' ) { |
1333
|
13
|
|
|
|
|
26
|
foreach my $name ( @{ $self->{'_taxlabels'} } ) { |
|
13
|
|
|
|
|
38
|
|
1334
|
59
|
|
|
|
|
154
|
my $taxon = $self->_factory->create_taxon( '-name' => $name ); |
1335
|
59
|
|
|
|
|
156
|
$self->_current->insert($taxon); |
1336
|
|
|
|
|
|
|
} |
1337
|
13
|
100
|
|
|
|
48
|
if ( $self->_current->get_ntax != $self->{'_ntax'} ) { |
1338
|
|
|
|
|
|
|
_bad_format( |
1339
|
|
|
|
|
|
|
sprintf( |
1340
|
|
|
|
|
|
|
'Mismatch between observed and expected ntax: %d vs %d', |
1341
|
1
|
|
|
|
|
6
|
$self->_current->get_ntax, $self->{'_ntax'} |
1342
|
|
|
|
|
|
|
) |
1343
|
|
|
|
|
|
|
); |
1344
|
|
|
|
|
|
|
} |
1345
|
|
|
|
|
|
|
|
1346
|
|
|
|
|
|
|
# XXX taxa cleanup here |
1347
|
12
|
|
|
|
|
36
|
$self->{'_ntax'} = undef; |
1348
|
12
|
|
|
|
|
43
|
$self->{'_taxlabels'} = []; |
1349
|
|
|
|
|
|
|
} |
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
# finalize symbols list |
1352
|
|
|
|
|
|
|
elsif ( uc $self->{'_previous'} eq 'SYMBOLS' ) { |
1353
|
5
|
|
|
|
|
14
|
my $logsymbols = join( ' ', @{ $self->{'_symbols'} } ); |
|
5
|
|
|
|
|
19
|
|
1354
|
5
|
|
|
|
|
20
|
$self->_logger->info("symbols: $logsymbols"); |
1355
|
5
|
|
|
|
|
17
|
$self->{'_symbols'} = []; |
1356
|
|
|
|
|
|
|
} |
1357
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
# finalize character labels |
1359
|
|
|
|
|
|
|
elsif ( uc $self->{'_previous'} eq 'CHARLABELS' ) { |
1360
|
0
|
0
|
|
|
|
|
if ( @{ $self->{'_charlabels'} } ) { |
|
0
|
|
|
|
|
|
|
1361
|
0
|
|
|
|
|
|
my $logcharlabels = join( ' ', @{ $self->{'_charlabels'} } ); |
|
0
|
|
|
|
|
|
|
1362
|
0
|
|
|
|
|
|
$self->_logger->info("charlabels: $logcharlabels"); |
1363
|
|
|
|
|
|
|
} |
1364
|
|
|
|
|
|
|
} |
1365
|
|
|
|
|
|
|
|
1366
|
|
|
|
|
|
|
# finalize state labels |
1367
|
|
|
|
|
|
|
elsif ( uc $self->{'_previous'} eq 'STATELABELS' ) { |
1368
|
0
|
0
|
|
|
|
|
if ( @{ $self->{'_statelabels'} } ) { |
|
0
|
|
|
|
|
|
|
1369
|
0
|
|
|
|
|
|
my $logstatelabels = join( ' ', @{ $self->{'_statelabels'} } ); |
|
0
|
|
|
|
|
|
|
1370
|
0
|
|
|
|
|
|
$self->_logger->info("statelabels: $logstatelabels"); |
1371
|
|
|
|
|
|
|
} |
1372
|
|
|
|
|
|
|
} |
1373
|
|
|
|
|
|
|
} |
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
# podinherit_insert_token |
1376
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
=head1 SEE ALSO |
1378
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
There is a mailing list at L |
1380
|
|
|
|
|
|
|
for any user or developer questions and discussions. |
1381
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
=over |
1383
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
=item L |
1385
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
The nexus parser is called by the L object. Look there for |
1387
|
|
|
|
|
|
|
examples of file parsing and manipulation. |
1388
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
=item L |
1390
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
Also see the manual: L and L. |
1392
|
|
|
|
|
|
|
|
1393
|
|
|
|
|
|
|
=back |
1394
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
=head1 CITATION |
1396
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
If you use Bio::Phylo in published research, please cite it: |
1398
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
B, B, B, B |
1400
|
|
|
|
|
|
|
and B, 2011. Bio::Phylo - phyloinformatic analysis using Perl. |
1401
|
|
|
|
|
|
|
I B<12>:63. |
1402
|
|
|
|
|
|
|
L |
1403
|
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
|
=cut |
1405
|
|
|
|
|
|
|
|
1406
|
|
|
|
|
|
|
1; |