line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Phylo::Parsers::Figtree; |
2
|
2
|
|
|
2
|
|
11
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
49
|
|
3
|
2
|
|
|
2
|
|
7
|
use base 'Bio::Phylo::Parsers::Abstract'; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
578
|
|
4
|
2
|
|
|
2
|
|
12
|
use Bio::Phylo::Util::CONSTANT qw':namespaces :objecttypes'; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
502
|
|
5
|
2
|
|
|
2
|
|
12
|
use Bio::Phylo::Factory; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
7
|
|
6
|
2
|
|
|
2
|
|
8
|
use Bio::Phylo::IO 'parse_tree'; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
79
|
|
7
|
2
|
|
|
2
|
|
11
|
use Bio::Phylo::Util::Logger ':levels'; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
1742
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
my $fac = Bio::Phylo::Factory->new; |
10
|
|
|
|
|
|
|
my $log = Bio::Phylo::Util::Logger->new; |
11
|
|
|
|
|
|
|
my $ns = _NS_FIGTREE_; |
12
|
|
|
|
|
|
|
my $pre = 'fig'; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Phylo::Parsers::Figtree - Parser used by Bio::Phylo::IO, no serviceable parts inside |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 DESCRIPTION |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
This module parses annotated trees in NEXUS format as interpreted by FigTree |
21
|
|
|
|
|
|
|
(L<http://tree.bio.ed.ac.uk/software/figtree/>), i.e. trees where nodes have |
22
|
|
|
|
|
|
|
additional 'hot comments' attached to them in the tree description. The |
23
|
|
|
|
|
|
|
implementation assumes syntax as follows: |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
[&minmax={0.1231,0.3254},rate=0.0075583392800736] |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
I.e. the first token inside the comments is an ampersand, the annotations are |
28
|
|
|
|
|
|
|
comma-separated key/value pairs, where ranges are between curly parentheses. |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
The annotations are stored as meta objects, e.g.: |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
$node->get_meta_object('fig:rate'); # 0.0075583392800736 |
33
|
|
|
|
|
|
|
$node->get_meta_object('fig:minmax_min'); # 0.1231 |
34
|
|
|
|
|
|
|
$node->get_meta_object('fig:minmax_max'); # 0.3254 |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
Annotations that have non-alphanumerical symbols in them will have these removed |
37
|
|
|
|
|
|
|
from them. For example, C<rate_95%_HPD={}> becomes two annotations: |
38
|
|
|
|
|
|
|
C<rate_95_HPD_min> and C<rate_95_HPD_max>. |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=cut |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
sub _parse { |
43
|
2
|
|
|
2
|
|
5
|
my $self = shift; |
44
|
2
|
|
|
|
|
12
|
my $fh = $self->_handle; |
45
|
2
|
|
|
|
|
26
|
my $forest = $fac->create_forest; |
46
|
2
|
|
|
|
|
34
|
$forest->set_namespaces( $pre => $ns ); |
47
|
2
|
|
|
|
|
7
|
my $tree_block; |
48
|
|
|
|
|
|
|
my $tree_string; |
49
|
2
|
|
|
|
|
0
|
my %translate; |
50
|
2
|
|
|
|
|
13
|
while(<$fh>) { |
51
|
53
|
100
|
|
|
|
532
|
$tree_block++ if /BEGIN TREES;/i; |
52
|
53
|
100
|
|
|
|
171
|
if ( /^\s*TREE (\S+) = \[&([RU])\] (.+)$/i ) { |
53
|
2
|
|
|
|
|
22
|
my ( $name, $rooted, $newick ) = ( $1, $2, $3 ); |
54
|
2
|
|
|
|
|
5
|
$tree_string++; |
55
|
2
|
|
|
|
|
11
|
my $tree = parse_tree( |
56
|
|
|
|
|
|
|
'-format' => 'newick', |
57
|
|
|
|
|
|
|
'-string' => $newick, |
58
|
|
|
|
|
|
|
'-ignore_comments' => 1, |
59
|
|
|
|
|
|
|
); |
60
|
2
|
100
|
|
|
|
13
|
$tree->set_as_unrooted if $rooted eq 'U'; |
61
|
2
|
|
|
|
|
18
|
$tree->set_name( $name ); |
62
|
2
|
|
|
|
|
11
|
$self->_post_process( $tree ); |
63
|
2
|
|
|
|
|
27
|
for my $tip ( @{ $tree->get_terminals } ) { |
|
2
|
|
|
|
|
40
|
|
64
|
122
|
|
|
|
|
202
|
my $name = $tip->get_name; |
65
|
122
|
|
|
|
|
246
|
$tip->set_name( $translate{$name} ); |
66
|
|
|
|
|
|
|
} |
67
|
2
|
|
|
|
|
17
|
$forest->insert($tree); |
68
|
|
|
|
|
|
|
} |
69
|
53
|
100
|
100
|
|
|
287
|
if ( $tree_block and not $tree_string and /\s+(\d+)\s+(.+)/ ) { |
|
|
|
100
|
|
|
|
|
70
|
18
|
|
|
|
|
48
|
my ( $id, $name ) = ( $1, $2 ); |
71
|
18
|
|
|
|
|
63
|
$name =~ s/[,;]$//; |
72
|
18
|
|
|
|
|
74
|
$translate{$id} = $name; |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
} |
75
|
2
|
|
|
|
|
15
|
return $forest; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub _post_process { |
79
|
2
|
|
|
2
|
|
6
|
my ( $self, $tree ) = @_; |
80
|
2
|
|
|
|
|
11
|
$log->debug("going to post-process tree"); |
81
|
|
|
|
|
|
|
$tree->visit(sub{ |
82
|
156
|
|
|
156
|
|
191
|
my $n = shift; |
83
|
156
|
|
|
|
|
287
|
my $name = $n->get_name; |
84
|
156
|
|
|
|
|
243
|
$name =~ s/\\//g; |
85
|
156
|
|
|
|
|
414
|
$log->debug("name: $name"); |
86
|
156
|
100
|
66
|
|
|
590
|
if ( $name =~ /\[/ and $name =~ /^([^\[]*?)\[(.+?)\]$/ ) { |
87
|
52
|
|
|
|
|
182
|
my ( $trimmed, $comments ) = ( $1, $2 ); |
88
|
52
|
|
|
|
|
202
|
$n->set_name( $trimmed ); |
89
|
52
|
|
|
|
|
172
|
$log->debug("trimmed name: $trimmed"); |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# "hot comments" start with ampersand. ignore if not. |
92
|
52
|
50
|
|
|
|
190
|
if ( $comments =~ /^&(.+)/ ) { |
93
|
52
|
|
|
|
|
162
|
$log->debug("hot comments: $comments"); |
94
|
52
|
|
|
|
|
122
|
$comments = $1; |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# string needs to be fully eaten up |
97
|
52
|
|
|
|
|
107
|
COMMENT: while( my $old_length = length($comments) ) { |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# grab the next key |
100
|
463
|
50
|
|
|
|
1518
|
if ( $comments =~ /^(.+?)=/ ) { |
101
|
463
|
|
|
|
|
961
|
my $key = $1; |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# remove the key and the = |
104
|
463
|
|
|
|
|
4149
|
$comments =~ s/^\Q$key\E=//; |
105
|
463
|
|
|
|
|
1068
|
$key =~ s/\%//; |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# value is a comma separated range |
108
|
463
|
100
|
|
|
|
1697
|
if ( $comments =~ /^{([^}]+)}/ ) { |
|
|
50
|
|
|
|
|
|
109
|
206
|
|
|
|
|
435
|
my $value = $1; |
110
|
206
|
|
|
|
|
598
|
my ( $min, $max ) = split /,/, $value; |
111
|
206
|
|
|
|
|
601
|
_meta( $n, "${key}_min" => $min ); |
112
|
206
|
|
|
|
|
724
|
_meta( $n, "${key}_max" => $max ); |
113
|
206
|
|
|
|
|
839
|
$log->debug("$key: $min .. $max"); |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# remove the range |
116
|
206
|
|
|
|
|
385
|
$value = "{$value}"; |
117
|
206
|
|
|
|
|
3005
|
$comments =~ s/^\Q$value\E//; |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# value is a scalar |
121
|
|
|
|
|
|
|
elsif ( $comments =~ /^([^,]+)/ ) { |
122
|
257
|
|
|
|
|
494
|
my $value = $1; |
123
|
257
|
|
|
|
|
563
|
_meta( $n, $key => $value ); |
124
|
257
|
|
|
|
|
3516
|
$comments =~ s/^\Q$value\E//; |
125
|
257
|
|
|
|
|
948
|
$log->debug("$key: $value"); |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
# remove trailing comma, if any |
129
|
463
|
|
|
|
|
1470
|
$comments =~ s/^,//; |
130
|
|
|
|
|
|
|
} |
131
|
463
|
50
|
|
|
|
1520
|
if ( $old_length == length($comments) ) { |
132
|
0
|
|
|
|
|
0
|
$log->warn("couldn't parse newick comment: $comments"); |
133
|
0
|
|
|
|
|
0
|
last COMMENT; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
else { |
138
|
0
|
|
|
|
|
0
|
$log->debug("not hot: $comments"); |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
} |
141
|
2
|
|
|
|
|
27
|
}); |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
sub _meta { |
145
|
669
|
|
|
669
|
|
1252
|
my ( $node, $key, $value ) = @_; |
146
|
|
|
|
|
|
|
#if ( $key =~ /[()+]/ ) { |
147
|
669
|
|
|
|
|
1786
|
$log->info("cleaning up CURIE candidate $key"); |
148
|
669
|
|
|
|
|
1072
|
$key =~ s/\(/_/g; |
149
|
669
|
|
|
|
|
862
|
$key =~ s/\)/_/g; |
150
|
669
|
|
|
|
|
808
|
$key =~ s/\+/_/g; |
151
|
669
|
|
|
|
|
809
|
$key =~ s/\!//; |
152
|
|
|
|
|
|
|
#} |
153
|
669
|
|
|
|
|
3243
|
$node->add_meta( |
154
|
|
|
|
|
|
|
$fac->create_meta( '-triple' => { "${pre}:${key}" => $value } ) |
155
|
|
|
|
|
|
|
); |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# podinherit_insert_token |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=head1 SEE ALSO |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo> |
164
|
|
|
|
|
|
|
for any user or developer questions and discussions. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=over |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=item L<Bio::Phylo::IO> |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
The figtree parser is called by the L<Bio::Phylo::IO> object. |
171
|
|
|
|
|
|
|
Look there to learn how to parse phylogenetic data files in general. |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=item L<Bio::Phylo::Manual> |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com> |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=back |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=head1 CITATION |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
If you use Bio::Phylo in published research, please cite it: |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen> |
184
|
|
|
|
|
|
|
and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl. |
185
|
|
|
|
|
|
|
I<BMC Bioinformatics> B<12>:63. |
186
|
|
|
|
|
|
|
L<http://dx.doi.org/10.1186/1471-2105-12-63> |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=cut |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
1; |