line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Phylo::Parsers::Figtree; |
2
|
2
|
|
|
2
|
|
10
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
47
|
|
3
|
2
|
|
|
2
|
|
8
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
42
|
|
4
|
2
|
|
|
2
|
|
8
|
use base 'Bio::Phylo::Parsers::Abstract'; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
561
|
|
5
|
2
|
|
|
2
|
|
12
|
use Bio::Phylo::Util::CONSTANT qw':namespaces :objecttypes'; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
491
|
|
6
|
2
|
|
|
2
|
|
13
|
use Bio::Phylo::Factory; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
8
|
|
7
|
2
|
|
|
2
|
|
8
|
use Bio::Phylo::IO 'parse_tree'; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
74
|
|
8
|
2
|
|
|
2
|
|
8
|
use Bio::Phylo::Util::Logger ':levels'; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
1727
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
my $fac = Bio::Phylo::Factory->new; |
11
|
|
|
|
|
|
|
my $log = Bio::Phylo::Util::Logger->new; |
12
|
|
|
|
|
|
|
my $ns = _NS_FIGTREE_; |
13
|
|
|
|
|
|
|
my $pre = 'fig'; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Bio::Phylo::Parsers::Figtree - Parser used by Bio::Phylo::IO, no serviceable parts inside |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 DESCRIPTION |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
This module parses annotated trees in NEXUS format as interpreted by FigTree |
22
|
|
|
|
|
|
|
(L<http://tree.bio.ed.ac.uk/software/figtree/>), i.e. trees where nodes have |
23
|
|
|
|
|
|
|
additional 'hot comments' attached to them in the tree description. The |
24
|
|
|
|
|
|
|
implementation assumes syntax as follows: |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
[&minmax={0.1231,0.3254},rate=0.0075583392800736] |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
I.e. the first token inside the comments is an ampersand, the annotations are |
29
|
|
|
|
|
|
|
comma-separated key/value pairs, where ranges are between curly parentheses. |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
The annotations are stored as meta objects, e.g.: |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
$node->get_meta_object('fig:rate'); # 0.0075583392800736 |
34
|
|
|
|
|
|
|
$node->get_meta_object('fig:minmax_min'); # 0.1231 |
35
|
|
|
|
|
|
|
$node->get_meta_object('fig:minmax_max'); # 0.3254 |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
Annotations that have non-alphanumerical symbols in them will have these removed |
38
|
|
|
|
|
|
|
from them. For example, C<rate_95%_HPD={}> becomes two annotations: |
39
|
|
|
|
|
|
|
C<rate_95_HPD_min> and C<rate_95_HPD_max>. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=cut |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
sub _parse { |
44
|
2
|
|
|
2
|
|
60
|
my $self = shift; |
45
|
2
|
|
|
|
|
10
|
my $fh = $self->_handle; |
46
|
2
|
|
|
|
|
55
|
my $forest = $fac->create_forest; |
47
|
2
|
|
|
|
|
16
|
$forest->set_namespaces( $pre => $ns ); |
48
|
2
|
|
|
|
|
6
|
my $tree_block; |
49
|
|
|
|
|
|
|
my $tree_string; |
50
|
2
|
|
|
|
|
0
|
my %translate; |
51
|
2
|
|
|
|
|
12
|
while(<$fh>) { |
52
|
53
|
100
|
|
|
|
476
|
$tree_block++ if /BEGIN TREES;/i; |
53
|
53
|
100
|
|
|
|
160
|
if ( /^\s*TREE (\S+) = \[&([RU])\] (.+)$/i ) { |
54
|
2
|
|
|
|
|
22
|
my ( $name, $rooted, $newick ) = ( $1, $2, $3 ); |
55
|
2
|
|
|
|
|
4
|
$tree_string++; |
56
|
2
|
|
|
|
|
23
|
my $tree = parse_tree( |
57
|
|
|
|
|
|
|
'-format' => 'newick', |
58
|
|
|
|
|
|
|
'-string' => $newick, |
59
|
|
|
|
|
|
|
'-ignore_comments' => 1, |
60
|
|
|
|
|
|
|
); |
61
|
2
|
100
|
|
|
|
16
|
$tree->set_as_unrooted if $rooted eq 'U'; |
62
|
2
|
|
|
|
|
20
|
$tree->set_name( $name ); |
63
|
2
|
|
|
|
|
13
|
$self->_post_process( $tree ); |
64
|
2
|
|
|
|
|
28
|
for my $tip ( @{ $tree->get_terminals } ) { |
|
2
|
|
|
|
|
50
|
|
65
|
122
|
|
|
|
|
208
|
my $name = $tip->get_name; |
66
|
122
|
|
|
|
|
251
|
$tip->set_name( $translate{$name} ); |
67
|
|
|
|
|
|
|
} |
68
|
2
|
|
|
|
|
15
|
$forest->insert($tree); |
69
|
|
|
|
|
|
|
} |
70
|
53
|
100
|
100
|
|
|
247
|
if ( $tree_block and not $tree_string and /\s+(\d+)\s+(.+)/ ) { |
|
|
|
100
|
|
|
|
|
71
|
18
|
|
|
|
|
41
|
my ( $id, $name ) = ( $1, $2 ); |
72
|
18
|
|
|
|
|
51
|
$name =~ s/[,;]$//; |
73
|
18
|
|
|
|
|
66
|
$translate{$id} = $name; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
} |
76
|
2
|
|
|
|
|
17
|
return $forest; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
sub _post_process { |
80
|
2
|
|
|
2
|
|
9
|
my ( $self, $tree ) = @_; |
81
|
2
|
|
|
|
|
12
|
$log->debug("going to post-process tree"); |
82
|
|
|
|
|
|
|
$tree->visit(sub{ |
83
|
156
|
|
|
156
|
|
230
|
my $n = shift; |
84
|
156
|
|
|
|
|
269
|
my $name = $n->get_name; |
85
|
156
|
|
|
|
|
243
|
$name =~ s/\\//g; |
86
|
156
|
|
|
|
|
394
|
$log->debug("name: $name"); |
87
|
156
|
100
|
66
|
|
|
614
|
if ( $name =~ /\[/ and $name =~ /^([^\[]*?)\[(.+?)\]$/ ) { |
88
|
52
|
|
|
|
|
179
|
my ( $trimmed, $comments ) = ( $1, $2 ); |
89
|
52
|
|
|
|
|
149
|
$n->set_name( $trimmed ); |
90
|
52
|
|
|
|
|
179
|
$log->debug("trimmed name: $trimmed"); |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# "hot comments" start with ampersand. ignore if not. |
93
|
52
|
50
|
|
|
|
197
|
if ( $comments =~ /^&(.+)/ ) { |
94
|
52
|
|
|
|
|
153
|
$log->debug("hot comments: $comments"); |
95
|
52
|
|
|
|
|
119
|
$comments = $1; |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# string needs to be fully eaten up |
98
|
52
|
|
|
|
|
111
|
COMMENT: while( my $old_length = length($comments) ) { |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# grab the next key |
101
|
463
|
50
|
|
|
|
1515
|
if ( $comments =~ /^(.+?)=/ ) { |
102
|
463
|
|
|
|
|
926
|
my $key = $1; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# remove the key and the = |
105
|
463
|
|
|
|
|
4154
|
$comments =~ s/^\Q$key\E=//; |
106
|
463
|
|
|
|
|
1081
|
$key =~ s/\%//; |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
# value is a comma separated range |
109
|
463
|
100
|
|
|
|
1653
|
if ( $comments =~ /^{([^}]+)}/ ) { |
|
|
50
|
|
|
|
|
|
110
|
206
|
|
|
|
|
435
|
my $value = $1; |
111
|
206
|
|
|
|
|
570
|
my ( $min, $max ) = split /,/, $value; |
112
|
206
|
|
|
|
|
608
|
_meta( $n, "${key}_min" => $min ); |
113
|
206
|
|
|
|
|
792
|
_meta( $n, "${key}_max" => $max ); |
114
|
206
|
|
|
|
|
936
|
$log->debug("$key: $min .. $max"); |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# remove the range |
117
|
206
|
|
|
|
|
375
|
$value = "{$value}"; |
118
|
206
|
|
|
|
|
3020
|
$comments =~ s/^\Q$value\E//; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# value is a scalar |
122
|
|
|
|
|
|
|
elsif ( $comments =~ /^([^,]+)/ ) { |
123
|
257
|
|
|
|
|
502
|
my $value = $1; |
124
|
257
|
|
|
|
|
567
|
_meta( $n, $key => $value ); |
125
|
257
|
|
|
|
|
3468
|
$comments =~ s/^\Q$value\E//; |
126
|
257
|
|
|
|
|
1013
|
$log->debug("$key: $value"); |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# remove trailing comma, if any |
130
|
463
|
|
|
|
|
1489
|
$comments =~ s/^,//; |
131
|
|
|
|
|
|
|
} |
132
|
463
|
50
|
|
|
|
1590
|
if ( $old_length == length($comments) ) { |
133
|
0
|
|
|
|
|
0
|
$log->warn("couldn't parse newick comment: $comments"); |
134
|
0
|
|
|
|
|
0
|
last COMMENT; |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
else { |
139
|
0
|
|
|
|
|
0
|
$log->debug("not hot: $comments"); |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
} |
142
|
2
|
|
|
|
|
29
|
}); |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sub _meta { |
146
|
669
|
|
|
669
|
|
1134
|
my ( $node, $key, $value ) = @_; |
147
|
|
|
|
|
|
|
#if ( $key =~ /[()+]/ ) { |
148
|
669
|
|
|
|
|
2052
|
$log->info("cleaning up CURIE candidate $key"); |
149
|
669
|
|
|
|
|
1123
|
$key =~ s/\(/_/g; |
150
|
669
|
|
|
|
|
882
|
$key =~ s/\)/_/g; |
151
|
669
|
|
|
|
|
784
|
$key =~ s/\+/_/g; |
152
|
669
|
|
|
|
|
779
|
$key =~ s/\!//; |
153
|
|
|
|
|
|
|
#} |
154
|
669
|
|
|
|
|
3521
|
$node->add_meta( |
155
|
|
|
|
|
|
|
$fac->create_meta( '-triple' => { "${pre}:${key}" => $value } ) |
156
|
|
|
|
|
|
|
); |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# podinherit_insert_token |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=head1 SEE ALSO |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo> |
165
|
|
|
|
|
|
|
for any user or developer questions and discussions. |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=over |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=item L<Bio::Phylo::IO> |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
The figtree parser is called by the L<Bio::Phylo::IO> object. |
172
|
|
|
|
|
|
|
Look there to learn how to parse phylogenetic data files in general. |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=item L<Bio::Phylo::Manual> |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com> |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=back |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head1 CITATION |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
If you use Bio::Phylo in published research, please cite it: |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen> |
185
|
|
|
|
|
|
|
and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl. |
186
|
|
|
|
|
|
|
I<BMC Bioinformatics> B<12>:63. |
187
|
|
|
|
|
|
|
L<http://dx.doi.org/10.1186/1471-2105-12-63> |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=cut |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
1; |