File Coverage

blib/lib/Bio/Phylo/Parsers/Figtree.pm
Criterion Covered Total %
statement 86 89 96.6
branch 16 20 80.0
condition 8 9 88.8
subroutine 11 11 100.0
pod n/a
total 121 129 93.8


line stmt bran cond sub pod time code
1             package Bio::Phylo::Parsers::Figtree;
2 2     2   10 use strict;
  2         4  
  2         47  
3 2     2   8 use warnings;
  2         4  
  2         42  
4 2     2   8 use base 'Bio::Phylo::Parsers::Abstract';
  2         3  
  2         561  
5 2     2   12 use Bio::Phylo::Util::CONSTANT qw':namespaces :objecttypes';
  2         4  
  2         491  
6 2     2   13 use Bio::Phylo::Factory;
  2         3  
  2         8  
7 2     2   8 use Bio::Phylo::IO 'parse_tree';
  2         4  
  2         74  
8 2     2   8 use Bio::Phylo::Util::Logger ':levels';
  2         4  
  2         1727  
9              
10             my $fac = Bio::Phylo::Factory->new;
11             my $log = Bio::Phylo::Util::Logger->new;
12             my $ns = _NS_FIGTREE_;
13             my $pre = 'fig';
14              
15             =head1 NAME
16              
17             Bio::Phylo::Parsers::Figtree - Parser used by Bio::Phylo::IO, no serviceable parts inside
18              
19             =head1 DESCRIPTION
20              
21             This module parses annotated trees in NEXUS format as interpreted by FigTree
22             (L<http://tree.bio.ed.ac.uk/software/figtree/>), i.e. trees where nodes have
23             additional 'hot comments' attached to them in the tree description. The
24             implementation assumes syntax as follows:
25              
26             [&minmax={0.1231,0.3254},rate=0.0075583392800736]
27            
28             I.e. the first token inside the comments is an ampersand, the annotations are
29             comma-separated key/value pairs, where ranges are between curly parentheses.
30              
31             The annotations are stored as meta objects, e.g.:
32              
33             $node->get_meta_object('fig:rate'); # 0.0075583392800736
34             $node->get_meta_object('fig:minmax_min'); # 0.1231
35             $node->get_meta_object('fig:minmax_max'); # 0.3254
36              
37             Annotations that have non-alphanumerical symbols in them will have these removed
38             from them. For example, C<rate_95%_HPD={}> becomes two annotations:
39             C<rate_95_HPD_min> and C<rate_95_HPD_max>.
40              
41             =cut
42              
43             sub _parse {
44 2     2   60 my $self = shift;
45 2         10 my $fh = $self->_handle;
46 2         55 my $forest = $fac->create_forest;
47 2         16 $forest->set_namespaces( $pre => $ns );
48 2         6 my $tree_block;
49             my $tree_string;
50 2         0 my %translate;
51 2         12 while(<$fh>) {
52 53 100       476 $tree_block++ if /BEGIN TREES;/i;
53 53 100       160 if ( /^\s*TREE (\S+) = \[&([RU])\] (.+)$/i ) {
54 2         22 my ( $name, $rooted, $newick ) = ( $1, $2, $3 );
55 2         4 $tree_string++;
56 2         23 my $tree = parse_tree(
57             '-format' => 'newick',
58             '-string' => $newick,
59             '-ignore_comments' => 1,
60             );
61 2 100       16 $tree->set_as_unrooted if $rooted eq 'U';
62 2         20 $tree->set_name( $name );
63 2         13 $self->_post_process( $tree );
64 2         28 for my $tip ( @{ $tree->get_terminals } ) {
  2         50  
65 122         208 my $name = $tip->get_name;
66 122         251 $tip->set_name( $translate{$name} );
67             }
68 2         15 $forest->insert($tree);
69             }
70 53 100 100     247 if ( $tree_block and not $tree_string and /\s+(\d+)\s+(.+)/ ) {
      100        
71 18         41 my ( $id, $name ) = ( $1, $2 );
72 18         51 $name =~ s/[,;]$//;
73 18         66 $translate{$id} = $name;
74             }
75             }
76 2         17 return $forest;
77             }
78              
79             sub _post_process {
80 2     2   9 my ( $self, $tree ) = @_;
81 2         12 $log->debug("going to post-process tree");
82             $tree->visit(sub{
83 156     156   230 my $n = shift;
84 156         269 my $name = $n->get_name;
85 156         243 $name =~ s/\\//g;
86 156         394 $log->debug("name: $name");
87 156 100 66     614 if ( $name =~ /\[/ and $name =~ /^([^\[]*?)\[(.+?)\]$/ ) {
88 52         179 my ( $trimmed, $comments ) = ( $1, $2 );
89 52         149 $n->set_name( $trimmed );
90 52         179 $log->debug("trimmed name: $trimmed");
91            
92             # "hot comments" start with ampersand. ignore if not.
93 52 50       197 if ( $comments =~ /^&(.+)/ ) {
94 52         153 $log->debug("hot comments: $comments");
95 52         119 $comments = $1;
96            
97             # string needs to be fully eaten up
98 52         111 COMMENT: while( my $old_length = length($comments) ) {
99            
100             # grab the next key
101 463 50       1515 if ( $comments =~ /^(.+?)=/ ) {
102 463         926 my $key = $1;
103            
104             # remove the key and the =
105 463         4154 $comments =~ s/^\Q$key\E=//;
106 463         1081 $key =~ s/\%//;
107            
108             # value is a comma separated range
109 463 100       1653 if ( $comments =~ /^{([^}]+)}/ ) {
    50          
110 206         435 my $value = $1;
111 206         570 my ( $min, $max ) = split /,/, $value;
112 206         608 _meta( $n, "${key}_min" => $min );
113 206         792 _meta( $n, "${key}_max" => $max );
114 206         936 $log->debug("$key: $min .. $max");
115            
116             # remove the range
117 206         375 $value = "{$value}";
118 206         3020 $comments =~ s/^\Q$value\E//;
119             }
120            
121             # value is a scalar
122             elsif ( $comments =~ /^([^,]+)/ ) {
123 257         502 my $value = $1;
124 257         567 _meta( $n, $key => $value );
125 257         3468 $comments =~ s/^\Q$value\E//;
126 257         1013 $log->debug("$key: $value");
127             }
128            
129             # remove trailing comma, if any
130 463         1489 $comments =~ s/^,//;
131             }
132 463 50       1590 if ( $old_length == length($comments) ) {
133 0         0 $log->warn("couldn't parse newick comment: $comments");
134 0         0 last COMMENT;
135             }
136             }
137             }
138             else {
139 0         0 $log->debug("not hot: $comments");
140             }
141             }
142 2         29 });
143             }
144              
145             sub _meta {
146 669     669   1134 my ( $node, $key, $value ) = @_;
147             #if ( $key =~ /[()+]/ ) {
148 669         2052 $log->info("cleaning up CURIE candidate $key");
149 669         1123 $key =~ s/\(/_/g;
150 669         882 $key =~ s/\)/_/g;
151 669         784 $key =~ s/\+/_/g;
152 669         779 $key =~ s/\!//;
153             #}
154 669         3521 $node->add_meta(
155             $fac->create_meta( '-triple' => { "${pre}:${key}" => $value } )
156             );
157             }
158              
159              
160             # podinherit_insert_token
161              
162             =head1 SEE ALSO
163              
164             There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo>
165             for any user or developer questions and discussions.
166              
167             =over
168              
169             =item L<Bio::Phylo::IO>
170              
171             The figtree parser is called by the L<Bio::Phylo::IO> object.
172             Look there to learn how to parse phylogenetic data files in general.
173              
174             =item L<Bio::Phylo::Manual>
175              
176             Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>
177              
178             =back
179              
180             =head1 CITATION
181              
182             If you use Bio::Phylo in published research, please cite it:
183              
184             B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
185             and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
186             I<BMC Bioinformatics> B<12>:63.
187             L<http://dx.doi.org/10.1186/1471-2105-12-63>
188              
189             =cut
190              
191             1;