line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Phylo::Parsers::Nhx; |
2
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
28
|
|
3
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
17
|
|
4
|
1
|
|
|
1
|
|
3
|
use Bio::Phylo::IO 'parse'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
37
|
|
5
|
1
|
|
|
1
|
|
4
|
use base 'Bio::Phylo::Parsers::Newick'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
278
|
|
6
|
1
|
|
|
1
|
|
6
|
use Bio::Phylo::Util::CONSTANT ':namespaces'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
389
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=head1 NAME |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
Bio::Phylo::Parsers::Nhx - Parser used by Bio::Phylo::IO, no serviceable parts inside |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 DESCRIPTION |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
This module parses "New Hampshire eXtended" (NHX) tree descriptions in parenthetical |
15
|
|
|
|
|
|
|
format. The node annotations, which are described here: |
16
|
|
|
|
|
|
|
https://sites.google.com/site/cmzmasek/home/software/forester/nhx, are stored as meta |
17
|
|
|
|
|
|
|
annotations in the namespace whose reserved prefix, nhx, is associated with the above |
18
|
|
|
|
|
|
|
URI. This means that after this parser is done, you can fetch an annotation value thusly: |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
my $gene_name = $node->get_meta_object( 'nhx:GN' ); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
This parser is called by the L<Bio::Phylo::IO> facade, don't call it directly. In turn, |
23
|
|
|
|
|
|
|
this parser delegates processing of Newick strings to L<Bio::Phylo::Parsers::Newick>. |
24
|
|
|
|
|
|
|
As such, several additional flags can be passed to the Bio::Phylo::IO parse and parse_tree |
25
|
|
|
|
|
|
|
functions to influence how to deal with complex newick strings: |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
-keep => [ ...list of taxa names... ] |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
The C<-keep> flag allows you to only retain certain taxa of interest, ignoring others |
30
|
|
|
|
|
|
|
while building the tree object. |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
-keep_whitespace => 1, |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
This will treat unescaped whitespace as if it is a normal taxon name character. Normally, |
35
|
|
|
|
|
|
|
whitespace is only retained inside quoted strings (e.g. C<'Homo sapiens'>), otherwise it |
36
|
|
|
|
|
|
|
is the convention to use underscores (C<Homo_sapiens>). This is because some programs |
37
|
|
|
|
|
|
|
introduce whitespace to prettify a newick string, e.g. to indicate indentation/depth, |
38
|
|
|
|
|
|
|
in which case you almost certainly want to ignore it. This is the default behaviour. The |
39
|
|
|
|
|
|
|
option to keep it is provided for dealing with incorrectly formatted data. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
Note that the flag C<-ignore_comments>, which is optional for the Newick parser cannot be |
42
|
|
|
|
|
|
|
used. This is because NHX embeds its metadata in what are normally comments (i.e. square |
43
|
|
|
|
|
|
|
brackets), so these must be processed in a special way. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=cut |
46
|
|
|
|
|
|
|
|
47
|
1
|
|
|
1
|
|
3
|
sub _return_is_scalar { 1 } |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
sub _parse { |
51
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
52
|
1
|
|
|
|
|
10
|
$self->_args->{'-ignore_comments'} = 1; |
53
|
1
|
|
|
|
|
7
|
return $self->SUPER::_parse; |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub _parse_node_data { |
57
|
215
|
|
|
215
|
|
1776
|
my ( $self, $node, @clade ) = @_; |
58
|
215
|
|
|
|
|
477
|
$self->_logger->debug("parsing name and branch length for node"); |
59
|
215
|
|
|
|
|
308
|
my @tail; |
60
|
215
|
|
|
|
|
488
|
PARSE_TAIL: for ( my $i = $#clade ; $i >= 0 ; $i-- ) { |
61
|
752
|
100
|
|
|
|
1558
|
if ( $clade[$i] eq ')' ) { |
|
|
100
|
|
|
|
|
|
62
|
107
|
|
|
|
|
301
|
@tail = @clade[ ( $i + 1 ) .. $#clade ]; |
63
|
107
|
|
|
|
|
195
|
last PARSE_TAIL; |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
elsif ( $i == 0 ) { |
66
|
108
|
|
|
|
|
273
|
@tail = @clade; |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# process branch length, nhx is suffixed |
71
|
215
|
|
|
|
|
305
|
my $bl = $tail[-1]; |
72
|
215
|
|
|
|
|
239
|
my $nhx; |
73
|
215
|
50
|
33
|
|
|
1333
|
if ( $bl and $bl =~ /^(.*?)\[&&NHX:(.+?)\]$/ ) { |
74
|
215
|
|
|
|
|
673
|
$node->set_namespaces( 'nhx' => _NS_NHX_ ); |
75
|
215
|
|
|
|
|
718
|
( $bl, $nhx ) = ( $1, $2 ); |
76
|
215
|
|
|
|
|
603
|
for my $tuple ( split /:/, $nhx ) { |
77
|
645
|
|
|
|
|
1707
|
my ( $k, $v ) = split /=/, $tuple; |
78
|
645
|
|
|
|
|
1734
|
$node->set_meta_object( 'nhx:' . $k => $v ); |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# name only |
83
|
215
|
50
|
|
|
|
667
|
if ( scalar @tail == 1 ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
84
|
0
|
|
|
|
|
0
|
$node->set_name( $tail[0] ); |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
elsif ( scalar @tail == 2 ) { |
87
|
0
|
|
|
|
|
0
|
$node->set_branch_length( $bl ); |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
elsif ( scalar @tail == 3 ) { |
90
|
215
|
|
|
|
|
520
|
$node->set_name( $tail[0] ); |
91
|
215
|
|
|
|
|
572
|
$node->set_branch_length( $bl ); |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# podinherit_insert_token |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=head1 SEE ALSO |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo> |
100
|
|
|
|
|
|
|
for any user or developer questions and discussions. |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=over |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=item L<Bio::Phylo::IO> |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
The NHX parser is called by the L<Bio::Phylo::IO> object. |
107
|
|
|
|
|
|
|
Look there to learn how to parse newick strings. |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=item L<Bio::Phylo::Manual> |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>. |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=back |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head1 CITATION |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
If you use Bio::Phylo in published research, please cite it: |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen> |
120
|
|
|
|
|
|
|
and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl. |
121
|
|
|
|
|
|
|
I<BMC Bioinformatics> B<12>:63. |
122
|
|
|
|
|
|
|
L<http://dx.doi.org/10.1186/1471-2105-12-63> |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=cut |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
1; |