| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
=encoding utf8 |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
Bio::BPWrapper::TreeManipulations - Functions for biotree |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use Bio::BPWrapper::TreeManipulations; |
|
10
|
|
|
|
|
|
|
# Set options hash ... |
|
11
|
|
|
|
|
|
|
initialize(\%opts); |
|
12
|
|
|
|
|
|
|
write_out(\%opts); |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=cut |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# Package global variables |
|
17
|
|
|
|
|
|
|
my ($in, $out, $aln, %opts, $file, $in_format, $out_format, @nodes, |
|
18
|
|
|
|
|
|
|
$tree, $print_tree, $rootnode, @otus); |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
###################### subroutine ###################### |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
package Bio::BPWrapper::TreeManipulations; |
|
23
|
|
|
|
|
|
|
|
|
24
|
15
|
|
|
15
|
|
105
|
use strict; |
|
|
15
|
|
|
|
|
26
|
|
|
|
15
|
|
|
|
|
560
|
|
|
25
|
15
|
|
|
15
|
|
75
|
use warnings; |
|
|
15
|
|
|
|
|
27
|
|
|
|
15
|
|
|
|
|
678
|
|
|
26
|
15
|
|
|
15
|
|
164
|
use v5.10; |
|
|
15
|
|
|
|
|
51
|
|
|
27
|
15
|
|
|
15
|
|
75
|
use Bio::BPWrapper; |
|
|
15
|
|
|
|
|
29
|
|
|
|
15
|
|
|
|
|
424
|
|
|
28
|
15
|
|
|
15
|
|
8614
|
use Bio::TreeIO; |
|
|
15
|
|
|
|
|
1079673
|
|
|
|
15
|
|
|
|
|
583
|
|
|
29
|
15
|
|
|
15
|
|
114
|
use Bio::Tree::Tree; |
|
|
15
|
|
|
|
|
28
|
|
|
|
15
|
|
|
|
|
281
|
|
|
30
|
15
|
|
|
15
|
|
56
|
use Bio::Tree::Node; |
|
|
15
|
|
|
|
|
23
|
|
|
|
15
|
|
|
|
|
233
|
|
|
31
|
15
|
|
|
15
|
|
51
|
use Bio::Tree::TreeFunctionsI; |
|
|
15
|
|
|
|
|
19
|
|
|
|
15
|
|
|
|
|
282
|
|
|
32
|
15
|
|
|
15
|
|
8694
|
use Data::Dumper; |
|
|
15
|
|
|
|
|
117082
|
|
|
|
15
|
|
|
|
|
1166
|
|
|
33
|
15
|
|
|
15
|
|
7775
|
use POSIX; |
|
|
15
|
|
|
|
|
103132
|
|
|
|
15
|
|
|
|
|
87
|
|
|
34
|
15
|
|
|
15
|
|
41576
|
use List::Util qw(shuffle); |
|
|
15
|
|
|
|
|
26
|
|
|
|
15
|
|
|
|
|
1922
|
|
|
35
|
15
|
|
|
15
|
|
88
|
if ($ENV{'DEBUG'}) { use Data::Dumper } |
|
|
15
|
|
|
|
|
28
|
|
|
|
15
|
|
|
|
|
743
|
|
|
36
|
|
|
|
|
|
|
|
|
37
|
15
|
|
|
15
|
|
65
|
use vars qw(@ISA @EXPORT @EXPORT_OK); |
|
|
15
|
|
|
|
|
27
|
|
|
|
15
|
|
|
|
|
152704
|
|
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
@ISA = qw(Exporter); |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
@EXPORT = qw(print_tree_shape edge_length_abundance swap_otus getdistance |
|
42
|
|
|
|
|
|
|
sister_pairs countOTU reroot clean_tree delete_otus initialize |
|
43
|
|
|
|
|
|
|
write_out bin walk_edge cut_sister); |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head1 SUBROUTINES |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head2 initialize() |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
Sets up most of the actions to be performed on an alignment. |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
Call this right after setting up an options hash. |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
Sets package variables: C<$in_format>, C<@nodes>, C<$tree>, C<$out_format>, and C<$out>. |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=cut |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub initialize { |
|
59
|
15
|
|
|
15
|
1
|
43
|
my $opts_ref = shift; |
|
60
|
15
|
|
|
|
|
93
|
Bio::BPWrapper::common_opts($opts_ref); |
|
61
|
15
|
|
|
|
|
28
|
%opts = %{$opts_ref}; |
|
|
15
|
|
|
|
|
65
|
|
|
62
|
15
|
|
50
|
|
|
122
|
$in_format = $opts{"input"} // 'newick'; # This doesn't work...or does it? |
|
63
|
15
|
|
100
|
|
|
86
|
$out_format = $opts{"output"} // "newick"; |
|
64
|
15
|
|
|
|
|
34
|
$print_tree = 0; # Trigger printing the tree. |
|
65
|
15
|
|
50
|
|
|
56
|
my $file = shift || "STDIN"; |
|
66
|
15
|
50
|
|
|
|
61
|
if ($in_format eq 'edge') { |
|
67
|
0
|
|
|
|
|
0
|
$tree = &edge2tree($file); |
|
68
|
|
|
|
|
|
|
} else { |
|
69
|
15
|
50
|
|
|
|
252
|
$in = Bio::TreeIO->new(-format => $in_format, ($file eq "STDIN") ? (-fh => \*STDIN) : (-file => $file)); |
|
70
|
15
|
|
|
|
|
95786
|
$tree = $in->next_tree(); # get the first tree (and ignore the rest) |
|
71
|
|
|
|
|
|
|
} |
|
72
|
15
|
|
|
|
|
444935
|
$out = Bio::TreeIO->new(-format => $out_format); |
|
73
|
15
|
|
|
|
|
14310
|
@nodes = $tree->get_nodes; |
|
74
|
15
|
|
|
|
|
13140
|
$rootnode = $tree->get_root_node; |
|
75
|
15
|
100
|
|
|
|
114
|
foreach (@nodes) { push @otus, $_ if $_->is_Leaf } |
|
|
420
|
|
|
|
|
3659
|
|
|
76
|
|
|
|
|
|
|
} |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# label low-desc sisters as "cut-nodes" |
|
79
|
|
|
|
|
|
|
sub cut_sister { |
|
80
|
0
|
|
0
|
0
|
0
|
0
|
my $cutoff = $opts{'cut-sis'} || die "$0 --cut-sis \n"; |
|
81
|
0
|
|
|
|
|
0
|
foreach my $nd (@nodes) { |
|
82
|
0
|
|
|
|
|
0
|
my $decCts = 0; |
|
83
|
0
|
|
0
|
|
|
0
|
my $id = $nd->id() || $nd->internal_id(); |
|
84
|
0
|
0
|
|
|
|
0
|
if ($nd->is_Leaf) { |
|
85
|
0
|
|
|
|
|
0
|
$nd->id("cut_". $id); # label to cut |
|
86
|
0
|
|
|
|
|
0
|
next; |
|
87
|
|
|
|
|
|
|
} |
|
88
|
|
|
|
|
|
|
|
|
89
|
0
|
|
|
|
|
0
|
foreach ($nd->get_all_Descendents) { |
|
90
|
0
|
|
|
|
|
0
|
$decCts++; |
|
91
|
|
|
|
|
|
|
} |
|
92
|
0
|
0
|
|
|
|
0
|
next if $decCts >= $cutoff; # don't cut |
|
93
|
0
|
|
|
|
|
0
|
$nd->id("cut_". $id); # label to cut |
|
94
|
|
|
|
|
|
|
} |
|
95
|
0
|
|
|
|
|
0
|
$print_tree = 1; |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
sub edge2tree { |
|
99
|
0
|
|
|
0
|
0
|
0
|
my $edgeFile = shift; |
|
100
|
0
|
|
0
|
|
|
0
|
open EG, "<", $edgeFile || die "can't read parent-child edge file\n"; |
|
101
|
0
|
|
|
|
|
0
|
$rootnode = Bio::Tree::Node->new(-id=>'root'); |
|
102
|
0
|
|
|
|
|
0
|
my $tr = Bio::Tree::Tree->new(); |
|
103
|
0
|
|
|
|
|
0
|
my @nds = ($rootnode); |
|
104
|
0
|
|
|
|
|
0
|
my %parent; |
|
105
|
|
|
|
|
|
|
my %seen_edge; |
|
106
|
0
|
|
|
|
|
0
|
while() { |
|
107
|
0
|
0
|
|
|
|
0
|
next unless /^(\S+)\s+(\S+)/; |
|
108
|
0
|
|
|
|
|
0
|
my ($pa, $ch) = ($1, $2); |
|
109
|
0
|
|
|
|
|
0
|
$seen_edge{$pa}{$ch}++; # number of events |
|
110
|
0
|
0
|
|
|
|
0
|
$parent{$ch} = $pa unless $parent{$ch}; # seen before |
|
111
|
|
|
|
|
|
|
} |
|
112
|
0
|
|
|
|
|
0
|
close EG; |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# print Dumper(\%parent); |
|
115
|
|
|
|
|
|
|
|
|
116
|
0
|
|
|
|
|
0
|
my %seen_parent; |
|
117
|
|
|
|
|
|
|
my %add_node; |
|
118
|
|
|
|
|
|
|
# my @nds; |
|
119
|
0
|
|
|
|
|
0
|
foreach my $ch (keys %parent) { |
|
120
|
0
|
|
|
|
|
0
|
my $pa = $parent{$ch}; |
|
121
|
0
|
|
|
|
|
0
|
$seen_parent{$pa}++; |
|
122
|
0
|
|
|
|
|
0
|
push @nds, Bio::Tree::Node->new(-id=>$ch, -branch_length => $seen_edge{$pa}{$ch}); |
|
123
|
0
|
|
|
|
|
0
|
$add_node{$ch}++; |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# special treatment to set outgroup (which has no parent specified in the edge table) as root |
|
127
|
|
|
|
|
|
|
# add all as chid of root |
|
128
|
0
|
|
|
|
|
0
|
foreach my $pa (keys %seen_parent) { |
|
129
|
0
|
0
|
|
|
|
0
|
next if $add_node{$pa}; |
|
130
|
0
|
|
|
|
|
0
|
my $orphan = Bio::Tree::Node->new(-id=>$pa, -branch_length => 1); # ST213 in test file "edges-pars.tsv" |
|
131
|
0
|
|
|
|
|
0
|
push @nds, $orphan; |
|
132
|
0
|
|
|
|
|
0
|
$parent{$pa} = 'root'; |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# attach descendant |
|
136
|
|
|
|
|
|
|
# my %attached; |
|
137
|
0
|
|
|
|
|
0
|
foreach my $node (@nds) { |
|
138
|
0
|
0
|
|
|
|
0
|
next if $node eq $rootnode; |
|
139
|
0
|
|
|
|
|
0
|
my $id = $node->id(); # print $id, "\t"; |
|
140
|
0
|
|
|
|
|
0
|
my $p_id = $parent{$id}; # print $p_id, "\n"; |
|
141
|
|
|
|
|
|
|
# next if $attached{$p_id}++; |
|
142
|
0
|
|
|
|
|
0
|
my @nds = grep { $_->id() eq $p_id } @nds; |
|
|
0
|
|
|
|
|
0
|
|
|
143
|
0
|
0
|
|
|
|
0
|
if (@nds) { |
|
144
|
0
|
|
|
|
|
0
|
my $p_node = shift @nds; |
|
145
|
0
|
|
|
|
|
0
|
$p_node->add_Descendent($node); |
|
146
|
|
|
|
|
|
|
} else { |
|
147
|
0
|
|
|
|
|
0
|
die "no parent $id\n"; |
|
148
|
|
|
|
|
|
|
} |
|
149
|
|
|
|
|
|
|
} |
|
150
|
0
|
|
|
|
|
0
|
$tr->set_root_node($rootnode); |
|
151
|
0
|
|
|
|
|
0
|
return $tr; |
|
152
|
|
|
|
|
|
|
} |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub reorder_by_ref { |
|
156
|
0
|
0
|
|
0
|
0
|
0
|
die "reference node id missing\n" unless $opts{'ref'}; |
|
157
|
0
|
|
|
|
|
0
|
my $id = 0; |
|
158
|
0
|
|
|
|
|
0
|
&_flip_if_not_in_top_clade($rootnode, $opts{'ref'}, \$id); |
|
159
|
0
|
|
|
|
|
0
|
$print_tree = 1; |
|
160
|
|
|
|
|
|
|
} |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
sub _flip_if_not_in_top_clade { # by resetting creation_id & sortby option of each_Descendent |
|
163
|
0
|
|
|
0
|
|
0
|
my ($nd, $ref, $refid) = @_; |
|
164
|
0
|
|
|
|
|
0
|
$nd->_creation_id($$refid++); |
|
165
|
|
|
|
|
|
|
# print STDERR $nd->internal_id(), ":\t"; |
|
166
|
0
|
0
|
|
|
|
0
|
if ($nd->is_Leaf()) { |
|
167
|
|
|
|
|
|
|
# print STDERR "\n"; |
|
168
|
0
|
|
|
|
|
0
|
return } |
|
169
|
0
|
|
|
|
|
0
|
my @des = $nd->each_Descendent(); |
|
170
|
0
|
|
|
|
|
0
|
my @des_reordered; |
|
171
|
0
|
|
|
|
|
0
|
for (my $i=0; $i<=$#des; $i++) { |
|
172
|
0
|
|
|
|
|
0
|
my $in_des = 0; |
|
173
|
0
|
|
|
|
|
0
|
my $id = $des[$i]->internal_id(); |
|
174
|
0
|
|
|
|
|
0
|
my @otus = &_each_leaf($des[$i]); |
|
175
|
0
|
0
|
|
|
|
0
|
foreach (@otus) { $in_des = 1 if $ref eq $_->id } |
|
|
0
|
|
|
|
|
0
|
|
|
176
|
|
|
|
|
|
|
# print STDERR join("|", map { $_->id } @otus), " => ", $in_des, ";"; |
|
177
|
0
|
0
|
|
|
|
0
|
if ($in_des) { |
|
178
|
0
|
|
|
|
|
0
|
unshift @des_reordered, $des[$i]; |
|
179
|
|
|
|
|
|
|
} else { |
|
180
|
0
|
|
|
|
|
0
|
push @des_reordered, $des[$i]; |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
} |
|
183
|
0
|
|
|
|
|
0
|
foreach (@des_reordered) { |
|
184
|
0
|
|
|
|
|
0
|
$_->_creation_id($$refid++); |
|
185
|
|
|
|
|
|
|
# print STDERR $_->internal_id(), ";"; |
|
186
|
|
|
|
|
|
|
} |
|
187
|
|
|
|
|
|
|
# print STDERR "\n"; |
|
188
|
0
|
|
|
|
|
0
|
my @des_new = $nd->each_Descendent('creation'); # key sort function!! |
|
189
|
0
|
|
|
|
|
0
|
foreach my $de (@des_new) { |
|
190
|
0
|
|
|
|
|
0
|
&_flip_if_not_in_top_clade($de, $opts{'ref'}, $refid); |
|
191
|
|
|
|
|
|
|
} |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# trim a node to a single OTU representative if all branch lengths of its descendant OTUs <= $cut |
|
196
|
|
|
|
|
|
|
sub trim_tips { |
|
197
|
0
|
0
|
|
0
|
0
|
0
|
die "Usage: $0 --trim-tips \n" unless $opts{'trim-tips'}; |
|
198
|
0
|
|
|
|
|
0
|
my $cut = $opts{'trim-tips'}; |
|
199
|
|
|
|
|
|
|
|
|
200
|
0
|
|
|
|
|
0
|
my @trim_nodes; |
|
201
|
0
|
|
|
|
|
0
|
my $group_ct = 0; |
|
202
|
0
|
|
|
|
|
0
|
&identify_nodes_to_trim_by_walk_from_root($rootnode, \$cut, \@trim_nodes, \$group_ct); |
|
203
|
0
|
|
|
|
|
0
|
my %otu_sets; |
|
204
|
0
|
|
|
|
|
0
|
my $set_ct = 1; |
|
205
|
0
|
|
|
|
|
0
|
foreach my $ref_trim (@trim_nodes) { |
|
206
|
0
|
|
|
|
|
0
|
foreach (@$ref_trim) { |
|
207
|
0
|
|
|
|
|
0
|
$otu_sets{$_} = $set_ct; |
|
208
|
|
|
|
|
|
|
} |
|
209
|
0
|
|
|
|
|
0
|
$set_ct++; |
|
210
|
|
|
|
|
|
|
# print STDERR join "\t", sort @$ref_trim; |
|
211
|
|
|
|
|
|
|
# print STDERR "\n"; |
|
212
|
|
|
|
|
|
|
} |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
|
|
215
|
0
|
|
|
|
|
0
|
foreach (@otus) { |
|
216
|
0
|
0
|
|
|
|
0
|
next if $otu_sets{$_->id}; |
|
217
|
0
|
|
|
|
|
0
|
$otu_sets{$_->id} = $set_ct++; |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
# print Dumper(\%otu_sets); |
|
220
|
0
|
|
|
|
|
0
|
print STDERR "#Trim tree from tip with a cutoff of d=$cut\n"; |
|
221
|
0
|
|
|
|
|
0
|
print STDERR "otu\tnr_set_id\n"; |
|
222
|
0
|
|
|
|
|
0
|
foreach (sort {$otu_sets{$a} <=> $otu_sets{$b}} keys %otu_sets) { |
|
|
0
|
|
|
|
|
0
|
|
|
223
|
0
|
|
|
|
|
0
|
print STDERR $_, "\t", $otu_sets{$_}, "\n"; |
|
224
|
|
|
|
|
|
|
} |
|
225
|
|
|
|
|
|
|
|
|
226
|
0
|
|
|
|
|
0
|
$print_tree = 1; |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
sub identify_nodes_to_trim_by_walk_from_root { |
|
230
|
0
|
|
|
0
|
0
|
0
|
my $node = shift; # internal node only |
|
231
|
0
|
|
|
|
|
0
|
my $ref_cut = shift; |
|
232
|
0
|
|
|
|
|
0
|
my $ref_group = shift; |
|
233
|
0
|
|
|
|
|
0
|
my $ref_ct = shift; |
|
234
|
|
|
|
|
|
|
|
|
235
|
0
|
0
|
|
|
|
0
|
return if $node->is_Leaf; |
|
236
|
0
|
|
|
|
|
0
|
my %des_otus; # save distances |
|
237
|
0
|
|
|
|
|
0
|
my $trim = 1; # default to trim |
|
238
|
|
|
|
|
|
|
# trim a node if all OTU distance to it is <= cut |
|
239
|
0
|
|
|
|
|
0
|
foreach my $des ($node -> get_all_Descendents()) { |
|
240
|
0
|
0
|
|
|
|
0
|
next unless $des->is_Leaf; |
|
241
|
|
|
|
|
|
|
#push @des_otus, $des; |
|
242
|
|
|
|
|
|
|
# distance to a desc OTU |
|
243
|
0
|
|
|
|
|
0
|
my $dist = $tree->distance($node, $des); |
|
244
|
0
|
|
|
|
|
0
|
$des_otus{$des->id} = { 'otu' => $des, 'dist' => $dist }; |
|
245
|
0
|
0
|
|
|
|
0
|
$trim = 0 if $dist > $$ref_cut; # don't trim is any distance to OTU > $cut |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
|
|
248
|
0
|
0
|
|
|
|
0
|
if ($trim) { # trim & retain the first OTU; stop descending |
|
249
|
0
|
|
|
|
|
0
|
my @leafs = sort keys %des_otus; # make a copy |
|
250
|
0
|
|
|
|
|
0
|
my $pa = $node->ancestor(); |
|
251
|
0
|
|
|
|
|
0
|
$pa -> remove_Descendent($node); # clear this node as a des of parent |
|
252
|
0
|
|
|
|
|
0
|
my $retain = shift @leafs; |
|
253
|
0
|
|
|
|
|
0
|
my $retain_node = $des_otus{$retain}->{'otu'}; |
|
254
|
0
|
|
|
|
|
0
|
my $d = $node->branch_length() + $des_otus{$retain}->{'dist'}; |
|
255
|
0
|
|
|
|
|
0
|
$retain_node->branch_length($d); # keep distance to tip |
|
256
|
0
|
|
|
|
|
0
|
$pa->add_Descendent($retain_node); # add retained OTU to parent |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
# collect all OTUs for a trimmed inode |
|
259
|
0
|
|
|
|
|
0
|
push @$ref_group, [keys %des_otus]; |
|
260
|
0
|
|
|
|
|
0
|
$$ref_ct++; |
|
261
|
0
|
|
|
|
|
0
|
return; |
|
262
|
|
|
|
|
|
|
} else { # don't trim |
|
263
|
0
|
|
|
|
|
0
|
foreach my $des ($node->each_Descendent()) { |
|
264
|
0
|
|
|
|
|
0
|
&identify_nodes_to_trim_by_walk_from_root($des, $ref_cut, $ref_group, $ref_ct); |
|
265
|
|
|
|
|
|
|
} |
|
266
|
|
|
|
|
|
|
} |
|
267
|
|
|
|
|
|
|
} |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
sub cut_tree { |
|
270
|
0
|
|
|
0
|
0
|
0
|
my @otu_hts; |
|
271
|
0
|
|
|
|
|
0
|
$rootnode->{height} = 0; |
|
272
|
0
|
|
|
|
|
0
|
foreach my $nd (@nodes) { |
|
273
|
0
|
|
|
|
|
0
|
my $ht = &distance_to_root($nd); |
|
274
|
0
|
|
|
|
|
0
|
$nd->{height} = $ht; |
|
275
|
0
|
0
|
|
|
|
0
|
push @otu_hts, $ht if $nd->is_Leaf; |
|
276
|
|
|
|
|
|
|
} |
|
277
|
|
|
|
|
|
|
|
|
278
|
0
|
|
|
|
|
0
|
@otu_hts = sort {$a <=> $b} @otu_hts; |
|
|
0
|
|
|
|
|
0
|
|
|
279
|
0
|
|
|
|
|
0
|
my $least_otu_height = shift @otu_hts; |
|
280
|
0
|
0
|
|
|
|
0
|
die "Usage: $0 --cut-tree \n" unless $opts{'cut-tree'}; |
|
281
|
0
|
|
0
|
|
|
0
|
my $cut = $opts{'cut-tree'} || 0.5 * $least_otu_height; # default to cut the branches traversing the line that is 1/2 of least deep OTU |
|
282
|
0
|
0
|
|
|
|
0
|
die "Cut tree at $cut, greater than least-deep OTU ($least_otu_height). Lower cut value.\n" if $cut >= $least_otu_height; |
|
283
|
|
|
|
|
|
|
|
|
284
|
0
|
|
|
|
|
0
|
my @cut_nodes; |
|
285
|
0
|
|
|
|
|
0
|
my $group_ct = 0; |
|
286
|
0
|
|
|
|
|
0
|
&identify_nodes_to_cut_by_walk_from_root($rootnode, \$cut, \@cut_nodes, \$group_ct); |
|
287
|
0
|
|
|
|
|
0
|
foreach my $cutnode (@cut_nodes) { |
|
288
|
0
|
0
|
|
|
|
0
|
print $cutnode->is_Leaf() ? "cut_otu" : $cutnode->id(), ":\t"; |
|
289
|
0
|
|
|
|
|
0
|
print join "\t", map {$_->id()} &_each_leaf($cutnode); |
|
|
0
|
|
|
|
|
0
|
|
|
290
|
0
|
|
|
|
|
0
|
print "\n"; |
|
291
|
|
|
|
|
|
|
} |
|
292
|
0
|
|
|
|
|
0
|
$print_tree = 1; |
|
293
|
|
|
|
|
|
|
} |
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
sub identify_nodes_to_cut_by_walk_from_root { |
|
297
|
0
|
|
|
0
|
0
|
0
|
my $node = shift; |
|
298
|
0
|
|
|
|
|
0
|
my $ref_cut = shift; |
|
299
|
0
|
|
|
|
|
0
|
my $ref_group = shift; |
|
300
|
0
|
|
|
|
|
0
|
my $ref_ct = shift; |
|
301
|
0
|
0
|
|
|
|
0
|
return if $node->{height} > $$ref_cut; # node too high |
|
302
|
0
|
|
|
|
|
0
|
foreach my $des ($node->each_Descendent() ) { |
|
303
|
0
|
0
|
|
|
|
0
|
if ($des->{height} > $$ref_cut) { # found a branch to cut: parent height < $cut & child height > $cut |
|
304
|
0
|
0
|
|
|
|
0
|
$des->id("cut_" . $$ref_ct++) unless $des->is_Leaf; |
|
305
|
0
|
|
|
|
|
0
|
push @$ref_group, $des |
|
306
|
|
|
|
|
|
|
} |
|
307
|
0
|
|
|
|
|
0
|
&identify_nodes_to_cut_by_walk_from_root($des, $ref_cut, $ref_group, $ref_ct); # node too low |
|
308
|
|
|
|
|
|
|
} |
|
309
|
|
|
|
|
|
|
} |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
sub label_selected_nodes { |
|
312
|
0
|
|
|
0
|
0
|
0
|
my $label_file = $opts{'label-selected-nodes'}; # each line consists of internal_id label |
|
313
|
0
|
|
|
|
|
0
|
my %labs; |
|
314
|
0
|
|
0
|
|
|
0
|
open LAB, "<", $label_file || die "label file not found\n"; |
|
315
|
0
|
|
|
|
|
0
|
while() { |
|
316
|
0
|
|
|
|
|
0
|
chomp; |
|
317
|
0
|
|
|
|
|
0
|
my ($a, $b) = split; |
|
318
|
0
|
|
|
|
|
0
|
$labs{$a} = $b; |
|
319
|
|
|
|
|
|
|
} |
|
320
|
0
|
|
|
|
|
0
|
close LAB; |
|
321
|
0
|
|
|
|
|
0
|
foreach my $nd (@nodes) { |
|
322
|
0
|
0
|
|
|
|
0
|
next if $nd->is_Leaf; |
|
323
|
0
|
0
|
|
|
|
0
|
if ($labs{$nd->internal_id}) { |
|
324
|
0
|
|
|
|
|
0
|
$nd->id($labs{$nd->internal_id}); |
|
325
|
0
|
|
|
|
|
0
|
warn $nd->internal_id, "\t", "changed to ", $labs{$nd->internal_id}, "\n"; |
|
326
|
|
|
|
|
|
|
} else { |
|
327
|
0
|
|
|
|
|
0
|
$nd->id(''); |
|
328
|
|
|
|
|
|
|
} |
|
329
|
|
|
|
|
|
|
} |
|
330
|
0
|
|
|
|
|
0
|
$print_tree = 1 |
|
331
|
|
|
|
|
|
|
} |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
sub rename_tips { |
|
334
|
0
|
|
|
0
|
0
|
0
|
my $label_file = $opts{'rename-tips'}; # each line consists of internal_id label |
|
335
|
0
|
|
|
|
|
0
|
my %labs; |
|
336
|
0
|
|
0
|
|
|
0
|
open LAB, "<", $label_file || die "label file not found\n"; |
|
337
|
0
|
|
|
|
|
0
|
while() { |
|
338
|
0
|
|
|
|
|
0
|
chomp; |
|
339
|
0
|
|
|
|
|
0
|
my ($a, $b) = split; |
|
340
|
0
|
|
|
|
|
0
|
$labs{$a} = $b; |
|
341
|
|
|
|
|
|
|
} |
|
342
|
0
|
|
|
|
|
0
|
close LAB; |
|
343
|
0
|
|
|
|
|
0
|
foreach my $nd (@nodes) { |
|
344
|
0
|
0
|
|
|
|
0
|
next unless $nd->is_Leaf; |
|
345
|
0
|
|
|
|
|
0
|
my $old = $nd->id(); |
|
346
|
0
|
0
|
|
|
|
0
|
if ($labs{$nd->id}) { |
|
347
|
0
|
|
|
|
|
0
|
$nd->id($labs{$nd->id}); |
|
348
|
0
|
|
|
|
|
0
|
warn "Success: old tip $old changed to new name\t", $nd->id(), "\n"; |
|
349
|
|
|
|
|
|
|
} else { |
|
350
|
0
|
|
|
|
|
0
|
warn "Failed: old tip $old not changed\n"; |
|
351
|
|
|
|
|
|
|
} |
|
352
|
|
|
|
|
|
|
} |
|
353
|
0
|
|
|
|
|
0
|
$print_tree = 1 |
|
354
|
|
|
|
|
|
|
} |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
sub write_tab_tree{ |
|
358
|
|
|
|
|
|
|
# print "*" x 200, "\n"; |
|
359
|
0
|
|
|
0
|
0
|
0
|
my $y_space = 1; # number of lines between two neighboring nodes |
|
360
|
0
|
|
|
|
|
0
|
my $root_pos = 1; |
|
361
|
0
|
|
|
|
|
0
|
my $maxL = 100; |
|
362
|
0
|
|
|
|
|
0
|
my $longest_leaf_length=0; |
|
363
|
0
|
|
|
|
|
0
|
my $largest_int_length =0; |
|
364
|
0
|
|
|
|
|
0
|
&assign_leaf_ycoord($rootnode, \$root_pos, \$y_space); |
|
365
|
0
|
|
|
|
|
0
|
&assign_inode_ycoord($rootnode); |
|
366
|
0
|
|
|
|
|
0
|
my @nd_sorted = sort {$b->{ycoord} <=> $a->{ycoord}} @nodes; |
|
|
0
|
|
|
|
|
0
|
|
|
367
|
0
|
|
|
|
|
0
|
foreach my $nd (@nd_sorted) { |
|
368
|
0
|
|
|
|
|
0
|
my $branch_length = $nd->branch_length; |
|
369
|
0
|
|
|
|
|
0
|
$nd->{xcoord} = &distance_to_root($nd); |
|
370
|
0
|
0
|
|
|
|
0
|
next unless $nd->is_Leaf(); |
|
371
|
0
|
0
|
|
|
|
0
|
$longest_leaf_length = $nd->{xcoord} if $nd->{xcoord} >= $longest_leaf_length; |
|
372
|
|
|
|
|
|
|
} |
|
373
|
0
|
|
|
|
|
0
|
my $scale = int($maxL/$longest_leaf_length); |
|
374
|
|
|
|
|
|
|
|
|
375
|
0
|
|
|
|
|
0
|
foreach my $nd (@nd_sorted) { |
|
376
|
0
|
|
|
|
|
0
|
$nd->{xcoord_scaled} = ceil($scale * $nd->{xcoord}); |
|
377
|
|
|
|
|
|
|
} |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
# draw initial lines |
|
380
|
0
|
|
|
|
|
0
|
my %lines; |
|
381
|
0
|
|
|
|
|
0
|
foreach my $nd (@nd_sorted) { |
|
382
|
0
|
|
0
|
|
|
0
|
my $dashes = ceil(($nd->branch_length || 0) * $scale); |
|
383
|
0
|
|
|
|
|
0
|
my $spaces = $nd->{xcoord_scaled} - $dashes + 1; |
|
384
|
0
|
0
|
|
|
|
0
|
my $tag = $nd->is_Leaf ? $nd->id() : $nd->internal_id(); |
|
385
|
0
|
0
|
|
|
|
0
|
my $line = " " x $spaces . "+" . "-" x ($dashes ? ($dashes-1):0) . $tag; |
|
386
|
0
|
|
|
|
|
0
|
$lines{$nd->internal_id()} = $line; |
|
387
|
|
|
|
|
|
|
} |
|
388
|
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
# update by adding vertical lines |
|
390
|
|
|
|
|
|
|
# algorithm: replace with pipe at parent xcoord for every node in-between a node and its parent |
|
391
|
0
|
|
|
|
|
0
|
for (my $i=0; $i<=$#nd_sorted; $i++) { |
|
392
|
0
|
|
|
|
|
0
|
my $current_node = $nd_sorted[$i]; |
|
393
|
0
|
|
|
|
|
0
|
my $current_ycoord = $current_node->{ycoord}; |
|
394
|
0
|
0
|
|
|
|
0
|
next if $current_node eq $rootnode; |
|
395
|
0
|
|
|
|
|
0
|
my $parent_node = $current_node->ancestor(); |
|
396
|
0
|
|
|
|
|
0
|
my $parent_ycoord = $parent_node->{ycoord}; |
|
397
|
0
|
|
|
|
|
0
|
my $parent_xcoord = $parent_node->{xcoord_scaled}; |
|
398
|
0
|
|
|
|
|
0
|
foreach my $nid (keys %lines) { # capture in-between nodes to add pipes |
|
399
|
0
|
|
|
|
|
0
|
my $node = $tree->find_node(internal_id=>$nid); |
|
400
|
0
|
0
|
0
|
|
|
0
|
next if $node eq $current_node || $node eq $parent_node; |
|
401
|
0
|
0
|
0
|
|
|
0
|
next if ($current_ycoord < $parent_ycoord) && (($node->{ycoord} < $current_ycoord) || ($node->{ycoord} > $parent_ycoord)); |
|
|
|
|
0
|
|
|
|
|
|
402
|
0
|
0
|
0
|
|
|
0
|
next if ($current_ycoord > $parent_ycoord) && (($node->{ycoord} > $current_ycoord) || ($node->{ycoord} < $parent_ycoord)); |
|
|
|
|
0
|
|
|
|
|
|
403
|
0
|
|
|
|
|
0
|
my $line = $lines{$nid}; |
|
404
|
0
|
|
|
|
|
0
|
$line .= ' ' while length($line) <= $parent_xcoord+1; # pad spaces when line not long enough to reach parent node xcoord |
|
405
|
0
|
|
|
|
|
0
|
my @chars = split //, $line; |
|
406
|
0
|
0
|
|
|
|
0
|
$chars[$parent_xcoord+1] ='|' unless $chars[$parent_xcoord+1] eq '+'; |
|
407
|
0
|
|
|
|
|
0
|
$lines{$nid} = join '', @chars; |
|
408
|
|
|
|
|
|
|
} |
|
409
|
|
|
|
|
|
|
} |
|
410
|
|
|
|
|
|
|
|
|
411
|
0
|
|
|
|
|
0
|
foreach (@nd_sorted) { |
|
412
|
0
|
|
|
|
|
0
|
print $lines{$_->internal_id}, "\n"; |
|
413
|
|
|
|
|
|
|
} |
|
414
|
|
|
|
|
|
|
} |
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
sub assign_leaf_ycoord { |
|
417
|
0
|
|
|
0
|
0
|
0
|
my ($node, $ypos, $yspacing) = @_; |
|
418
|
0
|
0
|
|
|
|
0
|
return if $node->is_Leaf(); |
|
419
|
0
|
|
|
|
|
0
|
for my $child ($node->each_Descendent()) { |
|
420
|
0
|
0
|
|
|
|
0
|
if ($child->is_Leaf()) { |
|
421
|
0
|
|
|
|
|
0
|
$child->{'ycoord'} = $$ypos; # de-reference |
|
422
|
0
|
|
|
|
|
0
|
$$ypos += $$yspacing; |
|
423
|
|
|
|
|
|
|
} else { |
|
424
|
0
|
|
|
|
|
0
|
&assign_leaf_ycoord($child, $ypos, $yspacing); |
|
425
|
|
|
|
|
|
|
} |
|
426
|
|
|
|
|
|
|
} |
|
427
|
|
|
|
|
|
|
} |
|
428
|
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
sub assign_inode_ycoord { |
|
430
|
0
|
|
|
0
|
0
|
0
|
my $node = shift; |
|
431
|
0
|
|
|
|
|
0
|
my @tmp; |
|
432
|
0
|
|
|
|
|
0
|
for my $child ($node->each_Descendent()) { |
|
433
|
0
|
0
|
|
|
|
0
|
&assign_inode_ycoord($child) unless defined $child->{'ycoord'}; |
|
434
|
0
|
|
|
|
|
0
|
push @tmp, $child->{'ycoord'} |
|
435
|
|
|
|
|
|
|
} |
|
436
|
0
|
|
|
|
|
0
|
my @sorted = sort { $a <=> $b } @tmp; |
|
|
0
|
|
|
|
|
0
|
|
|
437
|
0
|
|
|
|
|
0
|
$node->{'ycoord'} = $sorted[0] + 1 / 2 * ($sorted[-1] - $sorted[0]) |
|
438
|
|
|
|
|
|
|
} |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
sub tips_to_root { |
|
441
|
0
|
|
|
0
|
0
|
0
|
foreach (@otus) { |
|
442
|
0
|
|
|
|
|
0
|
printf "%s\t%.6f\n", $_->id(), distance_to_root($_); |
|
443
|
|
|
|
|
|
|
} |
|
444
|
|
|
|
|
|
|
} |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
sub distance_to_root { |
|
447
|
0
|
|
|
0
|
0
|
0
|
my $node = shift; |
|
448
|
0
|
|
0
|
|
|
0
|
my $dist = $node->branch_length() || 0; |
|
449
|
0
|
|
|
|
|
0
|
my $parent = $node->ancestor(); |
|
450
|
0
|
0
|
|
|
|
0
|
$dist += &distance_to_root($parent) if defined $parent; |
|
451
|
0
|
|
|
|
|
0
|
return $dist; |
|
452
|
|
|
|
|
|
|
} |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
sub pars_binary { |
|
455
|
0
|
|
|
0
|
0
|
0
|
my $trait_table_file = $opts{"ci"}; |
|
456
|
0
|
|
0
|
|
|
0
|
open BIN, "<", $trait_table_file || die "no trait file: $trait_table_file\n"; |
|
457
|
0
|
|
|
|
|
0
|
my ($sites, @colnames, @rownames); |
|
458
|
0
|
|
|
|
|
0
|
my $first_line = 1; |
|
459
|
0
|
|
|
|
|
0
|
while() { |
|
460
|
0
|
|
|
|
|
0
|
chomp; |
|
461
|
0
|
|
|
|
|
0
|
my @data = split /\t/, $_; |
|
462
|
0
|
0
|
|
|
|
0
|
if ($first_line) { |
|
463
|
0
|
|
|
|
|
0
|
shift @data; |
|
464
|
0
|
|
|
|
|
0
|
@colnames = @data; |
|
465
|
0
|
|
|
|
|
0
|
$first_line = 0; |
|
466
|
0
|
|
|
|
|
0
|
next; |
|
467
|
|
|
|
|
|
|
} |
|
468
|
|
|
|
|
|
|
|
|
469
|
0
|
|
|
|
|
0
|
my $otu = shift @data; |
|
470
|
0
|
|
|
|
|
0
|
push @rownames, $otu; |
|
471
|
0
|
0
|
|
|
|
0
|
die "check colnames\n" unless @colnames == @data; |
|
472
|
0
|
|
|
|
|
0
|
for(my $i=0; $i<=$#colnames; $i++) { |
|
473
|
0
|
0
|
|
|
|
0
|
$sites->{$otu}->{$colnames[$i]} = $data[$i] ? 1:0; # force binary |
|
474
|
|
|
|
|
|
|
} |
|
475
|
|
|
|
|
|
|
} |
|
476
|
0
|
|
|
|
|
0
|
close BIN; |
|
477
|
|
|
|
|
|
|
|
|
478
|
0
|
|
|
|
|
0
|
foreach my $otu (@otus) { |
|
479
|
|
|
|
|
|
|
# die "otu id not found in rownames\n" unless &_check_id($node->id, \@rownames); |
|
480
|
0
|
|
|
|
|
0
|
foreach my $trait_name (@colnames) { |
|
481
|
0
|
|
|
|
|
0
|
$otu->add_tag_value($trait_name, [$sites->{$otu->id}->{$trait_name}]); |
|
482
|
|
|
|
|
|
|
} |
|
483
|
|
|
|
|
|
|
} |
|
484
|
|
|
|
|
|
|
# print Dumper(\@otus); exit; |
|
485
|
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
# Fitch algorithm: post-order traversal |
|
487
|
0
|
|
|
|
|
0
|
my @informative; |
|
488
|
0
|
|
|
|
|
0
|
for (my $i=0; $i<=$#colnames; $i++) { |
|
489
|
0
|
0
|
|
|
|
0
|
next unless &_is_informative($colnames[$i], $i); |
|
490
|
0
|
|
|
|
|
0
|
push @informative, $colnames[$i]; |
|
491
|
0
|
|
|
|
|
0
|
$rootnode->add_tag_value($colnames[$i], &_fitch_parsimony($rootnode, $i, \@colnames)); |
|
492
|
0
|
0
|
|
|
|
0
|
&_penny_parsimony($rootnode, $i, \@colnames) if @{$rootnode->get_tag_values($colnames[$i])} == 2; # only when root state unresolved |
|
|
0
|
|
|
|
|
0
|
|
|
493
|
0
|
|
|
|
|
0
|
my $ci = &_consistency_index($i, \@colnames); |
|
494
|
0
|
|
|
|
|
0
|
print join "\t", ($i+1, $colnames[$i], $ci); |
|
495
|
0
|
|
|
|
|
0
|
print "\n"; |
|
496
|
|
|
|
|
|
|
} |
|
497
|
|
|
|
|
|
|
} |
|
498
|
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
sub _fitch_parsimony { |
|
500
|
0
|
|
|
0
|
|
0
|
my ($node,$index, $refcol)=@_; #warn $node->internal_id, "\t", $node->id() || "inode", "\n"; |
|
501
|
0
|
|
|
|
|
0
|
my $ref_node_state; |
|
502
|
0
|
|
|
|
|
0
|
my @colnames = @{$refcol}; |
|
|
0
|
|
|
|
|
0
|
|
|
503
|
0
|
0
|
|
|
|
0
|
if ($node->is_Leaf) { |
|
504
|
|
|
|
|
|
|
# warn $node->id, "\t", Dumper($node->get_tag_values($colnames[$index])), "\n"; |
|
505
|
0
|
|
|
|
|
0
|
return $node->get_tag_values($colnames[$index]); |
|
506
|
|
|
|
|
|
|
} else { |
|
507
|
0
|
|
|
|
|
0
|
my @child = $node->each_Descendent; |
|
508
|
0
|
|
|
|
|
0
|
my ($ref0, $ref1); |
|
509
|
0
|
0
|
|
|
|
0
|
if ($child[0]->is_Leaf) { # child 0 is an OTU |
|
510
|
0
|
|
|
|
|
0
|
$ref0 = $child[0]->get_tag_values($colnames[$index]); |
|
511
|
0
|
0
|
|
|
|
0
|
if ($child[1]->is_Leaf) { # both child 0 & 1 are an OTU |
|
512
|
0
|
|
|
|
|
0
|
$ref1 = $child[1]->get_tag_values($colnames[$index]); |
|
513
|
|
|
|
|
|
|
# warn "got sis otu for inode ", $node->internal_id(), "\n"; |
|
514
|
0
|
|
|
|
|
0
|
$ref_node_state = &_intersect_or_union($ref0, $ref1); |
|
515
|
|
|
|
|
|
|
# warn Dumper($node->get_tag_values($colnames[$index])); |
|
516
|
|
|
|
|
|
|
} else { # child 0 is an OTU, child 1 is an inode |
|
517
|
0
|
|
|
|
|
0
|
$ref_node_state = &_intersect_or_union($ref0, &_fitch_parsimony($child[1], $index, \@colnames)); |
|
518
|
|
|
|
|
|
|
} |
|
519
|
|
|
|
|
|
|
} else { # child 0 is an inode |
|
520
|
0
|
0
|
|
|
|
0
|
if ($child[1]->is_Leaf) { # child 1 is an inode child 1 is an OTU |
|
521
|
0
|
|
|
|
|
0
|
$ref1 = $child[1]->get_tag_values($colnames[$index]); |
|
522
|
|
|
|
|
|
|
# warn "got sis otu for inode ", $node->internal_id(), "\n"; |
|
523
|
0
|
|
|
|
|
0
|
$ref_node_state = &_intersect_or_union(&_fitch_parsimony($child[0], $index, \@colnames), $ref1); |
|
524
|
|
|
|
|
|
|
} else { # both inodes |
|
525
|
0
|
|
|
|
|
0
|
$ref_node_state = &_intersect_or_union(&_fitch_parsimony($child[0], $index, \@colnames), &_fitch_parsimony($child[1], $index, \@colnames)); |
|
526
|
|
|
|
|
|
|
} |
|
527
|
|
|
|
|
|
|
} |
|
528
|
0
|
|
|
|
|
0
|
$node->add_tag_value($colnames[$index], $ref_node_state); |
|
529
|
0
|
|
|
|
|
0
|
return $ref_node_state; |
|
530
|
|
|
|
|
|
|
} |
|
531
|
|
|
|
|
|
|
} |
|
532
|
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
sub _intersect_or_union { # from Perl Cookbook |
|
534
|
0
|
|
|
0
|
|
0
|
my ($ref1, $ref2) = @_; |
|
535
|
0
|
|
|
|
|
0
|
my (%union, %isect); |
|
536
|
0
|
|
|
|
|
0
|
foreach my $e (@$ref1, @$ref2) { |
|
537
|
0
|
0
|
|
|
|
0
|
$union{$e}++ && $isect{$e}++; # Perl Cookbook ideom |
|
538
|
|
|
|
|
|
|
} |
|
539
|
|
|
|
|
|
|
# warn Dumper(\%union, \%isect); |
|
540
|
|
|
|
|
|
|
|
|
541
|
0
|
0
|
|
|
|
0
|
if (@$ref1 == @$ref2) { # (0) U (0); (0) U (1); (1) U (1); (0,1) U (0,1) |
|
542
|
0
|
|
|
|
|
0
|
return [keys %union]; |
|
543
|
|
|
|
|
|
|
} else { # (0) I (0,1); (1) I (0,1) |
|
544
|
0
|
|
|
|
|
0
|
return [keys %isect]; |
|
545
|
|
|
|
|
|
|
} |
|
546
|
|
|
|
|
|
|
} |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
sub _penny_parsimony { |
|
549
|
0
|
|
|
0
|
|
0
|
my $nd = shift; |
|
550
|
0
|
|
|
|
|
0
|
my $index = shift; |
|
551
|
0
|
|
|
|
|
0
|
my $refcol = shift; |
|
552
|
0
|
|
|
|
|
0
|
my @colnames = @{$refcol}; |
|
|
0
|
|
|
|
|
0
|
|
|
553
|
0
|
|
|
|
|
0
|
my $ref_states = $nd->get_tag_values($colnames[$index]); |
|
554
|
0
|
|
|
|
|
0
|
my $ref_new; |
|
555
|
|
|
|
|
|
|
my $pa; |
|
556
|
0
|
0
|
|
|
|
0
|
return if $nd->is_Leaf; |
|
557
|
0
|
0
|
|
|
|
0
|
if ($pa = $nd->ancestor()) { |
|
558
|
0
|
|
|
|
|
0
|
my $ref_pa_state = $pa->get_tag_values($colnames[$index]); |
|
559
|
0
|
|
|
|
|
0
|
$ref_new = &_intersect_or_union($ref_states, $ref_pa_state); # intersect with with parent |
|
560
|
|
|
|
|
|
|
} else { # is the root |
|
561
|
0
|
|
|
|
|
0
|
$ref_new = [1]; # resolve root state using Penny parsimony: one gain only |
|
562
|
|
|
|
|
|
|
} |
|
563
|
0
|
|
|
|
|
0
|
$nd->add_tag_value($colnames[$index], $ref_new); # resolve root state using Penny parsimony: one gain only |
|
564
|
0
|
|
|
|
|
0
|
&_penny_parsimony($_, $index, \@colnames) for $nd->each_Descendent(); |
|
565
|
|
|
|
|
|
|
} |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
sub _is_informative { |
|
568
|
0
|
|
|
0
|
|
0
|
my $col_id = shift; |
|
569
|
0
|
|
|
|
|
0
|
my $index = shift; |
|
570
|
0
|
|
|
|
|
0
|
my @sts = map {$_->get_tag_values($col_id)->[0]} @otus; |
|
|
0
|
|
|
|
|
0
|
|
|
571
|
0
|
|
|
|
|
0
|
my %seen; |
|
572
|
0
|
|
|
|
|
0
|
$seen{$_}++ for @sts; |
|
573
|
0
|
0
|
|
|
|
0
|
if (keys %seen == 1) { #warn "$index: $col_id is constant\n"; |
|
574
|
0
|
|
|
|
|
0
|
return 0 } |
|
575
|
0
|
|
|
|
|
0
|
foreach (keys %seen) { |
|
576
|
0
|
0
|
|
|
|
0
|
if ($seen{$_} == 1) { #warn "$index: $col_id is singleton\n"; |
|
577
|
0
|
|
|
|
|
0
|
return 0 } |
|
578
|
|
|
|
|
|
|
} |
|
579
|
|
|
|
|
|
|
# warn "$index: $col_id is informative\n"; |
|
580
|
0
|
|
|
|
|
0
|
return 1; |
|
581
|
|
|
|
|
|
|
} |
|
582
|
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
sub _consistency_index { # ratio of minimum (1 for a binary trait) to actural changes |
|
584
|
0
|
|
|
0
|
|
0
|
my $index = shift; |
|
585
|
0
|
|
|
|
|
0
|
my $ci = 0; |
|
586
|
0
|
|
|
|
|
0
|
my $refcol = shift; |
|
587
|
0
|
|
|
|
|
0
|
my @colnames = @{$refcol}; |
|
|
0
|
|
|
|
|
0
|
|
|
588
|
0
|
|
|
|
|
0
|
foreach my $nd (@nodes) { |
|
589
|
0
|
0
|
|
|
|
0
|
next if $nd eq $rootnode; |
|
590
|
0
|
|
|
|
|
0
|
my $pa = $nd->ancestor(); |
|
591
|
0
|
|
|
|
|
0
|
my $state = $nd->get_tag_values($colnames[$index])->[0]; # assuming fully resolved (only one state) |
|
592
|
0
|
|
|
|
|
0
|
my $pa_state = $pa->get_tag_values($colnames[$index])->[0]; |
|
593
|
0
|
0
|
|
|
|
0
|
if ($state ne $pa_state) { |
|
594
|
|
|
|
|
|
|
# if ($state) { |
|
595
|
|
|
|
|
|
|
# push @{$gain_loss{'gain'}->{$nd->internal_id}}, $colnames[$index]; |
|
596
|
|
|
|
|
|
|
# $fam_events{$colnames[$index]}->{$nd->internal_id}->{'gain'}++; |
|
597
|
|
|
|
|
|
|
# } else { |
|
598
|
|
|
|
|
|
|
# push @{$gain_loss{'loss'}->{$nd->internal_id}}, $colnames[$index]; |
|
599
|
|
|
|
|
|
|
# $fam_events{$colnames[$index]}->{$nd->internal_id}->{'loss'}++; |
|
600
|
|
|
|
|
|
|
# } |
|
601
|
0
|
|
|
|
|
0
|
$ci++; |
|
602
|
|
|
|
|
|
|
} |
|
603
|
|
|
|
|
|
|
} |
|
604
|
0
|
|
|
|
|
0
|
return sprintf "%.4f", 1/$ci; |
|
605
|
|
|
|
|
|
|
} |
|
606
|
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
sub _check_id { |
|
608
|
0
|
|
|
0
|
|
0
|
my $st = shift; |
|
609
|
0
|
|
|
|
|
0
|
my $ref = shift; |
|
610
|
0
|
|
|
|
|
0
|
foreach my $id (@$ref) { |
|
611
|
0
|
0
|
|
|
|
0
|
return 1 if $st eq $id; |
|
612
|
|
|
|
|
|
|
} |
|
613
|
0
|
|
|
|
|
0
|
return 0; |
|
614
|
|
|
|
|
|
|
} |
|
615
|
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
sub print_tree_shape { |
|
617
|
0
|
|
|
0
|
0
|
0
|
my @matrix; |
|
618
|
0
|
|
|
|
|
0
|
my (%leaf, %inode); |
|
619
|
0
|
|
|
|
|
0
|
my $ct_leaf = 0; |
|
620
|
0
|
|
|
|
|
0
|
my $ct_inode = 0; |
|
621
|
0
|
|
|
|
|
0
|
for my $nd (@nodes) { |
|
622
|
0
|
0
|
|
|
|
0
|
if ($nd->is_Leaf()) { |
|
623
|
0
|
|
|
|
|
0
|
$leaf{$nd->id} = ++$ct_leaf; |
|
624
|
|
|
|
|
|
|
} else { |
|
625
|
0
|
|
|
|
|
0
|
$inode{$nd->internal_id} = ++$ct_inode; |
|
626
|
|
|
|
|
|
|
} |
|
627
|
|
|
|
|
|
|
} |
|
628
|
|
|
|
|
|
|
|
|
629
|
0
|
|
|
|
|
0
|
for my $nd (@nodes) { |
|
630
|
0
|
0
|
0
|
|
|
0
|
next if $nd->is_Leaf() || $nd eq $rootnode; |
|
631
|
0
|
|
|
|
|
0
|
my @dscs = $nd->each_Descendent; |
|
632
|
0
|
0
|
|
|
|
0
|
die $nd->internal_id . ": node has more than two descendants\n" unless @dscs == 2; |
|
633
|
|
|
|
|
|
|
my $id1 = $dscs[0]->is_Leaf |
|
634
|
0
|
0
|
|
|
|
0
|
? -1 * $leaf{$dscs[0]->id} : $inode{$dscs[0]->internal_id}; |
|
635
|
|
|
|
|
|
|
my $id2 = $dscs[1]->is_Leaf |
|
636
|
0
|
0
|
|
|
|
0
|
? -1 * $leaf{$dscs[1]->id} : $inode{$dscs[1]->internal_id}; |
|
637
|
0
|
0
|
|
|
|
0
|
if ($nd eq $rootnode) { # root at first |
|
638
|
0
|
|
|
|
|
0
|
unshift @matrix, [ ($id1, $id2) ]; |
|
639
|
|
|
|
|
|
|
} else { |
|
640
|
0
|
|
|
|
|
0
|
push @matrix, [ ($id1, $id2) ] |
|
641
|
|
|
|
|
|
|
} |
|
642
|
|
|
|
|
|
|
} |
|
643
|
0
|
|
|
|
|
0
|
for (my $i = $#matrix; $i >=0; $i--) { |
|
644
|
0
|
|
|
|
|
0
|
print $matrix[$i]->[0], "\t", $matrix[$i]->[1], "\n"; |
|
645
|
|
|
|
|
|
|
}; |
|
646
|
|
|
|
|
|
|
# print Dumper(\%leaf, \%inode); |
|
647
|
|
|
|
|
|
|
} |
|
648
|
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
sub edge_length_abundance { |
|
650
|
0
|
|
|
0
|
0
|
0
|
my @inodes; |
|
651
|
|
|
|
|
|
|
my @brs; |
|
652
|
0
|
|
|
|
|
0
|
push (@inodes, $_) for _walk_up($rootnode); |
|
653
|
|
|
|
|
|
|
|
|
654
|
0
|
|
|
|
|
0
|
for my $nd (@inodes) { |
|
655
|
0
|
0
|
|
|
|
0
|
next if $nd eq $rootnode; |
|
656
|
0
|
|
|
|
|
0
|
my $id = $nd->internal_id; |
|
657
|
0
|
|
|
|
|
0
|
my $ct_otus = 0; |
|
658
|
0
|
0
|
|
|
|
0
|
if ($nd->is_Leaf) { |
|
659
|
0
|
|
|
|
|
0
|
$ct_otus = 1; |
|
660
|
|
|
|
|
|
|
} else { |
|
661
|
0
|
|
|
|
|
0
|
foreach (&_each_leaf($nd)) { |
|
662
|
0
|
|
|
|
|
0
|
$ct_otus++; |
|
663
|
|
|
|
|
|
|
} |
|
664
|
|
|
|
|
|
|
} |
|
665
|
0
|
|
|
|
|
0
|
push @brs, { 'id' => $id, 'num_tips' => $ct_otus, 'br_len' => $nd->branch_length() } |
|
666
|
|
|
|
|
|
|
} |
|
667
|
|
|
|
|
|
|
|
|
668
|
0
|
|
|
|
|
0
|
my $ct_tips = 0; |
|
669
|
0
|
|
|
|
|
0
|
foreach my $nd (@nodes) { |
|
670
|
0
|
0
|
|
|
|
0
|
$ct_tips ++ if $nd->is_Leaf(); |
|
671
|
|
|
|
|
|
|
} |
|
672
|
|
|
|
|
|
|
|
|
673
|
0
|
|
|
|
|
0
|
for (my $k=1; $k<=$ct_tips; $k++) { |
|
674
|
0
|
|
|
|
|
0
|
my $total = 0; |
|
675
|
0
|
|
|
|
|
0
|
my @nds = grep { $_->{num_tips} == $k } @brs; |
|
|
0
|
|
|
|
|
0
|
|
|
676
|
0
|
0
|
|
|
|
0
|
if (@nds) { $total += $_->{br_len} for @nds } |
|
|
0
|
|
|
|
|
0
|
|
|
677
|
0
|
|
|
|
|
0
|
printf "%d\t%.6f\n", $k, $total/$tree->total_branch_length(); |
|
678
|
|
|
|
|
|
|
} |
|
679
|
|
|
|
|
|
|
} |
|
680
|
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
=for comment |
|
682
|
|
|
|
|
|
|
# Needs work |
|
683
|
|
|
|
|
|
|
sub rotate_an_in_node { |
|
684
|
|
|
|
|
|
|
my $nd_id = $opts{'rotate-node'}; |
|
685
|
|
|
|
|
|
|
my $ref_node = $tree->find_node(-id => $nd_id) || $tree->find_node(-internal_id => $nd_id) || die "node not found\n"; |
|
686
|
|
|
|
|
|
|
my $pa_node = $ref_node->ancestor(); |
|
687
|
|
|
|
|
|
|
$pa_node->each_Descendent($nd_id) || 'die: not an internal node\n'; |
|
688
|
|
|
|
|
|
|
# $ref_node->each_Descendent( sub { shuffle(@des) } ); |
|
689
|
|
|
|
|
|
|
$print_tree = 1; |
|
690
|
|
|
|
|
|
|
#} |
|
691
|
|
|
|
|
|
|
## Not working |
|
692
|
|
|
|
|
|
|
#sub sort_child { # sort by height |
|
693
|
|
|
|
|
|
|
# my $by = $opts{'sort-child'} || 'height'; |
|
694
|
|
|
|
|
|
|
# foreach my $nd (@nodes) { |
|
695
|
|
|
|
|
|
|
# next if $nd->is_Leaf; |
|
696
|
|
|
|
|
|
|
# my @des = $nd->each_Descendent(); |
|
697
|
|
|
|
|
|
|
# $nd->each_Descendent($by); |
|
698
|
|
|
|
|
|
|
# $ref_node->each_Descendent( sub { shuffle(@des) } ); |
|
699
|
|
|
|
|
|
|
# } |
|
700
|
|
|
|
|
|
|
# $print_tree = 1; |
|
701
|
|
|
|
|
|
|
#} |
|
702
|
|
|
|
|
|
|
=cut |
|
703
|
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
sub swap_otus { # don't know why |
|
705
|
0
|
|
|
0
|
0
|
0
|
my @otus; |
|
706
|
0
|
|
|
|
|
0
|
my $otu_ct = 0; |
|
707
|
0
|
|
|
|
|
0
|
foreach (@nodes) { |
|
708
|
0
|
0
|
|
|
|
0
|
next unless $_->is_Leaf(); |
|
709
|
0
|
|
|
|
|
0
|
push @otus, $_; |
|
710
|
0
|
|
|
|
|
0
|
$otu_ct++; |
|
711
|
|
|
|
|
|
|
} |
|
712
|
0
|
|
|
|
|
0
|
@otus = sort {$a->id() cmp $b->id() } @otus; |
|
|
0
|
|
|
|
|
0
|
|
|
713
|
0
|
|
|
|
|
0
|
my $ref_otu; |
|
714
|
0
|
0
|
|
|
|
0
|
if ($opts{'swap-otus'}) { |
|
715
|
0
|
|
0
|
|
|
0
|
$ref_otu = $tree->find_node($opts{'swap-otus'}) || die "node not found\n"; |
|
716
|
|
|
|
|
|
|
} else { |
|
717
|
0
|
|
|
|
|
0
|
$ref_otu = $otus[0]; |
|
718
|
|
|
|
|
|
|
} |
|
719
|
|
|
|
|
|
|
|
|
720
|
0
|
|
|
|
|
0
|
foreach my $nd (@otus) { |
|
721
|
0
|
0
|
|
|
|
0
|
next if $nd eq $ref_otu; |
|
722
|
0
|
|
|
|
|
0
|
my $nd_id = $nd->id(); |
|
723
|
0
|
|
|
|
|
0
|
my $ref_id = $ref_otu->id(); |
|
724
|
0
|
|
|
|
|
0
|
$nd->id("new_".$ref_id); |
|
725
|
0
|
|
|
|
|
0
|
$ref_otu->id("new_".$nd_id); |
|
726
|
0
|
|
|
|
|
0
|
say $tree->as_text($out_format); |
|
727
|
0
|
|
|
|
|
0
|
$nd->id($nd_id); |
|
728
|
0
|
|
|
|
|
0
|
$ref_otu->id($ref_id); |
|
729
|
|
|
|
|
|
|
} |
|
730
|
|
|
|
|
|
|
} |
|
731
|
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
# Get the distance between nodes |
|
733
|
|
|
|
|
|
|
sub getdistance { |
|
734
|
1
|
|
|
1
|
0
|
5
|
my @dnodes = _name2node($opts{'dist'}); |
|
735
|
1
|
50
|
|
|
|
3
|
if (scalar(@dnodes) != 2) { say "Error: Provide exactly two nodes/leaves to use with --dist" } |
|
|
0
|
|
|
|
|
0
|
|
|
736
|
1
|
|
|
|
|
10
|
else { say $tree->distance(-nodes => \@dnodes) } |
|
737
|
|
|
|
|
|
|
} |
|
738
|
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
sub sister_pairs { |
|
740
|
0
|
|
|
0
|
0
|
0
|
my @otus; |
|
741
|
0
|
|
|
|
|
0
|
my $otu_ct = 0; |
|
742
|
0
|
|
|
|
|
0
|
foreach (@nodes) { |
|
743
|
0
|
0
|
|
|
|
0
|
next unless $_->is_Leaf(); |
|
744
|
0
|
|
|
|
|
0
|
push @otus, $_; |
|
745
|
0
|
|
|
|
|
0
|
$otu_ct++; |
|
746
|
|
|
|
|
|
|
} |
|
747
|
|
|
|
|
|
|
|
|
748
|
0
|
|
|
|
|
0
|
@otus = sort {$a->id() cmp $b->id() } @otus; |
|
|
0
|
|
|
|
|
0
|
|
|
749
|
0
|
|
|
|
|
0
|
for (my $i = 0; $i < $otu_ct; $i++) { |
|
750
|
0
|
|
|
|
|
0
|
my $pa_i = $otus[$i]->ancestor(); |
|
751
|
0
|
|
|
|
|
0
|
for (my $j = $i+1; $j < $otu_ct; $j++) { |
|
752
|
0
|
|
|
|
|
0
|
my $pa_j = $otus[$j]->ancestor(); |
|
753
|
0
|
|
|
|
|
0
|
print $otus[$i]->id, "\t", $otus[$j]->id, "\t"; |
|
754
|
0
|
0
|
|
|
|
0
|
print $pa_i eq $pa_j ? 1 : 0; |
|
755
|
0
|
|
|
|
|
0
|
print "\n"; |
|
756
|
|
|
|
|
|
|
} |
|
757
|
|
|
|
|
|
|
} |
|
758
|
|
|
|
|
|
|
} |
|
759
|
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
=head2 countOTU() |
|
761
|
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
Print total number of OTUs (leaves). |
|
763
|
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
=cut |
|
765
|
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
sub countOTU { |
|
767
|
1
|
|
|
1
|
1
|
2
|
my $otu_ct = 0; |
|
768
|
1
|
100
|
|
|
|
2
|
foreach (@nodes) { $otu_ct++ if $_->is_Leaf() } |
|
|
28
|
|
|
|
|
129
|
|
|
769
|
1
|
|
|
|
|
11
|
say $otu_ct |
|
770
|
|
|
|
|
|
|
} |
|
771
|
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
=head2 reroot() |
|
773
|
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
Reroot tree to node in C<$opts{'reroot'}> by creating new branch. |
|
775
|
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
=cut |
|
777
|
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
sub reroot { |
|
779
|
|
|
|
|
|
|
# my $outgroup_id = $opts{'reroot'}; |
|
780
|
0
|
|
|
0
|
1
|
0
|
my ($tag, $out_id) = split(':', $opts{'reroot'}); |
|
781
|
0
|
|
|
|
|
0
|
my $outgroup; |
|
782
|
0
|
0
|
|
|
|
0
|
if ($tag eq 'otu') { # leaf id |
|
|
|
0
|
|
|
|
|
|
|
783
|
0
|
|
|
|
|
0
|
$outgroup = $tree->find_node($out_id) |
|
784
|
|
|
|
|
|
|
} elsif ($tag eq 'intid') { # internal id |
|
785
|
0
|
|
|
|
|
0
|
for my $nd (@nodes) { |
|
786
|
0
|
0
|
|
|
|
0
|
if ($nd->internal_id() == $out_id) { |
|
787
|
0
|
|
|
|
|
0
|
$outgroup = $nd; |
|
788
|
|
|
|
|
|
|
last |
|
789
|
0
|
|
|
|
|
0
|
} |
|
790
|
|
|
|
|
|
|
} |
|
791
|
|
|
|
|
|
|
} |
|
792
|
|
|
|
|
|
|
else { |
|
793
|
0
|
|
|
|
|
0
|
die("Need a tag: otu:, or intid:\n"); |
|
794
|
|
|
|
|
|
|
} |
|
795
|
|
|
|
|
|
|
# my $newroot = $outgroup->create_node_on_branch(-FRACTION => 0.5, -ANNOT => {id => 'newroot'}); |
|
796
|
|
|
|
|
|
|
# $tree->reroot($outgroup); |
|
797
|
0
|
0
|
|
|
|
0
|
die "outgroup not found: $out_id" unless $outgroup; |
|
798
|
0
|
|
|
|
|
0
|
$tree->reroot_at_midpoint($outgroup); |
|
799
|
0
|
|
|
|
|
0
|
$print_tree = 1; |
|
800
|
|
|
|
|
|
|
} |
|
801
|
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
sub clean_tree { |
|
803
|
0
|
|
|
0
|
0
|
0
|
foreach my $nd (@nodes) { |
|
804
|
0
|
0
|
|
|
|
0
|
$nd->branch_length(0) if $opts{'clean-br'}; |
|
805
|
0
|
0
|
|
|
|
0
|
if ($opts{'clean-boot'}) { |
|
806
|
0
|
|
|
|
|
0
|
$nd->bootstrap(0); |
|
807
|
0
|
0
|
|
|
|
0
|
$nd->id('') unless $nd->is_Leaf; |
|
808
|
|
|
|
|
|
|
} |
|
809
|
|
|
|
|
|
|
} |
|
810
|
0
|
|
|
|
|
0
|
$print_tree = 1; |
|
811
|
|
|
|
|
|
|
} |
|
812
|
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
sub delete_otus { |
|
814
|
0
|
|
|
0
|
0
|
0
|
my $ref_otus = &_get_otus(); |
|
815
|
0
|
|
|
|
|
0
|
my @otus_to_retain = &_remove_otus($ref_otus, $opts{'del-otus'}); |
|
816
|
|
|
|
|
|
|
# print Dumper(\@otus_to_retain); |
|
817
|
0
|
|
|
|
|
0
|
$opts{'subset'} = join ",", @otus_to_retain; |
|
818
|
0
|
|
|
|
|
0
|
&subset(); |
|
819
|
|
|
|
|
|
|
} |
|
820
|
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
sub _get_otus { |
|
822
|
0
|
|
|
0
|
|
0
|
my @list; |
|
823
|
0
|
0
|
|
|
|
0
|
foreach my $nd (@nodes) { push @list, $nd if $nd->is_Leaf } |
|
|
0
|
|
|
|
|
0
|
|
|
824
|
0
|
|
|
|
|
0
|
return \@list; |
|
825
|
|
|
|
|
|
|
} |
|
826
|
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
sub _remove_otus { |
|
828
|
0
|
|
|
0
|
|
0
|
my $ref = shift; |
|
829
|
0
|
|
|
|
|
0
|
my $str = shift; |
|
830
|
0
|
|
|
|
|
0
|
my @list; |
|
831
|
0
|
|
|
|
|
0
|
my @otus_to_remove = split /\s*,\s*/, $str; |
|
832
|
0
|
|
|
|
|
0
|
my %to_del; |
|
833
|
0
|
|
|
|
|
0
|
foreach (@otus_to_remove) { $to_del{$_} = 1 } |
|
|
0
|
|
|
|
|
0
|
|
|
834
|
|
|
|
|
|
|
|
|
835
|
0
|
|
|
|
|
0
|
foreach my $nd (@$ref) { |
|
836
|
0
|
0
|
|
|
|
0
|
push @list, $nd->id() unless $to_del{$nd->id()}; |
|
837
|
|
|
|
|
|
|
} |
|
838
|
0
|
|
|
|
|
0
|
return @list; |
|
839
|
|
|
|
|
|
|
} |
|
840
|
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
sub multi2bi { |
|
842
|
0
|
|
|
0
|
0
|
0
|
foreach my $nd (@nodes) { |
|
843
|
|
|
|
|
|
|
# next if $nd eq $rootnode; |
|
844
|
0
|
|
|
|
|
0
|
&_add_node($nd); |
|
845
|
|
|
|
|
|
|
} |
|
846
|
0
|
|
|
|
|
0
|
$print_tree = 1; |
|
847
|
|
|
|
|
|
|
} |
|
848
|
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
sub _add_node { |
|
850
|
0
|
|
|
0
|
|
0
|
my $node = shift; |
|
851
|
|
|
|
|
|
|
# warn "processing\t", $node->internal_id, "\n"; |
|
852
|
0
|
|
|
|
|
0
|
my @desc = $node->each_Descendent; |
|
853
|
0
|
0
|
|
|
|
0
|
return if scalar(@desc) <= 2; |
|
854
|
|
|
|
|
|
|
# warn "multifurcating node:\t", $node->internal_id, " ... add a new node\n"; |
|
855
|
0
|
|
|
|
|
0
|
shift @desc; # retain the first descent |
|
856
|
|
|
|
|
|
|
# my $new_node = $node->create_node_on_branch(-FRACTION => 0.5, -FORCE => 1, -ANNOT=>{ -id => "new_id" }); |
|
857
|
0
|
|
|
|
|
0
|
my $new_node = Bio::Tree::Node->new(-id => "new", -branch_length => 0); |
|
858
|
0
|
|
|
|
|
0
|
$node->add_Descendent($new_node); |
|
859
|
|
|
|
|
|
|
# warn "\ta new node created:\t", $new_node->id, "\n"; |
|
860
|
0
|
|
|
|
|
0
|
foreach (@desc) { |
|
861
|
0
|
|
|
|
|
0
|
$node->remove_Descendent($_); # remove from grand-parent |
|
862
|
|
|
|
|
|
|
# warn "\t\tremove descendant:\t", $_->internal_id, "\n"; |
|
863
|
0
|
|
|
|
|
0
|
$new_node->add_Descendent($_); # re-attarch to parent |
|
864
|
|
|
|
|
|
|
# warn "\t\tadd descendant to the new node:\t", $_->internal_id, "\n"; |
|
865
|
|
|
|
|
|
|
} |
|
866
|
0
|
|
|
|
|
0
|
&_add_node($new_node); |
|
867
|
|
|
|
|
|
|
} |
|
868
|
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
# Subset a tree |
|
870
|
|
|
|
|
|
|
sub subset { |
|
871
|
|
|
|
|
|
|
# Collect the subset of nodes from STDIN or from $_ |
|
872
|
2
|
|
|
2
|
0
|
7
|
my @keep_nodes; |
|
873
|
2
|
100
|
|
|
|
16
|
if ($opts{'subset'}) { @keep_nodes = _name2node($opts{'subset'}) } |
|
|
1
|
|
|
|
|
5
|
|
|
874
|
1
|
|
|
|
|
3
|
else { my $ar = $_[0]; @keep_nodes = @$ar } |
|
|
1
|
|
|
|
|
3
|
|
|
875
|
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
# Collect list of descendents |
|
877
|
2
|
|
|
|
|
6
|
my @descendents; |
|
878
|
2
|
|
|
|
|
8
|
for my $nd (@keep_nodes) { push @descendents, $_ for $nd->get_all_Descendents } |
|
|
4
|
|
|
|
|
76
|
|
|
879
|
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
# Collect list of ancestors |
|
881
|
2
|
|
|
|
|
47
|
my @ancestors; |
|
882
|
|
|
|
|
|
|
my $tmp; |
|
883
|
2
|
|
|
|
|
7
|
for (@keep_nodes) { |
|
884
|
4
|
|
|
|
|
45
|
$tmp = $_; |
|
885
|
4
|
|
|
|
|
20
|
while ($tmp->ancestor) { |
|
886
|
11
|
|
|
|
|
118
|
push @ancestors, $tmp->ancestor; |
|
887
|
11
|
|
|
|
|
111
|
$tmp = $tmp->ancestor |
|
888
|
|
|
|
|
|
|
} |
|
889
|
|
|
|
|
|
|
} |
|
890
|
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
# Make a hash of nodes to keep |
|
892
|
2
|
|
|
|
|
29
|
my %keep = map { $_->internal_id => $_ } @keep_nodes; |
|
|
4
|
|
|
|
|
54
|
|
|
893
|
2
|
|
|
|
|
32
|
$keep{$_->internal_id} = $_ for @descendents; |
|
894
|
2
|
|
|
|
|
9
|
$keep{$_->internal_id} = $_ for @ancestors; |
|
895
|
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
# Remove all nodes but those in %keep |
|
897
|
2
|
100
|
|
|
|
90
|
for (@nodes) { $tree->remove_Node($_) unless exists($keep{$_->internal_id}) } |
|
|
56
|
|
|
|
|
4443
|
|
|
898
|
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
# Clean up internal single-descendent nodes |
|
900
|
2
|
|
|
|
|
194
|
my @desc; |
|
901
|
|
|
|
|
|
|
my $nd_len; |
|
902
|
2
|
|
|
|
|
0
|
my $desc_len; |
|
903
|
2
|
|
|
|
|
10
|
for my $nd ($tree->get_nodes) { |
|
904
|
12
|
100
|
|
|
|
981
|
next if $nd == $rootnode; |
|
905
|
10
|
|
|
|
|
32
|
@desc = $nd->each_Descendent; |
|
906
|
10
|
100
|
|
|
|
234
|
next unless scalar(@desc) == 1; |
|
907
|
5
|
|
50
|
|
|
18
|
$nd_len = $nd->branch_length() || 0; |
|
908
|
5
|
|
50
|
|
|
81
|
$desc_len = $desc[0]->branch_length() || 0; |
|
909
|
5
|
|
|
|
|
75
|
$desc[0]->branch_length($nd_len + $desc_len); |
|
910
|
5
|
|
|
|
|
371
|
$nd->ancestor->add_Descendent($desc[0]); |
|
911
|
5
|
|
|
|
|
803
|
$tree->remove_Node($nd) |
|
912
|
|
|
|
|
|
|
} |
|
913
|
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
# Take care of the a single-descendent root node |
|
915
|
2
|
|
|
|
|
10
|
@desc = $rootnode->each_Descendent; |
|
916
|
2
|
100
|
|
|
|
74
|
if (scalar(@desc) == 1) { |
|
917
|
1
|
|
|
|
|
19
|
$rootnode->add_Descendent($_) for $desc[0]->each_Descendent; |
|
918
|
1
|
|
|
|
|
16
|
$tree->remove_Node($desc[0]) |
|
919
|
|
|
|
|
|
|
} |
|
920
|
2
|
|
|
|
|
98
|
$print_tree = 1 |
|
921
|
|
|
|
|
|
|
} |
|
922
|
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
# Print OTU names and lengths |
|
924
|
|
|
|
|
|
|
sub print_leaves_lengths { |
|
925
|
1
|
100
|
50
|
1
|
0
|
3
|
foreach (@nodes) { say $_->id(), "\t", $_->branch_length() || 0 if $_->is_Leaf() } |
|
|
28
|
|
|
|
|
236
|
|
|
926
|
|
|
|
|
|
|
} |
|
927
|
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
# Get LCA |
|
929
|
|
|
|
|
|
|
sub getlca { |
|
930
|
1
|
|
|
1
|
0
|
3
|
my @lca_nodes; |
|
931
|
1
|
50
|
|
|
|
6
|
if (_name2node($opts{'lca'})) { @lca_nodes = _name2node($opts{'lca'}) } |
|
|
1
|
|
|
|
|
24
|
|
|
932
|
0
|
|
|
|
|
0
|
else { my $ar = $_[0]; @lca_nodes = @$ar } |
|
|
0
|
|
|
|
|
0
|
|
|
933
|
1
|
|
|
|
|
3
|
my @nd_pair; |
|
934
|
|
|
|
|
|
|
my $lca; |
|
935
|
|
|
|
|
|
|
|
|
936
|
1
|
|
|
|
|
4
|
$nd_pair[0] = $lca_nodes[0]; |
|
937
|
1
|
50
|
|
|
|
4
|
if (@lca_nodes > 1) { |
|
|
|
0
|
|
|
|
|
|
|
938
|
1
|
|
|
|
|
6
|
for (my $index = 1; $index < @lca_nodes; $index++) { |
|
939
|
2
|
|
|
|
|
6
|
$nd_pair[1] = $lca_nodes[$index]; |
|
940
|
2
|
|
|
|
|
18
|
$lca = $tree->get_lca(-nodes => \@nd_pair); |
|
941
|
2
|
|
|
|
|
5128
|
$nd_pair[0] = $lca |
|
942
|
|
|
|
|
|
|
} |
|
943
|
1
|
50
|
|
|
|
7
|
if (_name2node($opts{'lca'})) { say $lca->internal_id } else { return $lca } |
|
|
1
|
|
|
|
|
5
|
|
|
|
0
|
|
|
|
|
0
|
|
|
944
|
|
|
|
|
|
|
} elsif (@lca_nodes == 1) { |
|
945
|
0
|
0
|
|
|
|
0
|
if (_name2node($opts{'lca'})) { say $lca_nodes[0]->ancestor->internal_id } |
|
|
0
|
|
|
|
|
0
|
|
|
946
|
0
|
|
|
|
|
0
|
else { return $lca_nodes[0]->ancestor->internal_id } |
|
947
|
|
|
|
|
|
|
} |
|
948
|
|
|
|
|
|
|
} |
|
949
|
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
# Label nodes with their internal ID's |
|
951
|
|
|
|
|
|
|
sub label_nodes { |
|
952
|
1
|
|
|
1
|
0
|
3
|
for (@nodes) { |
|
953
|
28
|
100
|
|
|
|
433
|
next if $_ == $rootnode; |
|
954
|
27
|
50
|
|
|
|
62
|
my $suffix = defined($_->id) ? "_" . $_->id : ""; |
|
955
|
27
|
|
|
|
|
339
|
$_->id($_->internal_id . $suffix) |
|
956
|
|
|
|
|
|
|
} |
|
957
|
1
|
|
|
|
|
16
|
$print_tree = 1 |
|
958
|
|
|
|
|
|
|
} |
|
959
|
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
# Print half-tree id distances between all pairs of nodes |
|
961
|
|
|
|
|
|
|
sub listdistance { |
|
962
|
1
|
|
|
1
|
0
|
3
|
my (@leaves, @sortedleaf_names, @leafnames); |
|
963
|
1
|
100
|
|
|
|
5
|
foreach (@nodes) { push(@leaves, $_) if $_->is_Leaf() } |
|
|
28
|
|
|
|
|
238
|
|
|
964
|
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
# Make an alphabetical list of OTU names |
|
966
|
1
|
|
|
|
|
16
|
push @sortedleaf_names, $_->id foreach sort {lc($a->id) cmp lc($b->id)} @leaves; |
|
|
41
|
|
|
|
|
464
|
|
|
967
|
|
|
|
|
|
|
|
|
968
|
1
|
|
|
|
|
123
|
@leaves = (); |
|
969
|
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
#Rebuld leaf array with new alphabetical order |
|
971
|
1
|
|
|
|
|
22
|
push @leaves, $tree->find_node(-id => $_) foreach @sortedleaf_names; |
|
972
|
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
# Prints a half-matrix of distance values |
|
974
|
1
|
|
|
|
|
19620
|
my $i = 1; |
|
975
|
1
|
|
|
|
|
6
|
for my $firstleaf (@leaves) { |
|
976
|
15
|
|
|
|
|
32
|
my @dnodes; |
|
977
|
15
|
|
|
|
|
58
|
for (my $x = $i; $x < scalar(@leaves); $x++) { |
|
978
|
105
|
|
|
|
|
226552
|
@dnodes = ($firstleaf, $leaves[$x]); |
|
979
|
105
|
|
|
|
|
386
|
say join "\t", ($firstleaf->id(), $leaves[$x]->id(), $tree->distance(-nodes => \@dnodes)) |
|
980
|
|
|
|
|
|
|
} |
|
981
|
15
|
|
|
|
|
38346
|
$i++ |
|
982
|
|
|
|
|
|
|
} |
|
983
|
|
|
|
|
|
|
} |
|
984
|
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
=head2 bin() |
|
986
|
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
Divides tree into number of specified segments and counts branches up |
|
988
|
|
|
|
|
|
|
to height the segment. Prints: bin_number, branch_count, bin_floor, |
|
989
|
|
|
|
|
|
|
bin_ceiling. |
|
990
|
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
=cut |
|
992
|
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
sub bin { |
|
994
|
1
|
|
|
1
|
1
|
5
|
my $treeheight = _treeheight(\$tree); |
|
995
|
1
|
|
|
|
|
8
|
my $bincount = $opts{'ltt'}; |
|
996
|
1
|
|
|
|
|
4
|
my $binsize = $treeheight/$bincount; |
|
997
|
1
|
|
|
|
|
2
|
my @bins; |
|
998
|
1
|
|
|
|
|
5
|
while ($treeheight > 0) { |
|
999
|
11
|
|
|
|
|
23
|
unshift @bins, $treeheight; |
|
1000
|
11
|
|
|
|
|
23
|
$treeheight -= $binsize |
|
1001
|
|
|
|
|
|
|
} |
|
1002
|
|
|
|
|
|
|
# Handle imperfect division. When approaching 0, if a tiny number is found, such as 2e-17, assign it as 0 and ignore negatives that may follow. |
|
1003
|
1
|
100
|
|
|
|
4
|
for (@bins) { shift @bins if $_ < 1e-10 } |
|
|
10
|
|
|
|
|
23
|
|
|
1004
|
1
|
|
|
|
|
3
|
unshift @bins, 0; |
|
1005
|
|
|
|
|
|
|
|
|
1006
|
1
|
|
|
|
|
5
|
for (my $i=0; $i+1<@bins; $i++) { |
|
1007
|
10
|
|
|
|
|
22
|
my $branchcount = 1; # branch from root |
|
1008
|
|
|
|
|
|
|
# Starting from the root, add a branch for each found descendent |
|
1009
|
10
|
|
|
|
|
42
|
$branchcount += _binrecursive(\$rootnode, $bins[$i+1]); |
|
1010
|
10
|
|
|
|
|
211
|
printf "%3d\t%3d\t%.4f\t%.4f\n", $i+1, $branchcount, $bins[$i], $bins[$i+1]; |
|
1011
|
|
|
|
|
|
|
} |
|
1012
|
|
|
|
|
|
|
} |
|
1013
|
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
sub print_all_lengths{ |
|
1015
|
1
|
|
|
1
|
0
|
4
|
for (@nodes) { |
|
1016
|
28
|
100
|
|
|
|
224
|
next if $_ == $rootnode; |
|
1017
|
27
|
|
|
|
|
66
|
my $p_node = $_->ancestor(); |
|
1018
|
27
|
|
|
|
|
192
|
my ($p_id, $c_id); |
|
1019
|
27
|
|
|
|
|
59
|
$p_id = $p_node->internal_id; |
|
1020
|
27
|
100
|
|
|
|
244
|
$c_id = $_->is_Leaf ? $_->id() : $_->internal_id; |
|
1021
|
27
|
|
|
|
|
404
|
say $p_id, "\t", $c_id, "\t", $_->branch_length; |
|
1022
|
|
|
|
|
|
|
} |
|
1023
|
|
|
|
|
|
|
} |
|
1024
|
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
sub random_tree{ |
|
1026
|
1
|
|
|
1
|
0
|
5
|
my @otus = _each_leaf($rootnode); |
|
1027
|
1
|
|
|
|
|
3
|
my @sample; |
|
1028
|
1
|
50
|
|
|
|
7
|
my $sample_size = $opts{"random"} == 0 ? int(scalar(@otus) / 2) : $opts{"random"}; |
|
1029
|
|
|
|
|
|
|
|
|
1030
|
1
|
50
|
|
|
|
4
|
die "Error: sample size ($sample_size) exceeds number of OTUs (", scalar(@otus), ")" if $sample_size > scalar(@otus); |
|
1031
|
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
# Use Reservoir Sampling to pick random otus. |
|
1033
|
1
|
|
|
|
|
5
|
my @sampled = (1 .. $sample_size); |
|
1034
|
1
|
|
|
|
|
5
|
for ($sample_size + 1 .. scalar(@otus)) { |
|
1035
|
14
|
100
|
|
|
|
87
|
$sampled[rand(@sampled)] = $_ if rand() < $sample_size/$_ |
|
1036
|
|
|
|
|
|
|
} |
|
1037
|
1
|
|
|
|
|
32
|
push @sample, $otus[--$_] for @sampled; |
|
1038
|
1
|
|
|
|
|
8
|
&subset(\@sample) |
|
1039
|
|
|
|
|
|
|
} |
|
1040
|
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
# Depth to the root for a node |
|
1042
|
|
|
|
|
|
|
sub depth_to_root { |
|
1043
|
1
|
|
|
1
|
0
|
6
|
say $_->depth for _name2node($opts{'depth'}) |
|
1044
|
|
|
|
|
|
|
} |
|
1045
|
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
# Remove Branch Lenghts |
|
1047
|
|
|
|
|
|
|
#sub remove_brlengths { |
|
1048
|
|
|
|
|
|
|
# foreach (@nodes) { $_->branch_length(0) if defined $_->branch_length } |
|
1049
|
|
|
|
|
|
|
# $print_tree = 1 |
|
1050
|
|
|
|
|
|
|
#} |
|
1051
|
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
sub alldesc { |
|
1053
|
1
|
|
|
1
|
0
|
3
|
my @inodes; |
|
1054
|
1
|
|
|
|
|
4
|
my $inode_id = $opts{'otus-desc'}; |
|
1055
|
|
|
|
|
|
|
|
|
1056
|
1
|
50
|
|
|
|
5
|
if ($inode_id eq 'all') { push (@inodes, $_) for _walk_up($rootnode) } |
|
|
0
|
|
|
|
|
0
|
|
|
1057
|
1
|
|
|
|
|
15
|
else { push @inodes, $tree->find_node(-internal_id => $inode_id) } |
|
1058
|
|
|
|
|
|
|
|
|
1059
|
1
|
|
|
|
|
1428
|
for my $nd (@inodes) { |
|
1060
|
1
|
|
|
|
|
4
|
print $nd->internal_id, " "; |
|
1061
|
1
|
50
|
|
|
|
35
|
if ($nd->is_Leaf) { print $nd->id } else { print $_->id, " " for _each_leaf($nd) } |
|
|
1
|
|
|
|
|
15
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1062
|
1
|
|
|
|
|
11
|
print "\n" |
|
1063
|
|
|
|
|
|
|
} |
|
1064
|
|
|
|
|
|
|
} |
|
1065
|
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
# Walks from starting OTU |
|
1067
|
|
|
|
|
|
|
sub walk { |
|
1068
|
1
|
|
|
1
|
0
|
25
|
my $startleaf = $tree->find_node($opts{'walk'}); |
|
1069
|
1
|
|
|
|
|
1422
|
my $curnode = $startleaf->ancestor; |
|
1070
|
1
|
|
|
|
|
9
|
my $last_curnode = $startleaf; |
|
1071
|
1
|
|
|
|
|
2
|
my @decs; |
|
1072
|
|
|
|
|
|
|
my %visited; |
|
1073
|
1
|
|
|
|
|
3
|
my $totlen = 0; |
|
1074
|
1
|
|
|
|
|
2
|
my @dpair; |
|
1075
|
1
|
|
|
|
|
2
|
my $vcount = 0; |
|
1076
|
|
|
|
|
|
|
|
|
1077
|
1
|
|
|
|
|
4
|
$visited{$startleaf} = 1; |
|
1078
|
|
|
|
|
|
|
|
|
1079
|
1
|
|
|
|
|
5
|
while ($curnode) { |
|
1080
|
4
|
|
|
|
|
32
|
$visited{$curnode} = 1; |
|
1081
|
4
|
|
|
|
|
12
|
@dpair = ($last_curnode, $curnode); |
|
1082
|
4
|
|
|
|
|
25
|
$totlen += $tree->distance(-nodes => \@dpair); |
|
1083
|
4
|
|
|
|
|
10816
|
&_desclen($curnode, \%visited, \$totlen, \$vcount); |
|
1084
|
4
|
|
|
|
|
10
|
$last_curnode = $curnode; |
|
1085
|
4
|
|
|
|
|
14
|
$curnode = $curnode->ancestor |
|
1086
|
|
|
|
|
|
|
} |
|
1087
|
|
|
|
|
|
|
} |
|
1088
|
|
|
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
sub walk_edge { |
|
1090
|
0
|
|
|
0
|
0
|
0
|
my $startleaf = $tree->find_node($opts{'walk-edge'}); |
|
1091
|
0
|
|
|
|
|
0
|
my $curnode = $startleaf->ancestor; |
|
1092
|
0
|
|
|
|
|
0
|
my $last_curnode = $startleaf; |
|
1093
|
0
|
|
|
|
|
0
|
my @decs; |
|
1094
|
|
|
|
|
|
|
my %visited; |
|
1095
|
0
|
|
|
|
|
0
|
my $totlen = 0; |
|
1096
|
0
|
|
|
|
|
0
|
my @dpair; |
|
1097
|
0
|
|
|
|
|
0
|
my $vcount = 0; |
|
1098
|
|
|
|
|
|
|
|
|
1099
|
0
|
|
|
|
|
0
|
$visited{$startleaf} = 1; |
|
1100
|
|
|
|
|
|
|
|
|
1101
|
0
|
|
|
|
|
0
|
while ($curnode) { |
|
1102
|
0
|
|
|
|
|
0
|
$visited{$curnode} = 1; |
|
1103
|
0
|
|
|
|
|
0
|
@dpair = ($last_curnode, $curnode); |
|
1104
|
0
|
|
|
|
|
0
|
my $pairLen = $tree->distance(-nodes => \@dpair); |
|
1105
|
0
|
|
0
|
|
|
0
|
say join "\t", ($curnode->id() || $curnode->internal_id(), $last_curnode->id() || $last_curnode->internal_id(), $pairLen); |
|
|
|
|
0
|
|
|
|
|
|
1106
|
0
|
|
|
|
|
0
|
$totlen += $pairLen; |
|
1107
|
0
|
|
|
|
|
0
|
&_desclen($curnode, \%visited, \$totlen, \$vcount); |
|
1108
|
0
|
|
|
|
|
0
|
$last_curnode = $curnode; |
|
1109
|
0
|
|
|
|
|
0
|
$curnode = $curnode->ancestor |
|
1110
|
|
|
|
|
|
|
} |
|
1111
|
|
|
|
|
|
|
} |
|
1112
|
|
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
# works for RAxML bipartition output and FastTree output with bootstrap values as node names |
|
1115
|
|
|
|
|
|
|
sub delete_low_boot_support { |
|
1116
|
0
|
|
0
|
0
|
0
|
0
|
my $cutoff = $opts{'del-low-boot'} || die 'spcify cutoff, e.g., 0.75 or 75\n'; # default 75 |
|
1117
|
0
|
|
|
|
|
0
|
&_remove_branch($rootnode, \$cutoff); |
|
1118
|
0
|
|
|
|
|
0
|
$print_tree = 1; |
|
1119
|
|
|
|
|
|
|
} |
|
1120
|
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
sub delete_short_branch { |
|
1122
|
0
|
|
0
|
0
|
0
|
0
|
my $cutoff = $opts{'del-short-br'} || die 'spcify cutoff, e.g., 0.02\n'; # default 0.02 |
|
1123
|
0
|
|
|
|
|
0
|
&_remove_short_branch($rootnode, $cutoff); |
|
1124
|
0
|
|
|
|
|
0
|
$print_tree = 1; |
|
1125
|
|
|
|
|
|
|
} |
|
1126
|
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
sub mid_point_root { |
|
1128
|
0
|
|
|
0
|
0
|
0
|
my $node1; |
|
1129
|
0
|
|
|
|
|
0
|
my $maxL=0; |
|
1130
|
0
|
|
|
|
|
0
|
foreach (@nodes) { |
|
1131
|
0
|
0
|
|
|
|
0
|
next unless $_->is_Leaf(); |
|
1132
|
0
|
|
|
|
|
0
|
my $dis = $_->depth(); |
|
1133
|
0
|
0
|
|
|
|
0
|
if ($dis > $maxL){ |
|
1134
|
0
|
|
|
|
|
0
|
$node1 = $_; |
|
1135
|
0
|
|
|
|
|
0
|
$maxL = $dis |
|
1136
|
|
|
|
|
|
|
} |
|
1137
|
|
|
|
|
|
|
} |
|
1138
|
|
|
|
|
|
|
|
|
1139
|
0
|
|
|
|
|
0
|
my $node2; |
|
1140
|
0
|
|
|
|
|
0
|
$maxL=0; |
|
1141
|
0
|
|
|
|
|
0
|
foreach (@nodes) { |
|
1142
|
0
|
0
|
0
|
|
|
0
|
next unless $_->is_Leaf() || $_ eq $node1; |
|
1143
|
0
|
|
|
|
|
0
|
my $dis = $tree->distance(-nodes=>[$node1, $_]); |
|
1144
|
0
|
0
|
|
|
|
0
|
if ($dis > $maxL){ |
|
1145
|
0
|
|
|
|
|
0
|
$node2 = $_; |
|
1146
|
0
|
|
|
|
|
0
|
$maxL = $dis |
|
1147
|
|
|
|
|
|
|
} |
|
1148
|
|
|
|
|
|
|
} |
|
1149
|
|
|
|
|
|
|
|
|
1150
|
0
|
|
|
|
|
0
|
my $nd = &_get_all_parents($node1,0,$maxL); |
|
1151
|
0
|
0
|
|
|
|
0
|
$nd = &_get_all_parents($node2,0,$maxL) unless $nd; |
|
1152
|
0
|
|
|
|
|
0
|
my ($node, $sumL) = @{$nd}; |
|
|
0
|
|
|
|
|
0
|
|
|
1153
|
|
|
|
|
|
|
|
|
1154
|
0
|
|
|
|
|
0
|
my $nodeL = $node->branch_length(); |
|
1155
|
0
|
|
|
|
|
0
|
my $pnode = $node->ancestor(); |
|
1156
|
0
|
|
|
|
|
0
|
my $nodeL_new = $nodeL - $sumL + $maxL/2; |
|
1157
|
|
|
|
|
|
|
|
|
1158
|
0
|
|
|
|
|
0
|
$tree->reroot_at_midpoint($node); |
|
1159
|
0
|
|
|
|
|
0
|
$pnode->branch_length($node->branch_length()*2-$nodeL_new); |
|
1160
|
0
|
|
|
|
|
0
|
$node->branch_length($nodeL_new); |
|
1161
|
|
|
|
|
|
|
|
|
1162
|
0
|
|
|
|
|
0
|
$print_tree = 1; |
|
1163
|
|
|
|
|
|
|
} |
|
1164
|
|
|
|
|
|
|
|
|
1165
|
|
|
|
|
|
|
sub _get_all_parents { |
|
1166
|
0
|
|
|
0
|
|
0
|
my $nd = shift; |
|
1167
|
0
|
|
|
|
|
0
|
my $sumL = shift; |
|
1168
|
0
|
|
|
|
|
0
|
my $mL = shift; |
|
1169
|
0
|
|
|
|
|
0
|
$sumL += $nd->branch_length(); |
|
1170
|
0
|
0
|
|
|
|
0
|
return [$nd, $sumL] if $sumL >= $mL/2; |
|
1171
|
0
|
0
|
|
|
|
0
|
return if $nd->ancestor() eq $rootnode; |
|
1172
|
0
|
|
|
|
|
0
|
&_get_all_parents($nd->ancestor(), $sumL, $mL); |
|
1173
|
|
|
|
|
|
|
} |
|
1174
|
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
=head2 write_out() |
|
1176
|
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
Performs the bulk of the actions actions set via |
|
1178
|
|
|
|
|
|
|
L|/initialize>. |
|
1179
|
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
Call this after calling C<#initialize(\%opts)>. |
|
1181
|
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
=cut |
|
1183
|
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
sub write_out { |
|
1185
|
15
|
|
|
15
|
1
|
61
|
my $opts = shift; |
|
1186
|
15
|
50
|
|
|
|
99
|
print_all_node_ids() if $opts->{'ids-all'}; |
|
1187
|
15
|
50
|
|
|
|
68
|
rename_tips() if $opts->{'rename-tips'}; |
|
1188
|
15
|
50
|
|
|
|
96
|
write_tab_tree() if $opts->{'as-text'}; |
|
1189
|
15
|
50
|
|
|
|
81
|
cut_tree() if $opts->{'cut-tree'}; |
|
1190
|
15
|
50
|
|
|
|
59
|
trim_tips() if $opts->{'trim-tips'}; |
|
1191
|
15
|
50
|
|
|
|
96
|
cut_sister() if $opts->{'cut-sis'}; |
|
1192
|
15
|
50
|
|
|
|
73
|
mid_point_root() if $opts->{'mid-point'}; |
|
1193
|
15
|
50
|
|
|
|
81
|
pars_binary() if $opts->{'ci'}; |
|
1194
|
15
|
100
|
|
|
|
75
|
getdistance() if $opts->{'dist'}; |
|
1195
|
15
|
50
|
|
|
|
1582
|
delete_low_boot_support() if $opts->{'del-low-boot'}; |
|
1196
|
15
|
50
|
|
|
|
333
|
delete_short_branch() if $opts->{'del-short-br'}; |
|
1197
|
15
|
100
|
|
|
|
347
|
say $tree->total_branch_length() if $opts->{'length'}; |
|
1198
|
15
|
100
|
|
|
|
1273
|
countOTU() if $opts->{'otus-num'}; |
|
1199
|
15
|
100
|
|
|
|
68
|
$print_tree = 1 if defined($opts->{'output'}); |
|
1200
|
15
|
50
|
|
|
|
60
|
reroot() if $opts->{'reroot'}; |
|
1201
|
15
|
50
|
|
|
|
63
|
reorder_by_ref() if $opts->{'ref'}; |
|
1202
|
15
|
100
|
|
|
|
164
|
subset() if $opts->{'subset'}; |
|
1203
|
15
|
100
|
|
|
|
69
|
print_leaves_lengths() if $opts->{'otus-all'}; |
|
1204
|
15
|
100
|
|
|
|
73
|
getlca() if $opts->{'lca'}; |
|
1205
|
15
|
100
|
|
|
|
95
|
label_nodes() if $opts->{'label-nodes'}; |
|
1206
|
15
|
50
|
|
|
|
98
|
label_selected_nodes() if $opts->{'label-selected-nodes'}; |
|
1207
|
15
|
100
|
|
|
|
67
|
listdistance() if $opts->{'dist-all'}; |
|
1208
|
15
|
100
|
|
|
|
65
|
bin() if $opts->{'ltt'}; |
|
1209
|
15
|
100
|
|
|
|
157
|
print_all_lengths() if $opts->{'length-all'}; |
|
1210
|
15
|
100
|
|
|
|
118
|
random_tree() if defined($opts->{'random'}); |
|
1211
|
15
|
100
|
|
|
|
92
|
depth_to_root() if $opts->{'depth'}; |
|
1212
|
15
|
50
|
|
|
|
292
|
tips_to_root() if $opts->{'tips-to-root'}; |
|
1213
|
15
|
50
|
|
|
|
79
|
rotate_an_in_node() if $opts->{'rotate-node'}; |
|
1214
|
|
|
|
|
|
|
# sort_child() if $opts->{'sort-child'}; |
|
1215
|
15
|
100
|
|
|
|
95
|
alldesc() if $opts->{'otus-desc'}; |
|
1216
|
15
|
100
|
|
|
|
85
|
walk() if $opts->{'walk'}; |
|
1217
|
15
|
50
|
|
|
|
97
|
walk_edge() if $opts->{'walk-edge'}; |
|
1218
|
15
|
50
|
|
|
|
166
|
multi2bi() if $opts->{'multi2bi'}; |
|
1219
|
15
|
50
|
33
|
|
|
168
|
clean_tree() if $opts->{'clean-br'} || $opts->{'clean-boot'}; |
|
1220
|
15
|
50
|
|
|
|
72
|
delete_otus() if $opts->{'del-otus'}; |
|
1221
|
15
|
50
|
|
|
|
69
|
sister_pairs() if $opts->{'sis-pairs'}; |
|
1222
|
15
|
50
|
|
|
|
78
|
swap_otus() if $opts->{'swap-otus'}; |
|
1223
|
15
|
50
|
|
|
|
61
|
edge_length_abundance() if $opts->{'ead'}; |
|
1224
|
15
|
50
|
|
|
|
123
|
print_tree_shape() if $opts->{'tree-shape'}; |
|
1225
|
15
|
100
|
|
|
|
1212
|
say $tree->as_text($out_format) if $print_tree; |
|
1226
|
|
|
|
|
|
|
} |
|
1227
|
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
################# internal subroutines ############## |
|
1229
|
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
sub _remove_branch { |
|
1231
|
0
|
|
|
0
|
|
0
|
my $nd = shift; |
|
1232
|
0
|
|
|
|
|
0
|
my $ref = shift; |
|
1233
|
0
|
|
|
|
|
0
|
my $bootcut = $$ref; |
|
1234
|
0
|
0
|
|
|
|
0
|
return if $nd->is_Leaf(); |
|
1235
|
0
|
|
|
|
|
0
|
my @desc = $nd->each_Descendent(); |
|
1236
|
0
|
|
|
|
|
0
|
my $pa = $nd->ancestor(); |
|
1237
|
0
|
|
|
|
|
0
|
foreach my $ch (@desc) { |
|
1238
|
0
|
0
|
|
|
|
0
|
if (!$nd->id()) { # no boostrap as node id (in-group branch) |
|
1239
|
0
|
|
|
|
|
0
|
&_remove_branch($ch, $ref); |
|
1240
|
0
|
|
|
|
|
0
|
next; |
|
1241
|
|
|
|
|
|
|
} |
|
1242
|
0
|
0
|
|
|
|
0
|
if ($nd->id() < $bootcut) { |
|
1243
|
0
|
|
|
|
|
0
|
$pa->remove_Descendent($nd); # remove the current node |
|
1244
|
0
|
|
|
|
|
0
|
$pa->add_Descendent($ch); # elevate the child node |
|
1245
|
0
|
|
|
|
|
0
|
$ch->branch_length($ch->branch_length() + $nd->branch_length()); # increment branch length |
|
1246
|
|
|
|
|
|
|
} |
|
1247
|
0
|
|
|
|
|
0
|
&_remove_branch($ch, $ref); |
|
1248
|
|
|
|
|
|
|
} |
|
1249
|
|
|
|
|
|
|
} |
|
1250
|
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
sub _remove_short_branch { |
|
1252
|
0
|
|
|
0
|
|
0
|
my $nd = shift; |
|
1253
|
0
|
|
|
|
|
0
|
my $brcut = shift; |
|
1254
|
0
|
0
|
|
|
|
0
|
return if $nd->is_Leaf(); |
|
1255
|
0
|
|
|
|
|
0
|
my @desc = $nd->each_Descendent(); |
|
1256
|
0
|
|
|
|
|
0
|
my $pa = $nd->ancestor(); |
|
1257
|
0
|
|
|
|
|
0
|
foreach my $ch (@desc) { |
|
1258
|
0
|
0
|
0
|
|
|
0
|
if ($nd->branch_length() && $nd->branch_length() <= $brcut) { |
|
1259
|
0
|
|
|
|
|
0
|
$pa->remove_Descendent($nd); # remove the current node |
|
1260
|
0
|
|
|
|
|
0
|
$pa->add_Descendent($ch); # elevate the child node |
|
1261
|
0
|
|
|
|
|
0
|
$ch->branch_length($ch->branch_length() + $nd->branch_length()); # increment branch length |
|
1262
|
|
|
|
|
|
|
} |
|
1263
|
0
|
|
|
|
|
0
|
&_remove_short_branch($ch, $brcut); |
|
1264
|
|
|
|
|
|
|
} |
|
1265
|
|
|
|
|
|
|
} |
|
1266
|
|
|
|
|
|
|
|
|
1267
|
|
|
|
|
|
|
sub _name2node { |
|
1268
|
6
|
|
|
6
|
|
35
|
my $str = shift; |
|
1269
|
6
|
|
|
|
|
62
|
my @node_names = split /\s*,\s*/, $str; |
|
1270
|
6
|
|
|
|
|
25
|
my $nd; |
|
1271
|
|
|
|
|
|
|
my @node_objects; |
|
1272
|
6
|
|
|
|
|
18
|
for my $node_name (@node_names) { |
|
1273
|
17
|
|
33
|
|
|
111
|
$nd = $tree->find_node(-id => $node_name) || $tree->find_node(-internal_id => $node_name); |
|
1274
|
17
|
50
|
|
|
|
21196
|
if ($nd) { push @node_objects, $nd } else { warn "Node/leaf '$node_name' not found. Ignoring...\n" } |
|
|
17
|
|
|
|
|
51
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1275
|
|
|
|
|
|
|
} |
|
1276
|
|
|
|
|
|
|
return @node_objects |
|
1277
|
6
|
|
|
|
|
47
|
} |
|
1278
|
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
# _each_leaf ($node): returns a list of all OTU's descended from this node, if any |
|
1280
|
|
|
|
|
|
|
sub _each_leaf { |
|
1281
|
1
|
|
|
1
|
|
3
|
my $nd = shift; |
|
1282
|
1
|
|
|
|
|
2
|
my @leaves; |
|
1283
|
1
|
50
|
|
|
|
5
|
return ($nd) if $nd->is_Leaf; |
|
1284
|
1
|
100
|
|
|
|
23
|
for ($nd->get_all_Descendents) { push (@leaves, $_) if $_->is_Leaf } |
|
|
27
|
|
|
|
|
1184
|
|
|
1285
|
|
|
|
|
|
|
return @leaves |
|
1286
|
1
|
|
|
|
|
13
|
} |
|
1287
|
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
# main routine to walk up from root |
|
1289
|
|
|
|
|
|
|
sub _wu { |
|
1290
|
0
|
|
|
0
|
|
0
|
my (@lf, @nd); |
|
1291
|
0
|
|
|
|
|
0
|
my $curnode = $_[0]; |
|
1292
|
0
|
|
|
|
|
0
|
my @decs = $_[0]->each_Descendent; |
|
1293
|
0
|
|
|
|
|
0
|
my $visitref = $_[1]; |
|
1294
|
0
|
|
|
|
|
0
|
my %visited = %$visitref; |
|
1295
|
0
|
|
|
|
|
0
|
my $node_list_ref = $_[2]; |
|
1296
|
|
|
|
|
|
|
# my $ref_ct_otu = $_[3]; |
|
1297
|
|
|
|
|
|
|
# my $ref_tatal_br_len = $_[4]; |
|
1298
|
|
|
|
|
|
|
|
|
1299
|
0
|
|
|
|
|
0
|
for (@decs) { |
|
1300
|
|
|
|
|
|
|
# $ref_total_br_len += $_->branch_length; |
|
1301
|
0
|
0
|
|
|
|
0
|
if ($_->is_Leaf) { |
|
1302
|
0
|
|
|
|
|
0
|
push @lf, $_; |
|
1303
|
|
|
|
|
|
|
# $$ref_ct_otu++; |
|
1304
|
|
|
|
|
|
|
} else { |
|
1305
|
0
|
|
|
|
|
0
|
push @nd, $_ |
|
1306
|
|
|
|
|
|
|
} |
|
1307
|
|
|
|
|
|
|
} |
|
1308
|
|
|
|
|
|
|
|
|
1309
|
0
|
0
|
|
|
|
0
|
for (@lf) { if (!exists($visited{$_})) { $visited{$_} = 1; push @$node_list_ref, $_ } } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1310
|
0
|
|
|
|
|
0
|
for (@nd) { |
|
1311
|
0
|
0
|
|
|
|
0
|
next if exists($visited{$_}); |
|
1312
|
0
|
|
|
|
|
0
|
$visited{$_} = 1; |
|
1313
|
0
|
|
|
|
|
0
|
push @$node_list_ref, $_; |
|
1314
|
0
|
|
|
|
|
0
|
&_wu($_, \%visited, $node_list_ref) |
|
1315
|
|
|
|
|
|
|
} |
|
1316
|
|
|
|
|
|
|
} |
|
1317
|
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
# Walk Up: "Walks" up from a given node and returned an order array representing the order that each node descended from the given node was visited. |
|
1319
|
|
|
|
|
|
|
sub _walk_up { |
|
1320
|
0
|
|
|
0
|
|
0
|
my %visited; |
|
1321
|
0
|
|
|
|
|
0
|
my @node_list = $_[0]; |
|
1322
|
0
|
|
|
|
|
0
|
&_wu($_[0], \%visited, \@node_list); |
|
1323
|
|
|
|
|
|
|
return @node_list |
|
1324
|
0
|
|
|
|
|
0
|
} |
|
1325
|
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
sub print_all_node_ids { |
|
1327
|
0
|
|
|
0
|
0
|
0
|
my %visited; |
|
1328
|
0
|
|
|
|
|
0
|
my @node_list = ($rootnode); |
|
1329
|
0
|
|
|
|
|
0
|
&_wu($rootnode, \%visited, \@node_list); |
|
1330
|
0
|
|
|
|
|
0
|
foreach (@node_list) { |
|
1331
|
0
|
|
0
|
|
|
0
|
print $_->id() || $_->internal_id(), "\n"; |
|
1332
|
|
|
|
|
|
|
} |
|
1333
|
|
|
|
|
|
|
} |
|
1334
|
|
|
|
|
|
|
|
|
1335
|
|
|
|
|
|
|
sub _treeheight { |
|
1336
|
1
|
|
|
1
|
|
3
|
my $height = 0; |
|
1337
|
1
|
|
|
|
|
3
|
my $tree = $_[0]; |
|
1338
|
1
|
100
|
|
|
|
5
|
for ($$tree->get_nodes) { $height = $_->depth if $_->depth > $height } |
|
|
28
|
|
|
|
|
2873
|
|
|
1339
|
1
|
|
|
|
|
113
|
return $height |
|
1340
|
|
|
|
|
|
|
} |
|
1341
|
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
sub _binrecursive { |
|
1343
|
|
|
|
|
|
|
|
|
1344
|
213
|
|
|
213
|
|
15007
|
my $branchcount = 0; |
|
1345
|
213
|
|
|
|
|
440
|
my $noderef = $_[0]; |
|
1346
|
213
|
|
|
|
|
346
|
my $upper = $_[1]; |
|
1347
|
213
|
|
|
|
|
608
|
my @desc = $$noderef->each_Descendent; |
|
1348
|
213
|
100
|
|
|
|
6327
|
$branchcount-- unless $$noderef->is_Leaf; |
|
1349
|
|
|
|
|
|
|
|
|
1350
|
213
|
|
|
|
|
2157
|
for (@desc) { |
|
1351
|
224
|
|
|
|
|
972
|
$branchcount++; |
|
1352
|
224
|
100
|
|
|
|
591
|
$branchcount += _binrecursive(\$_, $upper) if $_->depth <= $upper |
|
1353
|
|
|
|
|
|
|
} |
|
1354
|
213
|
|
|
|
|
1035
|
return $branchcount |
|
1355
|
|
|
|
|
|
|
} |
|
1356
|
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
# Starting at a node that has 2 descendents, print the distance from start to desc if it's a leaf or call itself passing the internal-node descendent |
|
1358
|
|
|
|
|
|
|
# Input: basenode, internal node |
|
1359
|
|
|
|
|
|
|
sub _desclen { |
|
1360
|
|
|
|
|
|
|
# startlear, curnode |
|
1361
|
13
|
|
|
13
|
|
29
|
my (@dpair, @lf, @nd); |
|
1362
|
13
|
|
|
|
|
23
|
my $curnode = $_[0]; |
|
1363
|
13
|
|
|
|
|
46
|
my @decs = $_[0]->each_Descendent; |
|
1364
|
13
|
|
|
|
|
618
|
my $visitref = $_[1]; |
|
1365
|
13
|
|
|
|
|
23
|
my $totlen = $_[2]; |
|
1366
|
13
|
|
|
|
|
21
|
my $vcountref = $_[3]; |
|
1367
|
13
|
|
|
|
|
131
|
my %visited = %$visitref; |
|
1368
|
13
|
|
|
|
|
35
|
my $dist; |
|
1369
|
|
|
|
|
|
|
|
|
1370
|
13
|
100
|
|
|
|
62
|
for (@decs) { if ($_->is_Leaf) { push @lf, $_ } else { push @nd, $_ } } |
|
|
27
|
|
|
|
|
74
|
|
|
|
15
|
|
|
|
|
161
|
|
|
|
12
|
|
|
|
|
122
|
|
|
1371
|
13
|
|
|
|
|
27
|
for (@lf) { |
|
1372
|
15
|
100
|
|
|
|
157
|
next if exists($visited{$_}); |
|
1373
|
14
|
|
|
|
|
49
|
$visited{$_} = 1; |
|
1374
|
14
|
|
|
|
|
29
|
$dpair[0] = $curnode; |
|
1375
|
14
|
|
|
|
|
23
|
$dpair[1] = $_; |
|
1376
|
14
|
|
|
|
|
62
|
$dist = $tree->distance(-nodes => \@dpair); |
|
1377
|
14
|
|
|
|
|
37741
|
$$totlen += $dist; |
|
1378
|
14
|
|
|
|
|
34
|
$$vcountref++; |
|
1379
|
14
|
50
|
0
|
|
|
57
|
say join "\t", ($curnode->id() || $curnode->internal_id(), $_->id() || $_->internal_id(), $dist) if $opts{'walk-edge'}; |
|
|
|
|
0
|
|
|
|
|
|
1380
|
14
|
50
|
|
|
|
65
|
say $_->id, "\t$$totlen\t$$vcountref" if $opts{'walk'} |
|
1381
|
|
|
|
|
|
|
} |
|
1382
|
|
|
|
|
|
|
|
|
1383
|
13
|
|
|
|
|
232
|
for (@nd) { |
|
1384
|
12
|
100
|
|
|
|
55
|
next if exists($visited{$_}); |
|
1385
|
9
|
|
|
|
|
71
|
$visited{$_} = 1; |
|
1386
|
9
|
|
|
|
|
19
|
$dpair[0] = $curnode; |
|
1387
|
9
|
|
|
|
|
19
|
$dpair[1] = $_; |
|
1388
|
9
|
|
|
|
|
40
|
$dist = $tree->distance(-nodes => \@dpair); |
|
1389
|
9
|
50
|
0
|
|
|
23611
|
say join "\t", ($curnode->id() || $curnode->internal_id(), $_->id() || $_->internal_id(), $dist) if $opts{'walk-edge'}; |
|
|
|
|
0
|
|
|
|
|
|
1390
|
9
|
|
|
|
|
22
|
$$totlen += $dist; |
|
1391
|
9
|
|
|
|
|
48
|
&_desclen($_, \%visited, $totlen, $vcountref) |
|
1392
|
|
|
|
|
|
|
} |
|
1393
|
|
|
|
|
|
|
} |
|
1394
|
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
1; |
|
1396
|
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
__END__ |