File Coverage

lib/Bio/BPWrapper/TreeManipulations.pm
Criterion Covered Total %
statement 284 877 32.3
branch 113 354 31.9
condition 9 89 10.1
subroutine 33 76 43.4
pod 5 43 11.6
total 444 1439 30.8


line stmt bran cond sub pod time code
1             =encoding utf8
2              
3             =head1 NAME
4              
5             Bio::BPWrapper::TreeManipulations - Functions for biotree
6              
7             =head1 SYNOPSIS
8              
9             use Bio::BPWrapper::TreeManipulations;
10             # Set options hash ...
11             initialize(\%opts);
12             write_out(\%opts);
13              
14             =cut
15              
16             # Package global variables
17             my ($in, $out, $aln, %opts, $file, $in_format, $out_format, @nodes,
18             $tree, $print_tree, $rootnode, @otus);
19              
20             ###################### subroutine ######################
21              
22             package Bio::BPWrapper::TreeManipulations;
23              
24 15     15   105 use strict;
  15         26  
  15         560  
25 15     15   75 use warnings;
  15         27  
  15         678  
26 15     15   164 use v5.10;
  15         51  
27 15     15   75 use Bio::BPWrapper;
  15         29  
  15         424  
28 15     15   8614 use Bio::TreeIO;
  15         1079673  
  15         583  
29 15     15   114 use Bio::Tree::Tree;
  15         28  
  15         281  
30 15     15   56 use Bio::Tree::Node;
  15         23  
  15         233  
31 15     15   51 use Bio::Tree::TreeFunctionsI;
  15         19  
  15         282  
32 15     15   8694 use Data::Dumper;
  15         117082  
  15         1166  
33 15     15   7775 use POSIX;
  15         103132  
  15         87  
34 15     15   41576 use List::Util qw(shuffle);
  15         26  
  15         1922  
35 15     15   88 if ($ENV{'DEBUG'}) { use Data::Dumper }
  15         28  
  15         743  
36              
37 15     15   65 use vars qw(@ISA @EXPORT @EXPORT_OK);
  15         27  
  15         152704  
38              
39             @ISA = qw(Exporter);
40              
41             @EXPORT = qw(print_tree_shape edge_length_abundance swap_otus getdistance
42             sister_pairs countOTU reroot clean_tree delete_otus initialize
43             write_out bin walk_edge cut_sister);
44              
45             =head1 SUBROUTINES
46              
47             =head2 initialize()
48              
49             Sets up most of the actions to be performed on an alignment.
50              
51             Call this right after setting up an options hash.
52              
53             Sets package variables: C<$in_format>, C<@nodes>, C<$tree>, C<$out_format>, and C<$out>.
54              
55              
56             =cut
57              
58             sub initialize {
59 15     15 1 43 my $opts_ref = shift;
60 15         93 Bio::BPWrapper::common_opts($opts_ref);
61 15         28 %opts = %{$opts_ref};
  15         65  
62 15   50     122 $in_format = $opts{"input"} // 'newick'; # This doesn't work...or does it?
63 15   100     86 $out_format = $opts{"output"} // "newick";
64 15         34 $print_tree = 0; # Trigger printing the tree.
65 15   50     56 my $file = shift || "STDIN";
66 15 50       61 if ($in_format eq 'edge') {
67 0         0 $tree = &edge2tree($file);
68             } else {
69 15 50       252 $in = Bio::TreeIO->new(-format => $in_format, ($file eq "STDIN") ? (-fh => \*STDIN) : (-file => $file));
70 15         95786 $tree = $in->next_tree(); # get the first tree (and ignore the rest)
71             }
72 15         444935 $out = Bio::TreeIO->new(-format => $out_format);
73 15         14310 @nodes = $tree->get_nodes;
74 15         13140 $rootnode = $tree->get_root_node;
75 15 100       114 foreach (@nodes) { push @otus, $_ if $_->is_Leaf }
  420         3659  
76             }
77              
78             # label low-desc sisters as "cut-nodes"
79             sub cut_sister {
80 0   0 0 0 0 my $cutoff = $opts{'cut-sis'} || die "$0 --cut-sis \n";
81 0         0 foreach my $nd (@nodes) {
82 0         0 my $decCts = 0;
83 0   0     0 my $id = $nd->id() || $nd->internal_id();
84 0 0       0 if ($nd->is_Leaf) {
85 0         0 $nd->id("cut_". $id); # label to cut
86 0         0 next;
87             }
88            
89 0         0 foreach ($nd->get_all_Descendents) {
90 0         0 $decCts++;
91             }
92 0 0       0 next if $decCts >= $cutoff; # don't cut
93 0         0 $nd->id("cut_". $id); # label to cut
94             }
95 0         0 $print_tree = 1;
96             }
97              
98             sub edge2tree {
99 0     0 0 0 my $edgeFile = shift;
100 0   0     0 open EG, "<", $edgeFile || die "can't read parent-child edge file\n";
101 0         0 $rootnode = Bio::Tree::Node->new(-id=>'root');
102 0         0 my $tr = Bio::Tree::Tree->new();
103 0         0 my @nds = ($rootnode);
104 0         0 my %parent;
105             my %seen_edge;
106 0         0 while() {
107 0 0       0 next unless /^(\S+)\s+(\S+)/;
108 0         0 my ($pa, $ch) = ($1, $2);
109 0         0 $seen_edge{$pa}{$ch}++; # number of events
110 0 0       0 $parent{$ch} = $pa unless $parent{$ch}; # seen before
111             }
112 0         0 close EG;
113              
114             # print Dumper(\%parent);
115              
116 0         0 my %seen_parent;
117             my %add_node;
118             # my @nds;
119 0         0 foreach my $ch (keys %parent) {
120 0         0 my $pa = $parent{$ch};
121 0         0 $seen_parent{$pa}++;
122 0         0 push @nds, Bio::Tree::Node->new(-id=>$ch, -branch_length => $seen_edge{$pa}{$ch});
123 0         0 $add_node{$ch}++;
124             }
125              
126             # special treatment to set outgroup (which has no parent specified in the edge table) as root
127             # add all as chid of root
128 0         0 foreach my $pa (keys %seen_parent) {
129 0 0       0 next if $add_node{$pa};
130 0         0 my $orphan = Bio::Tree::Node->new(-id=>$pa, -branch_length => 1); # ST213 in test file "edges-pars.tsv"
131 0         0 push @nds, $orphan;
132 0         0 $parent{$pa} = 'root';
133             }
134              
135             # attach descendant
136             # my %attached;
137 0         0 foreach my $node (@nds) {
138 0 0       0 next if $node eq $rootnode;
139 0         0 my $id = $node->id(); # print $id, "\t";
140 0         0 my $p_id = $parent{$id}; # print $p_id, "\n";
141             # next if $attached{$p_id}++;
142 0         0 my @nds = grep { $_->id() eq $p_id } @nds;
  0         0  
143 0 0       0 if (@nds) {
144 0         0 my $p_node = shift @nds;
145 0         0 $p_node->add_Descendent($node);
146             } else {
147 0         0 die "no parent $id\n";
148             }
149             }
150 0         0 $tr->set_root_node($rootnode);
151 0         0 return $tr;
152             }
153              
154              
155             sub reorder_by_ref {
156 0 0   0 0 0 die "reference node id missing\n" unless $opts{'ref'};
157 0         0 my $id = 0;
158 0         0 &_flip_if_not_in_top_clade($rootnode, $opts{'ref'}, \$id);
159 0         0 $print_tree = 1;
160             }
161              
162             sub _flip_if_not_in_top_clade { # by resetting creation_id & sortby option of each_Descendent
163 0     0   0 my ($nd, $ref, $refid) = @_;
164 0         0 $nd->_creation_id($$refid++);
165             # print STDERR $nd->internal_id(), ":\t";
166 0 0       0 if ($nd->is_Leaf()) {
167             # print STDERR "\n";
168 0         0 return }
169 0         0 my @des = $nd->each_Descendent();
170 0         0 my @des_reordered;
171 0         0 for (my $i=0; $i<=$#des; $i++) {
172 0         0 my $in_des = 0;
173 0         0 my $id = $des[$i]->internal_id();
174 0         0 my @otus = &_each_leaf($des[$i]);
175 0 0       0 foreach (@otus) { $in_des = 1 if $ref eq $_->id }
  0         0  
176             # print STDERR join("|", map { $_->id } @otus), " => ", $in_des, ";";
177 0 0       0 if ($in_des) {
178 0         0 unshift @des_reordered, $des[$i];
179             } else {
180 0         0 push @des_reordered, $des[$i];
181             }
182             }
183 0         0 foreach (@des_reordered) {
184 0         0 $_->_creation_id($$refid++);
185             # print STDERR $_->internal_id(), ";";
186             }
187             # print STDERR "\n";
188 0         0 my @des_new = $nd->each_Descendent('creation'); # key sort function!!
189 0         0 foreach my $de (@des_new) {
190 0         0 &_flip_if_not_in_top_clade($de, $opts{'ref'}, $refid);
191             }
192             }
193              
194              
195             # trim a node to a single OTU representative if all branch lengths of its descendant OTUs <= $cut
196             sub trim_tips {
197 0 0   0 0 0 die "Usage: $0 --trim-tips \n" unless $opts{'trim-tips'};
198 0         0 my $cut = $opts{'trim-tips'};
199              
200 0         0 my @trim_nodes;
201 0         0 my $group_ct = 0;
202 0         0 &identify_nodes_to_trim_by_walk_from_root($rootnode, \$cut, \@trim_nodes, \$group_ct);
203 0         0 my %otu_sets;
204 0         0 my $set_ct = 1;
205 0         0 foreach my $ref_trim (@trim_nodes) {
206 0         0 foreach (@$ref_trim) {
207 0         0 $otu_sets{$_} = $set_ct;
208             }
209 0         0 $set_ct++;
210             # print STDERR join "\t", sort @$ref_trim;
211             # print STDERR "\n";
212             }
213              
214              
215 0         0 foreach (@otus) {
216 0 0       0 next if $otu_sets{$_->id};
217 0         0 $otu_sets{$_->id} = $set_ct++;
218             }
219             # print Dumper(\%otu_sets);
220 0         0 print STDERR "#Trim tree from tip with a cutoff of d=$cut\n";
221 0         0 print STDERR "otu\tnr_set_id\n";
222 0         0 foreach (sort {$otu_sets{$a} <=> $otu_sets{$b}} keys %otu_sets) {
  0         0  
223 0         0 print STDERR $_, "\t", $otu_sets{$_}, "\n";
224             }
225              
226 0         0 $print_tree = 1;
227             }
228              
229             sub identify_nodes_to_trim_by_walk_from_root {
230 0     0 0 0 my $node = shift; # internal node only
231 0         0 my $ref_cut = shift;
232 0         0 my $ref_group = shift;
233 0         0 my $ref_ct = shift;
234              
235 0 0       0 return if $node->is_Leaf;
236 0         0 my %des_otus; # save distances
237 0         0 my $trim = 1; # default to trim
238             # trim a node if all OTU distance to it is <= cut
239 0         0 foreach my $des ($node -> get_all_Descendents()) {
240 0 0       0 next unless $des->is_Leaf;
241             #push @des_otus, $des;
242             # distance to a desc OTU
243 0         0 my $dist = $tree->distance($node, $des);
244 0         0 $des_otus{$des->id} = { 'otu' => $des, 'dist' => $dist };
245 0 0       0 $trim = 0 if $dist > $$ref_cut; # don't trim is any distance to OTU > $cut
246             }
247              
248 0 0       0 if ($trim) { # trim & retain the first OTU; stop descending
249 0         0 my @leafs = sort keys %des_otus; # make a copy
250 0         0 my $pa = $node->ancestor();
251 0         0 $pa -> remove_Descendent($node); # clear this node as a des of parent
252 0         0 my $retain = shift @leafs;
253 0         0 my $retain_node = $des_otus{$retain}->{'otu'};
254 0         0 my $d = $node->branch_length() + $des_otus{$retain}->{'dist'};
255 0         0 $retain_node->branch_length($d); # keep distance to tip
256 0         0 $pa->add_Descendent($retain_node); # add retained OTU to parent
257            
258             # collect all OTUs for a trimmed inode
259 0         0 push @$ref_group, [keys %des_otus];
260 0         0 $$ref_ct++;
261 0         0 return;
262             } else { # don't trim
263 0         0 foreach my $des ($node->each_Descendent()) {
264 0         0 &identify_nodes_to_trim_by_walk_from_root($des, $ref_cut, $ref_group, $ref_ct);
265             }
266             }
267             }
268              
269             sub cut_tree {
270 0     0 0 0 my @otu_hts;
271 0         0 $rootnode->{height} = 0;
272 0         0 foreach my $nd (@nodes) {
273 0         0 my $ht = &distance_to_root($nd);
274 0         0 $nd->{height} = $ht;
275 0 0       0 push @otu_hts, $ht if $nd->is_Leaf;
276             }
277              
278 0         0 @otu_hts = sort {$a <=> $b} @otu_hts;
  0         0  
279 0         0 my $least_otu_height = shift @otu_hts;
280 0 0       0 die "Usage: $0 --cut-tree \n" unless $opts{'cut-tree'};
281 0   0     0 my $cut = $opts{'cut-tree'} || 0.5 * $least_otu_height; # default to cut the branches traversing the line that is 1/2 of least deep OTU
282 0 0       0 die "Cut tree at $cut, greater than least-deep OTU ($least_otu_height). Lower cut value.\n" if $cut >= $least_otu_height;
283              
284 0         0 my @cut_nodes;
285 0         0 my $group_ct = 0;
286 0         0 &identify_nodes_to_cut_by_walk_from_root($rootnode, \$cut, \@cut_nodes, \$group_ct);
287 0         0 foreach my $cutnode (@cut_nodes) {
288 0 0       0 print $cutnode->is_Leaf() ? "cut_otu" : $cutnode->id(), ":\t";
289 0         0 print join "\t", map {$_->id()} &_each_leaf($cutnode);
  0         0  
290 0         0 print "\n";
291             }
292 0         0 $print_tree = 1;
293             }
294              
295              
296             sub identify_nodes_to_cut_by_walk_from_root {
297 0     0 0 0 my $node = shift;
298 0         0 my $ref_cut = shift;
299 0         0 my $ref_group = shift;
300 0         0 my $ref_ct = shift;
301 0 0       0 return if $node->{height} > $$ref_cut; # node too high
302 0         0 foreach my $des ($node->each_Descendent() ) {
303 0 0       0 if ($des->{height} > $$ref_cut) { # found a branch to cut: parent height < $cut & child height > $cut
304 0 0       0 $des->id("cut_" . $$ref_ct++) unless $des->is_Leaf;
305 0         0 push @$ref_group, $des
306             }
307 0         0 &identify_nodes_to_cut_by_walk_from_root($des, $ref_cut, $ref_group, $ref_ct); # node too low
308             }
309             }
310              
311             sub label_selected_nodes {
312 0     0 0 0 my $label_file = $opts{'label-selected-nodes'}; # each line consists of internal_id label
313 0         0 my %labs;
314 0   0     0 open LAB, "<", $label_file || die "label file not found\n";
315 0         0 while() {
316 0         0 chomp;
317 0         0 my ($a, $b) = split;
318 0         0 $labs{$a} = $b;
319             }
320 0         0 close LAB;
321 0         0 foreach my $nd (@nodes) {
322 0 0       0 next if $nd->is_Leaf;
323 0 0       0 if ($labs{$nd->internal_id}) {
324 0         0 $nd->id($labs{$nd->internal_id});
325 0         0 warn $nd->internal_id, "\t", "changed to ", $labs{$nd->internal_id}, "\n";
326             } else {
327 0         0 $nd->id('');
328             }
329             }
330 0         0 $print_tree = 1
331             }
332              
333             sub rename_tips {
334 0     0 0 0 my $label_file = $opts{'rename-tips'}; # each line consists of internal_id label
335 0         0 my %labs;
336 0   0     0 open LAB, "<", $label_file || die "label file not found\n";
337 0         0 while() {
338 0         0 chomp;
339 0         0 my ($a, $b) = split;
340 0         0 $labs{$a} = $b;
341             }
342 0         0 close LAB;
343 0         0 foreach my $nd (@nodes) {
344 0 0       0 next unless $nd->is_Leaf;
345 0         0 my $old = $nd->id();
346 0 0       0 if ($labs{$nd->id}) {
347 0         0 $nd->id($labs{$nd->id});
348 0         0 warn "Success: old tip $old changed to new name\t", $nd->id(), "\n";
349             } else {
350 0         0 warn "Failed: old tip $old not changed\n";
351             }
352             }
353 0         0 $print_tree = 1
354             }
355              
356              
357             sub write_tab_tree{
358             # print "*" x 200, "\n";
359 0     0 0 0 my $y_space = 1; # number of lines between two neighboring nodes
360 0         0 my $root_pos = 1;
361 0         0 my $maxL = 100;
362 0         0 my $longest_leaf_length=0;
363 0         0 my $largest_int_length =0;
364 0         0 &assign_leaf_ycoord($rootnode, \$root_pos, \$y_space);
365 0         0 &assign_inode_ycoord($rootnode);
366 0         0 my @nd_sorted = sort {$b->{ycoord} <=> $a->{ycoord}} @nodes;
  0         0  
367 0         0 foreach my $nd (@nd_sorted) {
368 0         0 my $branch_length = $nd->branch_length;
369 0         0 $nd->{xcoord} = &distance_to_root($nd);
370 0 0       0 next unless $nd->is_Leaf();
371 0 0       0 $longest_leaf_length = $nd->{xcoord} if $nd->{xcoord} >= $longest_leaf_length;
372             }
373 0         0 my $scale = int($maxL/$longest_leaf_length);
374              
375 0         0 foreach my $nd (@nd_sorted) {
376 0         0 $nd->{xcoord_scaled} = ceil($scale * $nd->{xcoord});
377             }
378              
379             # draw initial lines
380 0         0 my %lines;
381 0         0 foreach my $nd (@nd_sorted) {
382 0   0     0 my $dashes = ceil(($nd->branch_length || 0) * $scale);
383 0         0 my $spaces = $nd->{xcoord_scaled} - $dashes + 1;
384 0 0       0 my $tag = $nd->is_Leaf ? $nd->id() : $nd->internal_id();
385 0 0       0 my $line = " " x $spaces . "+" . "-" x ($dashes ? ($dashes-1):0) . $tag;
386 0         0 $lines{$nd->internal_id()} = $line;
387             }
388              
389             # update by adding vertical lines
390             # algorithm: replace with pipe at parent xcoord for every node in-between a node and its parent
391 0         0 for (my $i=0; $i<=$#nd_sorted; $i++) {
392 0         0 my $current_node = $nd_sorted[$i];
393 0         0 my $current_ycoord = $current_node->{ycoord};
394 0 0       0 next if $current_node eq $rootnode;
395 0         0 my $parent_node = $current_node->ancestor();
396 0         0 my $parent_ycoord = $parent_node->{ycoord};
397 0         0 my $parent_xcoord = $parent_node->{xcoord_scaled};
398 0         0 foreach my $nid (keys %lines) { # capture in-between nodes to add pipes
399 0         0 my $node = $tree->find_node(internal_id=>$nid);
400 0 0 0     0 next if $node eq $current_node || $node eq $parent_node;
401 0 0 0     0 next if ($current_ycoord < $parent_ycoord) && (($node->{ycoord} < $current_ycoord) || ($node->{ycoord} > $parent_ycoord));
      0        
402 0 0 0     0 next if ($current_ycoord > $parent_ycoord) && (($node->{ycoord} > $current_ycoord) || ($node->{ycoord} < $parent_ycoord));
      0        
403 0         0 my $line = $lines{$nid};
404 0         0 $line .= ' ' while length($line) <= $parent_xcoord+1; # pad spaces when line not long enough to reach parent node xcoord
405 0         0 my @chars = split //, $line;
406 0 0       0 $chars[$parent_xcoord+1] ='|' unless $chars[$parent_xcoord+1] eq '+';
407 0         0 $lines{$nid} = join '', @chars;
408             }
409             }
410              
411 0         0 foreach (@nd_sorted) {
412 0         0 print $lines{$_->internal_id}, "\n";
413             }
414             }
415              
416             sub assign_leaf_ycoord {
417 0     0 0 0 my ($node, $ypos, $yspacing) = @_;
418 0 0       0 return if $node->is_Leaf();
419 0         0 for my $child ($node->each_Descendent()) {
420 0 0       0 if ($child->is_Leaf()) {
421 0         0 $child->{'ycoord'} = $$ypos; # de-reference
422 0         0 $$ypos += $$yspacing;
423             } else {
424 0         0 &assign_leaf_ycoord($child, $ypos, $yspacing);
425             }
426             }
427             }
428              
429             sub assign_inode_ycoord {
430 0     0 0 0 my $node = shift;
431 0         0 my @tmp;
432 0         0 for my $child ($node->each_Descendent()) {
433 0 0       0 &assign_inode_ycoord($child) unless defined $child->{'ycoord'};
434 0         0 push @tmp, $child->{'ycoord'}
435             }
436 0         0 my @sorted = sort { $a <=> $b } @tmp;
  0         0  
437 0         0 $node->{'ycoord'} = $sorted[0] + 1 / 2 * ($sorted[-1] - $sorted[0])
438             }
439              
440             sub tips_to_root {
441 0     0 0 0 foreach (@otus) {
442 0         0 printf "%s\t%.6f\n", $_->id(), distance_to_root($_);
443             }
444             }
445              
446             sub distance_to_root {
447 0     0 0 0 my $node = shift;
448 0   0     0 my $dist = $node->branch_length() || 0;
449 0         0 my $parent = $node->ancestor();
450 0 0       0 $dist += &distance_to_root($parent) if defined $parent;
451 0         0 return $dist;
452             }
453              
454             sub pars_binary {
455 0     0 0 0 my $trait_table_file = $opts{"ci"};
456 0   0     0 open BIN, "<", $trait_table_file || die "no trait file: $trait_table_file\n";
457 0         0 my ($sites, @colnames, @rownames);
458 0         0 my $first_line = 1;
459 0         0 while() {
460 0         0 chomp;
461 0         0 my @data = split /\t/, $_;
462 0 0       0 if ($first_line) {
463 0         0 shift @data;
464 0         0 @colnames = @data;
465 0         0 $first_line = 0;
466 0         0 next;
467             }
468              
469 0         0 my $otu = shift @data;
470 0         0 push @rownames, $otu;
471 0 0       0 die "check colnames\n" unless @colnames == @data;
472 0         0 for(my $i=0; $i<=$#colnames; $i++) {
473 0 0       0 $sites->{$otu}->{$colnames[$i]} = $data[$i] ? 1:0; # force binary
474             }
475             }
476 0         0 close BIN;
477              
478 0         0 foreach my $otu (@otus) {
479             # die "otu id not found in rownames\n" unless &_check_id($node->id, \@rownames);
480 0         0 foreach my $trait_name (@colnames) {
481 0         0 $otu->add_tag_value($trait_name, [$sites->{$otu->id}->{$trait_name}]);
482             }
483             }
484             # print Dumper(\@otus); exit;
485              
486             # Fitch algorithm: post-order traversal
487 0         0 my @informative;
488 0         0 for (my $i=0; $i<=$#colnames; $i++) {
489 0 0       0 next unless &_is_informative($colnames[$i], $i);
490 0         0 push @informative, $colnames[$i];
491 0         0 $rootnode->add_tag_value($colnames[$i], &_fitch_parsimony($rootnode, $i, \@colnames));
492 0 0       0 &_penny_parsimony($rootnode, $i, \@colnames) if @{$rootnode->get_tag_values($colnames[$i])} == 2; # only when root state unresolved
  0         0  
493 0         0 my $ci = &_consistency_index($i, \@colnames);
494 0         0 print join "\t", ($i+1, $colnames[$i], $ci);
495 0         0 print "\n";
496             }
497             }
498              
499             sub _fitch_parsimony {
500 0     0   0 my ($node,$index, $refcol)=@_; #warn $node->internal_id, "\t", $node->id() || "inode", "\n";
501 0         0 my $ref_node_state;
502 0         0 my @colnames = @{$refcol};
  0         0  
503 0 0       0 if ($node->is_Leaf) {
504             # warn $node->id, "\t", Dumper($node->get_tag_values($colnames[$index])), "\n";
505 0         0 return $node->get_tag_values($colnames[$index]);
506             } else {
507 0         0 my @child = $node->each_Descendent;
508 0         0 my ($ref0, $ref1);
509 0 0       0 if ($child[0]->is_Leaf) { # child 0 is an OTU
510 0         0 $ref0 = $child[0]->get_tag_values($colnames[$index]);
511 0 0       0 if ($child[1]->is_Leaf) { # both child 0 & 1 are an OTU
512 0         0 $ref1 = $child[1]->get_tag_values($colnames[$index]);
513             # warn "got sis otu for inode ", $node->internal_id(), "\n";
514 0         0 $ref_node_state = &_intersect_or_union($ref0, $ref1);
515             # warn Dumper($node->get_tag_values($colnames[$index]));
516             } else { # child 0 is an OTU, child 1 is an inode
517 0         0 $ref_node_state = &_intersect_or_union($ref0, &_fitch_parsimony($child[1], $index, \@colnames));
518             }
519             } else { # child 0 is an inode
520 0 0       0 if ($child[1]->is_Leaf) { # child 1 is an inode child 1 is an OTU
521 0         0 $ref1 = $child[1]->get_tag_values($colnames[$index]);
522             # warn "got sis otu for inode ", $node->internal_id(), "\n";
523 0         0 $ref_node_state = &_intersect_or_union(&_fitch_parsimony($child[0], $index, \@colnames), $ref1);
524             } else { # both inodes
525 0         0 $ref_node_state = &_intersect_or_union(&_fitch_parsimony($child[0], $index, \@colnames), &_fitch_parsimony($child[1], $index, \@colnames));
526             }
527             }
528 0         0 $node->add_tag_value($colnames[$index], $ref_node_state);
529 0         0 return $ref_node_state;
530             }
531             }
532              
533             sub _intersect_or_union { # from Perl Cookbook
534 0     0   0 my ($ref1, $ref2) = @_;
535 0         0 my (%union, %isect);
536 0         0 foreach my $e (@$ref1, @$ref2) {
537 0 0       0 $union{$e}++ && $isect{$e}++; # Perl Cookbook ideom
538             }
539             # warn Dumper(\%union, \%isect);
540              
541 0 0       0 if (@$ref1 == @$ref2) { # (0) U (0); (0) U (1); (1) U (1); (0,1) U (0,1)
542 0         0 return [keys %union];
543             } else { # (0) I (0,1); (1) I (0,1)
544 0         0 return [keys %isect];
545             }
546             }
547              
548             sub _penny_parsimony {
549 0     0   0 my $nd = shift;
550 0         0 my $index = shift;
551 0         0 my $refcol = shift;
552 0         0 my @colnames = @{$refcol};
  0         0  
553 0         0 my $ref_states = $nd->get_tag_values($colnames[$index]);
554 0         0 my $ref_new;
555             my $pa;
556 0 0       0 return if $nd->is_Leaf;
557 0 0       0 if ($pa = $nd->ancestor()) {
558 0         0 my $ref_pa_state = $pa->get_tag_values($colnames[$index]);
559 0         0 $ref_new = &_intersect_or_union($ref_states, $ref_pa_state); # intersect with with parent
560             } else { # is the root
561 0         0 $ref_new = [1]; # resolve root state using Penny parsimony: one gain only
562             }
563 0         0 $nd->add_tag_value($colnames[$index], $ref_new); # resolve root state using Penny parsimony: one gain only
564 0         0 &_penny_parsimony($_, $index, \@colnames) for $nd->each_Descendent();
565             }
566              
567             sub _is_informative {
568 0     0   0 my $col_id = shift;
569 0         0 my $index = shift;
570 0         0 my @sts = map {$_->get_tag_values($col_id)->[0]} @otus;
  0         0  
571 0         0 my %seen;
572 0         0 $seen{$_}++ for @sts;
573 0 0       0 if (keys %seen == 1) { #warn "$index: $col_id is constant\n";
574 0         0 return 0 }
575 0         0 foreach (keys %seen) {
576 0 0       0 if ($seen{$_} == 1) { #warn "$index: $col_id is singleton\n";
577 0         0 return 0 }
578             }
579             # warn "$index: $col_id is informative\n";
580 0         0 return 1;
581             }
582              
583             sub _consistency_index { # ratio of minimum (1 for a binary trait) to actural changes
584 0     0   0 my $index = shift;
585 0         0 my $ci = 0;
586 0         0 my $refcol = shift;
587 0         0 my @colnames = @{$refcol};
  0         0  
588 0         0 foreach my $nd (@nodes) {
589 0 0       0 next if $nd eq $rootnode;
590 0         0 my $pa = $nd->ancestor();
591 0         0 my $state = $nd->get_tag_values($colnames[$index])->[0]; # assuming fully resolved (only one state)
592 0         0 my $pa_state = $pa->get_tag_values($colnames[$index])->[0];
593 0 0       0 if ($state ne $pa_state) {
594             # if ($state) {
595             # push @{$gain_loss{'gain'}->{$nd->internal_id}}, $colnames[$index];
596             # $fam_events{$colnames[$index]}->{$nd->internal_id}->{'gain'}++;
597             # } else {
598             # push @{$gain_loss{'loss'}->{$nd->internal_id}}, $colnames[$index];
599             # $fam_events{$colnames[$index]}->{$nd->internal_id}->{'loss'}++;
600             # }
601 0         0 $ci++;
602             }
603             }
604 0         0 return sprintf "%.4f", 1/$ci;
605             }
606              
607             sub _check_id {
608 0     0   0 my $st = shift;
609 0         0 my $ref = shift;
610 0         0 foreach my $id (@$ref) {
611 0 0       0 return 1 if $st eq $id;
612             }
613 0         0 return 0;
614             }
615              
616             sub print_tree_shape {
617 0     0 0 0 my @matrix;
618 0         0 my (%leaf, %inode);
619 0         0 my $ct_leaf = 0;
620 0         0 my $ct_inode = 0;
621 0         0 for my $nd (@nodes) {
622 0 0       0 if ($nd->is_Leaf()) {
623 0         0 $leaf{$nd->id} = ++$ct_leaf;
624             } else {
625 0         0 $inode{$nd->internal_id} = ++$ct_inode;
626             }
627             }
628              
629 0         0 for my $nd (@nodes) {
630 0 0 0     0 next if $nd->is_Leaf() || $nd eq $rootnode;
631 0         0 my @dscs = $nd->each_Descendent;
632 0 0       0 die $nd->internal_id . ": node has more than two descendants\n" unless @dscs == 2;
633             my $id1 = $dscs[0]->is_Leaf
634 0 0       0 ? -1 * $leaf{$dscs[0]->id} : $inode{$dscs[0]->internal_id};
635             my $id2 = $dscs[1]->is_Leaf
636 0 0       0 ? -1 * $leaf{$dscs[1]->id} : $inode{$dscs[1]->internal_id};
637 0 0       0 if ($nd eq $rootnode) { # root at first
638 0         0 unshift @matrix, [ ($id1, $id2) ];
639             } else {
640 0         0 push @matrix, [ ($id1, $id2) ]
641             }
642             }
643 0         0 for (my $i = $#matrix; $i >=0; $i--) {
644 0         0 print $matrix[$i]->[0], "\t", $matrix[$i]->[1], "\n";
645             };
646             # print Dumper(\%leaf, \%inode);
647             }
648              
649             sub edge_length_abundance {
650 0     0 0 0 my @inodes;
651             my @brs;
652 0         0 push (@inodes, $_) for _walk_up($rootnode);
653              
654 0         0 for my $nd (@inodes) {
655 0 0       0 next if $nd eq $rootnode;
656 0         0 my $id = $nd->internal_id;
657 0         0 my $ct_otus = 0;
658 0 0       0 if ($nd->is_Leaf) {
659 0         0 $ct_otus = 1;
660             } else {
661 0         0 foreach (&_each_leaf($nd)) {
662 0         0 $ct_otus++;
663             }
664             }
665 0         0 push @brs, { 'id' => $id, 'num_tips' => $ct_otus, 'br_len' => $nd->branch_length() }
666             }
667              
668 0         0 my $ct_tips = 0;
669 0         0 foreach my $nd (@nodes) {
670 0 0       0 $ct_tips ++ if $nd->is_Leaf();
671             }
672              
673 0         0 for (my $k=1; $k<=$ct_tips; $k++) {
674 0         0 my $total = 0;
675 0         0 my @nds = grep { $_->{num_tips} == $k } @brs;
  0         0  
676 0 0       0 if (@nds) { $total += $_->{br_len} for @nds }
  0         0  
677 0         0 printf "%d\t%.6f\n", $k, $total/$tree->total_branch_length();
678             }
679             }
680              
681             =for comment
682             # Needs work
683             sub rotate_an_in_node {
684             my $nd_id = $opts{'rotate-node'};
685             my $ref_node = $tree->find_node(-id => $nd_id) || $tree->find_node(-internal_id => $nd_id) || die "node not found\n";
686             my $pa_node = $ref_node->ancestor();
687             $pa_node->each_Descendent($nd_id) || 'die: not an internal node\n';
688             # $ref_node->each_Descendent( sub { shuffle(@des) } );
689             $print_tree = 1;
690             #}
691             ## Not working
692             #sub sort_child { # sort by height
693             # my $by = $opts{'sort-child'} || 'height';
694             # foreach my $nd (@nodes) {
695             # next if $nd->is_Leaf;
696             # my @des = $nd->each_Descendent();
697             # $nd->each_Descendent($by);
698             # $ref_node->each_Descendent( sub { shuffle(@des) } );
699             # }
700             # $print_tree = 1;
701             #}
702             =cut
703              
704             sub swap_otus { # don't know why
705 0     0 0 0 my @otus;
706 0         0 my $otu_ct = 0;
707 0         0 foreach (@nodes) {
708 0 0       0 next unless $_->is_Leaf();
709 0         0 push @otus, $_;
710 0         0 $otu_ct++;
711             }
712 0         0 @otus = sort {$a->id() cmp $b->id() } @otus;
  0         0  
713 0         0 my $ref_otu;
714 0 0       0 if ($opts{'swap-otus'}) {
715 0   0     0 $ref_otu = $tree->find_node($opts{'swap-otus'}) || die "node not found\n";
716             } else {
717 0         0 $ref_otu = $otus[0];
718             }
719              
720 0         0 foreach my $nd (@otus) {
721 0 0       0 next if $nd eq $ref_otu;
722 0         0 my $nd_id = $nd->id();
723 0         0 my $ref_id = $ref_otu->id();
724 0         0 $nd->id("new_".$ref_id);
725 0         0 $ref_otu->id("new_".$nd_id);
726 0         0 say $tree->as_text($out_format);
727 0         0 $nd->id($nd_id);
728 0         0 $ref_otu->id($ref_id);
729             }
730             }
731              
732             # Get the distance between nodes
733             sub getdistance {
734 1     1 0 5 my @dnodes = _name2node($opts{'dist'});
735 1 50       3 if (scalar(@dnodes) != 2) { say "Error: Provide exactly two nodes/leaves to use with --dist" }
  0         0  
736 1         10 else { say $tree->distance(-nodes => \@dnodes) }
737             }
738              
739             sub sister_pairs {
740 0     0 0 0 my @otus;
741 0         0 my $otu_ct = 0;
742 0         0 foreach (@nodes) {
743 0 0       0 next unless $_->is_Leaf();
744 0         0 push @otus, $_;
745 0         0 $otu_ct++;
746             }
747              
748 0         0 @otus = sort {$a->id() cmp $b->id() } @otus;
  0         0  
749 0         0 for (my $i = 0; $i < $otu_ct; $i++) {
750 0         0 my $pa_i = $otus[$i]->ancestor();
751 0         0 for (my $j = $i+1; $j < $otu_ct; $j++) {
752 0         0 my $pa_j = $otus[$j]->ancestor();
753 0         0 print $otus[$i]->id, "\t", $otus[$j]->id, "\t";
754 0 0       0 print $pa_i eq $pa_j ? 1 : 0;
755 0         0 print "\n";
756             }
757             }
758             }
759              
760             =head2 countOTU()
761              
762             Print total number of OTUs (leaves).
763              
764             =cut
765              
766             sub countOTU {
767 1     1 1 2 my $otu_ct = 0;
768 1 100       2 foreach (@nodes) { $otu_ct++ if $_->is_Leaf() }
  28         129  
769 1         11 say $otu_ct
770             }
771              
772             =head2 reroot()
773              
774             Reroot tree to node in C<$opts{'reroot'}> by creating new branch.
775              
776             =cut
777              
778             sub reroot {
779             # my $outgroup_id = $opts{'reroot'};
780 0     0 1 0 my ($tag, $out_id) = split(':', $opts{'reroot'});
781 0         0 my $outgroup;
782 0 0       0 if ($tag eq 'otu') { # leaf id
    0          
783 0         0 $outgroup = $tree->find_node($out_id)
784             } elsif ($tag eq 'intid') { # internal id
785 0         0 for my $nd (@nodes) {
786 0 0       0 if ($nd->internal_id() == $out_id) {
787 0         0 $outgroup = $nd;
788             last
789 0         0 }
790             }
791             }
792             else {
793 0         0 die("Need a tag: otu:, or intid:\n");
794             }
795             # my $newroot = $outgroup->create_node_on_branch(-FRACTION => 0.5, -ANNOT => {id => 'newroot'});
796             # $tree->reroot($outgroup);
797 0 0       0 die "outgroup not found: $out_id" unless $outgroup;
798 0         0 $tree->reroot_at_midpoint($outgroup);
799 0         0 $print_tree = 1;
800             }
801              
802             sub clean_tree {
803 0     0 0 0 foreach my $nd (@nodes) {
804 0 0       0 $nd->branch_length(0) if $opts{'clean-br'};
805 0 0       0 if ($opts{'clean-boot'}) {
806 0         0 $nd->bootstrap(0);
807 0 0       0 $nd->id('') unless $nd->is_Leaf;
808             }
809             }
810 0         0 $print_tree = 1;
811             }
812              
813             sub delete_otus {
814 0     0 0 0 my $ref_otus = &_get_otus();
815 0         0 my @otus_to_retain = &_remove_otus($ref_otus, $opts{'del-otus'});
816             # print Dumper(\@otus_to_retain);
817 0         0 $opts{'subset'} = join ",", @otus_to_retain;
818 0         0 &subset();
819             }
820              
821             sub _get_otus {
822 0     0   0 my @list;
823 0 0       0 foreach my $nd (@nodes) { push @list, $nd if $nd->is_Leaf }
  0         0  
824 0         0 return \@list;
825             }
826              
827             sub _remove_otus {
828 0     0   0 my $ref = shift;
829 0         0 my $str = shift;
830 0         0 my @list;
831 0         0 my @otus_to_remove = split /\s*,\s*/, $str;
832 0         0 my %to_del;
833 0         0 foreach (@otus_to_remove) { $to_del{$_} = 1 }
  0         0  
834              
835 0         0 foreach my $nd (@$ref) {
836 0 0       0 push @list, $nd->id() unless $to_del{$nd->id()};
837             }
838 0         0 return @list;
839             }
840              
841             sub multi2bi {
842 0     0 0 0 foreach my $nd (@nodes) {
843             # next if $nd eq $rootnode;
844 0         0 &_add_node($nd);
845             }
846 0         0 $print_tree = 1;
847             }
848              
849             sub _add_node {
850 0     0   0 my $node = shift;
851             # warn "processing\t", $node->internal_id, "\n";
852 0         0 my @desc = $node->each_Descendent;
853 0 0       0 return if scalar(@desc) <= 2;
854             # warn "multifurcating node:\t", $node->internal_id, " ... add a new node\n";
855 0         0 shift @desc; # retain the first descent
856             # my $new_node = $node->create_node_on_branch(-FRACTION => 0.5, -FORCE => 1, -ANNOT=>{ -id => "new_id" });
857 0         0 my $new_node = Bio::Tree::Node->new(-id => "new", -branch_length => 0);
858 0         0 $node->add_Descendent($new_node);
859             # warn "\ta new node created:\t", $new_node->id, "\n";
860 0         0 foreach (@desc) {
861 0         0 $node->remove_Descendent($_); # remove from grand-parent
862             # warn "\t\tremove descendant:\t", $_->internal_id, "\n";
863 0         0 $new_node->add_Descendent($_); # re-attarch to parent
864             # warn "\t\tadd descendant to the new node:\t", $_->internal_id, "\n";
865             }
866 0         0 &_add_node($new_node);
867             }
868              
869             # Subset a tree
870             sub subset {
871             # Collect the subset of nodes from STDIN or from $_
872 2     2 0 7 my @keep_nodes;
873 2 100       16 if ($opts{'subset'}) { @keep_nodes = _name2node($opts{'subset'}) }
  1         5  
874 1         3 else { my $ar = $_[0]; @keep_nodes = @$ar }
  1         3  
875              
876             # Collect list of descendents
877 2         6 my @descendents;
878 2         8 for my $nd (@keep_nodes) { push @descendents, $_ for $nd->get_all_Descendents }
  4         76  
879              
880             # Collect list of ancestors
881 2         47 my @ancestors;
882             my $tmp;
883 2         7 for (@keep_nodes) {
884 4         45 $tmp = $_;
885 4         20 while ($tmp->ancestor) {
886 11         118 push @ancestors, $tmp->ancestor;
887 11         111 $tmp = $tmp->ancestor
888             }
889             }
890              
891             # Make a hash of nodes to keep
892 2         29 my %keep = map { $_->internal_id => $_ } @keep_nodes;
  4         54  
893 2         32 $keep{$_->internal_id} = $_ for @descendents;
894 2         9 $keep{$_->internal_id} = $_ for @ancestors;
895              
896             # Remove all nodes but those in %keep
897 2 100       90 for (@nodes) { $tree->remove_Node($_) unless exists($keep{$_->internal_id}) }
  56         4443  
898              
899             # Clean up internal single-descendent nodes
900 2         194 my @desc;
901             my $nd_len;
902 2         0 my $desc_len;
903 2         10 for my $nd ($tree->get_nodes) {
904 12 100       981 next if $nd == $rootnode;
905 10         32 @desc = $nd->each_Descendent;
906 10 100       234 next unless scalar(@desc) == 1;
907 5   50     18 $nd_len = $nd->branch_length() || 0;
908 5   50     81 $desc_len = $desc[0]->branch_length() || 0;
909 5         75 $desc[0]->branch_length($nd_len + $desc_len);
910 5         371 $nd->ancestor->add_Descendent($desc[0]);
911 5         803 $tree->remove_Node($nd)
912             }
913              
914             # Take care of the a single-descendent root node
915 2         10 @desc = $rootnode->each_Descendent;
916 2 100       74 if (scalar(@desc) == 1) {
917 1         19 $rootnode->add_Descendent($_) for $desc[0]->each_Descendent;
918 1         16 $tree->remove_Node($desc[0])
919             }
920 2         98 $print_tree = 1
921             }
922              
923             # Print OTU names and lengths
924             sub print_leaves_lengths {
925 1 100 50 1 0 3 foreach (@nodes) { say $_->id(), "\t", $_->branch_length() || 0 if $_->is_Leaf() }
  28         236  
926             }
927              
928             # Get LCA
929             sub getlca {
930 1     1 0 3 my @lca_nodes;
931 1 50       6 if (_name2node($opts{'lca'})) { @lca_nodes = _name2node($opts{'lca'}) }
  1         24  
932 0         0 else { my $ar = $_[0]; @lca_nodes = @$ar }
  0         0  
933 1         3 my @nd_pair;
934             my $lca;
935              
936 1         4 $nd_pair[0] = $lca_nodes[0];
937 1 50       4 if (@lca_nodes > 1) {
    0          
938 1         6 for (my $index = 1; $index < @lca_nodes; $index++) {
939 2         6 $nd_pair[1] = $lca_nodes[$index];
940 2         18 $lca = $tree->get_lca(-nodes => \@nd_pair);
941 2         5128 $nd_pair[0] = $lca
942             }
943 1 50       7 if (_name2node($opts{'lca'})) { say $lca->internal_id } else { return $lca }
  1         5  
  0         0  
944             } elsif (@lca_nodes == 1) {
945 0 0       0 if (_name2node($opts{'lca'})) { say $lca_nodes[0]->ancestor->internal_id }
  0         0  
946 0         0 else { return $lca_nodes[0]->ancestor->internal_id }
947             }
948             }
949              
950             # Label nodes with their internal ID's
951             sub label_nodes {
952 1     1 0 3 for (@nodes) {
953 28 100       433 next if $_ == $rootnode;
954 27 50       62 my $suffix = defined($_->id) ? "_" . $_->id : "";
955 27         339 $_->id($_->internal_id . $suffix)
956             }
957 1         16 $print_tree = 1
958             }
959              
960             # Print half-tree id distances between all pairs of nodes
961             sub listdistance {
962 1     1 0 3 my (@leaves, @sortedleaf_names, @leafnames);
963 1 100       5 foreach (@nodes) { push(@leaves, $_) if $_->is_Leaf() }
  28         238  
964              
965             # Make an alphabetical list of OTU names
966 1         16 push @sortedleaf_names, $_->id foreach sort {lc($a->id) cmp lc($b->id)} @leaves;
  41         464  
967              
968 1         123 @leaves = ();
969              
970             #Rebuld leaf array with new alphabetical order
971 1         22 push @leaves, $tree->find_node(-id => $_) foreach @sortedleaf_names;
972              
973             # Prints a half-matrix of distance values
974 1         19620 my $i = 1;
975 1         6 for my $firstleaf (@leaves) {
976 15         32 my @dnodes;
977 15         58 for (my $x = $i; $x < scalar(@leaves); $x++) {
978 105         226552 @dnodes = ($firstleaf, $leaves[$x]);
979 105         386 say join "\t", ($firstleaf->id(), $leaves[$x]->id(), $tree->distance(-nodes => \@dnodes))
980             }
981 15         38346 $i++
982             }
983             }
984              
985             =head2 bin()
986              
987             Divides tree into number of specified segments and counts branches up
988             to height the segment. Prints: bin_number, branch_count, bin_floor,
989             bin_ceiling.
990              
991             =cut
992              
993             sub bin {
994 1     1 1 5 my $treeheight = _treeheight(\$tree);
995 1         8 my $bincount = $opts{'ltt'};
996 1         4 my $binsize = $treeheight/$bincount;
997 1         2 my @bins;
998 1         5 while ($treeheight > 0) {
999 11         23 unshift @bins, $treeheight;
1000 11         23 $treeheight -= $binsize
1001             }
1002             # Handle imperfect division. When approaching 0, if a tiny number is found, such as 2e-17, assign it as 0 and ignore negatives that may follow.
1003 1 100       4 for (@bins) { shift @bins if $_ < 1e-10 }
  10         23  
1004 1         3 unshift @bins, 0;
1005              
1006 1         5 for (my $i=0; $i+1<@bins; $i++) {
1007 10         22 my $branchcount = 1; # branch from root
1008             # Starting from the root, add a branch for each found descendent
1009 10         42 $branchcount += _binrecursive(\$rootnode, $bins[$i+1]);
1010 10         211 printf "%3d\t%3d\t%.4f\t%.4f\n", $i+1, $branchcount, $bins[$i], $bins[$i+1];
1011             }
1012             }
1013              
1014             sub print_all_lengths{
1015 1     1 0 4 for (@nodes) {
1016 28 100       224 next if $_ == $rootnode;
1017 27         66 my $p_node = $_->ancestor();
1018 27         192 my ($p_id, $c_id);
1019 27         59 $p_id = $p_node->internal_id;
1020 27 100       244 $c_id = $_->is_Leaf ? $_->id() : $_->internal_id;
1021 27         404 say $p_id, "\t", $c_id, "\t", $_->branch_length;
1022             }
1023             }
1024              
1025             sub random_tree{
1026 1     1 0 5 my @otus = _each_leaf($rootnode);
1027 1         3 my @sample;
1028 1 50       7 my $sample_size = $opts{"random"} == 0 ? int(scalar(@otus) / 2) : $opts{"random"};
1029              
1030 1 50       4 die "Error: sample size ($sample_size) exceeds number of OTUs (", scalar(@otus), ")" if $sample_size > scalar(@otus);
1031              
1032             # Use Reservoir Sampling to pick random otus.
1033 1         5 my @sampled = (1 .. $sample_size);
1034 1         5 for ($sample_size + 1 .. scalar(@otus)) {
1035 14 100       87 $sampled[rand(@sampled)] = $_ if rand() < $sample_size/$_
1036             }
1037 1         32 push @sample, $otus[--$_] for @sampled;
1038 1         8 &subset(\@sample)
1039             }
1040              
1041             # Depth to the root for a node
1042             sub depth_to_root {
1043 1     1 0 6 say $_->depth for _name2node($opts{'depth'})
1044             }
1045              
1046             # Remove Branch Lenghts
1047             #sub remove_brlengths {
1048             # foreach (@nodes) { $_->branch_length(0) if defined $_->branch_length }
1049             # $print_tree = 1
1050             #}
1051              
1052             sub alldesc {
1053 1     1 0 3 my @inodes;
1054 1         4 my $inode_id = $opts{'otus-desc'};
1055              
1056 1 50       5 if ($inode_id eq 'all') { push (@inodes, $_) for _walk_up($rootnode) }
  0         0  
1057 1         15 else { push @inodes, $tree->find_node(-internal_id => $inode_id) }
1058              
1059 1         1428 for my $nd (@inodes) {
1060 1         4 print $nd->internal_id, " ";
1061 1 50       35 if ($nd->is_Leaf) { print $nd->id } else { print $_->id, " " for _each_leaf($nd) }
  1         15  
  0         0  
1062 1         11 print "\n"
1063             }
1064             }
1065              
1066             # Walks from starting OTU
1067             sub walk {
1068 1     1 0 25 my $startleaf = $tree->find_node($opts{'walk'});
1069 1         1422 my $curnode = $startleaf->ancestor;
1070 1         9 my $last_curnode = $startleaf;
1071 1         2 my @decs;
1072             my %visited;
1073 1         3 my $totlen = 0;
1074 1         2 my @dpair;
1075 1         2 my $vcount = 0;
1076              
1077 1         4 $visited{$startleaf} = 1;
1078              
1079 1         5 while ($curnode) {
1080 4         32 $visited{$curnode} = 1;
1081 4         12 @dpair = ($last_curnode, $curnode);
1082 4         25 $totlen += $tree->distance(-nodes => \@dpair);
1083 4         10816 &_desclen($curnode, \%visited, \$totlen, \$vcount);
1084 4         10 $last_curnode = $curnode;
1085 4         14 $curnode = $curnode->ancestor
1086             }
1087             }
1088              
1089             sub walk_edge {
1090 0     0 0 0 my $startleaf = $tree->find_node($opts{'walk-edge'});
1091 0         0 my $curnode = $startleaf->ancestor;
1092 0         0 my $last_curnode = $startleaf;
1093 0         0 my @decs;
1094             my %visited;
1095 0         0 my $totlen = 0;
1096 0         0 my @dpair;
1097 0         0 my $vcount = 0;
1098              
1099 0         0 $visited{$startleaf} = 1;
1100              
1101 0         0 while ($curnode) {
1102 0         0 $visited{$curnode} = 1;
1103 0         0 @dpair = ($last_curnode, $curnode);
1104 0         0 my $pairLen = $tree->distance(-nodes => \@dpair);
1105 0   0     0 say join "\t", ($curnode->id() || $curnode->internal_id(), $last_curnode->id() || $last_curnode->internal_id(), $pairLen);
      0        
1106 0         0 $totlen += $pairLen;
1107 0         0 &_desclen($curnode, \%visited, \$totlen, \$vcount);
1108 0         0 $last_curnode = $curnode;
1109 0         0 $curnode = $curnode->ancestor
1110             }
1111             }
1112              
1113              
1114             # works for RAxML bipartition output and FastTree output with bootstrap values as node names
1115             sub delete_low_boot_support {
1116 0   0 0 0 0 my $cutoff = $opts{'del-low-boot'} || die 'spcify cutoff, e.g., 0.75 or 75\n'; # default 75
1117 0         0 &_remove_branch($rootnode, \$cutoff);
1118 0         0 $print_tree = 1;
1119             }
1120              
1121             sub delete_short_branch {
1122 0   0 0 0 0 my $cutoff = $opts{'del-short-br'} || die 'spcify cutoff, e.g., 0.02\n'; # default 0.02
1123 0         0 &_remove_short_branch($rootnode, $cutoff);
1124 0         0 $print_tree = 1;
1125             }
1126              
1127             sub mid_point_root {
1128 0     0 0 0 my $node1;
1129 0         0 my $maxL=0;
1130 0         0 foreach (@nodes) {
1131 0 0       0 next unless $_->is_Leaf();
1132 0         0 my $dis = $_->depth();
1133 0 0       0 if ($dis > $maxL){
1134 0         0 $node1 = $_;
1135 0         0 $maxL = $dis
1136             }
1137             }
1138              
1139 0         0 my $node2;
1140 0         0 $maxL=0;
1141 0         0 foreach (@nodes) {
1142 0 0 0     0 next unless $_->is_Leaf() || $_ eq $node1;
1143 0         0 my $dis = $tree->distance(-nodes=>[$node1, $_]);
1144 0 0       0 if ($dis > $maxL){
1145 0         0 $node2 = $_;
1146 0         0 $maxL = $dis
1147             }
1148             }
1149              
1150 0         0 my $nd = &_get_all_parents($node1,0,$maxL);
1151 0 0       0 $nd = &_get_all_parents($node2,0,$maxL) unless $nd;
1152 0         0 my ($node, $sumL) = @{$nd};
  0         0  
1153              
1154 0         0 my $nodeL = $node->branch_length();
1155 0         0 my $pnode = $node->ancestor();
1156 0         0 my $nodeL_new = $nodeL - $sumL + $maxL/2;
1157              
1158 0         0 $tree->reroot_at_midpoint($node);
1159 0         0 $pnode->branch_length($node->branch_length()*2-$nodeL_new);
1160 0         0 $node->branch_length($nodeL_new);
1161              
1162 0         0 $print_tree = 1;
1163             }
1164              
1165             sub _get_all_parents {
1166 0     0   0 my $nd = shift;
1167 0         0 my $sumL = shift;
1168 0         0 my $mL = shift;
1169 0         0 $sumL += $nd->branch_length();
1170 0 0       0 return [$nd, $sumL] if $sumL >= $mL/2;
1171 0 0       0 return if $nd->ancestor() eq $rootnode;
1172 0         0 &_get_all_parents($nd->ancestor(), $sumL, $mL);
1173             }
1174              
1175             =head2 write_out()
1176              
1177             Performs the bulk of the actions actions set via
1178             L|/initialize>.
1179              
1180             Call this after calling C<#initialize(\%opts)>.
1181              
1182             =cut
1183              
1184             sub write_out {
1185 15     15 1 61 my $opts = shift;
1186 15 50       99 print_all_node_ids() if $opts->{'ids-all'};
1187 15 50       68 rename_tips() if $opts->{'rename-tips'};
1188 15 50       96 write_tab_tree() if $opts->{'as-text'};
1189 15 50       81 cut_tree() if $opts->{'cut-tree'};
1190 15 50       59 trim_tips() if $opts->{'trim-tips'};
1191 15 50       96 cut_sister() if $opts->{'cut-sis'};
1192 15 50       73 mid_point_root() if $opts->{'mid-point'};
1193 15 50       81 pars_binary() if $opts->{'ci'};
1194 15 100       75 getdistance() if $opts->{'dist'};
1195 15 50       1582 delete_low_boot_support() if $opts->{'del-low-boot'};
1196 15 50       333 delete_short_branch() if $opts->{'del-short-br'};
1197 15 100       347 say $tree->total_branch_length() if $opts->{'length'};
1198 15 100       1273 countOTU() if $opts->{'otus-num'};
1199 15 100       68 $print_tree = 1 if defined($opts->{'output'});
1200 15 50       60 reroot() if $opts->{'reroot'};
1201 15 50       63 reorder_by_ref() if $opts->{'ref'};
1202 15 100       164 subset() if $opts->{'subset'};
1203 15 100       69 print_leaves_lengths() if $opts->{'otus-all'};
1204 15 100       73 getlca() if $opts->{'lca'};
1205 15 100       95 label_nodes() if $opts->{'label-nodes'};
1206 15 50       98 label_selected_nodes() if $opts->{'label-selected-nodes'};
1207 15 100       67 listdistance() if $opts->{'dist-all'};
1208 15 100       65 bin() if $opts->{'ltt'};
1209 15 100       157 print_all_lengths() if $opts->{'length-all'};
1210 15 100       118 random_tree() if defined($opts->{'random'});
1211 15 100       92 depth_to_root() if $opts->{'depth'};
1212 15 50       292 tips_to_root() if $opts->{'tips-to-root'};
1213 15 50       79 rotate_an_in_node() if $opts->{'rotate-node'};
1214             # sort_child() if $opts->{'sort-child'};
1215 15 100       95 alldesc() if $opts->{'otus-desc'};
1216 15 100       85 walk() if $opts->{'walk'};
1217 15 50       97 walk_edge() if $opts->{'walk-edge'};
1218 15 50       166 multi2bi() if $opts->{'multi2bi'};
1219 15 50 33     168 clean_tree() if $opts->{'clean-br'} || $opts->{'clean-boot'};
1220 15 50       72 delete_otus() if $opts->{'del-otus'};
1221 15 50       69 sister_pairs() if $opts->{'sis-pairs'};
1222 15 50       78 swap_otus() if $opts->{'swap-otus'};
1223 15 50       61 edge_length_abundance() if $opts->{'ead'};
1224 15 50       123 print_tree_shape() if $opts->{'tree-shape'};
1225 15 100       1212 say $tree->as_text($out_format) if $print_tree;
1226             }
1227              
1228             ################# internal subroutines ##############
1229              
1230             sub _remove_branch {
1231 0     0   0 my $nd = shift;
1232 0         0 my $ref = shift;
1233 0         0 my $bootcut = $$ref;
1234 0 0       0 return if $nd->is_Leaf();
1235 0         0 my @desc = $nd->each_Descendent();
1236 0         0 my $pa = $nd->ancestor();
1237 0         0 foreach my $ch (@desc) {
1238 0 0       0 if (!$nd->id()) { # no boostrap as node id (in-group branch)
1239 0         0 &_remove_branch($ch, $ref);
1240 0         0 next;
1241             }
1242 0 0       0 if ($nd->id() < $bootcut) {
1243 0         0 $pa->remove_Descendent($nd); # remove the current node
1244 0         0 $pa->add_Descendent($ch); # elevate the child node
1245 0         0 $ch->branch_length($ch->branch_length() + $nd->branch_length()); # increment branch length
1246             }
1247 0         0 &_remove_branch($ch, $ref);
1248             }
1249             }
1250              
1251             sub _remove_short_branch {
1252 0     0   0 my $nd = shift;
1253 0         0 my $brcut = shift;
1254 0 0       0 return if $nd->is_Leaf();
1255 0         0 my @desc = $nd->each_Descendent();
1256 0         0 my $pa = $nd->ancestor();
1257 0         0 foreach my $ch (@desc) {
1258 0 0 0     0 if ($nd->branch_length() && $nd->branch_length() <= $brcut) {
1259 0         0 $pa->remove_Descendent($nd); # remove the current node
1260 0         0 $pa->add_Descendent($ch); # elevate the child node
1261 0         0 $ch->branch_length($ch->branch_length() + $nd->branch_length()); # increment branch length
1262             }
1263 0         0 &_remove_short_branch($ch, $brcut);
1264             }
1265             }
1266              
1267             sub _name2node {
1268 6     6   35 my $str = shift;
1269 6         62 my @node_names = split /\s*,\s*/, $str;
1270 6         25 my $nd;
1271             my @node_objects;
1272 6         18 for my $node_name (@node_names) {
1273 17   33     111 $nd = $tree->find_node(-id => $node_name) || $tree->find_node(-internal_id => $node_name);
1274 17 50       21196 if ($nd) { push @node_objects, $nd } else { warn "Node/leaf '$node_name' not found. Ignoring...\n" }
  17         51  
  0         0  
1275             }
1276             return @node_objects
1277 6         47 }
1278              
1279             # _each_leaf ($node): returns a list of all OTU's descended from this node, if any
1280             sub _each_leaf {
1281 1     1   3 my $nd = shift;
1282 1         2 my @leaves;
1283 1 50       5 return ($nd) if $nd->is_Leaf;
1284 1 100       23 for ($nd->get_all_Descendents) { push (@leaves, $_) if $_->is_Leaf }
  27         1184  
1285             return @leaves
1286 1         13 }
1287              
1288             # main routine to walk up from root
1289             sub _wu {
1290 0     0   0 my (@lf, @nd);
1291 0         0 my $curnode = $_[0];
1292 0         0 my @decs = $_[0]->each_Descendent;
1293 0         0 my $visitref = $_[1];
1294 0         0 my %visited = %$visitref;
1295 0         0 my $node_list_ref = $_[2];
1296             # my $ref_ct_otu = $_[3];
1297             # my $ref_tatal_br_len = $_[4];
1298              
1299 0         0 for (@decs) {
1300             # $ref_total_br_len += $_->branch_length;
1301 0 0       0 if ($_->is_Leaf) {
1302 0         0 push @lf, $_;
1303             # $$ref_ct_otu++;
1304             } else {
1305 0         0 push @nd, $_
1306             }
1307             }
1308              
1309 0 0       0 for (@lf) { if (!exists($visited{$_})) { $visited{$_} = 1; push @$node_list_ref, $_ } }
  0         0  
  0         0  
  0         0  
1310 0         0 for (@nd) {
1311 0 0       0 next if exists($visited{$_});
1312 0         0 $visited{$_} = 1;
1313 0         0 push @$node_list_ref, $_;
1314 0         0 &_wu($_, \%visited, $node_list_ref)
1315             }
1316             }
1317              
1318             # Walk Up: "Walks" up from a given node and returned an order array representing the order that each node descended from the given node was visited.
1319             sub _walk_up {
1320 0     0   0 my %visited;
1321 0         0 my @node_list = $_[0];
1322 0         0 &_wu($_[0], \%visited, \@node_list);
1323             return @node_list
1324 0         0 }
1325              
1326             sub print_all_node_ids {
1327 0     0 0 0 my %visited;
1328 0         0 my @node_list = ($rootnode);
1329 0         0 &_wu($rootnode, \%visited, \@node_list);
1330 0         0 foreach (@node_list) {
1331 0   0     0 print $_->id() || $_->internal_id(), "\n";
1332             }
1333             }
1334              
1335             sub _treeheight {
1336 1     1   3 my $height = 0;
1337 1         3 my $tree = $_[0];
1338 1 100       5 for ($$tree->get_nodes) { $height = $_->depth if $_->depth > $height }
  28         2873  
1339 1         113 return $height
1340             }
1341              
1342             sub _binrecursive {
1343              
1344 213     213   15007 my $branchcount = 0;
1345 213         440 my $noderef = $_[0];
1346 213         346 my $upper = $_[1];
1347 213         608 my @desc = $$noderef->each_Descendent;
1348 213 100       6327 $branchcount-- unless $$noderef->is_Leaf;
1349              
1350 213         2157 for (@desc) {
1351 224         972 $branchcount++;
1352 224 100       591 $branchcount += _binrecursive(\$_, $upper) if $_->depth <= $upper
1353             }
1354 213         1035 return $branchcount
1355             }
1356              
1357             # Starting at a node that has 2 descendents, print the distance from start to desc if it's a leaf or call itself passing the internal-node descendent
1358             # Input: basenode, internal node
1359             sub _desclen {
1360             # startlear, curnode
1361 13     13   29 my (@dpair, @lf, @nd);
1362 13         23 my $curnode = $_[0];
1363 13         46 my @decs = $_[0]->each_Descendent;
1364 13         618 my $visitref = $_[1];
1365 13         23 my $totlen = $_[2];
1366 13         21 my $vcountref = $_[3];
1367 13         131 my %visited = %$visitref;
1368 13         35 my $dist;
1369              
1370 13 100       62 for (@decs) { if ($_->is_Leaf) { push @lf, $_ } else { push @nd, $_ } }
  27         74  
  15         161  
  12         122  
1371 13         27 for (@lf) {
1372 15 100       157 next if exists($visited{$_});
1373 14         49 $visited{$_} = 1;
1374 14         29 $dpair[0] = $curnode;
1375 14         23 $dpair[1] = $_;
1376 14         62 $dist = $tree->distance(-nodes => \@dpair);
1377 14         37741 $$totlen += $dist;
1378 14         34 $$vcountref++;
1379 14 50 0     57 say join "\t", ($curnode->id() || $curnode->internal_id(), $_->id() || $_->internal_id(), $dist) if $opts{'walk-edge'};
      0        
1380 14 50       65 say $_->id, "\t$$totlen\t$$vcountref" if $opts{'walk'}
1381             }
1382              
1383 13         232 for (@nd) {
1384 12 100       55 next if exists($visited{$_});
1385 9         71 $visited{$_} = 1;
1386 9         19 $dpair[0] = $curnode;
1387 9         19 $dpair[1] = $_;
1388 9         40 $dist = $tree->distance(-nodes => \@dpair);
1389 9 50 0     23611 say join "\t", ($curnode->id() || $curnode->internal_id(), $_->id() || $_->internal_id(), $dist) if $opts{'walk-edge'};
      0        
1390 9         22 $$totlen += $dist;
1391 9         48 &_desclen($_, \%visited, $totlen, $vcountref)
1392             }
1393             }
1394              
1395             1;
1396              
1397             __END__