File Coverage

Bio/Tree/TreeFunctionsI.pm
Criterion Covered Total %
statement 241 417 57.7
branch 93 200 46.5
condition 33 85 38.8
subroutine 16 25 64.0
pod 19 19 100.0
total 402 746 53.8


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tree::TreeFunctionsI
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14              
15             =head1 NAME
16              
17             Bio::Tree::TreeFunctionsI - Decorated Interface implementing basic Tree exploration methods
18              
19             =head1 SYNOPSIS
20              
21             use Bio::TreeIO;
22             my $in = Bio::TreeIO->new(-format => 'newick', -file => 'tree.tre');
23              
24             my $tree = $in->next_tree;
25              
26             my @nodes = $tree->find_node('id1');
27              
28             if( $tree->is_monophyletic(-nodes => \@nodes, -outgroup => $outnode) ){
29             #...
30             }
31              
32             =head1 DESCRIPTION
33              
34             This interface provides a set of implementated Tree functions which
35             only use the defined methods in the TreeI or NodeI interface.
36              
37             =head1 FEEDBACK
38              
39             =head2 Mailing Lists
40              
41             User feedback is an integral part of the evolution of this and other
42             Bioperl modules. Send your comments and suggestions preferably to
43             the Bioperl mailing list. Your participation is much appreciated.
44              
45             bioperl-l@bioperl.org - General discussion
46             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
47              
48             =head2 Support
49              
50             Please direct usage questions or support issues to the mailing list:
51              
52             I
53              
54             rather than to the module maintainer directly. Many experienced and
55             reponsive experts will be able look at the problem and quickly
56             address it. Please include a thorough description of the problem
57             with code and data examples if at all possible.
58              
59             =head2 Reporting Bugs
60              
61             Report bugs to the Bioperl bug tracking system to help us keep track
62             of the bugs and their resolution. Bug reports can be submitted via the
63             web:
64              
65             https://github.com/bioperl/bioperl-live/issues
66              
67             =head1 AUTHOR - Jason Stajich, Aaron Mackey, Justin Reese
68              
69             Email jason-at-bioperl-dot-org
70             Email amackey-at-virginia.edu
71             Email jtr4v-at-virginia.edu
72              
73             =head1 CONTRIBUTORS
74              
75             Sendu Bala, bix@sendu.me.uk
76              
77             Rerooting code was worked on by
78              
79             Daniel Barker d.barker-at-reading.ac.uk
80             Ramiro Barrantes Ramiro.Barrantes-at-uvm.edu
81              
82             =head1 APPENDIX
83              
84             The rest of the documentation details each of the object methods.
85             Internal methods are usually preceded with a _
86              
87             =cut
88              
89             # Let the code begin...
90              
91              
92             package Bio::Tree::TreeFunctionsI;
93              
94 67     67   306 use strict;
  67         76  
  67         1714  
95 67     67   257 use base qw(Bio::Tree::TreeI);
  67         75  
  67         191667  
96              
97              
98             =head2 find_node
99              
100             Title : find_node
101             Usage : my @nodes = $self->find_node(-id => 'node1');
102             Function: returns all nodes that match a specific field, by default this
103             is id, but different branch_length,
104             Returns : List of nodes which matched search
105             Args : text string to search for
106             OR
107             -fieldname => $textstring
108              
109             =cut
110              
111             sub find_node {
112 710     710 1 14926 my ($self, $type, $field) = @_;
113 710 50       1076 if( ! defined $type ) {
114 0         0 $self->warn("Must request a either a string or field and string when searching");
115             }
116              
117             # all this work for a '-' named field
118             # is so that we could potentially
119             # expand to other constraints in
120             # different implementations
121             # like 'find all nodes with boostrap < XX'
122              
123 710 100       859 if( ! defined $field ) {
124             # only 1 argument, default to searching by id
125 40         44 $field = $type;
126 40         47 $type = 'id';
127             } else {
128 670         1974 $type =~ s/^-//;
129             }
130              
131             # could actually do this by testing $rootnode->can($type) but
132             # it is possible that a tree is implemeted with different node types
133             # - although it is unlikely that the root node would be richer than the
134             # leaf nodes. Can't handle NHX tags right now
135              
136 710 100 66     1377 my @nodes = grep { $_->can($type) && defined $_->$type() &&
  8351         19135  
137             $_->$type() eq $field } $self->get_nodes();
138              
139 710 100       1073 if ( wantarray) {
140 445         968 return @nodes;
141             } else {
142 265 50       510 if( @nodes > 1 ) {
143 0         0 $self->warn("More than 1 node found but caller requested scalar, only returning first node");
144             }
145 265         496 return shift @nodes;
146             }
147             }
148              
149              
150             =head2 remove_Node
151              
152             Title : remove_Node
153             Usage : $tree->remove_Node($node)
154             Function: Removes a node from the tree
155             Returns : boolean represent status of success
156             Args : either Bio::Tree::NodeI or string of the node id
157              
158             =cut
159              
160             sub remove_Node {
161 1     1 1 439 my ($self,$input) = @_;
162 1         2 my $node = undef;
163 1 50       4 unless( ref($input) ) {
    0          
164 1         3 $node = $self->find_node($input);
165             } elsif( ! $input->isa('Bio::Tree::NodeI') ) {
166 0         0 $self->warn("Did not provide either a valid Bio::Tree::NodeI object or id to remove_node");
167 0         0 return 0;
168             } else {
169 0         0 $node = $input;
170             }
171 1 50 33     6 if( ! $node->ancestor &&
172             $self->get_root_node->internal_id != $node->internal_id) {
173 0         0 $self->warn("Node (".$node->to_string . ") has no ancestor, can't remove!");
174             } else {
175 1         4 $node->ancestor->remove_Descendent($node);
176             }
177             }
178              
179              
180             =head2 get_lineage_nodes
181              
182             Title : get_lineage_nodes
183             Usage : my @nodes = $tree->get_lineage_nodes($node);
184             Function: Given a node or its ID, get its full lineage, i.e. all its ancestors,
185             from the root to the most recent ancestor. Only use the node ID as
186             input if the nodes have been added to the tree.
187             Returns : list of nodes
188             Args : either Bio::Tree::NodeI (or string of the node id)
189              
190             =cut
191              
192             sub get_lineage_nodes {
193 907     907 1 1000 my ($self, $input) = @_;
194 907         698 my $node;
195              
196             # Sanity checks
197 907 50       1569 if (ref $input) {
198 907 50       2361 if (not $input->isa('Bio::Tree::NodeI')) {
199 0         0 $self->throw("Did not provide a valid Bio::Tree::NodeI object or ID string to get_lineage_nodes");
200             }
201 907         914 $node = $input;
202             } else {
203 0         0 $node = $self->find_node($input);
204             }
205              
206             # When dealing with Bio::Taxon objects with databases, the root will always
207             # be the database's root, ignoring this Tree's set root node; prefer the
208             # Tree's idea of root.
209 907   100     1721 my $root = $self->get_root_node || '';
210              
211 907         876 my @lineage;
212 907         1565 while ($node) {
213 5725   100     7884 $node = $node->ancestor || last;
214 5466         5954 unshift(@lineage, $node);
215 5466 100       13846 $node eq $root && last;
216             }
217 907         2619 return @lineage;
218             }
219              
220              
221             =head2 get_lineage_string
222              
223             Title : get_lineage_string
224             Usage : my $lineage = $tree->get_lineage_string($node);
225             Function: Get the string representation of the full lineage of a node, e.g.
226             for the Enterobacteriales node, return
227             Bacteria;Proteobacteria;Gammaproteobacteria;Enterobacteriales.
228             This method uses get_lineage_nodes internally and therefore inherits
229             of all of its caveats.
230             Returns : string
231             Args : * either Bio::Tree::NodeI (or string of the node id)
232             * an optional separator (default: ';')
233              
234             =cut
235              
236             sub get_lineage_string {
237 0     0 1 0 my ($self, $input, $sep) = @_;
238 0   0     0 $sep ||= ';';
239 0         0 my $node;
240 0 0       0 unless (ref $input) {
    0          
241 0         0 $node = $self->find_node($input);
242             }
243             elsif (! $input->isa('Bio::Tree::NodeI')) {
244 0         0 $self->warn("Did not provide either a valid Bio::Tree::NodeI object or id to get_lineage_nodes");
245 0         0 return;
246             }
247             else {
248 0         0 $node = $input;
249             }
250 0         0 my @nodes = ($self->get_lineage_nodes($node), $node);
251 0         0 for my $i (0 .. scalar @nodes - 1) {
252 0   0     0 my $node_name = $nodes[$i]->node_name || '';
253 0 0       0 if ($node_name =~ m/$sep/) {
254 0         0 $self->warn("Separator '$sep' is not safe to use because the node ".
255             "called '$node_name' contains it. Consider using another separator".
256             " or sanitizing the node name.");
257             }
258 0         0 $nodes[$i] = $node_name;
259             }
260 0         0 return join $sep, @nodes;
261             }
262              
263              
264             =head2 splice
265              
266             Title : splice
267             Usage : $tree->splice(-remove_id => \@ids);
268             Function: Remove all the nodes from a tree that correspond to the supplied
269             args, making all the descendents of a removed node the descendents
270             of the removed node's ancestor.
271             You can ask to explicitly remove certain nodes by using -remove_*,
272             remove them conditionally by using -remove_* in combination with
273             -keep_*, or remove everything except certain nodes by using only
274             -keep_*.
275             Returns : n/a
276             Args : just a list of Bio::Tree::NodeI objects to remove, OR
277             -key => value pairs, where -key has the prefix 'remove' or 'keep',
278             followed by an underscore, followed by a fieldname (like for the
279             method find_node). Value should be a scalar or an array ref of
280             scalars (again, like you might supply to find_node).
281              
282             So (-remove_id => [1, 2]) will remove all nodes from the tree that
283             have an id() of '1' or '2', while
284             (-remove_id => [1, 2], -keep_id => [2]) will remove all nodes with
285             an id() of '1'.
286             (-keep_id => [2]) will remove all nodes unless they have an id() of
287             '2' (note, no -remove_*).
288              
289             -preserve_lengths => 1 : setting this argument will splice out
290             intermediate nodes, preserving the original total length between
291             the ancestor and the descendants of the spliced node. Undef
292             by default.
293              
294             =cut
295              
296             sub splice {
297 12     12 1 1470 my ($self, @args) = @_;
298 12 50       31 $self->throw("Must supply some arguments") unless @args > 0;
299 12         14 my $preserve_lengths = 0;
300 12         11 my @nodes_to_remove;
301 12 100       32 if (ref($args[0])) {
302 8 50       65 $self->throw("When supplying just a list of Nodes, they must be Bio::Tree::NodeI objects") unless $args[0]->isa('Bio::Tree::NodeI');
303 8         19 @nodes_to_remove = @args;
304             }
305             else {
306 4 50       9 $self->throw("When supplying -key => value pairs, must be an even number of args") unless @args % 2 == 0;
307 4         10 my %args = @args;
308 4         5 my @keep_nodes;
309             my @remove_nodes;
310 4         5 my $remove_all = 1;
311 4         13 while (my ($key, $value) = each %args) {
312 5 100       12 my @values = ref($value) ? @{$value} : ($value);
  2         5  
313              
314 5 100       28 if ($key =~ s/remove_//) {
    50          
    0          
315 3         3 $remove_all = 0;
316 3         5 foreach my $value (@values) {
317 4         8 push(@remove_nodes, $self->find_node($key => $value));
318             }
319             }
320             elsif ($key =~ s/keep_//) {
321 2         3 foreach my $value (@values) {
322 8         29 push(@keep_nodes, $self->find_node($key => $value));
323             }
324             }
325             elsif ($key =~ /preserve/) {
326 0         0 $preserve_lengths = $value;
327             }
328             }
329              
330 4 100       11 if ($remove_all) {
331 1 50       4 if (@keep_nodes == 0) {
332 0         0 $self->warn("Requested to remove everything except certain nodes, but those nodes were not found; doing nothing instead");
333 0         0 return;
334             }
335              
336 1         4 @remove_nodes = $self->get_nodes;
337             }
338 4 100       9 if (@keep_nodes > 0) {
339 2         4 my %keep_iids = map { $_->internal_id => 1 } @keep_nodes;
  8         12  
340 2         5 foreach my $node (@remove_nodes) {
341 11 100       17 push(@nodes_to_remove, $node) unless exists $keep_iids{$node->internal_id};
342             }
343             }
344             else {
345 2         6 @nodes_to_remove = @remove_nodes;
346             }
347             }
348             # do the splicing
349             #*** the algorithm here hasn't really been thought through and tested much,
350             # will probably need revising
351 12         16 my %root_descs;
352 12         12 my $reroot = 0;
353 12         24 foreach my $node (@nodes_to_remove) {
354 49         65 my @descs = $node->each_Descendent;
355 49         103 my $ancestor = $node->ancestor;
356 49 0 33     69 if (! $ancestor && ! $reroot) {
357             # we're going to remove the tree root, so will have to re-root the
358             # tree later
359 0         0 $reroot = 1;
360 0         0 %root_descs = map { $_->internal_id => $_ } @descs;
  0         0  
361 0         0 $node->remove_all_Descendents;
362 0         0 next;
363             }
364 49 50       66 if (exists $root_descs{$node->internal_id}) {
365             # well, this one can't be the future root anymore
366 0         0 delete $root_descs{$node->internal_id};
367             # but maybe one of this one's descs will become the root
368 0         0 foreach my $desc (@descs) {
369 0         0 $root_descs{$desc->internal_id} = $desc;
370             }
371             }
372             # make the ancestor of our descendents our own ancestor, and give us
373             # no ancestor of our own to remove us from the tree
374 49         66 foreach my $desc (@descs) {
375 46         63 $desc->ancestor($ancestor);
376 46 50       71 $desc->branch_length($desc->branch_length + $node->branch_length) if $preserve_lengths;
377             }
378 49         72 $node->ancestor(undef);
379             }
380 12 50       46 if ($reroot) {
381 0         0 my @candidates = values %root_descs;
382 0 0       0 $self->throw("After splicing, there was no tree root!") unless @candidates > 0;
383 0 0       0 $self->throw("After splicing, the original root was removed but there are multiple candidates for the new root!") unless @candidates == 1;
384 0         0 $self->set_root_node($candidates[0]); # not sure its valid to use the reroot() method
385             }
386             }
387              
388              
389             =head2 get_lca
390              
391             Title : get_lca
392             Usage : get_lca(-nodes => \@nodes ); OR
393             get_lca(@nodes);
394             Function: given two or more nodes, returns the lowest common ancestor (aka most
395             recent common ancestor)
396             Returns : node object or undef if there is no common ancestor
397             Args : -nodes => arrayref of nodes to test, OR
398             just a list of nodes
399              
400             =cut
401              
402             sub get_lca {
403 124     124 1 147 my ($self, @args) = @_;
404 124         291 my ($nodes) = $self->_rearrange([qw(NODES)],@args);
405 124         127 my @nodes;
406 124 100       210 if (ref($nodes) eq 'ARRAY') {
407 2         3 @nodes = @{$nodes};
  2         5  
408             }
409             else {
410 122         159 @nodes = @args;
411             }
412 124 50       177 @nodes >= 2 or $self->throw("At least 2 nodes are required");
413             # We must go root->leaf to get the correct answer to lca (in a world where
414             # internal_id might not be uniquely assigned), but leaf->root is more
415             # forgiving (eg. lineages may not all have the same root, or they may have
416             # different numbers of 'minor' taxa inbeteen 'major' ones).
417             #
418             # I use root->leaf so that we can easily do multiple nodes at once - no
419             # matter what taxa are below the lca, the lca and all its ancestors ought to
420             # be identical.
421 124         95 my @paths;
422 124         134 foreach my $node (@nodes) {
423 335 50 33     1152 unless(ref($node) && $node->isa('Bio::Tree::NodeI')) {
424 0         0 $self->throw("Cannot process get_lca() with a non-NodeI object ($node)\n");
425             }
426 335         407 my @path = ($self->get_lineage_nodes($node), $node);
427 335         418 push(@paths, \@path);
428             }
429 124 50       200 return unless @paths >= 2;
430 124         86 my $lca;
431 124         192 LEVEL: while ($paths[0] > 0) {
432 370         259 my %node_ids;
433             my $node;
434 370         311 foreach my $path (@paths) {
435 942   50     551 $node = shift(@{$path}) || last LEVEL;
436 942         1032 my $node_id = $node->internal_id;
437 942 50       1030 unless (defined $node_id) {
438 0         0 $self->warn("One of the lineages had a node with no internal_id, can't calculate the common ancestor");
439 0         0 return;
440             }
441 942         1037 $node_ids{$node_id}++;
442             }
443 370 100       415 if (keys %node_ids == 1) {
444 246         440 $lca = $node;
445             }
446             else {
447             # at this point in the lineage the nodes are different; the previous
448             # loop had the lca
449 124         173 last LEVEL;
450             }
451             }
452             # If the tree that we are contains the lca (get_lca could have been called
453             # on an empty tree, since it works with plain Nodes), prefer to return the
454             # node object that belongs to us
455 124 100 66     356 if ($lca && $self->number_nodes > 0) {
456 122         250 my $own_lca = $self->find_node(-internal_id => $lca->internal_id);
457 122 50       196 $lca = $own_lca if $own_lca;
458             }
459 124         293 return $lca;
460             }
461              
462              
463             =head2 merge_lineage
464              
465             Title : merge_lineage
466             Usage : merge_lineage($node)
467             Function: Merge a lineage of nodes with this tree.
468             Returns : true for success, false (and a warning) otherwise
469             Args : Bio::Tree::TreeI with only one leaf, OR
470             Bio::Tree::NodeI which has an ancestor
471              
472             For example, if we are the tree $tree:
473              
474             +---B
475             |
476             A
477             |
478             +---C
479              
480             and we want to merge the lineage $other_tree:
481              
482             A---C---D
483              
484             After calling $tree->merge_lineage($other_tree), $tree looks like:
485              
486             +---B
487             |
488             A
489             |
490             +---C---D
491              
492             =cut
493              
494             sub merge_lineage {
495 0     0 1 0 my ($self, $thing) = @_;
496 0 0       0 $self->throw("Must supply an object reference") unless ref($thing);
497              
498 0         0 my $lineage_leaf;
499 0 0       0 if ($thing->isa('Bio::Tree::TreeI')) {
    0          
500 0         0 my @leaves = $thing->get_leaf_nodes;
501 0 0       0 $self->throw("The supplied Tree can only have one leaf") unless @leaves == 1;
502 0         0 $lineage_leaf = shift(@leaves);
503             }
504             elsif ($thing->isa('Bio::Tree::NodeI')) {
505 0 0       0 $self->throw("The supplied Node must have an ancestor") unless $thing->ancestor;
506 0         0 $lineage_leaf = $thing;
507             }
508              
509             # Find the lowest node in the supplied lineage that is in the tree
510             # That will be our lca and we can merge at the node below
511 0         0 my @lineage = ($lineage_leaf, reverse($self->get_lineage_nodes($lineage_leaf)));
512 0         0 my $merged = 0;
513 0         0 my $node;
514 0         0 my $i = 0;
515 0         0 while ($i <= $#lineage) {
516 0         0 $node = $self->find_node(-internal_id => $lineage[$i]->internal_id);
517 0 0       0 if (defined $node) {
518 0         0 $merged = 1;
519 0         0 last;
520             }
521 0         0 $i++;
522             }
523 0 0       0 if (not $merged) {
524 0         0 $self->warn("Could not merge the lineage of ".$lineage_leaf->id." with the rest of the tree");
525             }
526              
527             # Merge descendents, recursively
528 0         0 while ($i > 0) {
529 0         0 $node->add_Descendent($lineage[$i-1]);
530 0         0 $node = $self->find_node(-internal_id => $lineage[$i-1]->internal_id);
531 0         0 $i--;
532             }
533              
534 0         0 return $merged;
535             }
536              
537              
538             =head2 contract_linear_paths
539              
540             Title : contract_linear_paths
541             Usage : contract_linear_paths()
542             Function: Splices out all nodes in the tree that have an ancestor and only one
543             descendent.
544             Returns : n/a
545             Args : none for normal behaviour, true to dis-regard the ancestor requirment
546             and re-root the tree as necessary
547              
548             For example, if we are the tree $tree:
549              
550             +---E
551             |
552             A---B---C---D
553             |
554             +---F
555              
556             After calling $tree->contract_linear_paths(), $tree looks like:
557              
558             +---E
559             |
560             A---D
561             |
562             +---F
563              
564             Instead, $tree->contract_linear_paths(1) would have given:
565              
566             +---E
567             |
568             D
569             |
570             +---F
571              
572             =cut
573              
574             sub contract_linear_paths {
575 8     8 1 12 my $self = shift;
576 8         10 my $reroot = shift;
577 8         7 my @remove;
578 8         24 foreach my $node ($self->get_nodes) {
579 104 100 100     122 if ($node->ancestor && $node->each_Descendent == 1) {
580 44         54 push(@remove, $node);
581             }
582             }
583 8 50       41 $self->splice(@remove) if @remove;
584 8 50       53 if ($reroot) {
585 0         0 my $root = $self->get_root_node;
586 0         0 my @descs = $root->each_Descendent;
587 0 0       0 if (@descs == 1) {
588 0         0 my $new_root = shift(@descs);
589 0         0 $self->set_root_node($new_root);
590 0         0 $new_root->ancestor(undef);
591             }
592             }
593             }
594              
595              
596             =head2 is_binary
597              
598             Example : is_binary(); is_binary($node);
599             Description: Finds if the tree or subtree defined by
600             the internal node is a true binary tree
601             without polytomies
602             Returns : boolean
603             Exceptions :
604             Args : Internal node Bio::Tree::NodeI, optional
605              
606              
607             =cut
608              
609             sub is_binary {
610 29     29 1 23 my $self = shift;
611 29   66     53 my $node = shift || $self->get_root_node;
612              
613 29         21 my $binary = 1;
614 29         36 my @descs = $node->each_Descendent;
615 29 100 100     72 $binary = 0 unless @descs == 2 or @descs == 0;
616             #print "$binary, ", scalar @descs, "\n";
617              
618             # recurse
619 29         29 foreach my $desc (@descs) {
620 27         35 $binary += $self->is_binary($desc) -1;
621             }
622 29 100       35 $binary = 0 if $binary < 0;
623 29         38 return $binary;
624             }
625              
626              
627             =head2 force_binary
628              
629             Title : force_binary
630             Usage : force_binary()
631             Function: Forces the tree into a binary tree, splitting branches arbitrarily
632             and creating extra nodes as necessary, such that all nodes have
633             exactly two or zero descendants.
634             Returns : n/a
635             Args : none
636              
637             For example, if we are the tree $tree:
638              
639             +---G
640             |
641             +---F
642             |
643             +---E
644             |
645             A
646             |
647             +---D
648             |
649             +---C
650             |
651             +---B
652              
653             (A has 6 descendants B-G)
654              
655             After calling $tree->force_binary(), $tree looks like:
656              
657             +---X
658             |
659             +---X
660             | |
661             | +---X
662             |
663             +---X
664             | |
665             | | +---G
666             | | |
667             | +---X
668             | |
669             | +---F
670             A
671             | +---E
672             | |
673             | +---X
674             | | |
675             | | +---D
676             | |
677             +---X
678             |
679             | +---C
680             | |
681             +---X
682             |
683             +---B
684              
685             (Where X are artificially created nodes with ids 'artificial_n', where n is
686             an integer making the id unique within the tree)
687              
688             =cut
689              
690             sub force_binary {
691 12     12 1 12 my $self = shift;
692 12   66     16 my $node = shift || $self->get_root_node;
693              
694 12         15 my @descs = $node->each_Descendent;
695 12 100       24 if (@descs > 2) {
    50          
696             # Removed overly verbose warning - cjfields 3-12-11
697            
698             # Many nodes have no identifying names, a simple warning is probably
699             # enough.
700              
701 2         9 $self->warn("Node has more than two descendants\nWill do an arbitrary balanced split");
702 2         2 my @working = @descs;
703             # create an even set of artifical nodes on which to later hang the descs
704 2         5 my $half = @working / 2;
705 2 100       7 $half++ if $half > int($half);
706 2         2 $half = int($half);
707 2         3 my @artificials;
708 2         3 while ($half > 1) {
709 2         2 my @this_level;
710 2   33     8 foreach my $top_node (@artificials || $node) {
711 2         4 for (1..2) {
712 4         15 my $art = $top_node->new(-id => "artificial_".++$self->{_art_num});
713 4         9 $top_node->add_Descendent($art);
714 4         7 push(@this_level, $art);
715             }
716             }
717 2         3 @artificials = @this_level;
718 2         5 $half--;
719             }
720             # attach two descs to each artifical leaf
721 2         3 foreach my $art (@artificials) {
722 4         5 for (1..2) {
723 8   66     17 my $desc = shift(@working) || $node->new(-id => "artificial_".++$self->{_art_num});
724 8         13 $desc->ancestor($art);
725             }
726             }
727             }
728             elsif (@descs == 1) {
729             # ensure that all nodes have 2 descs
730 0         0 $node->add_Descendent($node->new(-id => "artificial_".++$self->{_art_num}));
731             }
732             # recurse
733 12         17 foreach my $desc (@descs) {
734 11         24 $self->force_binary($desc);
735             }
736             }
737              
738              
739             =head2 simplify_to_leaves_string
740              
741             Title : simplify_to_leaves_string
742             Usage : my $leaves_string = $tree->simplify_to_leaves_string()
743             Function: Creates a simple textual representation of the relationship between
744             leaves in self. It forces the tree to be binary, so the result may
745             not strictly correspond to the tree (if the tree wasn't binary), but
746             will be as close as possible. The tree object is not altered. Only
747             leaf node ids are output, in a newick-like format.
748             Returns : string
749             Args : none
750              
751             =cut
752              
753             sub simplify_to_leaves_string {
754 0     0 1 0 my $self = shift;
755              
756             # Before contracting and forcing binary we need to clone self, but Clone.pm
757             # clone() seg faults and fails to make the clone, whilst Storable dclone
758             # needs $self->{_root_cleanup_methods} deleted (code ref) and seg faults at
759             # end of script. Let's make our own clone...
760 0         0 my $tree = $self->_clone;
761              
762 0         0 $tree->contract_linear_paths(1);
763 0         0 $tree->force_binary;
764 0         0 foreach my $node ($tree->get_nodes) {
765 0         0 my $id = $node->id;
766 0 0 0     0 $id = ($node->is_Leaf && $id !~ /^artificial/) ? $id : '';
767 0         0 $node->id($id);
768             }
769              
770 0         0 my %paired;
771 0         0 my @data = $self->_simplify_helper($tree->get_root_node, \%paired);
772              
773 0         0 return join(',', @data);
774             }
775              
776              
777             # alias
778 0     0   0 sub _clone { shift->clone(@_) }
779              
780              
781             # safe node clone that doesn't seg fault, but deliberately loses ancestors and
782             # descendents
783             sub _clone_node {
784 0     0   0 my ($self, $node) = @_;
785 0         0 my $clone = $node->new;
786              
787 0         0 while (my ($key, $val) = each %{$node}) {
  0         0  
788 0 0 0     0 if ($key eq '_desc' || $key eq '_ancestor') {
789 0         0 next;
790             }
791 0         0 ${$clone}{$key} = $val;
  0         0  
792             }
793              
794 0         0 return $clone;
795             }
796              
797              
798             # tree string generator for simplify_to_leaves_string, based on
799             # Bio::TreeIO::newick::_write_tree_Helper
800             sub _simplify_helper {
801 0     0   0 my ($self, $node, $paired) = @_;
802 0 0       0 return () if (!defined $node);
803              
804 0         0 my @data = ();
805 0         0 foreach my $node ($node->each_Descendent()) {
806 0         0 push(@data, $self->_simplify_helper($node, $paired));
807             }
808              
809 0   0     0 my $id = $node->id_output || '';
810 0 0       0 if (@data) {
    0          
811 0 0 0     0 unless (exists ${$paired}{"@data"} || @data == 1) {
  0         0  
812 0         0 $data[0] = "(" . $data[0];
813 0         0 $data[-1] .= ")";
814 0         0 ${$paired}{"@data"} = 1;
  0         0  
815             }
816             }
817             elsif ($id) {
818 0         0 push(@data, $id);
819             }
820              
821 0         0 return @data;
822             }
823              
824              
825             =head2 distance
826              
827             Title : distance
828             Usage : distance(-nodes => \@nodes )
829             Function: returns the distance between two given nodes
830             Returns : numerical distance
831             Args : -nodes => arrayref of nodes to test
832             or ($node1, $node2)
833              
834             =cut
835              
836             sub distance {
837 0     0 1 0 my ($self,@args) = @_;
838 0         0 my ($nodes) = $self->_rearrange([qw(NODES)],@args);
839 0 0       0 if( ! defined $nodes ) {
    0          
    0          
840 0         0 $self->warn("Must supply two nodes or -nodes parameter to distance() method");
841 0         0 return;
842             }
843             elsif (ref($nodes) eq 'ARRAY') {
844 0         0 1;
845             }
846             elsif ( @args == 2) { # assume these are nodes...
847 0         0 $nodes = \@args;
848             }
849             else {
850 0         0 $self->warn("Must supply two nodes or -nodes parameter to distance() method");
851 0         0 return;
852             }
853 0 0       0 $self->throw("Must provide 2 nodes") unless @{$nodes} == 2;
  0         0  
854              
855 0         0 my $lca = $self->get_lca(@{$nodes});
  0         0  
856 0 0       0 unless($lca) {
857 0         0 $self->warn("could not find the lca of supplied nodes; can't find distance either");
858 0         0 return;
859             }
860              
861 0         0 my $cumul_dist = 0;
862 0         0 my $warned = 0;
863 0         0 foreach my $current_node (@{$nodes}) {
  0         0  
864 0         0 while (1) {
865 0 0       0 last if $current_node eq $lca;
866 0 0       0 if ($current_node->branch_length) {
    0          
867 0         0 $cumul_dist += $current_node->branch_length;
868             }
869             elsif (! $warned) {
870 0         0 $self->warn("At least some nodes do not have a branch length, the distance returned could be wrong");
871 0         0 $warned = 1;
872             }
873              
874 0   0     0 $current_node = $current_node->ancestor || last;
875             }
876             }
877              
878 0         0 return $cumul_dist;
879             }
880              
881              
882             =head2 is_monophyletic
883              
884             Title : is_monophyletic
885             Usage : if( $tree->is_monophyletic(-nodes => \@nodes,
886             -outgroup => $outgroup)
887             Function: Will do a test of monophyly for the nodes specified
888             in comparison to a chosen outgroup
889             Returns : boolean
890             Args : -nodes => arrayref of nodes to test
891             -outgroup => outgroup to serve as a reference
892              
893             =cut
894              
895             sub is_monophyletic{
896 6     6 1 32 my ($self,@args) = @_;
897 6         24 my ($nodes,$outgroup) = $self->_rearrange([qw(NODES OUTGROUP)],@args);
898              
899 6 50 33     31 if( ! defined $nodes || ! defined $outgroup ) {
900 0         0 $self->warn("Must supply -nodes and -outgroup parameters to the method
901             is_monophyletic");
902 0         0 return;
903             }
904 6 50       28 if( ref($nodes) !~ /ARRAY/i ) {
905 0         0 $self->warn("Must provide a valid array reference for -nodes");
906             }
907              
908 6         21 my $clade_root = $self->get_lca(@{$nodes});
  6         32  
909 6 50       13 unless( defined $clade_root ) {
910 0         0 $self->warn("could not find clade root via lca");
911 0         0 return;
912             }
913              
914 6         10 my $og_ancestor = $outgroup->ancestor;
915 6         12 while( defined ($og_ancestor ) ) {
916 12 100       13 if( $og_ancestor->internal_id == $clade_root->internal_id ) {
917             # monophyly is violated
918 2         13 return 0;
919             }
920 10         16 $og_ancestor = $og_ancestor->ancestor;
921             }
922 4         24 return 1;
923             }
924              
925              
926             =head2 is_paraphyletic
927              
928             Title : is_paraphyletic
929             Usage : if( $tree->is_paraphyletic(-nodes =>\@nodes,
930             -outgroup => $node) ){ }
931             Function: Tests whether or not a given set of nodes are paraphyletic
932             (representing the full clade) given an outgroup
933             Returns : [-1,0,1] , -1 if the group is not monophyletic
934             0 if the group is not paraphyletic
935             1 if the group is paraphyletic
936             Args : -nodes => Array of Bio::Tree::NodeI objects which are in the tree
937             -outgroup => a Bio::Tree::NodeI to compare the nodes to
938              
939              
940             =cut
941              
942             sub is_paraphyletic{
943 2     2 1 5 my ($self,@args) = @_;
944 2         7 my ($nodes,$outgroup) = $self->_rearrange([qw(NODES OUTGROUP)],@args);
945              
946 2 50 33     10 if( ! defined $nodes || ! defined $outgroup ) {
947 0         0 $self->warn("Must suply -nodes and -outgroup parameters to the method is_paraphyletic");
948 0         0 return;
949             }
950 2 50       8 if( ref($nodes) !~ /ARRAY/i ) {
951 0         0 $self->warn("Must provide a valid array reference for -nodes");
952 0         0 return;
953             }
954              
955             # Algorithm
956             # Find the lca
957             # Find all the nodes beneath the lca
958             # Test to see that none are missing from the nodes list
959 2         2 my %nodehash;
960 2         4 foreach my $n ( @$nodes ) {
961 6         9 $nodehash{$n->internal_id} = $n;
962             }
963              
964 2         7 my $clade_root = $self->get_lca(-nodes => $nodes );
965 2 50       5 unless( defined $clade_root ) {
966 0         0 $self->warn("could not find clade root via lca");
967 0         0 return;
968             }
969              
970 2         5 my $og_ancestor = $outgroup->ancestor;
971              
972             # Is this necessary/correct for paraphyly test?
973 2         5 while( defined ($og_ancestor ) ) {
974 4 50       5 if( $og_ancestor->internal_id == $clade_root->internal_id ) {
975             # monophyly is violated, could be paraphyletic
976 0         0 return -1;
977             }
978 4         6 $og_ancestor = $og_ancestor->ancestor;
979             }
980 2         10 my $tree = Bio::Tree::Tree->new(-root => $clade_root,
981             -nodelete => 1);
982              
983 2         5 foreach my $n ( $tree->get_nodes() ) {
984 8 100       12 next unless $n->is_Leaf();
985             # if any leaf node is not in the list
986             # then it is part of the clade and so the list
987             # must be paraphyletic
988 4 100       6 return 1 unless ( $nodehash{$n->internal_id} );
989             }
990 1         4 return 0;
991             }
992              
993              
994             =head2 reroot
995              
996             Title : reroot
997             Usage : $tree->reroot($node);
998             Function: Reroots a tree making a new node the root
999             Returns : 1 on success, 0 on failure
1000             Args : Bio::Tree::NodeI that is in the tree, but is not the current root
1001              
1002             =cut
1003              
1004             sub reroot {
1005 18     18 1 39 my ($self,$new_root) = @_;
1006 18 50 33     116 unless (defined $new_root && $new_root->isa("Bio::Tree::NodeI")) {
1007 0         0 $self->warn("Must provide a valid Bio::Tree::NodeI when rerooting");
1008 0         0 return 0;
1009             }
1010              
1011 18         39 my $old_root = $self->get_root_node;
1012 18 100       46 if( $new_root == $old_root ) {
1013 1         7 $self->warn("Node requested for reroot is already the root node!");
1014 1         4 return 0;
1015             }
1016 17         35 my $anc = $new_root->ancestor;
1017 17 50       34 unless( $anc ) {
1018             # this is already the root
1019 0         0 $self->warn("Node requested for reroot is already the root node!");
1020 0         0 return 0;
1021             }
1022 17         71 my $tmp_node = $new_root->create_node_on_branch(-position=>0,-force=>1);
1023             # reverse the ancestor & children pointers
1024 17         36 my $former_anc = $tmp_node->ancestor;
1025 17         65 my @path_from_oldroot = ($self->get_lineage_nodes($tmp_node), $tmp_node);
1026 17         54 for (my $i = 0; $i < $#path_from_oldroot; $i++) {
1027 43         39 my $current = $path_from_oldroot[$i];
1028 43         46 my $next = $path_from_oldroot[$i + 1];
1029 43         62 $current->remove_Descendent($next);
1030 43         61 $current->branch_length($next->branch_length);
1031 43 100       64 $current->bootstrap($next->bootstrap) if defined $next->bootstrap;
1032 43         90 $next->remove_tag('B');
1033 43         71 $next->add_Descendent($current);
1034             }
1035              
1036 17         35 $new_root->add_Descendent($former_anc);
1037 17         32 $tmp_node->remove_Descendent($former_anc);
1038            
1039 17         19 $tmp_node = undef;
1040 17         29 $new_root->branch_length(undef);
1041              
1042 17         20 $old_root = undef;
1043 17         37 $self->set_root_node($new_root);
1044              
1045 17         47 return 1;
1046             }
1047              
1048              
1049             =head2 reroot_at_midpoint
1050              
1051             Title : reroot_at_midpoint
1052             Usage : $tree->reroot_at_midpoint($node, $new_root_id);
1053             Function: Reroots a tree on a new node created halfway between the
1054             argument and its ancestor
1055             Returns : the new midpoint Bio::Tree::NodeIon success, 0 on failure
1056             Args : non-root Bio::Tree::NodeI currently in $tree
1057             scalar string, id for new node (optional)
1058              
1059             =cut
1060              
1061             sub reroot_at_midpoint {
1062 0     0 1 0 my $self = shift;
1063 0         0 my $node = shift;
1064 0         0 my $id = shift;
1065              
1066 0 0 0     0 unless (defined $node && $node->isa("Bio::Tree::NodeI")) {
1067 0         0 $self->warn("Must provide a valid Bio::Tree::NodeI when rerooting");
1068 0         0 return 0;
1069             }
1070              
1071 0         0 my $midpt = $node->create_node_on_branch(-FRACTION=>0.5);
1072 0 0       0 if (defined $id) {
1073 0 0       0 $self->warn("ID argument is not a scalar") if (ref $id);
1074 0 0 0     0 $midpt->id($id) if defined($id) && !ref($id);
1075             }
1076 0         0 $self->reroot($midpt);
1077 0         0 return $midpt;
1078             }
1079              
1080              
1081             =head2 findnode_by_id
1082              
1083             Title : findnode_by_id
1084             Usage : my $node = $tree->findnode_by_id($id);
1085             Function: Get a node by its id (which should be
1086             unique for the tree)
1087             Returns : L
1088             Args : node id
1089              
1090              
1091             =cut
1092              
1093             sub findnode_by_id {
1094 0     0 1 0 my $tree = shift;
1095 0         0 $tree->deprecated("use of findnode_by_id() is deprecated; ".
1096             "use find_node() instead");
1097 0         0 my $id = shift;
1098 0         0 my $rootnode = $tree->get_root_node;
1099 0 0 0     0 if ( ($rootnode->id) and ($rootnode->id eq $id) ) {
1100 0         0 return $rootnode;
1101             }
1102             # process all the children
1103 0         0 foreach my $node ( $rootnode->get_Descendents ) {
1104 0 0 0     0 if ( ($node->id) and ($node->id eq $id ) ) {
1105 0         0 return $node;
1106             }
1107             }
1108             }
1109              
1110              
1111             =head2 move_id_to_bootstrap
1112              
1113             Title : move_id_to_bootstrap
1114             Usage : $tree->move_id_to_bootstrap
1115             Function: Move internal IDs to bootstrap slot
1116             Returns : undef
1117             Args : undef
1118              
1119             =cut
1120              
1121             sub move_id_to_bootstrap{
1122 1     1 1 3 my ($tree) = shift;
1123 1         53 for my $node ( grep { ! $_->is_Leaf } $tree->get_nodes ) {
  9         15  
1124 4 100       8 $node->bootstrap(defined $node->id ? $node->id : '');
1125 4         9 $node->id('');
1126             }
1127             }
1128              
1129              
1130             =head2 add_trait
1131              
1132             Title : add_trait
1133             Usage : my $key = $tree->add_trait($trait_file, 3);
1134             Function: Add traits to the leaf nodes of a Bio::Tree:Tree from a file.
1135             The trait file is a tab-delimited text file and needs to have a
1136             header line giving names to traits. The first column contains the
1137             leaf node ids. Subsequent columns contain different trait value sets.
1138             Single or double quotes are removed from the trait values. Traits
1139             are added to leaf nodes as a tag named $key using the add_tag_value()
1140             method. This means that you can retrieve the trait values using the
1141             get_tag_values() method (see the documentation for Bio::Tree::Node).
1142             Returns : Trait name (a scalar) on success, undef on failure (for example, if
1143             the column index requested was too large).
1144             Args : * Name of trait file (scalar string).
1145             * Index of trait file column (scalar int). Note that numbering starts
1146             at 0. Default: 1 (second column).
1147             * Ignore missing values. Typically, if a leaf node has no value in
1148             the trait file, an exception is thrown. If you set this option to
1149             1, then no trait will be given to the node (no exception thrown).
1150              
1151             =cut
1152              
1153             sub _read_trait_file {
1154 3     3   3 my ($self, $file, $column) = @_;
1155 3   50     6 $column ||= 1;
1156              
1157 3         2 my $trait_name;
1158             my $trait_values;
1159 3 50       117 open my $TRAIT, '<', $file or $self->throw("Could not read file '$file': $!");
1160              
1161 3         4 my $first_line = 1;
1162 3         38 while (<$TRAIT>) {
1163 54         37 chomp;
1164 54         150 s/['"]//g;
1165 54         80 my @line = split /\t/;
1166 54 100       61 if ($first_line) {
1167 3         3 $first_line = 0;
1168 3         6 $trait_name = $line[$column];
1169 3         11 next;
1170             }
1171              
1172 51         38 my $id = $line[0];
1173 51 50 33     128 last if (not defined $id) or ($id eq '');
1174              
1175             # Skip empty trait values
1176 51         36 my $value = $line[$column];
1177 51 100 100     135 next if (not defined $value) or ($value eq '');
1178              
1179 33         83 $trait_values->{$id} = $value;
1180             }
1181              
1182 3         52 close $TRAIT;
1183 3         10 return $trait_name, $trait_values;
1184             }
1185              
1186             sub add_trait {
1187 3     3 1 4 my ($self, $file, $column, $ignore) = @_;
1188 3 100       7 $ignore = 0 if not defined $ignore;
1189              
1190 3         8 my ($trait_name, $trait_values) = $self->_read_trait_file($file, $column);
1191              
1192 3 100       8 if (defined $trait_name) {
1193              
1194 2         13 for my $node ($self->get_leaf_nodes) {
1195              
1196             # strip quotes from the node id
1197 32 50       39 $node->id($1) if $node->id =~ /^['"]+(.*)['"]+$/;
1198              
1199 32 100       40 if ( not exists $trait_values->{$node->id} ) {
1200 1 50       3 if ($ignore) {
1201 1         1 next;
1202             } else {
1203 0         0 $self->throw("No trait for node [".$node->id."/".$node->internal_id."]");
1204             }
1205             }
1206              
1207 31         43 $node->add_tag_value($trait_name, $trait_values->{ $node->id } );
1208              
1209             }
1210             }
1211 3         12 return $trait_name;
1212             }
1213              
1214              
1215             1;