File Coverage

Bio/TreeIO/nexus.pm
Criterion Covered Total %
statement 68 138 49.2
branch 16 76 21.0
condition 2 21 9.5
subroutine 9 12 75.0
pod 4 5 80.0
total 99 252 39.2


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::TreeIO::nexus
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::TreeIO::nexus - A TreeIO driver module for parsing Nexus tree output from PAUP
17              
18             =head1 SYNOPSIS
19              
20             use Bio::TreeIO;
21             my $in = Bio::TreeIO->new(-file => 't/data/cat_tre.tre');
22             while( my $tree = $in->next_tree ) {
23             }
24              
25             =head1 DESCRIPTION
26              
27             This is a driver module for parsing PAUP Nexus tree format which
28             basically is just a remapping of trees.
29              
30             =head2 Comments
31              
32             The nexus format allows node comments that are placed inside square
33             brackets. Usually the comments (implemented as tags for nodes) are
34             used to give a name for an internal node or record the bootstrap value,
35             but other uses are possible.
36              
37             The FigTree program by Andrew Rambaut adds various rendering
38             parameters inside comments and flags these comments by starting them
39             with '&!'. The parameters implemented here are 'label' and 'color'.
40              
41             =head1 FEEDBACK
42              
43             =head2 Mailing Lists
44              
45             User feedback is an integral part of the evolution of this and other
46             Bioperl modules. Send your comments and suggestions preferably to
47             the Bioperl mailing list. Your participation is much appreciated.
48              
49             bioperl-l@bioperl.org - General discussion
50             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
51              
52             =head2 Support
53              
54             Please direct usage questions or support issues to the mailing list:
55              
56             I
57              
58             rather than to the module maintainer directly. Many experienced and
59             reponsive experts will be able look at the problem and quickly
60             address it. Please include a thorough description of the problem
61             with code and data examples if at all possible.
62              
63             =head2 Reporting Bugs
64              
65             Report bugs to the Bioperl bug tracking system to help us keep track
66             of the bugs and their resolution. Bug reports can be submitted via
67             the web:
68              
69             https://github.com/bioperl/bioperl-live/issues
70              
71             =head1 AUTHOR - Jason Stajich
72              
73             Email jason-at-open-bio-dot-org
74              
75             =head1 APPENDIX
76              
77             The rest of the documentation details each of the object methods.
78             Internal methods are usually preceded with a _
79              
80             =cut
81              
82             # Let the code begin...
83              
84             package Bio::TreeIO::nexus;
85 3     3   18 use strict;
  3         6  
  3         87  
86              
87 3     3   12 use Bio::Event::EventGeneratorI;
  3         5  
  3         62  
88 3     3   225 use IO::String;
  3         1671  
  3         77  
89              
90 3     3   15 use base qw(Bio::TreeIO);
  3         5  
  3         4459  
91              
92             =head2 new
93              
94             Title : new
95             Args : -header => boolean default is true
96             print/do not print #NEXUS header
97             -translate => boolean default is true
98             print/do not print Node Id translation to a number
99              
100             =cut
101              
102             sub _initialize {
103 9     9   18 my $self = shift;
104 9         102 $self->SUPER::_initialize(@_);
105 9         57 my ( $hdr, $trans ) = $self->_rearrange(
106             [
107             qw(HEADER
108             TRANSLATE)
109             ],
110             @_
111             );
112 9 50       55 $self->header( defined $hdr ? $hdr : 1 );
113 9 50       44 $self->translate_node( defined $trans ? $trans : 1 );
114             }
115              
116             =head2 next_tree
117              
118             Title : next_tree
119             Usage : my $tree = $treeio->next_tree
120             Function: Gets the next tree in the stream
121             Returns : Bio::Tree::TreeI
122             Args : none
123              
124              
125             =cut
126              
127             sub next_tree {
128 39     39 1 1900 my ($self) = @_;
129 39 100       78 unless ( $self->{'_parsed'} ) {
130 9         26 $self->_parse;
131             }
132 39         121 return $self->{'_trees'}->[ $self->{'_treeiter'}++ ];
133             }
134              
135             sub rewind {
136 0     0 0 0 shift->{'_treeiter'} = 0;
137             }
138              
139             sub _parse {
140 9     9   27 my ($self) = @_;
141              
142 9         22 $self->{'_parsed'} = 1;
143 9         20 $self->{'_treeiter'} = 0;
144              
145 9         53 while ( defined( $_ = $self->_readline ) ) {
146 9 50       60 next if /^\s+$/;
147 9         35 last;
148             }
149 9 50       27 return unless ( defined $_ );
150              
151 9 50       52 unless (/^\#NEXUS/i) {
152 0         0 $self->warn("File does not start with #NEXUS"); #'
153 0         0 return;
154             }
155              
156 9         14 my $line;
157 9         33 while ( defined( $_ = $self->_readline ) ) {
158 283         605 $line .= $_;
159             }
160 9         199 my @sections = split( /#NEXUS/i, $line );
161 9         26 for my $s (@sections) {
162 13         87 my %translate;
163 13 50       47 if ( $self->verbose > 0 ) {
164 0         0 while ( $s =~ s/(\[[^\]]+\])// ) {
165 0         0 $self->debug("removing comment $1\n");
166             }
167             }
168             else {
169 13         144 $s =~ s/(\[[^\]]+\])//g;
170             }
171            
172 13 50       98 if ( $s =~ /begin trees;(.+)(end;)?/si ) {
173 13         103 my $trees = $1;
174 13 100       156 if ( $trees =~ s/\s+translate\s+([^;]+);//i ) {
175 12         25 my @trans;
176 12         34 my $tr = $1;
177              
178 12         92 while ($tr =~ m{\s*([^,\s]+?\s+(?:'[^']+'|[^'\s]+)),?}gc) {
179 158         566 push @trans, $1;
180             }
181 12         28 for my $n ( @trans ) {
182 158 50       400 if ($n =~ /^\s*(\S+)\s+(.+)$/) {
183 158         272 my ($id,$tag) = ($1,$2);
184 158         392 $tag =~ s/[\s,]+$//; # remove the extra spaces of the last taxon
185 158         373 $translate{$id} = $tag;
186             }
187             }
188             }
189             else {
190 1         7 $self->debug("no translate in: $trees\n");
191             }
192 13         107 while ($trees =~ /\s+tree\s+\*?\s*(\S+)\s*\=
193             \s*(?:\[\S+\])?\s*([^\;]+;)/igx)
194             {
195 37         1203 my ( $tree_name, $tree_str ) = ( $1, $2 );
196              
197             # MrBayes does not print colons for node label
198             # $tree_str =~ s/\)(\d*\.\d+)\)/:$1/g;
199 37         375 my $buf = IO::String->new($tree_str);
200 37         2777 my $treeio = Bio::TreeIO->new(
201             -format => 'newick',
202             -fh => $buf
203             );
204 37         127 my $tree = $treeio->next_tree;
205 37         182 foreach my $node ( grep { $_->is_Leaf } $tree->get_nodes ) {
  2875         3843  
206 1470         2055 my $id = $node->id;
207 1470         2246 my $lookup = $translate{$id};
208 1470   66     2471 $node->id( $lookup || $id );
209             }
210 37 50       361 $tree->id($tree_name) if defined $tree_name;
211 37         70 push @{ $self->{'_trees'} }, $tree;
  37         427  
212             }
213             }
214             else {
215 0         0 $self->debug("begin_trees failed: $s\n");
216             }
217             }
218 9 50       218 if ( !@sections ) {
219 0         0 $self->debug("warn no sections: $line\n");
220             }
221             }
222              
223             =head2 write_tree
224              
225             Title : write_tree
226             Usage : $treeio->write_tree($tree);
227             Function: Writes a tree onto the stream
228             Returns : none
229             Args : Bio::Tree::TreeI
230              
231              
232             =cut
233              
234             sub write_tree {
235 0     0 1 0 my ( $self, @trees ) = @_;
236 0 0       0 if ( $self->header ) {
237 0         0 $self->_print("#NEXUS\n\n");
238             }
239 0         0 my $translate = $self->translate_node;
240 0         0 my $time = localtime();
241 0         0 $self->_print( sprintf( "Begin trees; [Treefile created %s]\n", $time ) );
242              
243 0         0 my ( $first, $nodecter, %node2num ) = ( 0, 1 );
244 0         0 foreach my $tree (@trees) {
245              
246 0 0 0     0 if ( $first == 0
247             && $translate )
248             {
249 0         0 $self->_print("\tTranslate\n");
250             $self->_print(
251             join(
252             ",\n",
253             map {
254 0         0 $node2num{ $_->id } = $nodecter;
255 0         0 sprintf( "\t\t%d %s", $nodecter++, $_->id )
256             }
257 0         0 grep { $_->is_Leaf } $tree->get_nodes
  0         0  
258             ),
259             "\n;\n"
260             );
261             }
262 0         0 my @data = _write_tree_Helper( $tree->get_root_node, \%node2num );
263 0 0       0 if ( $data[-1] !~ /\)$/ ) {
264 0         0 $data[0] = "(" . $data[0];
265 0         0 $data[-1] .= ")";
266             }
267              
268             # by default all trees in bioperl are currently rooted
269             # something we'll try and fix one day....
270             $self->_print(
271 0 0 0     0 sprintf(
272             "\t tree %s = [&%s] %s;\n",
273             ( $tree->id || sprintf( "Bioperl_%d", $first + 1 ) ),
274             ( $tree->get_root_node ) ? 'R' : 'U',
275             join( ',', @data )
276             )
277             );
278 0         0 $first++;
279             }
280 0         0 $self->_print("End;\n");
281 0 0 0     0 $self->flush if $self->_flush_on_write && defined $self->_fh;
282 0         0 return;
283             }
284              
285             sub _write_tree_Helper {
286 0     0   0 my ( $node, $node2num ) = @_;
287 0 0       0 return () if ( !defined $node );
288 0         0 my @data;
289              
290 0         0 foreach my $n ( $node->each_Descendent() ) {
291 0         0 push @data, _write_tree_Helper( $n, $node2num );
292             }
293 0 0       0 if ( @data > 1 ) { # internal node
294 0         0 $data[0] = "(" . $data[0];
295 0         0 $data[-1] .= ")";
296              
297             # FigTree comments start
298 0         0 my $comment_flag;
299 0 0 0     0 $comment_flag = 0
300             if ( $node->has_tag('color') or $node->has_tag('label') );
301            
302 0 0       0 $data[-1] .= '[&!' if defined $comment_flag;
303            
304 0 0       0 if ( $node->has_tag('color')) {
305 0         0 my $color = $node->get_tag_values('color');
306 0         0 $data[-1] .= "color=$color";
307 0         0 $comment_flag++;
308             }
309 0 0       0 if ( $node->has_tag('label')) {
310 0         0 my $label = $node->get_tag_values('label');
311 0 0       0 $data[-1] .= ',' if $comment_flag;
312 0         0 $data[-1] .= 'label="'. $label. '"';
313             }
314 0 0       0 $data[-1] .= ']' if defined $comment_flag;
315             # FigTree comments end
316            
317             # let's explicitly write out the bootstrap if we've got it
318 0         0 my $b;
319              
320 0         0 my $bl = $node->branch_length;
321 0 0       0 if ( !defined $bl ) {
    0          
322             }
323             elsif ( $bl =~ /\#/ ) {
324 0         0 $data[-1] .= $bl;
325             }
326             else {
327 0         0 $data[-1] .= ":$bl";
328             }
329 0 0       0 if ( defined( $b = $node->bootstrap ) ) {
    0          
330 0         0 $data[-1] .= sprintf( "[%s]", $b );
331             }
332             elsif ( defined( $b = $node->id ) ) {
333 0 0       0 $b = $node2num->{$b} if ( $node2num->{$b} ); # translate node2num
334 0 0       0 $data[-1] .= sprintf( "[%s]", $b ) if defined $b;
335             }
336              
337             }
338             else { # leaf node
339 0 0 0     0 if ( defined $node->id || defined $node->branch_length ) {
340 0 0       0 my $id = defined $node->id ? $node->id : '';
341 0 0 0     0 if ( length($id) && $node2num->{$id} ) {
342 0         0 $id = $node2num->{$id};
343             }
344 0 0       0 if ( $node->has_tag('color')) {
345 0         0 my ($color) = $node->get_tag_values('color');
346 0         0 $id .= "[&!color=$color\]";
347             }
348 0 0       0 push @data,
349             sprintf( "%s%s",
350             $id,
351             defined $node->branch_length
352             ? ":" . $node->branch_length
353             : '' );
354             }
355             }
356 0         0 return @data;
357             }
358              
359             =head2 header
360              
361             Title : header
362             Usage : $obj->header($newval)
363             Function:
364             Example :
365             Returns : value of header (a scalar)
366             Args : on set, new value (a scalar or undef, optional)
367              
368              
369             =cut
370              
371             sub header {
372 9     9 1 18 my $self = shift;
373              
374 9 50       53 return $self->{'header'} = shift if @_;
375 0         0 return $self->{'header'};
376             }
377              
378             =head2 translate_node
379              
380             Title : translate_node
381             Usage : $obj->translate_node($newval)
382             Function:
383             Example :
384             Returns : value of translate_node (a scalar)
385             Args : on set, new value (a scalar or undef, optional)
386              
387              
388             =cut
389              
390             sub translate_node {
391 9     9 1 16 my $self = shift;
392              
393 9 50       42 return $self->{'translate_node'} = shift if @_;
394 0           return $self->{'translate_node'};
395             }
396              
397             1;