File Coverage

blib/lib/Bio/Phylo/Parsers/Adjacency.pm
Criterion Covered Total %
statement 60 61 98.3
branch 13 18 72.2
condition n/a
subroutine 6 6 100.0
pod n/a
total 79 85 92.9


line stmt bran cond sub pod time code
1             package Bio::Phylo::Parsers::Adjacency;
2 1     1   5 use strict;
  1         2  
  1         26  
3 1     1   4 use warnings;
  1         1  
  1         22  
4 1     1   5 use base 'Bio::Phylo::Parsers::Abstract';
  1         1  
  1         286  
5 1     1   6 use Bio::Phylo::Util::Exceptions 'throw';
  1         2  
  1         39  
6 1     1   5 use Bio::Phylo::Util::CONSTANT qw'/looks_like/ :namespaces :objecttypes';
  1         2  
  1         675  
7              
8             =head1 NAME
9              
10             Bio::Phylo::Parsers::Adjacency - Parser used by Bio::Phylo::IO, no serviceable parts inside
11              
12             =head1 DESCRIPTION
13              
14             This module parses a tree structure from tabular data organized as an "adjacency
15             list", i.e. child -> parent relationships. The table should at least have the
16             following columns: 'child' and 'parent'. 'length' is interpreted as branch
17             length. Columns starting with 'node:' are assigned as semantic annotations
18             to the focal node, columns starting with 'branch:' are assigned to the focal
19             branch. Records need to be listed in pre-order, so that references to parent
20             nodes can be resolved immediately. Consequently, the root is the first record,
21             without a parent. Example:
22              
23             ((A:1,B:2)n1:3,C:4)n2:0;
24              
25             Becomes (with an extra example annotation):
26              
27             child parent length node:dcterms:identifier
28             n2 0 35462
29             n1 n2 3 34987
30             A n1 1 73843
31             B n1 2 98743
32             C n2 4 39847
33              
34             =cut
35              
36             sub _parse {
37 1     1   2 my $self = shift;
38 1         4 my $fh = $self->_handle;
39 1         4 my $fac = $self->_factory;
40 1         23 my $log = $self->_logger;
41 1         7 my $tree = $fac->create_tree;
42 1         16 my $ns = $self->_args->{'-namespaces'};
43 1 50       4 if ( $ns ) {
44 1         2 $tree->set_namespaces( %{ $ns } );
  1         11  
45             }
46 1         4 my ( @header, %node_cols );
47 1         0 my %node_for_id;
48 1         5 LINE: while (<$fh>) {
49 6 100       16 unless ( scalar(keys(%node_for_id)) % 1000 ) {
50 2         8 $log->debug("processed node " . scalar(keys(%node_for_id)));
51             }
52 6         14 chomp;
53            
54             # the first line is the header row
55 6 100       14 if ( not @header ) {
56 1         8 @header = split /\t/, $_;
57 1         3 for my $col ( @header ) {
58 4 100       13 if ( $col =~ /^node:(.+)$/ ) {
59 1         3 my $predicate = $1;
60 1         4 $node_cols{$col} = $predicate;
61             }
62             }
63 1         5 next LINE;
64             }
65            
66             # this is a record
67 5         21 my @fields = split /\t/, $_;
68 5         14 my %record = map { $header[$_] => $fields[$_] } 0 .. $#header;
  20         52  
69            
70             # create node
71 5         14 my $name = $record{'child'};
72 5         7 my $pname = $record{'parent'};
73 5         29 my $node = $fac->create_node( '-name' => $name );
74 5         24 $tree->insert($node);
75 5         13 $node_for_id{$name} = $node;
76            
77             # build the tree structure
78 5 100       12 if ( my $parent = $node_for_id{$pname} ) {
79 4         11 $node->set_parent($parent);
80             }
81            
82             # assign branch length, if defined
83 5 50       11 if ( defined $record{'length'} ) {
84 5         16 $node->set_branch_length($record{'length'});
85             }
86            
87             # now see if there are any node columns
88 5         14 for my $col ( keys %node_cols ) {
89 5         9 my $value = $record{$col};
90 5 50       10 if ( $value ) {
91 5         9 my $predicate = $node_cols{$col};
92 5 50       38 if ( $predicate =~ /^(.+)?:.+$/ ) {
93 5         12 my $prefix = $1;
94 5 50       15 if ( my $ns = $Bio::Phylo::Util::CONSTANT::NS->{$prefix} ) {
95 5         39 $node->add_meta(
96             $fac->create_meta(
97             '-namespaces' => { $prefix => $ns },
98             '-triple' => { $predicate => $value }
99             )
100             );
101             }
102             else {
103 0         0 $log->warn("No namespace for prefix $prefix");
104             }
105             }
106             }
107             }
108             }
109 1         13 my $forest = $fac->create_forest;
110 1         5 $forest->insert($tree);
111 1         9 return $forest;
112             }
113              
114             =head1 SEE ALSO
115              
116             There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo>
117             for any user or developer questions and discussions.
118              
119             =over
120              
121             =item L<Bio::Phylo::IO>
122              
123             The adjacency parser is called by the L<Bio::Phylo::IO|Bio::Phylo::IO> object.
124             Look there to learn how to parse trees in general
125              
126             =item L<Bio::Phylo::Manual>
127              
128             Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>
129              
130             =back
131              
132             =head1 CITATION
133              
134             If you use Bio::Phylo in published research, please cite it:
135              
136             B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
137             and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
138             I<BMC Bioinformatics> B<12>:63.
139             L<http://dx.doi.org/10.1186/1471-2105-12-63>
140              
141             =cut
142              
143             1;