line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Bio::LITE::Taxonomy; |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Bio::LITE::Taxonomy - Lightweight and efficient taxonomic tree manager |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 SYNOPSIS |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use Bio::LITE::Taxonomy |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
my $taxNCBI = Bio::LITE::Taxonomy::NCBI->new ( |
14
|
|
|
|
|
|
|
names=> "/path/to/names.dmp", |
15
|
|
|
|
|
|
|
nodes=>"/path/to/nodes.dmp" |
16
|
|
|
|
|
|
|
); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
my @taxNCBI = $taxNCBI->get_taxonomy(1442); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
my $taxRDP = Bio::LITE::Taxonomy::RDP->new ( |
21
|
|
|
|
|
|
|
bergeyXML=>"/media/disk-1/bergeyTrainingTree.xml" |
22
|
|
|
|
|
|
|
) |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
my @taxRDP = $taxRDP->get_taxonomy(22075); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 DESCRIPTION |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
This module provides easy and efficient access to different taxonomies (NCBI and RDP) with minimal dependencies and without intermediate databases. This module should be used through specific taxonomic interfaces (e.g. L or L). |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
This module is not part of the Bioperl bundle. For Bioperl alternatives, see the L"SEE ALSO"> section of this document. If you are dealing with big datasets or you don't need the rest of the Bioperl bundle to process taxonomic queries this module is for you. |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
These modules are designed with performance in mind. The trees are stored in memory (as plain hashes). The GI to Taxid mappings provided by L are very efficient. It also supports both NCBI and RDP taxonomies following the same interface. |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=head1 METHODS |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
The following methods are available: |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=over 4 |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=item get_taxonomy |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
Accepts a taxid as input and returns an array with its ascendants ordered from top to bottom. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
my @tax = $tax->get_taxonomy($taxid); |
46
|
|
|
|
|
|
|
print "$_\n" for (@tax); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
If called in scalar context, returns an array reference. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=item get_taxonomy_with_levels |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
The same as get_taxonomy but instead of getting the ascendants returns an array of array references. Each array reference has the ascendant and its taxonomic level (at positions 0 and 1 respectively). This is simpler than it sounds. Check this: |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
my @taxL = $tax->get_taxonomy_with_levels($taxid); |
55
|
|
|
|
|
|
|
for my $l (@taxL) { |
56
|
|
|
|
|
|
|
print "Taxon $l->[0] has rank $l->[1]\n"; |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
If called in scalar context, returns an array reference. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=item get_taxid_from_name |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
Accepts the scientific name of a taxon and returns its associated taxid. |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=item get_taxonomy_from_name |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
Same as before but returns the full taxonomy of the scientific name. This is the same as: |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
my $taxid = $tax->get_taxid_from_name($name); |
70
|
|
|
|
|
|
|
my @taxonomy = $tax->get_taxonomy($taxid); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
If called in scalar context returns an array reference. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=item get_term_at_level |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
Given a taxid and a taxonomic level as input, returns the taxon. For example, |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
my $taxon = $tax->get_term_at_level(1442,"family"); # $taxon = Bacillaceae |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=item get_level_from_name |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
Given a taxon's scientific name, returns its associated taxonomic level. |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=back |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 SEE ALSO |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
L |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
L |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
L: Module to obtain NCBIs Taxids from GIs. |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
L: Bioperl alternative to handle taxonomies. |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
L: Bioperl module to handle nodes in taxonomies |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=head1 AUTHOR |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
Miguel Pignatelli |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Any comments or suggestions should be addressed to emepyc@gmail.com |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=head1 LICENSE |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Copyright 2009 Miguel Pignatelli, all rights reserved. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
This library is free software; you may redistribute it and/or modify it under the same terms as Perl itself. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=cut |
111
|
|
|
|
|
|
|
|
112
|
3
|
|
|
3
|
|
67811
|
use strict; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
119
|
|
113
|
3
|
|
|
3
|
|
17
|
use warnings; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
97
|
|
114
|
3
|
|
|
3
|
|
17
|
use Carp qw/croak/; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
228
|
|
115
|
|
|
|
|
|
|
|
116
|
3
|
|
|
3
|
|
17
|
use vars qw/$VERSION @ISA/; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
2195
|
|
117
|
|
|
|
|
|
|
$VERSION = '0.07'; |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub _check_level |
120
|
|
|
|
|
|
|
{ |
121
|
0
|
|
|
0
|
|
|
my ($self, $level) = @_; |
122
|
0
|
0
|
|
|
|
|
croak "Level not defined" unless defined $level;; |
123
|
0
|
|
|
|
|
|
return $self->{allowed_levels}{$level}; |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
sub _print_levels |
127
|
|
|
|
|
|
|
{ |
128
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
129
|
0
|
|
|
|
|
|
print STDERR "$_\n" for sort keys %{$self->{allowed_levels}}; |
|
0
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
sub get_term_at_level |
133
|
|
|
|
|
|
|
{ |
134
|
0
|
|
|
0
|
1
|
|
my ($self,$taxid,$level) = @_; |
135
|
0
|
0
|
|
|
|
|
do { |
136
|
0
|
|
|
|
|
|
print STDERR "Level $level not recognized\nAllowed levels:\n"; |
137
|
0
|
|
|
|
|
|
$self->_print_levels; |
138
|
0
|
|
|
|
|
|
croak; |
139
|
|
|
|
|
|
|
} if (! defined $self->_check_level($level)); |
140
|
0
|
0
|
|
|
|
|
return "" unless (defined ${$self->{nodes}->{$taxid}}{name}); |
|
0
|
|
|
|
|
|
|
141
|
0
|
|
|
|
|
|
while (${$self->{nodes}->{$taxid}}{name} ne "root"){ |
|
0
|
|
|
|
|
|
|
142
|
0
|
0
|
|
|
|
|
return ${$self->{nodes}->{$taxid}}{name} if (${$self->{nodes}->{$taxid}}{level} eq $level); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
143
|
0
|
|
|
|
|
|
$taxid = ${$self->{nodes}->{$taxid}}{parent}; |
|
0
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
} |
145
|
0
|
|
|
|
|
|
return "undef"; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub get_taxonomy |
149
|
|
|
|
|
|
|
{ |
150
|
0
|
|
|
0
|
1
|
|
my ($self, $taxid) = @_; |
151
|
0
|
0
|
|
|
|
|
return undef unless defined $taxid; |
152
|
0
|
0
|
|
|
|
|
return "" unless defined ${$self->{nodes}->{$taxid}}{name}; |
|
0
|
|
|
|
|
|
|
153
|
0
|
|
|
|
|
|
my @taxonomy; |
154
|
0
|
|
|
|
|
|
while (${$self->{nodes}->{$taxid}}{name} ne "root"){ |
|
0
|
|
|
|
|
|
|
155
|
0
|
|
|
|
|
|
push @taxonomy, ${$self->{nodes}->{$taxid}}{name}; |
|
0
|
|
|
|
|
|
|
156
|
0
|
|
|
|
|
|
$taxid = ${$self->{nodes}->{$taxid}}{parent}; |
|
0
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
# pop @taxonomy; # Causes lost of first non-root annotation |
159
|
|
|
|
|
|
|
return wantarray |
160
|
0
|
0
|
|
|
|
|
? reverse @taxonomy |
161
|
|
|
|
|
|
|
: [reverse @taxonomy]; |
162
|
|
|
|
|
|
|
# return reverse do{pop @taxonomy;@taxonomy}; |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
# Note: |
166
|
|
|
|
|
|
|
# This may change in the future. |
167
|
|
|
|
|
|
|
# It would be simpler to return a hash reference, but (specially in the NCBI taxonomy) |
168
|
|
|
|
|
|
|
# many taxons has not associated level (i.e. C) which would imply collapsing the keys. |
169
|
|
|
|
|
|
|
# And of course, use an ordered hash or keep an extra array with the ordering. |
170
|
|
|
|
|
|
|
# The code for doing this is commented below. |
171
|
|
|
|
|
|
|
# Look for user feedback about this. |
172
|
|
|
|
|
|
|
sub get_taxonomy_with_levels |
173
|
|
|
|
|
|
|
{ |
174
|
0
|
|
|
0
|
1
|
|
my ($self,$taxid) = @_; |
175
|
0
|
0
|
|
|
|
|
return undef unless defined $taxid; |
176
|
0
|
0
|
|
|
|
|
return "" unless defined ${$self->{nodes}->{$taxid}}{name}; |
|
0
|
|
|
|
|
|
|
177
|
0
|
|
|
|
|
|
my @taxonomy; |
178
|
0
|
|
|
|
|
|
while (${$self->{nodes}->{$taxid}}{name} ne "root"){ |
|
0
|
|
|
|
|
|
|
179
|
0
|
|
|
|
|
|
push @taxonomy, [${$self->{nodes}->{$taxid}}{name},${$self->{nodes}->{$taxid}}{level}]; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
180
|
0
|
|
|
|
|
|
$taxid = ${$self->{nodes}->{$taxid}}{parent}; |
|
0
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
# pop @taxonomy; # Last element is cellular_organism... always? |
183
|
|
|
|
|
|
|
return wantarray |
184
|
0
|
0
|
|
|
|
|
? reverse @taxonomy |
185
|
|
|
|
|
|
|
: [reverse @taxonomy]; |
186
|
|
|
|
|
|
|
# return reverse do{pop @taxonomy;@taxonomy}; |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
sub get_level_from_name |
190
|
|
|
|
|
|
|
{ |
191
|
0
|
|
|
0
|
1
|
|
my ($self,$name) = @_; |
192
|
0
|
0
|
|
|
|
|
return defined $self->{names}{$name} ? $self->{nodes}->{$self->{names}{$name}}->{level} : undef; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
sub get_taxid_from_name |
196
|
|
|
|
|
|
|
{ |
197
|
0
|
|
|
0
|
1
|
|
my ($self,$name) = @_; |
198
|
0
|
0
|
|
|
|
|
return defined $self->{names}{$name} ? $self->{names}{$name} : undef; |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
sub get_taxonomy_from_name |
202
|
|
|
|
|
|
|
{ |
203
|
0
|
|
|
0
|
1
|
|
my ($self,$name) = @_; |
204
|
0
|
|
|
|
|
|
return $self->get_taxonomy($self->{names}{$name}); |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
# Currently not in use. May apply in the future |
208
|
|
|
|
|
|
|
# sub _get_taxonomy |
209
|
|
|
|
|
|
|
# { |
210
|
|
|
|
|
|
|
# my ($self,$taxid) = @_; |
211
|
|
|
|
|
|
|
# return undef unless (defined $taxid); |
212
|
|
|
|
|
|
|
# return "" unless defined ${$self->{nodes}->{$taxid}}{name}; |
213
|
|
|
|
|
|
|
# my %taxonomy; |
214
|
|
|
|
|
|
|
# my @order; |
215
|
|
|
|
|
|
|
# while (${$self->{nodes}->{$taxid}}{name} ne "root"){ |
216
|
|
|
|
|
|
|
# push @{$taxonomy{${$self->{nodes}->{$taxid}}{level}}}, ${$self->{nodes}->{$taxid}}{name}; |
217
|
|
|
|
|
|
|
# push @order, ${$self->{nodes}->{$taxid}}{level}; |
218
|
|
|
|
|
|
|
# $taxid = ${$self->{nodes}->{$taxid}}{parent}; |
219
|
|
|
|
|
|
|
# } |
220
|
|
|
|
|
|
|
# return (\%taxonomy,\@order); |
221
|
|
|
|
|
|
|
# } |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
# Currently not in use. May apply in the future |
224
|
|
|
|
|
|
|
# sub get_taxonomy1 |
225
|
|
|
|
|
|
|
# { |
226
|
|
|
|
|
|
|
# my ($self,$taxid) = @_; |
227
|
|
|
|
|
|
|
# my ($t,$o) = $self->_get_taxonomy($taxid); |
228
|
|
|
|
|
|
|
# my @taxonomy; |
229
|
|
|
|
|
|
|
# for my $l (@$o) { |
230
|
|
|
|
|
|
|
# push @taxonomy,shift @{$t->{$l}}; |
231
|
|
|
|
|
|
|
# } |
232
|
|
|
|
|
|
|
# return reverse do{pop @taxonomy; @taxonomy}; |
233
|
|
|
|
|
|
|
# } |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
# Currently not in use. May apply in the future |
236
|
|
|
|
|
|
|
# sub get_taxonomy_with_levels1 |
237
|
|
|
|
|
|
|
# { |
238
|
|
|
|
|
|
|
# my ($self,$taxid) = @_; |
239
|
|
|
|
|
|
|
# my ($t,$o) = $self->_get_taxonomy($taxid); |
240
|
|
|
|
|
|
|
# my @taxonomy; |
241
|
|
|
|
|
|
|
# for my $l (@$o) { |
242
|
|
|
|
|
|
|
# push @taxonomy, ($l.":".shift @{$t->{$l}}); |
243
|
|
|
|
|
|
|
# } |
244
|
|
|
|
|
|
|
# return join "\t",@taxonomy; |
245
|
|
|
|
|
|
|
# } |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
1; |