line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::DB::Taxonomy::list |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Sendu Bala |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Sendu Bala |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Bio::DB::Taxonomy::list - An implementation of Bio::DB::Taxonomy |
18
|
|
|
|
|
|
|
that accepts lists of words to build a database |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 SYNOPSIS |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
use Bio::DB::Taxonomy; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
my $db = Bio::DB::Taxonomy->new( -source => 'list' ); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
my @ranks = ('superkingdom', 'class', 'genus', 'species'); |
27
|
|
|
|
|
|
|
my @names = ('Eukaryota', 'Mammalia', 'Homo', 'Homo sapiens'); |
28
|
|
|
|
|
|
|
$db->add_lineage(-names => \@names, -ranks => \@ranks); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
@names = ('Eukaryota', 'Mammalia', 'Mus', 'Mus musculus'); |
31
|
|
|
|
|
|
|
$db->add_lineage(-names => \@names, -ranks => \@ranks); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head1 DESCRIPTION |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
This is an implementation which uses supplied lists of words to create a |
36
|
|
|
|
|
|
|
database from which you can extract Bio::Taxon objects. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 TODO |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
It is possible this module could do something like store the data it builds |
41
|
|
|
|
|
|
|
up to disc. Would that be useful? |
42
|
|
|
|
|
|
|
At any rate, this is why the module is called 'list' and not 'in_memory' or |
43
|
|
|
|
|
|
|
similar. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head1 FEEDBACK |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head2 Mailing Lists |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
50
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
51
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
54
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=head2 Support |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
I |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
63
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
64
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
65
|
|
|
|
|
|
|
with code and data examples if at all possible. |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head2 Reporting Bugs |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
70
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via |
71
|
|
|
|
|
|
|
the web: |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head1 AUTHOR - Sendu Bala |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
Email bix@sendu.me.uk |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head1 APPENDIX |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
82
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=cut |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# Let the code begin... |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
package Bio::DB::Taxonomy::list; |
90
|
|
|
|
|
|
|
|
91
|
34
|
|
|
34
|
|
123
|
use strict; |
|
34
|
|
|
|
|
40
|
|
|
34
|
|
|
|
|
836
|
|
92
|
34
|
|
|
34
|
|
709
|
use Bio::Taxon; |
|
34
|
|
|
|
|
35
|
|
|
34
|
|
|
|
|
703
|
|
93
|
|
|
|
|
|
|
|
94
|
34
|
|
|
34
|
|
97
|
use base qw(Bio::DB::Taxonomy); |
|
34
|
|
|
|
|
34
|
|
|
34
|
|
|
|
|
34051
|
|
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
our $prefix = 'list'; |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=head2 new |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Title : new |
102
|
|
|
|
|
|
|
Usage : my $obj = Bio::DB::Taxonomy::list->new(); |
103
|
|
|
|
|
|
|
Function: Builds a new Bio::DB::Taxonomy::list object |
104
|
|
|
|
|
|
|
Returns : an instance of Bio::DB::Taxonomy::list |
105
|
|
|
|
|
|
|
Args : optional, as per the add_lineage() method. |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=cut |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
sub new { |
110
|
252
|
|
|
252
|
1
|
524
|
my ($class, @args) = @_; |
111
|
252
|
|
|
|
|
1197
|
my $self = $class->SUPER::new(@args); |
112
|
252
|
|
|
|
|
587
|
my %args = @args; |
113
|
252
|
|
|
|
|
355
|
delete $args{'-source'}; |
114
|
|
|
|
|
|
|
|
115
|
252
|
100
|
|
|
|
1107
|
$self->add_lineage(%args) if %args; |
116
|
|
|
|
|
|
|
|
117
|
252
|
|
|
|
|
1085
|
return $self; |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=head2 add_lineage |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Title : add_lineage |
124
|
|
|
|
|
|
|
Usage : my @names = ('Eukaryota', 'Mammalia', 'Homo', 'Homo sapiens'); |
125
|
|
|
|
|
|
|
my @ranks = ('superkingdom', 'class', 'genus', 'species'); |
126
|
|
|
|
|
|
|
$db->add_lineage( -names => \@names, -ranks => \@ranks ); |
127
|
|
|
|
|
|
|
Function: Add a lineage to the database, where the lineage is described by |
128
|
|
|
|
|
|
|
a list of scientific names in the order root->leaf. The rank of each |
129
|
|
|
|
|
|
|
name can optionally be described by supplying an additional list |
130
|
|
|
|
|
|
|
of rank names in the same order (eg. superkingdom->species). |
131
|
|
|
|
|
|
|
Returns : 1 for success |
132
|
|
|
|
|
|
|
Args : -names => [] : array ref of scientific names, REQUIRED |
133
|
|
|
|
|
|
|
-ranks => [] : array ref of rank names, same order as above, OPTIONAL |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=cut |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
sub add_lineage { |
138
|
331
|
|
|
331
|
1
|
543
|
my ($self, @args) = @_; |
139
|
331
|
|
|
|
|
1171
|
my ($names, $ranks) = $self->_rearrange([qw (NAMES RANKS)], @args); |
140
|
331
|
50
|
33
|
|
|
1691
|
$self->throw("-names must be supplied and its value must be an array reference") |
141
|
|
|
|
|
|
|
unless $names && ref($names) eq 'ARRAY'; |
142
|
|
|
|
|
|
|
|
143
|
331
|
|
|
|
|
549
|
my $names_idx = scalar @$names - 1; |
144
|
|
|
|
|
|
|
|
145
|
331
|
100
|
|
|
|
610
|
if ($ranks) { |
146
|
39
|
50
|
|
|
|
47
|
$self->throw("-ranks must be an array reference") |
147
|
|
|
|
|
|
|
unless ref($ranks) eq 'ARRAY'; |
148
|
39
|
50
|
|
|
|
47
|
$self->throw("The -names and -ranks lists must be of equal length") |
149
|
|
|
|
|
|
|
unless $names_idx == scalar @$ranks - 1; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# This is non-trivial because names are not guaranteed unique in a taxonomy, |
153
|
|
|
|
|
|
|
# and neither are name&rank combinations. Furthermore, different name&rank |
154
|
|
|
|
|
|
|
# combinations can actually refer to the same taxon, eg. when one time |
155
|
|
|
|
|
|
|
# 'Homo'&'genus' is supplied, while another time 'Homo'&'no rank'. |
156
|
|
|
|
|
|
|
# |
157
|
|
|
|
|
|
|
# name&rank&ancestor could well be unique (or good enough 99.9999% of the |
158
|
|
|
|
|
|
|
# time), but we have the added complication that lineages could sometimes be |
159
|
|
|
|
|
|
|
# supplied with differing numbers of taxa. Ideally we want to realise that |
160
|
|
|
|
|
|
|
# the first of these two lineages shares all its nodes with the second: |
161
|
|
|
|
|
|
|
# ('Mammalia', 'Homo', 'Homo sapiens') |
162
|
|
|
|
|
|
|
# ('Mammalia', 'Hominidae', 'Homo', 'Homo sapiens') |
163
|
|
|
|
|
|
|
# |
164
|
|
|
|
|
|
|
# Clearly with limited information we can't do a perfect job, but we can try |
165
|
|
|
|
|
|
|
# and do a reasonable one. So, let's just do the trivial implementation now |
166
|
|
|
|
|
|
|
# and see how bad it is! (assumes ranks are unique except for 'no rank') |
167
|
|
|
|
|
|
|
|
168
|
331
|
|
|
|
|
459
|
my $ancestors = $self->{ancestors}; |
169
|
331
|
|
|
|
|
629
|
my $node_data = $self->{node_data}; |
170
|
331
|
|
|
|
|
380
|
my $name_to_id = $self->{name_to_id}; |
171
|
331
|
|
|
|
|
366
|
my $children = $self->{children}; |
172
|
|
|
|
|
|
|
|
173
|
331
|
|
|
|
|
418
|
my $my_ancestor_id = ''; |
174
|
331
|
|
|
|
|
292
|
my @node_ids; |
175
|
331
|
|
|
|
|
771
|
for my $i (0 .. $names_idx) { |
176
|
3010
|
|
|
|
|
2545
|
my $name = $names->[$i]; |
177
|
3010
|
|
|
|
|
2122
|
my $rank = $ranks->[$i]; # if undef, this node has 'no rank' |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# This is a new node with a new id if we haven't seen this name before. |
180
|
|
|
|
|
|
|
# It's also always a new node if this is the first lineage going into |
181
|
|
|
|
|
|
|
# the db. |
182
|
|
|
|
|
|
|
# |
183
|
|
|
|
|
|
|
# We need to handle, however, situations in the future where we try to |
184
|
|
|
|
|
|
|
# merge in a new lineage but we have non-unique names in the lineage |
185
|
|
|
|
|
|
|
# and possible missing classes in some lineages, e.g. |
186
|
|
|
|
|
|
|
# '... Anophelinae, Anopheles, Anopheles, Angusticorn, Anopheles...' |
187
|
|
|
|
|
|
|
# merged with |
188
|
|
|
|
|
|
|
# '... Anophelinae, Anopheles, Angusticorn, Anopheles...'), |
189
|
|
|
|
|
|
|
# but still need the 'tree' to be correct |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
# Look for a node that is consistent with this lineage |
192
|
3010
|
|
|
|
|
1849
|
my $node_id; |
193
|
3010
|
|
|
|
|
1955
|
SAME_NAMED: for my $same_id (@{$name_to_id->{$name}}) { |
|
3010
|
|
|
|
|
6367
|
|
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# Taxa are the same if it they have the same ancestor or none |
196
|
557
|
|
100
|
|
|
797
|
my $this_ancestor_id = $ancestors->{$same_id} || ''; |
197
|
557
|
100
|
|
|
|
611
|
if ($my_ancestor_id eq $this_ancestor_id) { |
198
|
282
|
|
|
|
|
162
|
$node_id = $same_id; |
199
|
282
|
|
|
|
|
235
|
last SAME_NAMED; |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# Compare children |
203
|
275
|
100
|
|
|
|
300
|
next if $i >= $names_idx; # this taxon has no child |
204
|
223
|
|
|
|
|
149
|
my $my_child_name = $names->[$i + 1]; |
205
|
|
|
|
|
|
|
#while ( my ($this_child_id, undef) = each %{$children->{$same_id}} ) { |
206
|
223
|
|
|
|
|
113
|
for my $this_child_id (keys %{$children->{$same_id}}) { |
|
223
|
|
|
|
|
347
|
|
207
|
234
|
100
|
|
|
|
363
|
if ($my_child_name eq $node_data->{$this_child_id}->[0]) { # both children have same name |
208
|
5
|
50
|
|
|
|
6
|
if ($my_ancestor_id) { |
209
|
5
|
|
|
|
|
5
|
my @s_ancestors; |
210
|
5
|
|
|
|
|
7
|
while ($this_ancestor_id = $ancestors->{$this_ancestor_id}) { |
211
|
5
|
50
|
|
|
|
7
|
if ($my_ancestor_id eq $this_ancestor_id) { |
212
|
0
|
|
|
|
|
0
|
$my_ancestor_id = $ancestors->{$same_id}; |
213
|
0
|
|
|
|
|
0
|
push @node_ids, @s_ancestors, $my_ancestor_id; |
214
|
0
|
|
|
|
|
0
|
$node_id = $same_id; |
215
|
0
|
|
|
|
|
0
|
last SAME_NAMED; |
216
|
|
|
|
|
|
|
} |
217
|
5
|
|
|
|
|
14
|
unshift @s_ancestors, $this_ancestor_id; |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
} else { |
220
|
|
|
|
|
|
|
# This new lineage (@$names) doesn't start at the |
221
|
|
|
|
|
|
|
# same root as the existing lineages. Assuming |
222
|
|
|
|
|
|
|
# '$name' corresponds to node $same_id"); |
223
|
0
|
|
|
|
|
0
|
$node_id = $same_id; |
224
|
0
|
|
|
|
|
0
|
last SAME_NAMED; |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
3010
|
100
|
|
|
|
4267
|
if (not defined $node_id) { |
231
|
|
|
|
|
|
|
# This is a new node. Add it to the database, using the prefix 'list' |
232
|
|
|
|
|
|
|
# for its ID to distinguish it from the IDs from other taxonomies. |
233
|
2728
|
|
|
|
|
2397
|
my $next_num = ++$self->{node_ids}; |
234
|
2728
|
|
|
|
|
2371
|
$node_id = $prefix.$next_num; |
235
|
2728
|
|
|
|
|
1670
|
push @{$self->{name_to_id}->{$name}}, $node_id; |
|
2728
|
|
|
|
|
4855
|
|
236
|
2728
|
|
|
|
|
4634
|
$self->{node_data}->{$node_id}->[0] = $name; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
3010
|
100
|
100
|
|
|
4782
|
if ( (defined $rank) && (not defined $node_data->{$node_id}->[1]) ) { |
240
|
|
|
|
|
|
|
# Save rank if node in database has no rank but the current node has one |
241
|
114
|
|
|
|
|
99
|
$self->{node_data}->{$node_id}->[1] = $rank; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
3010
|
100
|
|
|
|
3477
|
if ($my_ancestor_id) { |
245
|
2679
|
50
|
66
|
|
|
4510
|
if ($self->{ancestors}->{$node_id} && $self->{ancestors}->{$node_id} ne $my_ancestor_id) { |
246
|
0
|
|
|
|
|
0
|
$self->throw("The lineage '".join(', ', @$names)."' and a ". |
247
|
|
|
|
|
|
|
"previously stored lineage share a node name but have ". |
248
|
|
|
|
|
|
|
"different ancestries for that node. Can't cope!"); |
249
|
|
|
|
|
|
|
} |
250
|
2679
|
|
|
|
|
3126
|
$self->{ancestors}->{$node_id} = $my_ancestor_id; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
3010
|
|
|
|
|
2096
|
$my_ancestor_id = $node_id; |
254
|
3010
|
|
|
|
|
3573
|
push @node_ids, $node_id; |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
# Go through the lineage in reverse so we can remember the children |
258
|
331
|
|
|
|
|
976
|
for (my $i = $names_idx - 1; $i >= 0; $i--) { |
259
|
2679
|
|
|
|
|
5875
|
$self->{children}->{$node_ids[$i]}->{$node_ids[$i+1]} = undef; |
260
|
|
|
|
|
|
|
} |
261
|
331
|
|
|
|
|
1365
|
return 1; |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
=head2 Bio::DB::Taxonomy Interface implementation |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=head2 get_num_taxa |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
Title : get_num_taxa |
270
|
|
|
|
|
|
|
Usage : my $num = $db->get_num_taxa(); |
271
|
|
|
|
|
|
|
Function: Get the number of taxa stored in the database. |
272
|
|
|
|
|
|
|
Returns : A number |
273
|
|
|
|
|
|
|
Args : None |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=cut |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
sub get_num_taxa { |
278
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
279
|
0
|
|
0
|
|
|
0
|
return $self->{node_ids} || 0; |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=head2 get_taxon |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
Title : get_taxon |
286
|
|
|
|
|
|
|
Usage : my $taxon = $db->get_taxon(-taxonid => $taxonid) |
287
|
|
|
|
|
|
|
Function: Get a Bio::Taxon object from the database. |
288
|
|
|
|
|
|
|
Returns : Bio::Taxon object |
289
|
|
|
|
|
|
|
Args : A single value which is the ID of the taxon to retrieve |
290
|
|
|
|
|
|
|
OR named args, as follows: |
291
|
|
|
|
|
|
|
-taxonid => Taxonomy ID (NB: these are not NCBI taxonomy ids but |
292
|
|
|
|
|
|
|
'list' pre-fixed ids unique to the list database). |
293
|
|
|
|
|
|
|
OR |
294
|
|
|
|
|
|
|
-name => String (to query by a taxonomy name). A given taxon name |
295
|
|
|
|
|
|
|
can match different taxonomy objects. When that is the |
296
|
|
|
|
|
|
|
case, a warning is displayed and the first matching taxon |
297
|
|
|
|
|
|
|
is reported. See get_taxonids() to get all matching taxon |
298
|
|
|
|
|
|
|
IDs. |
299
|
|
|
|
|
|
|
OR |
300
|
|
|
|
|
|
|
-names => Array ref of lineage names, like in add_lineage(). To |
301
|
|
|
|
|
|
|
overcome the limitations of -name, you can use -names to |
302
|
|
|
|
|
|
|
provide the full lineage of the taxon you want and get a |
303
|
|
|
|
|
|
|
unique, unambiguous taxon object. |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
=cut |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
sub get_taxon { |
308
|
6877
|
|
|
6877
|
1
|
10434
|
my ($self, @args) = @_; |
309
|
|
|
|
|
|
|
|
310
|
6877
|
|
|
|
|
5107
|
my $taxonid; |
311
|
6877
|
100
|
|
|
|
10567
|
if (scalar @args == 1) { |
312
|
|
|
|
|
|
|
# Argument is a taxon ID |
313
|
2145
|
|
|
|
|
2142
|
$taxonid = $args[0]; |
314
|
|
|
|
|
|
|
} else { |
315
|
|
|
|
|
|
|
# Got named arguments |
316
|
4732
|
|
|
|
|
3570
|
my ($name, $names); |
317
|
4732
|
|
|
|
|
12648
|
($taxonid, $name, $names) = $self->_rearrange([qw(TAXONID NAME NAMES)], @args); |
318
|
4732
|
100
|
|
|
|
9658
|
if ($name) { |
319
|
2
|
|
|
|
|
3
|
$names = [$name]; |
320
|
|
|
|
|
|
|
} |
321
|
4732
|
100
|
|
|
|
7016
|
if ($names) { |
322
|
2
|
|
|
|
|
3
|
$name = $names->[-1]; |
323
|
|
|
|
|
|
|
|
324
|
2
|
|
|
|
|
6
|
my @taxonids = $self->get_taxonids($name); |
325
|
2
|
|
|
|
|
2
|
$taxonid = $taxonids[0]; |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
# Use provided lineage to find correct ID amongst several matching IDs |
328
|
2
|
50
|
33
|
|
|
7
|
if ( (scalar @taxonids > 1) && (scalar @$names > 1) ) { |
329
|
0
|
|
|
|
|
0
|
for my $query_taxonid (@taxonids) { |
330
|
0
|
|
|
|
|
0
|
my $matched = 1; |
331
|
0
|
|
|
|
|
0
|
my $db_ancestor = $self->get_taxon($query_taxonid); |
332
|
0
|
|
|
|
|
0
|
for (my $i = $#$names-1; $i >= 0; $i--) { |
333
|
0
|
|
|
|
|
0
|
my $query_ancestor_name = $names->[$i]; |
334
|
0
|
|
|
|
|
0
|
$db_ancestor = $db_ancestor->ancestor; |
335
|
0
|
|
|
|
|
0
|
my $db_ancestor_name = ''; |
336
|
0
|
0
|
|
|
|
0
|
if ($db_ancestor) { |
337
|
0
|
|
|
|
|
0
|
$db_ancestor_name = $db_ancestor->node_name; |
338
|
|
|
|
|
|
|
} |
339
|
0
|
0
|
|
|
|
0
|
if (not ($query_ancestor_name eq $db_ancestor_name) ) { |
340
|
0
|
|
|
|
|
0
|
$matched = 0; |
341
|
0
|
|
|
|
|
0
|
last; # done testing this taxonid |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
} |
344
|
0
|
0
|
|
|
|
0
|
if ($matched == 1) { |
345
|
0
|
|
|
|
|
0
|
@taxonids = [$query_taxonid]; |
346
|
0
|
|
|
|
|
0
|
$taxonid = $query_taxonid; |
347
|
0
|
|
|
|
|
0
|
last; # done testing all taxonids |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# Warn if several taxon IDs matched |
353
|
2
|
50
|
|
|
|
7
|
if (scalar @taxonids > 1) { |
354
|
0
|
0
|
|
|
|
0
|
$self->warn("There were multiple ids (@taxonids) matching '$name',". |
355
|
|
|
|
|
|
|
" using '$taxonid'") if scalar @taxonids > 1; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
# Now that we have the taxon ID, retrieve the corresponding Taxon object |
362
|
6877
|
|
|
|
|
5522
|
my $taxon; |
363
|
6877
|
|
|
|
|
8662
|
my $node = $self->{node_data}->{$taxonid}; |
364
|
6877
|
100
|
|
|
|
9796
|
if ($node) { |
365
|
6873
|
|
|
|
|
7541
|
my ($sci_name, $rank) = @$node; |
366
|
6873
|
|
|
|
|
18886
|
$taxon = Bio::Taxon->new( |
367
|
|
|
|
|
|
|
-name => $sci_name, |
368
|
|
|
|
|
|
|
-object_id => $taxonid, # not an ncbi taxid, simply an object id |
369
|
|
|
|
|
|
|
); |
370
|
|
|
|
|
|
|
|
371
|
6873
|
100
|
|
|
|
10748
|
if ($rank) { |
372
|
53
|
|
|
|
|
93
|
$taxon->rank($rank); |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
# we can't use -dbh or the db_handle() method ourselves or we'll go |
376
|
|
|
|
|
|
|
# infinite on the merge attempt |
377
|
6873
|
|
|
|
|
7678
|
$taxon->{'db_handle'} = $self; |
378
|
|
|
|
|
|
|
|
379
|
6873
|
|
|
|
|
13874
|
$self->_handle_internal_id($taxon, 1); |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
|
382
|
6877
|
|
|
|
|
14629
|
return $taxon; |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
*get_Taxonomy_Node = \&get_taxon; |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=head2 get_taxonids |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
Title : get_taxonids |
391
|
|
|
|
|
|
|
Usage : my @taxonids = $db->get_taxonids('Homo sapiens'); |
392
|
|
|
|
|
|
|
Function: Searches for a taxonid (generated by the list module) based on a |
393
|
|
|
|
|
|
|
query string. Note that multiple taxonids can match to the same |
394
|
|
|
|
|
|
|
supplied name. |
395
|
|
|
|
|
|
|
Returns : array of integer ids in list context, one of these in scalar context |
396
|
|
|
|
|
|
|
Args : string representing taxon's name |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
=cut |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
sub get_taxonids { |
401
|
257
|
|
|
257
|
1
|
1286
|
my ($self, $name) = @_; |
402
|
257
|
100
|
|
|
|
609
|
return wantarray() ? @{$self->{name_to_id}->{$name} || []} : $self->{name_to_id}->{$name}->[0]; |
|
257
|
50
|
|
|
|
1017
|
|
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
*get_taxonid = \&get_taxonids; |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
=head2 ancestor |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
Title : ancestor |
411
|
|
|
|
|
|
|
Usage : my $ancestor_taxon = $db->ancestor($taxon) |
412
|
|
|
|
|
|
|
Function: Retrieve the full ancestor taxon of a supplied Taxon from the |
413
|
|
|
|
|
|
|
database. |
414
|
|
|
|
|
|
|
Returns : Bio::Taxon |
415
|
|
|
|
|
|
|
Args : Bio::Taxon (that was retrieved from this database) |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=cut |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
sub ancestor { |
420
|
4476
|
|
|
4476
|
1
|
4261
|
my ($self, $taxon) = @_; |
421
|
4476
|
50
|
|
|
|
6311
|
$taxon || return; # for bug 2092, or something similar to it at least: shouldn't need this! |
422
|
4476
|
50
|
33
|
|
|
18255
|
$self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa('Bio::Taxon'); |
423
|
4476
|
50
|
33
|
|
|
6102
|
$self->throw("The supplied Taxon must belong to this database") |
424
|
|
|
|
|
|
|
unless $taxon->db_handle && $taxon->db_handle eq $self; |
425
|
4476
|
|
33
|
|
|
6478
|
my $id = $taxon->id || $self->throw("The supplied Taxon is missing its id!"); |
426
|
|
|
|
|
|
|
|
427
|
4476
|
|
100
|
|
|
13895
|
my $ancestor_id = $self->{ancestors}->{$id} || return; |
428
|
2120
|
|
|
|
|
3110
|
return $self->get_taxon($ancestor_id); |
429
|
|
|
|
|
|
|
} |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
=head2 each_Descendent |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
Title : each_Descendent |
435
|
|
|
|
|
|
|
Usage : my @taxa = $db->each_Descendent($taxon); |
436
|
|
|
|
|
|
|
Function: Get all the descendents of the supplied Taxon (but not their |
437
|
|
|
|
|
|
|
descendents, ie. not a recursive fetchall). |
438
|
|
|
|
|
|
|
Returns : Array of Bio::Taxon objects |
439
|
|
|
|
|
|
|
Args : Bio::Taxon (that was retrieved from this database) |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
=cut |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
sub each_Descendent { |
444
|
18
|
|
|
18
|
1
|
26
|
my ($self, $taxon) = @_; |
445
|
18
|
50
|
33
|
|
|
79
|
$self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa('Bio::Taxon'); |
446
|
18
|
50
|
33
|
|
|
26
|
$self->throw("The supplied Taxon must belong to this database") |
447
|
|
|
|
|
|
|
unless $taxon->db_handle && $taxon->db_handle eq $self; |
448
|
18
|
|
33
|
|
|
25
|
my $id = $taxon->id || $self->throw("The supplied Taxon is missing its id!"); |
449
|
|
|
|
|
|
|
|
450
|
18
|
|
|
|
|
16
|
my @children; |
451
|
18
|
|
|
|
|
16
|
while ( my ($child_id, undef) = each %{$self->{children}->{$id}} ) { |
|
38
|
|
|
|
|
110
|
|
452
|
20
|
|
50
|
|
|
30
|
push @children, ($self->get_taxon($child_id) || next); |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
|
455
|
18
|
|
|
|
|
50
|
return @children; |
456
|
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
1; |