line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Species |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by James Gilbert |
7
|
|
|
|
|
|
|
# Reimplemented by Sendu Bala |
8
|
|
|
|
|
|
|
# Re-reimplemented by Chris Fields |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Species - Generic species object. |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
$species = Bio::Species->new(-classification => [@classification]); |
21
|
|
|
|
|
|
|
# Can also pass classification |
22
|
|
|
|
|
|
|
# array to new as below |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
$species->classification(qw( sapiens Homo Hominidae |
25
|
|
|
|
|
|
|
Catarrhini Primates Eutheria |
26
|
|
|
|
|
|
|
Mammalia Vertebrata Chordata |
27
|
|
|
|
|
|
|
Metazoa Eukaryota )); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
$genus = $species->genus(); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
$bi = $species->binomial(); # $bi is now "Homo sapiens" |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# For storing common name |
34
|
|
|
|
|
|
|
$species->common_name("human"); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# For storing subspecies |
37
|
|
|
|
|
|
|
$species->sub_species("accountant"); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head1 DESCRIPTION |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
B
|
42
|
|
|
|
|
|
|
Please use that class instead.> |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
Provides a very simple object for storing phylogenetic information. The |
45
|
|
|
|
|
|
|
classification is stored in an array, which is a list of nodes in a phylogenetic |
46
|
|
|
|
|
|
|
tree. Access to getting and setting species and genus is provided, but not to |
47
|
|
|
|
|
|
|
any of the other node types (eg: "phylum", "class", "order", "family"). There's |
48
|
|
|
|
|
|
|
plenty of scope for making the model more sophisticated, if this is ever needed. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
A methods are also provided for storing common names, and subspecies. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head1 FEEDBACK |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head2 Mailing Lists |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
57
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
58
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
61
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head2 Support |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
I |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
70
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
71
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
72
|
|
|
|
|
|
|
with code and data examples if at all possible. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head2 Reporting Bugs |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
77
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via the |
78
|
|
|
|
|
|
|
web: |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 AUTHOR |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
James Gilbert email B |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Sendu Bala, bix@sendu.me.uk |
89
|
|
|
|
|
|
|
Chris Fields, cjfields at bioperl dot org |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=head1 APPENDIX |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
94
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=cut |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
#' Let the code begin... |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
package Bio::Species; |
101
|
42
|
|
|
42
|
|
1796
|
use strict; |
|
42
|
|
|
|
|
77
|
|
|
42
|
|
|
|
|
1087
|
|
102
|
42
|
|
|
42
|
|
188
|
use warnings; |
|
42
|
|
|
|
|
60
|
|
|
42
|
|
|
|
|
1188
|
|
103
|
|
|
|
|
|
|
|
104
|
42
|
|
|
42
|
|
11473
|
use Bio::DB::Taxonomy; |
|
42
|
|
|
|
|
95
|
|
|
42
|
|
|
|
|
1172
|
|
105
|
42
|
|
|
42
|
|
253
|
use Bio::Tree::Tree; |
|
42
|
|
|
|
|
71
|
|
|
42
|
|
|
|
|
688
|
|
106
|
42
|
|
|
42
|
|
15228
|
use Bio::Taxon; |
|
42
|
|
|
|
|
88
|
|
|
42
|
|
|
|
|
1291
|
|
107
|
42
|
|
|
42
|
|
284
|
use base qw(Bio::Root::Root Bio::Tree::NodeI); |
|
42
|
|
|
|
|
72
|
|
|
42
|
|
|
|
|
75994
|
|
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=head2 new |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
Title : new |
112
|
|
|
|
|
|
|
Usage : my $obj = Bio::Species->new(-classification => \@class) |
113
|
|
|
|
|
|
|
Function: Build a new Species object |
114
|
|
|
|
|
|
|
Returns : Bio::Species object |
115
|
|
|
|
|
|
|
Args : -ncbi_taxid => NCBI taxonomic ID (optional) |
116
|
|
|
|
|
|
|
-classification => arrayref of classification |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=cut |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub new { |
121
|
251
|
|
|
251
|
1
|
61315
|
my($class, @args) = @_; |
122
|
|
|
|
|
|
|
|
123
|
251
|
|
|
|
|
1100
|
my $self = $class->SUPER::new(@args); |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# Bio::Species is now just a proxy object that just observes the NodeI |
126
|
|
|
|
|
|
|
# interface methods but delegates them to the proper classes (Bio::Taxon and |
127
|
|
|
|
|
|
|
# Bio::Tree::Tree). This will be surplanted by the much simpler |
128
|
|
|
|
|
|
|
# Bio::Taxon/Bio::DB::Taxonomy modules in the future. |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# Using a proxy allows proper GC w/o using weaken(). This just wraps the |
131
|
|
|
|
|
|
|
# older instances, which have no reciprocal refs (thus no circular refs). |
132
|
|
|
|
|
|
|
# This can then run proper cleanup |
133
|
|
|
|
|
|
|
|
134
|
251
|
|
|
|
|
1847
|
$self->taxon(Bio::Taxon->new(@args)); |
135
|
|
|
|
|
|
|
|
136
|
251
|
|
|
|
|
1291
|
my ($org, $sp, $var, $classification) = |
137
|
|
|
|
|
|
|
$self->_rearrange([qw(ORGANELLE |
138
|
|
|
|
|
|
|
SUB_SPECIES |
139
|
|
|
|
|
|
|
VARIANT |
140
|
|
|
|
|
|
|
CLASSIFICATION)], @args); |
141
|
|
|
|
|
|
|
|
142
|
251
|
100
|
66
|
|
|
1283
|
if (defined $classification && ref($classification) eq "ARRAY" && @{$classification}) { |
|
16
|
|
66
|
|
|
58
|
|
143
|
16
|
|
|
|
|
56
|
$self->classification(@$classification); |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
else { |
146
|
235
|
|
|
|
|
1836
|
$self->tree(Bio::Tree::Tree->new()); |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
251
|
50
|
|
|
|
673
|
defined $org && $self->organelle($org); |
150
|
251
|
100
|
|
|
|
680
|
defined $sp && $self->sub_species($sp); |
151
|
251
|
50
|
|
|
|
675
|
defined $var && $self->variant($var); |
152
|
|
|
|
|
|
|
|
153
|
251
|
|
|
|
|
686
|
return $self; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head2 classification |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Title : classification |
159
|
|
|
|
|
|
|
Usage : $self->classification(@class_array); |
160
|
|
|
|
|
|
|
@classification = $self->classification(); |
161
|
|
|
|
|
|
|
Function: Get/set the lineage of this species. The array provided must be in |
162
|
|
|
|
|
|
|
the order ... ---> SPECIES, GENUS ---> KINGDOM ---> etc. |
163
|
|
|
|
|
|
|
Example : $obj->classification(qw( 'Homo sapiens' Homo Hominidae |
164
|
|
|
|
|
|
|
Catarrhini Primates Eutheria Mammalia Vertebrata |
165
|
|
|
|
|
|
|
Chordata Metazoa Eukaryota)); |
166
|
|
|
|
|
|
|
Returns : Classification array |
167
|
|
|
|
|
|
|
Args : Classification array |
168
|
|
|
|
|
|
|
OR |
169
|
|
|
|
|
|
|
A reference to the classification array. In the latter case |
170
|
|
|
|
|
|
|
if there is a second argument and it evaluates to true, |
171
|
|
|
|
|
|
|
names will not be validated. NB: in any case, names are never |
172
|
|
|
|
|
|
|
validated anyway. |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=cut |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub classification { |
177
|
305
|
|
|
305
|
1
|
1206
|
my ($self, @vals) = @_; |
178
|
|
|
|
|
|
|
|
179
|
305
|
|
|
|
|
818
|
my $taxon = $self->taxon; |
180
|
|
|
|
|
|
|
|
181
|
305
|
100
|
|
|
|
865
|
if (@vals) { |
182
|
252
|
100
|
|
|
|
902
|
if (ref($vals[0]) eq 'ARRAY') { |
183
|
2
|
|
|
|
|
4
|
@vals = @{$vals[0]}; |
|
2
|
|
|
|
|
8
|
|
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
252
|
|
100
|
|
|
672
|
$vals[1] ||= ''; |
187
|
|
|
|
|
|
|
# make sure the lineage contains us as first or second element |
188
|
|
|
|
|
|
|
# (lineage may have subspecies, species, genus ...) |
189
|
252
|
|
|
|
|
770
|
my $name = $taxon->node_name; |
190
|
252
|
|
|
|
|
1072
|
my ($genus, $species) = (quotemeta($vals[1]), quotemeta($vals[0])); |
191
|
252
|
50
|
33
|
|
|
3894
|
if ($name && |
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
192
|
|
|
|
|
|
|
($name !~ m{$species}i && $name !~ m{$genus}i) && |
193
|
|
|
|
|
|
|
$name !~ m{$genus $species}i) { |
194
|
0
|
0
|
|
|
|
0
|
if ($name =~ /^$genus $species\s*(.+)/) { |
195
|
|
|
|
|
|
|
# just assume the problem is someone tried to make a Bio::Species starting at subspecies |
196
|
|
|
|
|
|
|
#*** no idea if this is appropriate! just a possible fix related to bug 2092 |
197
|
0
|
|
|
|
|
0
|
$self->sub_species($1); |
198
|
0
|
|
|
|
|
0
|
$name = $taxon->node_name("$vals[1] $vals[0]"); |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
else { |
201
|
0
|
|
|
|
|
0
|
$self->warn("The supplied lineage does not start near '$name' (I was supplied '".join(" | ", @vals)."')"); |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
# create a lineage for ourselves |
206
|
252
|
|
|
|
|
2910
|
my $db = Bio::DB::Taxonomy->new(-source => 'list', -names => [reverse @vals]); |
207
|
252
|
100
|
|
|
|
1173
|
unless ($taxon->scientific_name) { |
208
|
|
|
|
|
|
|
# assume we're supposed to be the leaf of the supplied lineage |
209
|
42
|
|
|
|
|
104
|
$self->taxon->scientific_name($vals[0]); |
210
|
|
|
|
|
|
|
} |
211
|
252
|
100
|
|
|
|
1023
|
unless ($taxon->rank) { |
212
|
|
|
|
|
|
|
# and that we are rank species |
213
|
250
|
|
|
|
|
613
|
$taxon->rank('species'); |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
252
|
|
|
|
|
1077
|
$taxon->db_handle($db); |
217
|
|
|
|
|
|
|
|
218
|
252
|
|
|
|
|
1309
|
$self->tree(Bio::Tree::Tree->new(-node => $taxon)); |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
305
|
|
|
|
|
1377
|
@vals = (); |
222
|
305
|
|
|
|
|
851
|
foreach my $node ($self->tree->get_lineage_nodes($taxon), $taxon) { |
223
|
2864
|
|
100
|
|
|
4575
|
unshift(@vals, $node->scientific_name || next); |
224
|
|
|
|
|
|
|
} |
225
|
305
|
|
|
|
|
1479
|
return @vals; |
226
|
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
=head2 ncbi_taxid |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
Title : ncbi_taxid |
231
|
|
|
|
|
|
|
Usage : $obj->ncbi_taxid($newval) |
232
|
|
|
|
|
|
|
Function: Get/set the NCBI Taxon ID |
233
|
|
|
|
|
|
|
Returns : the NCBI Taxon ID as a string |
234
|
|
|
|
|
|
|
Args : newvalue to set or undef to unset (optional) |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=cut |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
=head2 common_name |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
Title : common_name |
241
|
|
|
|
|
|
|
Usage : $self->common_name( $common_name ); |
242
|
|
|
|
|
|
|
$common_name = $self->common_name(); |
243
|
|
|
|
|
|
|
Function: Get or set the common name of the species |
244
|
|
|
|
|
|
|
Example : $self->common_name('human') |
245
|
|
|
|
|
|
|
Returns : The common name in a string |
246
|
|
|
|
|
|
|
Args : String, which is the common name (optional) |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=cut |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=head2 division |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
Title : division |
253
|
|
|
|
|
|
|
Usage : $obj->division($newval) |
254
|
|
|
|
|
|
|
Function: Genbank Division for a species |
255
|
|
|
|
|
|
|
Returns : value of division (a scalar) |
256
|
|
|
|
|
|
|
Args : value of division (a scalar) |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=cut |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=head2 species |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
Title : species |
263
|
|
|
|
|
|
|
Usage : $self->species( $species ); |
264
|
|
|
|
|
|
|
$species = $self->species(); |
265
|
|
|
|
|
|
|
Function: Get or set the species name. |
266
|
|
|
|
|
|
|
Note that this is NOT genus and species |
267
|
|
|
|
|
|
|
-- use $self->binomial() for that. |
268
|
|
|
|
|
|
|
Example : $self->species('sapiens'); |
269
|
|
|
|
|
|
|
Returns : species name as string (NOT genus and species) |
270
|
|
|
|
|
|
|
Args : species name as string (NOT genus and species) |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=cut |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
sub species { |
275
|
130
|
|
|
130
|
1
|
309
|
my ($self, $species) = @_; |
276
|
|
|
|
|
|
|
|
277
|
130
|
100
|
|
|
|
310
|
if ($species) { |
278
|
11
|
|
|
|
|
47
|
$self->{_species} = $species; |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
|
281
|
130
|
100
|
|
|
|
351
|
unless (defined $self->{_species}) { |
282
|
|
|
|
|
|
|
# work it out from our nodes |
283
|
61
|
|
|
|
|
194
|
my $species_taxon = $self->tree->find_node(-rank => 'species'); |
284
|
61
|
50
|
|
|
|
181
|
unless ($species_taxon) { |
285
|
|
|
|
|
|
|
# just assume we are rank species |
286
|
0
|
|
|
|
|
0
|
$species_taxon = $self->taxon; |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
|
289
|
61
|
|
|
|
|
368
|
$species = $species_taxon->scientific_name; |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
# |
292
|
|
|
|
|
|
|
# munge it like the Bio::SeqIO modules used to do |
293
|
|
|
|
|
|
|
# (more or less copy/pasted from old Bio::SeqIO::genbank, hence comments |
294
|
|
|
|
|
|
|
# referring to 'ORGANISM' etc.) |
295
|
|
|
|
|
|
|
# |
296
|
|
|
|
|
|
|
|
297
|
61
|
|
|
|
|
182
|
my $root = $self->tree->get_root_node; |
298
|
61
|
50
|
|
|
|
202
|
unless ($root) { |
299
|
0
|
|
|
|
|
0
|
$self->tree(Bio::Tree::Tree->new(-node => $species_taxon)); |
300
|
0
|
|
|
|
|
0
|
$root = $self->tree->get_root_node; |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
|
303
|
61
|
|
|
|
|
241
|
my @spflds = split(' ', $species); |
304
|
61
|
100
|
66
|
|
|
276
|
if (@spflds > 1 && $root->node_name ne 'Viruses') { |
305
|
43
|
|
|
|
|
83
|
$species = undef; |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
# does the next term start with uppercase? |
308
|
|
|
|
|
|
|
# yes: valid genus; no then unconventional |
309
|
|
|
|
|
|
|
# e.g. leaf litter basidiomycete sp. Collb2-39 |
310
|
43
|
|
|
|
|
77
|
my $genus; |
311
|
43
|
50
|
|
|
|
237
|
if ($spflds[0] =~ m/^[A-Z]/) { |
312
|
43
|
|
|
|
|
97
|
$genus = shift(@spflds); |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
else { |
315
|
0
|
|
|
|
|
0
|
undef $genus; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
|
318
|
43
|
|
|
|
|
67
|
my $sub_species; |
319
|
43
|
50
|
|
|
|
150
|
if (@spflds) { |
320
|
43
|
|
|
|
|
170
|
while (my $fld = shift @spflds) { |
321
|
45
|
|
|
|
|
144
|
$species .= "$fld "; |
322
|
|
|
|
|
|
|
# does it have subspecies or varieties? |
323
|
45
|
100
|
|
|
|
314
|
last if ($fld =~ m/(sp\.|var\.)/); |
324
|
|
|
|
|
|
|
} |
325
|
43
|
|
|
|
|
119
|
chop $species; # last space |
326
|
43
|
100
|
|
|
|
123
|
$sub_species = join ' ',@spflds if(@spflds); |
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
else { |
329
|
0
|
|
|
|
|
0
|
$species = 'sp.'; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
# does ORGANISM start with any words which make its genus undefined? |
333
|
|
|
|
|
|
|
# these are in @unkn_genus |
334
|
|
|
|
|
|
|
# this in case species starts with uppercase so isn't caught above. |
335
|
|
|
|
|
|
|
# alter common name if required |
336
|
43
|
|
|
|
|
88
|
my $unconv = 0; # is it unconventional species name? |
337
|
43
|
|
|
|
|
147
|
my @unkn_genus = ('unknown','unclassified','uncultured','unidentified'); |
338
|
43
|
|
|
|
|
115
|
foreach (@unkn_genus) { |
339
|
172
|
50
|
33
|
|
|
2204
|
if ($genus && $genus =~ m/$_/i) { |
|
|
50
|
|
|
|
|
|
340
|
0
|
|
|
|
|
0
|
$species = $genus . " " . $species; |
341
|
0
|
|
|
|
|
0
|
undef $genus; |
342
|
0
|
|
|
|
|
0
|
$unconv = 1; |
343
|
0
|
|
|
|
|
0
|
last; |
344
|
|
|
|
|
|
|
} |
345
|
|
|
|
|
|
|
elsif ($species =~ m/$_/i) { |
346
|
0
|
|
|
|
|
0
|
$unconv = 1; |
347
|
0
|
|
|
|
|
0
|
last; |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
} |
350
|
43
|
50
|
66
|
|
|
451
|
if (!$unconv && !$sub_species && $species =~ s/^(\w+)\s(\w+)$/$1/) { |
|
|
|
66
|
|
|
|
|
351
|
|
|
|
|
|
|
# need to extract subspecies from conventional ORGANISM format. |
352
|
|
|
|
|
|
|
# Will the 'word' in a two element species name |
353
|
|
|
|
|
|
|
# e.g. $species = 'thummi thummi' => $species='thummi' & |
354
|
|
|
|
|
|
|
# $sub_species='thummi' |
355
|
0
|
|
|
|
|
0
|
$sub_species = $2; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
43
|
50
|
|
|
|
242
|
$self->genus($genus) if $genus; |
359
|
43
|
100
|
|
|
|
132
|
$self->sub_species($sub_species) if $sub_species; |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
|
362
|
61
|
|
|
|
|
191
|
$self->{_species} = $species; |
363
|
|
|
|
|
|
|
} |
364
|
130
|
|
|
|
|
387
|
return $self->{_species}; |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
=head2 genus |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
Title : genus |
370
|
|
|
|
|
|
|
Usage : $self->genus( $genus ); |
371
|
|
|
|
|
|
|
$genus = $self->genus(); |
372
|
|
|
|
|
|
|
Function: Get or set the scientific genus name. |
373
|
|
|
|
|
|
|
Example : $self->genus('Homo'); |
374
|
|
|
|
|
|
|
Returns : Scientific genus name as string |
375
|
|
|
|
|
|
|
Args : Scientific genus name as string |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=cut |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
sub genus { |
380
|
190
|
|
|
190
|
1
|
393
|
my ($self, $genus) = @_; |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
# TODO: instead of caching the raw name, cache the actual node instance. |
383
|
190
|
100
|
|
|
|
428
|
if ($genus) { |
384
|
64
|
|
|
|
|
175
|
$self->{_genus} = $genus; |
385
|
|
|
|
|
|
|
} |
386
|
190
|
100
|
|
|
|
459
|
unless (defined $self->{_genus}) { |
387
|
18
|
|
|
|
|
55
|
my $genus_taxon = $self->tree->find_node(-rank => 'genus'); |
388
|
18
|
50
|
|
|
|
61
|
unless ($genus_taxon) { |
389
|
|
|
|
|
|
|
# just assume our ancestor is rank genus |
390
|
18
|
|
|
|
|
60
|
$genus_taxon = $self->taxon->ancestor; |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
|
393
|
18
|
50
|
|
|
|
136
|
$self->{_genus} = $genus_taxon->scientific_name if $genus_taxon; |
394
|
|
|
|
|
|
|
} |
395
|
|
|
|
|
|
|
|
396
|
190
|
|
|
|
|
435
|
return $self->{_genus}; |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
=head2 sub_species |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
Title : sub_species |
402
|
|
|
|
|
|
|
Usage : $obj->sub_species($newval) |
403
|
|
|
|
|
|
|
Function: Get or set the scientific subspecies name. |
404
|
|
|
|
|
|
|
Returns : value of sub_species |
405
|
|
|
|
|
|
|
Args : newvalue (optional) |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=cut |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
sub sub_species { |
410
|
10
|
|
|
10
|
1
|
22
|
my ($self, $sub) = @_; |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
# TODO: instead of caching the raw name, cache the actual node instance. |
413
|
10
|
100
|
|
|
|
29
|
if (!defined $self->{'_sub_species'}) { |
414
|
6
|
|
|
|
|
17
|
my $ss_taxon = $self->tree->find_node(-rank => 'subspecies'); |
415
|
6
|
50
|
|
|
|
25
|
if ($ss_taxon) { |
416
|
0
|
0
|
|
|
|
0
|
if ($sub) { |
417
|
0
|
|
|
|
|
0
|
$ss_taxon->scientific_name($sub); |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
# *** weakening ref to our root node in species() to solve a |
420
|
|
|
|
|
|
|
# memory leak means that we have a subspecies taxon to set |
421
|
|
|
|
|
|
|
# during the first call to species(), but it has vanished by |
422
|
|
|
|
|
|
|
# the time a user subsequently calls sub_species() to get the |
423
|
|
|
|
|
|
|
# value. So we 'cheat' and just store the subspecies name in |
424
|
|
|
|
|
|
|
# our self hash, instead of the tree. Is this a problem for |
425
|
|
|
|
|
|
|
# a Species object? Can't decide --sendu |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
# This can now be changed to deal with this information on the |
428
|
|
|
|
|
|
|
# fly. For now, the caching remains, but maybe we should just |
429
|
|
|
|
|
|
|
# let these things deal with mutable data as needed? -- cjfields |
430
|
|
|
|
|
|
|
|
431
|
0
|
|
|
|
|
0
|
$self->{'_sub_species'} = $sub; |
432
|
|
|
|
|
|
|
} |
433
|
0
|
|
|
|
|
0
|
return $ss_taxon->scientific_name; |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
else { |
436
|
|
|
|
|
|
|
# should we create a node here to be added to the tree? |
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
# fall back to direct storage on self |
441
|
10
|
100
|
|
|
|
30
|
$self->{'_sub_species'} = $sub if $sub; |
442
|
10
|
|
|
|
|
22
|
return $self->{'_sub_species'}; |
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=head2 variant |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
Title : variant |
448
|
|
|
|
|
|
|
Usage : $obj->variant($newval) |
449
|
|
|
|
|
|
|
Function: Get/set variant information for this species object (strain, |
450
|
|
|
|
|
|
|
isolate, etc). |
451
|
|
|
|
|
|
|
Example : |
452
|
|
|
|
|
|
|
Returns : value of variant (a scalar) |
453
|
|
|
|
|
|
|
Args : new value (a scalar or undef, optional) |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=cut |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
sub variant{ |
458
|
3
|
|
|
3
|
1
|
10
|
my ($self, $var) = @_; |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
# TODO: instead of caching the raw name, cache the actual node instance. |
461
|
3
|
50
|
|
|
|
15
|
if (!defined $self->{'_variant'}) { |
462
|
3
|
|
|
|
|
12
|
my $var_taxon = $self->tree->find_node(-rank => 'variant'); |
463
|
3
|
50
|
|
|
|
14
|
if ($var_taxon) { |
464
|
0
|
0
|
|
|
|
0
|
if ($var) { |
465
|
0
|
|
|
|
|
0
|
$var_taxon->scientific_name($var); |
466
|
|
|
|
|
|
|
} |
467
|
0
|
|
|
|
|
0
|
return $var_taxon->scientific_name; |
468
|
|
|
|
|
|
|
} |
469
|
|
|
|
|
|
|
else { |
470
|
|
|
|
|
|
|
# should we create a node here to be added to the tree? |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
# fall back to direct storage on self |
475
|
3
|
50
|
|
|
|
13
|
$self->{'_variant'} = $var if $var; |
476
|
3
|
|
|
|
|
16
|
return $self->{'_variant'}; |
477
|
|
|
|
|
|
|
} |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
=head2 binomial |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
Title : binomial |
482
|
|
|
|
|
|
|
Usage : $binomial = $self->binomial(); |
483
|
|
|
|
|
|
|
$binomial = $self->binomial('FULL'); |
484
|
|
|
|
|
|
|
Function: Returns a string "Genus species", or "Genus species subspecies", |
485
|
|
|
|
|
|
|
if the first argument is 'FULL' (and the species has a subspecies). |
486
|
|
|
|
|
|
|
Args : Optionally the string 'FULL' to get the full name including |
487
|
|
|
|
|
|
|
the subspecies. |
488
|
|
|
|
|
|
|
Note : This is just munged from the taxon() name |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=cut |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
sub binomial { |
493
|
108
|
|
|
108
|
1
|
282
|
my ($self, $full) = @_; |
494
|
108
|
|
50
|
|
|
351
|
my $rank = $self->taxon->rank || 'no rank'; |
495
|
|
|
|
|
|
|
|
496
|
108
|
|
|
|
|
393
|
my ($species, $genus) = ($self->species, $self->genus); |
497
|
108
|
50
|
|
|
|
280
|
unless (defined $species) { |
498
|
0
|
|
|
|
|
0
|
$species = 'sp.'; |
499
|
0
|
|
|
|
|
0
|
$self->warn("requested binomial but classification was not set"); |
500
|
|
|
|
|
|
|
} |
501
|
108
|
50
|
|
|
|
232
|
$genus = '' unless( defined $genus); |
502
|
|
|
|
|
|
|
|
503
|
108
|
|
|
|
|
1166
|
$species =~ s/$genus\s+//; |
504
|
|
|
|
|
|
|
|
505
|
108
|
|
|
|
|
314
|
my $bi = "$genus $species"; |
506
|
108
|
100
|
66
|
|
|
336
|
if (defined($full) && $full =~ /full/i) { |
507
|
3
|
|
|
|
|
7
|
my $ssp = $self->sub_species; |
508
|
3
|
50
|
|
|
|
9
|
if ($ssp) { |
509
|
3
|
|
|
|
|
35
|
$ssp =~ s/$bi\s+//; |
510
|
3
|
|
|
|
|
29
|
$ssp =~ s/$species\s+//; |
511
|
3
|
|
|
|
|
10
|
$bi .= " $ssp"; |
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
} |
514
|
108
|
|
|
|
|
475
|
return $bi; |
515
|
|
|
|
|
|
|
} |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=head2 validate_species_name |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
Title : validate_species_name |
520
|
|
|
|
|
|
|
Usage : $result = $self->validate_species_name($string); |
521
|
|
|
|
|
|
|
Function: Validate the species portion of the binomial |
522
|
|
|
|
|
|
|
Args : string |
523
|
|
|
|
|
|
|
Notes : The string following the "genus name" in the NCBI binomial is so |
524
|
|
|
|
|
|
|
variable that it's not clear that this is a useful function. Consider |
525
|
|
|
|
|
|
|
the binomials "Simian 11 rotavirus (serotype 3 / strain |
526
|
|
|
|
|
|
|
SA11-Patton)", or "St. Thomas 3 rotavirus", straight from GenBank. |
527
|
|
|
|
|
|
|
This is particularly problematic in microbes and viruses. As such, |
528
|
|
|
|
|
|
|
this isn't actually used automatically by any Bio::Species method. |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=cut |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
sub validate_species_name { |
533
|
0
|
|
|
0
|
1
|
0
|
my( $self, $string ) = @_; |
534
|
|
|
|
|
|
|
|
535
|
0
|
0
|
|
|
|
0
|
return 1 if $string eq "sp."; |
536
|
0
|
0
|
|
|
|
0
|
return 1 if $string =~ /strain/; |
537
|
0
|
0
|
|
|
|
0
|
return 1 if $string =~ /^[a-z][\w\s-]+$/i; |
538
|
0
|
|
|
|
|
0
|
$self->throw("Invalid species name '$string'"); |
539
|
|
|
|
|
|
|
} |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
sub validate_name { |
542
|
0
|
|
|
0
|
0
|
0
|
return 1; |
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
=head2 organelle |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
Title : organelle |
548
|
|
|
|
|
|
|
Usage : $self->organelle( $organelle ); |
549
|
|
|
|
|
|
|
$organelle = $self->organelle(); |
550
|
|
|
|
|
|
|
Function: Get or set the organelle name |
551
|
|
|
|
|
|
|
Example : $self->organelle('Chloroplast') |
552
|
|
|
|
|
|
|
Returns : The organelle name in a string |
553
|
|
|
|
|
|
|
Args : String, which is the organelle name |
554
|
|
|
|
|
|
|
Note : TODO: We currently do not know where the organelle definition will |
555
|
|
|
|
|
|
|
eventually go. This is stored in the source seqfeature, though, |
556
|
|
|
|
|
|
|
so the information isn't lost. |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
=cut |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
sub organelle { |
561
|
42
|
|
|
42
|
1
|
111
|
my($self) = shift; |
562
|
42
|
100
|
|
|
|
152
|
return $self->{'_organelle'} = shift if @_; |
563
|
31
|
|
|
|
|
128
|
return $self->{'_organelle'}; |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
=head2 Delegation |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
The following methods delegate to the internal Bio::Taxon instance. This is |
569
|
|
|
|
|
|
|
mainly to allow code continue using older methods, with the mind to migrate to |
570
|
|
|
|
|
|
|
using Bio::Taxon and related methods when this class is deprecated. |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
=cut |
573
|
|
|
|
|
|
|
|
574
|
0
|
|
|
0
|
0
|
0
|
sub node_name {shift->taxon->node_name(@_)} |
575
|
268
|
|
|
268
|
0
|
673
|
sub scientific_name {shift->taxon->node_name(@_)} |
576
|
|
|
|
|
|
|
|
577
|
0
|
|
|
0
|
1
|
0
|
sub id {shift->taxon->id(@_)} |
578
|
0
|
|
|
0
|
0
|
0
|
sub object_id {shift->taxon->id(@_)} |
579
|
361
|
|
|
361
|
1
|
2075
|
sub ncbi_taxid {shift->taxon->ncbi_taxid(@_)} |
580
|
1
|
|
|
1
|
0
|
3
|
sub rank {shift->taxon->rank(@_)} |
581
|
0
|
|
|
0
|
1
|
0
|
sub division {shift->taxon->division(@_)} |
582
|
|
|
|
|
|
|
|
583
|
0
|
|
|
0
|
0
|
0
|
sub common_names {shift->taxon->common_names(@_)} |
584
|
128
|
|
|
128
|
1
|
518
|
sub common_name {shift->taxon->common_names(@_)} |
585
|
|
|
|
|
|
|
|
586
|
0
|
|
|
0
|
0
|
0
|
sub genetic_code {shift->taxon->genetic_code(@_)} |
587
|
0
|
|
|
0
|
0
|
0
|
sub mitochondrial_genetic_code {shift->taxon->mitochondrial_genetic_code(@_)} |
588
|
|
|
|
|
|
|
|
589
|
0
|
|
|
0
|
0
|
0
|
sub create_date { shift->taxon->create_date(@_)} |
590
|
0
|
|
|
0
|
0
|
0
|
sub pub_date { shift->taxon->pub_date(@_)} |
591
|
0
|
|
|
0
|
0
|
0
|
sub update_date { shift->taxon->update_date(@_)} |
592
|
|
|
|
|
|
|
|
593
|
0
|
|
|
0
|
0
|
0
|
sub db_handle { shift->taxon->db_handle(@_)} |
594
|
|
|
|
|
|
|
|
595
|
0
|
|
|
0
|
0
|
0
|
sub parent_id { shift->taxon->parent_id(@_)} |
596
|
0
|
|
|
0
|
0
|
0
|
sub parent_taxon_id { shift->taxon->parent_id(@_)} |
597
|
|
|
|
|
|
|
|
598
|
0
|
|
|
0
|
0
|
0
|
sub version { shift->taxon->version(@_)} |
599
|
0
|
|
|
0
|
0
|
0
|
sub authority { shift->taxon->authority(@_)} |
600
|
0
|
|
|
0
|
0
|
0
|
sub namespace { shift->taxon->namespace(@_)} |
601
|
|
|
|
|
|
|
|
602
|
0
|
|
|
0
|
1
|
0
|
sub ancestor { shift->taxon->ancestor(@_)} |
603
|
0
|
|
|
0
|
0
|
0
|
sub get_Parent_Node { shift->taxon->get_Parent_Node(@_)} |
604
|
0
|
|
|
0
|
1
|
0
|
sub each_Descendent { shift->taxon->each_Descendent(@_)} |
605
|
0
|
|
|
0
|
0
|
0
|
sub get_Children_Nodes { shift->taxon->get_Children_Nodes(@_)} |
606
|
0
|
|
|
0
|
0
|
0
|
sub remove_Descendant { shift->taxon->remove_Descendant(@_)} |
607
|
|
|
|
|
|
|
|
608
|
213
|
|
|
213
|
0
|
635
|
sub name { shift->taxon->name(@_)} |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
=head2 taxon |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
Title : taxon |
613
|
|
|
|
|
|
|
Usage : $obj->taxon |
614
|
|
|
|
|
|
|
Function : retrieve the internal Bio::Taxon instance |
615
|
|
|
|
|
|
|
Returns : A Bio::Taxon. If one is not previously set, |
616
|
|
|
|
|
|
|
an instance is created lazily |
617
|
|
|
|
|
|
|
Args : Bio::Taxon (optional) |
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
=cut |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
sub taxon { |
622
|
1984
|
|
|
1984
|
1
|
3292
|
my ($self, $taxon) = @_; |
623
|
1984
|
100
|
66
|
|
|
8503
|
if (!$self->{taxon} || $taxon) { |
624
|
251
|
|
33
|
|
|
651
|
$taxon ||= Bio::Taxon->new(); |
625
|
251
|
|
|
|
|
644
|
$self->{taxon} = $taxon; |
626
|
|
|
|
|
|
|
} |
627
|
1984
|
|
|
|
|
5823
|
$self->{taxon}; |
628
|
|
|
|
|
|
|
} |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
=head2 tree |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
Title : tree |
633
|
|
|
|
|
|
|
Usage : $obj->tree |
634
|
|
|
|
|
|
|
Function : Returns a Bio::Tree::Tree object |
635
|
|
|
|
|
|
|
Returns : A Bio::Tree::Tree. If one is not previously set, |
636
|
|
|
|
|
|
|
an instance is created lazily |
637
|
|
|
|
|
|
|
Args : Bio::Tree::Tree (optional) |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
=cut |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
sub tree { |
642
|
1230
|
|
|
1230
|
1
|
2476
|
my ($self, $tree) = @_; |
643
|
1230
|
100
|
100
|
|
|
5627
|
if (!$self->{tree} || $tree) { |
644
|
487
|
|
33
|
|
|
1175
|
$tree ||= Bio::Tree::Tree->new(); |
645
|
487
|
|
|
|
|
1110
|
delete $tree->{_root_cleanup_methods}; |
646
|
487
|
|
|
|
|
1351
|
$self->{tree} = $tree; |
647
|
|
|
|
|
|
|
} |
648
|
1230
|
|
|
|
|
4733
|
$self->{tree}; |
649
|
|
|
|
|
|
|
} |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
sub DESTROY { |
652
|
289
|
|
|
289
|
|
18299
|
my $self = shift; |
653
|
289
|
|
|
|
|
1409
|
$self->tree->cleanup_tree; |
654
|
289
|
|
|
|
|
1460
|
delete $self->{tree}; |
655
|
289
|
|
|
|
|
1022
|
$self->taxon->node_cleanup; |
656
|
|
|
|
|
|
|
} |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
1; |