File Coverage

Bio/DB/Taxonomy/silva.pm

Criterion	Covered	Total	%
statement	31	31	100.0
branch	8	10	80.0
condition			n/a
subroutine	5	5	100.0
pod	1	1	100.0
total	45	47	95.7

line	stmt	bran	sub	pod	time	code
1						#
2						# BioPerl module for Bio::DB::Taxonomy::silva
3						#
4						# Please direct questions and support issues to
5						#
6						# Copyright Florent Angly
7						#
8						# You may distribute this module under the same terms as perl itself
9
10
11						=head1 NAME
12
13						Bio::DB::Taxonomy::silva - Use the Silva taxonomy
14
15						=head1 SYNOPSIS
16
17						use Bio::DB::Taxonomy;
18
19						my $db = Bio::DB::Taxonomy->new(
20						-source => 'silva',
21						-taxofile => 'SSURef_108_tax_silva_trunc.fasta',
22						);
23
24						=head1 DESCRIPTION
25
26						This is an implementation of Bio::DB::Taxonomy which stores and accesses the
27						Silva taxonomy. Internally, Bio::DB::Taxonomy::silva keeps the taxonomy
28						into memory by using Bio::DB::Taxonomy::list. As a consequence, note that the
29						IDs assigned to the taxonomy nodes, e.g. sv72, are arbitrary, contrary to the
30						pre-defined IDs that NCBI assigns to taxons. Note also that no rank names or
31						common names are assigned to the taxa of Bio::DB::Taxonomy::silva.
32
33						The latest Silva taxonomy (2011) contains about 126,000 taxa and occupies
34						about 124 MB of memory once parsed into a Bio::DB::Taxonomy::silva object.
35						Obviously, it can take a little while to load.
36
37						The taxonomy file SSURef_108_tax_silva_trunc.fasta that this module uses is
38						available from L.
39
40						=head1 FEEDBACK
41
42						=head2 Mailing Lists
43
44						User feedback is an integral part of the evolution of this and other
45						Bioperl modules. Send your comments and suggestions preferably to
46						the Bioperl mailing list. Your participation is much appreciated.
47
48						bioperl-l@bioperl.org - General discussion
49						http://bioperl.org/wiki/Mailing_lists - About the mailing lists
50
51						=head2 Support
52
53						Please direct usage questions or support issues to the mailing list:
54
55						I
56
57						rather than to the module maintainer directly. Many experienced and
58						reponsive experts will be able look at the problem and quickly
59						address it. Please include a thorough description of the problem
60						with code and data examples if at all possible.
61
62						=head2 Reporting Bugs
63
64						Report bugs to the Bioperl bug tracking system to help us keep track
65						of the bugs and their resolution. Bug reports can be submitted via
66						the web:
67
68						https://github.com/bioperl/bioperl-live/issues
69
70						=head1 AUTHOR - Florent Angly
71
72						florent.angly@gmail.com
73
74						=head1 APPENDIX
75
76						The rest of the documentation details each of the object methods.
77						Internal methods are usually preceded with a _
78
79						=cut
80
81
82						package Bio::DB::Taxonomy::silva;
83
84	1		1		3	use strict;
	1				1
	1				24
85	1		1		325	use Bio::SeqIO;
	1				2
	1				26
86
87	1		1		4	use base qw(Bio::DB::Taxonomy Bio::DB::Taxonomy::list);
	1				1
	1				478
88
89						$Bio::DB::Taxonomy::list::prefix = 'sv';
90
91
92						=head2 new
93
94						Title : new
95						Usage : my $obj = Bio::DB::Taxonomy::silva->new();
96						Function: Builds a new Bio::DB::Taxonomy::silva object
97						Returns : an instance of Bio::DB::Taxonomy::silva
98						Args : -taxofile => name of the FASTA file containing the taxonomic information,
99						typically 'SSURef_108_tax_silva_trunc.fasta' (mandatory)
100
101						=cut
102
103						sub new {
104						# Override Bio::DB::Taxonomy
105	2		2	1	4	my($class, @args) = @_;
106	2				8	my $self = $class->SUPER::new(@args);
107	2				9	my ($taxofile) = $self->_rearrange([qw(TAXOFILE)], @args);
108
109	2	100			5	if ( $taxofile ) {
110	1				2	$self = $self->_build_taxonomy($taxofile);
111						}
112
113	2				12	return $self;
114						}
115
116
117						sub _build_taxonomy {
118	1		1		1	my ($self, $taxofile) = @_;
119
120	1				2	my $taxonomy = Bio::DB::Taxonomy::list->new();
121	1				1	my %taxas;
122	1				4	my $desc_re = qr/^>\S+?(?:\s+(.+))?$/;
123
124						# One could open the file using Bio::SeqIO::fasta, but it is slower and we
125						# only need the sequence descriptions
126
127	1	50			37	open my $in, '<', $taxofile or $self->throw("Could not read file '$taxofile': $!");
128
129						# Populate taxonomy with taxonomy obtained from sequence description
130	1				19	while (my $line = <$in>) {
131
132	162	100			656	next if $line !~ $desc_re;
133	57				85	my $taxo_string = $1;
134	57	50			65	next if not $taxo_string;
135
136						# Example of taxonomy string:
137						# 1/ Bacteria;Firmicutes;Bacilli;Lactobacillales;Enterococcaceae;Enterococcus;Enterococcus faecium DO
138						# 2/ Eukaryota;Metazoa;Chordata;Craniata;Vertebrata;Euteleostomi;Mammalia;Eutheria;Euarchontoglires;Glires;
139						# Rodentia;Sciurognathi;Muroidea;Muridae;Murinae;Rattus;;Rattus norvegicus (Norway rat)
140
141						# Skip already seen taxas
142	57	100			124	next if exists $taxas{$taxo_string};
143	42				71	$taxas{$taxo_string} = undef;
144
145						# Strip the common name (could save it if Bio::DB::Taxonomy::list supported it)
146	42				55	$taxo_string =~ s/ $.*$$//;
147
148						# Save lineage
149						# Unfortunately, we cannot easily add ranks since they vary from 2 to 23 for every entry
150	42				145	my @names = split /;/, $taxo_string;
151	42				93	$taxonomy->add_lineage(
152						-names => \@names,
153						);
154
155						}
156
157	1				21	close $in;
158
159	1				11	return $taxonomy;
160						}
161
162
163						1;