| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Bio::Palantir; |
|
2
|
|
|
|
|
|
|
# ABSTRACT: core classes and utilities for Bio::Palantir |
|
3
|
|
|
|
|
|
|
# CONTRIBUTOR: Denis BAURAIN <denis.baurain@uliege.be> |
|
4
|
|
|
|
|
|
|
$Bio::Palantir::VERSION = '0.200700'; |
|
5
|
1
|
|
|
1
|
|
216779
|
use strict; use warnings; |
|
|
1
|
|
|
1
|
|
10
|
|
|
|
1
|
|
|
|
|
23
|
|
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
20
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
1
|
|
|
1
|
|
695
|
use Bio::Palantir::Parser; use Bio::Palantir::Refiner; use |
|
|
1
|
|
|
1
|
|
3
|
|
|
|
1
|
|
|
|
|
30
|
|
|
|
1
|
|
|
|
|
444
|
|
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
49
|
|
|
8
|
1
|
|
|
1
|
|
460
|
Bio::Palantir::Explorer; |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
56
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
1; |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
__END__ |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=pod |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 NAME |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
Bio::Palantir - core classes and utilities for Bio::Palantir |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 VERSION |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
version 0.200700 |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
use Bio::Palantir; |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# open and parse biosynML.xml or regions.js antiSMASH report |
|
29
|
|
|
|
|
|
|
my $infile = 'biosynML.xml'; |
|
30
|
|
|
|
|
|
|
my $report = Bio::Palantir::Parser->new( file => $infile ); |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# get main container |
|
33
|
|
|
|
|
|
|
my $root = $report->root; |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# explore Biosynthetic Gene Clusters (BGCs) content |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# Bio::Palantir::Parser |
|
38
|
|
|
|
|
|
|
for my $cluster ($root->all_clusters) { # returns all clusters say |
|
39
|
|
|
|
|
|
|
$cluster->type; # returns the cluster type (e.g., nrps) |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
for my $gene ($cluster->all_genes) { # returns all genes say |
|
42
|
|
|
|
|
|
|
$gene->name; # for instance, returns the gene name say $gene->genomic_coordinates; # returns DNA gene coordinates (relative to the genome) |
|
43
|
|
|
|
|
|
|
say $gene->coordinates; # returns protein gene coordinates (also relative to the genome) |
|
44
|
|
|
|
|
|
|
say $gene->protein_sequence; # returns the gene protein sequence |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# if the BGC possess domains (i.e., NRPS/PKS) |
|
47
|
|
|
|
|
|
|
for my $domain ($gene->all_domains) { # returns all domains |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
say $domain->rank; # for instance, returns the domain in the gene |
|
50
|
|
|
|
|
|
|
say $domain->function; # returns the domain function (e.g., condensation) |
|
51
|
|
|
|
|
|
|
say join '-', $domain->coordinates; # returns the coordinates (which are relative to the gene ones) |
|
52
|
|
|
|
|
|
|
say $domain->protein_sequence; # returns the domain protein sequence |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# lowest level is Motifs (for antiSMASH 3 and 4) |
|
55
|
|
|
|
|
|
|
for my $motif ($domain->all_motifs) { |
|
56
|
|
|
|
|
|
|
#... |
|
57
|
|
|
|
|
|
|
} |
|
58
|
|
|
|
|
|
|
} |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# same way for looping into Module objects |
|
61
|
|
|
|
|
|
|
for my $module ($cluster->all_modules) { |
|
62
|
|
|
|
|
|
|
# ... |
|
63
|
|
|
|
|
|
|
} |
|
64
|
|
|
|
|
|
|
} |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# Bio::Palantir::Refiner |
|
68
|
|
|
|
|
|
|
use aliased 'Bio::Palantir::Refiner'; |
|
69
|
|
|
|
|
|
|
use aliased 'Bio::Palantir::Refiner::ClusterPus'; |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
# it is possible to create Bio::Palantir::Refiner objects from already existing Bio::Palantir::Parser ones |
|
72
|
|
|
|
|
|
|
my @cluster_plus; |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
for my $cluster ($root->all_clusters) { |
|
75
|
|
|
|
|
|
|
push @cluster_plus, ClusterPlus->new( _cluster => $cluster ); |
|
76
|
|
|
|
|
|
|
} |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# but if you intend to use the Refiner part, it is more convenient to create the Refiner object directly from a file |
|
79
|
|
|
|
|
|
|
my $report = Refiner->new( file => biosynML.xml); |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
for my $cluster_plus ($report->all_clusters) { |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
say $cluster_plus->type; |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
for my $gene_plus ($cluster_plus->all_genes) { |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
say $gene_plus->name; |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
for my $domain_plus ($gene_plus->all_domains) { |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
say 'Palantir version:'; |
|
92
|
|
|
|
|
|
|
say $domain_plus->function; |
|
93
|
|
|
|
|
|
|
say $domain_plus->coordinates; |
|
94
|
|
|
|
|
|
|
say $domain_plus->evalue; |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# compare with antiSMASH results |
|
97
|
|
|
|
|
|
|
say 'antiSMASH version:'; say $domain_plus->_domain->function; |
|
98
|
|
|
|
|
|
|
say $domain_plus->_domain->coordinates; |
|
99
|
|
|
|
|
|
|
# say $domain_plus->evalue; # only available for Palantir part |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
# Bio::Palantir::Explorer |
|
109
|
|
|
|
|
|
|
use aliased 'Bio::Palantir::Explorer::ClusterFasta'; |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# from a Bio::Palantir::Refiner object |
|
112
|
|
|
|
|
|
|
for my $cluster_plus ($report->all_clusters) { |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
for my $gene_plus ($report->all_genes) { |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
for my $domain_exp ($gene_plus->all_exp_domains) { |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
say $domain_exp->function; |
|
119
|
|
|
|
|
|
|
say $domain_exp->coordinates; |
|
120
|
|
|
|
|
|
|
say $domain_exp->evalue; |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
} |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
# from a FASTA file (containing ONLY one BGC, each sequence being interpreted as a gene from the cluster) |
|
129
|
|
|
|
|
|
|
my $cluster_exp = ClusterFasta->new( fasta => nrps_bgc.fasta ); |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
for my $gene_exp ($cluster_exp->all_genes) { |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
for my $domain_exp ($gene_exp->all_domains) { |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
say $domain_exp->function; |
|
136
|
|
|
|
|
|
|
say $domain_exp->coordinates; |
|
137
|
|
|
|
|
|
|
say $domain_exp->evalue; |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
} |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
This distribution is the base of the C<Bio::Palantir> module collection designed |
|
146
|
|
|
|
|
|
|
as a toolbox for handling the post-processing of antiSMASH report data |
|
147
|
|
|
|
|
|
|
(L<https://antismash.secondarymetabolites.org>) and improving in some aspects |
|
148
|
|
|
|
|
|
|
its annotation of NRPS/PKS Biosynthetic Gene Clusters (BGCs), aiming then to |
|
149
|
|
|
|
|
|
|
support small and large-scale genome mining projects. |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
The B<Palantir libraries> are organized as follows: |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
C<Bio::Palantir::Parser> contains classes for hierarchically storing the |
|
154
|
|
|
|
|
|
|
information of antiSMASH gene clusters. |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
C<Bio::Palantir::Refiner> consists in classes (parallel to Parser) dedicated to |
|
157
|
|
|
|
|
|
|
the improvement of NRPS/PKS gene clusters parallel classes to |
|
158
|
|
|
|
|
|
|
Bio::Palantir::Parser. |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
C<Bio::Palantir::Explorer> contains classes (also parallel to Parser) giving |
|
161
|
|
|
|
|
|
|
access to an exploratory version of detected domains |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
More information on their internal structure can be found in their respective |
|
164
|
|
|
|
|
|
|
file. |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Here is the list of functionalities offered by Palantir libraries and bins: |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
Refinement of NRPS/PKS BGC annotations |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
- B<Dynamic elongation of the coordinates of core domains>: enrich the |
|
171
|
|
|
|
|
|
|
information contained in the sequences (application examples: improved |
|
172
|
|
|
|
|
|
|
similarity searches and evolutionary approaches) |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
- B<Filling the gaps in BGC annotation>: retrieve missed domains from exceptions |
|
175
|
|
|
|
|
|
|
in the rules detection (application example: resolution of ambiguous or |
|
176
|
|
|
|
|
|
|
incoherent BGC annotation) |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
- B<Module delimitation>: apply biological rules to group domains in modules |
|
179
|
|
|
|
|
|
|
(application example: analyses at module scale) |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
- B<BGC visualization>: visualize and compare antismash and Palantir annotations |
|
182
|
|
|
|
|
|
|
[bin/draw_clusters.pl] |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
- B<Exploratory mode visualization>: visualize and design the domain |
|
185
|
|
|
|
|
|
|
architecture consensus from a raw view of all detected signatures (application |
|
186
|
|
|
|
|
|
|
example: manual curation of the domaine architecture consensus) |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
BGC data manipulation |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
- B<Generation of PDF/Word reports>: export customizable reports of refined BGC |
|
191
|
|
|
|
|
|
|
data (application example: manual reading of numerous (filtered) BGC data) |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
- B<Extraction of sequences>: export Fasta files from BGC data at different |
|
194
|
|
|
|
|
|
|
scales: cluster, gene, module, domain (application example: data formatting for |
|
195
|
|
|
|
|
|
|
downstream analyses) |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
- B<Generation of SQL tables>: export SQL tables containing BGC data details |
|
198
|
|
|
|
|
|
|
(application example: large-scale queries and statistics) |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head1 AUTHOR |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
Loic MEUNIER <lmeunier@uliege.be> |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=head1 CONTRIBUTOR |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=for stopwords Denis BAURAIN |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
Denis BAURAIN <denis.baurain@uliege.be> |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
This software is copyright (c) 2019 by University of Liege / Unit of Eukaryotic Phylogenomics / Loic MEUNIER and Denis BAURAIN. |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
|
215
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=cut |