line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Palantir; |
2
|
|
|
|
|
|
|
# ABSTRACT: core classes and utilities for Bio::Palantir |
3
|
|
|
|
|
|
|
# CONTRIBUTOR: Denis BAURAIN <denis.baurain@uliege.be> |
4
|
|
|
|
|
|
|
$Bio::Palantir::VERSION = '0.200700'; |
5
|
1
|
|
|
1
|
|
216779
|
use strict; use warnings; |
|
1
|
|
|
1
|
|
10
|
|
|
1
|
|
|
|
|
23
|
|
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
20
|
|
6
|
|
|
|
|
|
|
|
7
|
1
|
|
|
1
|
|
695
|
use Bio::Palantir::Parser; use Bio::Palantir::Refiner; use |
|
1
|
|
|
1
|
|
3
|
|
|
1
|
|
|
|
|
30
|
|
|
1
|
|
|
|
|
444
|
|
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
49
|
|
8
|
1
|
|
|
1
|
|
460
|
Bio::Palantir::Explorer; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
56
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
1; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
__END__ |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=pod |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 NAME |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
Bio::Palantir - core classes and utilities for Bio::Palantir |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 VERSION |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
version 0.200700 |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 SYNOPSIS |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
use Bio::Palantir; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# open and parse biosynML.xml or regions.js antiSMASH report |
29
|
|
|
|
|
|
|
my $infile = 'biosynML.xml'; |
30
|
|
|
|
|
|
|
my $report = Bio::Palantir::Parser->new( file => $infile ); |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# get main container |
33
|
|
|
|
|
|
|
my $root = $report->root; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# explore Biosynthetic Gene Clusters (BGCs) content |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# Bio::Palantir::Parser |
38
|
|
|
|
|
|
|
for my $cluster ($root->all_clusters) { # returns all clusters say |
39
|
|
|
|
|
|
|
$cluster->type; # returns the cluster type (e.g., nrps) |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
for my $gene ($cluster->all_genes) { # returns all genes say |
42
|
|
|
|
|
|
|
$gene->name; # for instance, returns the gene name say $gene->genomic_coordinates; # returns DNA gene coordinates (relative to the genome) |
43
|
|
|
|
|
|
|
say $gene->coordinates; # returns protein gene coordinates (also relative to the genome) |
44
|
|
|
|
|
|
|
say $gene->protein_sequence; # returns the gene protein sequence |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# if the BGC possess domains (i.e., NRPS/PKS) |
47
|
|
|
|
|
|
|
for my $domain ($gene->all_domains) { # returns all domains |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
say $domain->rank; # for instance, returns the domain in the gene |
50
|
|
|
|
|
|
|
say $domain->function; # returns the domain function (e.g., condensation) |
51
|
|
|
|
|
|
|
say join '-', $domain->coordinates; # returns the coordinates (which are relative to the gene ones) |
52
|
|
|
|
|
|
|
say $domain->protein_sequence; # returns the domain protein sequence |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# lowest level is Motifs (for antiSMASH 3 and 4) |
55
|
|
|
|
|
|
|
for my $motif ($domain->all_motifs) { |
56
|
|
|
|
|
|
|
#... |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# same way for looping into Module objects |
61
|
|
|
|
|
|
|
for my $module ($cluster->all_modules) { |
62
|
|
|
|
|
|
|
# ... |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# Bio::Palantir::Refiner |
68
|
|
|
|
|
|
|
use aliased 'Bio::Palantir::Refiner'; |
69
|
|
|
|
|
|
|
use aliased 'Bio::Palantir::Refiner::ClusterPus'; |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
# it is possible to create Bio::Palantir::Refiner objects from already existing Bio::Palantir::Parser ones |
72
|
|
|
|
|
|
|
my @cluster_plus; |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
for my $cluster ($root->all_clusters) { |
75
|
|
|
|
|
|
|
push @cluster_plus, ClusterPlus->new( _cluster => $cluster ); |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# but if you intend to use the Refiner part, it is more convenient to create the Refiner object directly from a file |
79
|
|
|
|
|
|
|
my $report = Refiner->new( file => biosynML.xml); |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
for my $cluster_plus ($report->all_clusters) { |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
say $cluster_plus->type; |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
for my $gene_plus ($cluster_plus->all_genes) { |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
say $gene_plus->name; |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
for my $domain_plus ($gene_plus->all_domains) { |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
say 'Palantir version:'; |
92
|
|
|
|
|
|
|
say $domain_plus->function; |
93
|
|
|
|
|
|
|
say $domain_plus->coordinates; |
94
|
|
|
|
|
|
|
say $domain_plus->evalue; |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# compare with antiSMASH results |
97
|
|
|
|
|
|
|
say 'antiSMASH version:'; say $domain_plus->_domain->function; |
98
|
|
|
|
|
|
|
say $domain_plus->_domain->coordinates; |
99
|
|
|
|
|
|
|
# say $domain_plus->evalue; # only available for Palantir part |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
# Bio::Palantir::Explorer |
109
|
|
|
|
|
|
|
use aliased 'Bio::Palantir::Explorer::ClusterFasta'; |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# from a Bio::Palantir::Refiner object |
112
|
|
|
|
|
|
|
for my $cluster_plus ($report->all_clusters) { |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
for my $gene_plus ($report->all_genes) { |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
for my $domain_exp ($gene_plus->all_exp_domains) { |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
say $domain_exp->function; |
119
|
|
|
|
|
|
|
say $domain_exp->coordinates; |
120
|
|
|
|
|
|
|
say $domain_exp->evalue; |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
# from a FASTA file (containing ONLY one BGC, each sequence being interpreted as a gene from the cluster) |
129
|
|
|
|
|
|
|
my $cluster_exp = ClusterFasta->new( fasta => nrps_bgc.fasta ); |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
for my $gene_exp ($cluster_exp->all_genes) { |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
for my $domain_exp ($gene_exp->all_domains) { |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
say $domain_exp->function; |
136
|
|
|
|
|
|
|
say $domain_exp->coordinates; |
137
|
|
|
|
|
|
|
say $domain_exp->evalue; |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=head1 DESCRIPTION |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
This distribution is the base of the C<Bio::Palantir> module collection designed |
146
|
|
|
|
|
|
|
as a toolbox for handling the post-processing of antiSMASH report data |
147
|
|
|
|
|
|
|
(L<https://antismash.secondarymetabolites.org>) and improving in some aspects |
148
|
|
|
|
|
|
|
its annotation of NRPS/PKS Biosynthetic Gene Clusters (BGCs), aiming then to |
149
|
|
|
|
|
|
|
support small and large-scale genome mining projects. |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
The B<Palantir libraries> are organized as follows: |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
C<Bio::Palantir::Parser> contains classes for hierarchically storing the |
154
|
|
|
|
|
|
|
information of antiSMASH gene clusters. |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
C<Bio::Palantir::Refiner> consists in classes (parallel to Parser) dedicated to |
157
|
|
|
|
|
|
|
the improvement of NRPS/PKS gene clusters parallel classes to |
158
|
|
|
|
|
|
|
Bio::Palantir::Parser. |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
C<Bio::Palantir::Explorer> contains classes (also parallel to Parser) giving |
161
|
|
|
|
|
|
|
access to an exploratory version of detected domains |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
More information on their internal structure can be found in their respective |
164
|
|
|
|
|
|
|
file. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Here is the list of functionalities offered by Palantir libraries and bins: |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
Refinement of NRPS/PKS BGC annotations |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
- B<Dynamic elongation of the coordinates of core domains>: enrich the |
171
|
|
|
|
|
|
|
information contained in the sequences (application examples: improved |
172
|
|
|
|
|
|
|
similarity searches and evolutionary approaches) |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
- B<Filling the gaps in BGC annotation>: retrieve missed domains from exceptions |
175
|
|
|
|
|
|
|
in the rules detection (application example: resolution of ambiguous or |
176
|
|
|
|
|
|
|
incoherent BGC annotation) |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
- B<Module delimitation>: apply biological rules to group domains in modules |
179
|
|
|
|
|
|
|
(application example: analyses at module scale) |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
- B<BGC visualization>: visualize and compare antismash and Palantir annotations |
182
|
|
|
|
|
|
|
[bin/draw_clusters.pl] |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
- B<Exploratory mode visualization>: visualize and design the domain |
185
|
|
|
|
|
|
|
architecture consensus from a raw view of all detected signatures (application |
186
|
|
|
|
|
|
|
example: manual curation of the domaine architecture consensus) |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
BGC data manipulation |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
- B<Generation of PDF/Word reports>: export customizable reports of refined BGC |
191
|
|
|
|
|
|
|
data (application example: manual reading of numerous (filtered) BGC data) |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
- B<Extraction of sequences>: export Fasta files from BGC data at different |
194
|
|
|
|
|
|
|
scales: cluster, gene, module, domain (application example: data formatting for |
195
|
|
|
|
|
|
|
downstream analyses) |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
- B<Generation of SQL tables>: export SQL tables containing BGC data details |
198
|
|
|
|
|
|
|
(application example: large-scale queries and statistics) |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head1 AUTHOR |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
Loic MEUNIER <lmeunier@uliege.be> |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=head1 CONTRIBUTOR |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=for stopwords Denis BAURAIN |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
Denis BAURAIN <denis.baurain@uliege.be> |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
This software is copyright (c) 2019 by University of Liege / Unit of Eukaryotic Phylogenomics / Loic MEUNIER and Denis BAURAIN. |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
215
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=cut |