| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package NBI::Launcher::Kraken2; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# ============================================================================= |
|
4
|
|
|
|
|
|
|
# NBI::Launcher::Kraken2 - Taxonomic classification using Kraken2 |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Reference implementation of the NBI::Launcher subclass pattern. |
|
7
|
|
|
|
|
|
|
# Only make_command() is overridden - the base class handles everything else. |
|
8
|
|
|
|
|
|
|
# |
|
9
|
|
|
|
|
|
|
# Tool: Kraken2 https://ccb.jhu.edu/software/kraken2/ |
|
10
|
|
|
|
|
|
|
# Mode: single-end or paired-end (auto-detected from --r2 presence) |
|
11
|
|
|
|
|
|
|
# Notes: --threads is auto-synced from --cpus (slurm_sync). |
|
12
|
|
|
|
|
|
|
# Database path defaults to $KRAKEN2_DB env var, then the hardcoded path. |
|
13
|
|
|
|
|
|
|
# ============================================================================= |
|
14
|
|
|
|
|
|
|
|
|
15
|
1
|
|
|
1
|
|
265878
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
34
|
|
|
16
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
57
|
|
|
17
|
1
|
|
|
1
|
|
5
|
use parent 'NBI::Launcher'; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
7
|
|
|
18
|
1
|
|
|
1
|
|
56
|
use POSIX qw(ceil); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
3
|
|
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
sub new { |
|
21
|
1
|
|
|
1
|
1
|
465
|
my ($class) = @_; |
|
22
|
1
|
|
|
|
|
41
|
return $class->SUPER::new( |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
name => 'kraken2', |
|
25
|
|
|
|
|
|
|
description => 'Taxonomic classification of sequencing reads', |
|
26
|
|
|
|
|
|
|
version => '2.0.8', |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# ── Activation ─────────────────────────────────────────────────────── |
|
29
|
|
|
|
|
|
|
# Using HPC module here. To switch to singularity, replace with: |
|
30
|
|
|
|
|
|
|
# activate => { singularity => '/path/to/kraken2.sif' }, |
|
31
|
|
|
|
|
|
|
activate => { module => 'kraken2/2.0.8' }, |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# ── Slurm defaults ─────────────────────────────────────────────────── |
|
34
|
|
|
|
|
|
|
# Memory is auto-calculated from the database folder size at submit time |
|
35
|
|
|
|
|
|
|
# (ceil(db_size_gb * 1.4) + 100 GB overhead). The value here is used only when the |
|
36
|
|
|
|
|
|
|
# db path is unavailable at submission (e.g. dry-run without a real db). |
|
37
|
|
|
|
|
|
|
slurm_defaults => { |
|
38
|
|
|
|
|
|
|
queue => 'qib-short', |
|
39
|
|
|
|
|
|
|
threads => 8, |
|
40
|
|
|
|
|
|
|
memory => 64, # GB fallback - overridden by db-size calc |
|
41
|
|
|
|
|
|
|
runtime => '24:00:00', |
|
42
|
|
|
|
|
|
|
}, |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# ── Inputs ─────────────────────────────────────────────────────────── |
|
45
|
|
|
|
|
|
|
inputs => [ |
|
46
|
|
|
|
|
|
|
{ name => 'r1', |
|
47
|
|
|
|
|
|
|
flag => '-1', |
|
48
|
|
|
|
|
|
|
type => 'file', |
|
49
|
|
|
|
|
|
|
required => 1, |
|
50
|
|
|
|
|
|
|
help => 'Forward reads (or single-end FASTQ)', |
|
51
|
|
|
|
|
|
|
}, |
|
52
|
|
|
|
|
|
|
{ name => 'r2', |
|
53
|
|
|
|
|
|
|
flag => '-2', |
|
54
|
|
|
|
|
|
|
type => 'file', |
|
55
|
|
|
|
|
|
|
required => 0, |
|
56
|
|
|
|
|
|
|
help => 'Reverse reads - omit for single-end mode', |
|
57
|
|
|
|
|
|
|
}, |
|
58
|
|
|
|
|
|
|
], |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# ── Parameters ─────────────────────────────────────────────────────── |
|
61
|
|
|
|
|
|
|
params => [ |
|
62
|
|
|
|
|
|
|
{ name => 'db', |
|
63
|
|
|
|
|
|
|
flag => '--db', |
|
64
|
|
|
|
|
|
|
type => 'dir', |
|
65
|
|
|
|
|
|
|
required => 1, |
|
66
|
|
|
|
|
|
|
default => '/qib/databases/kraken2/standard', |
|
67
|
|
|
|
|
|
|
default_env => 'KRAKEN2_DB', |
|
68
|
|
|
|
|
|
|
help => 'Kraken2 database directory', |
|
69
|
|
|
|
|
|
|
}, |
|
70
|
|
|
|
|
|
|
{ name => 'confidence', |
|
71
|
|
|
|
|
|
|
flag => '--confidence', |
|
72
|
|
|
|
|
|
|
type => 'float', |
|
73
|
|
|
|
|
|
|
default => 0.0, |
|
74
|
|
|
|
|
|
|
help => 'Confidence score threshold (0.0–1.0)', |
|
75
|
|
|
|
|
|
|
}, |
|
76
|
|
|
|
|
|
|
# slurm_sync: not shown in --help; value comes from --cpus |
|
77
|
|
|
|
|
|
|
{ name => 'threads', |
|
78
|
|
|
|
|
|
|
flag => '--threads', |
|
79
|
|
|
|
|
|
|
type => 'int', |
|
80
|
|
|
|
|
|
|
slurm_sync => 'threads', |
|
81
|
|
|
|
|
|
|
}, |
|
82
|
|
|
|
|
|
|
], |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
# ── Outputs ────────────────────────────────────────────────────────── |
|
85
|
|
|
|
|
|
|
outputs => [ |
|
86
|
|
|
|
|
|
|
{ name => 'report', |
|
87
|
|
|
|
|
|
|
flag => '--report', |
|
88
|
|
|
|
|
|
|
pattern => '{sample}.k2report', |
|
89
|
|
|
|
|
|
|
required => 1, |
|
90
|
|
|
|
|
|
|
help => 'Per-taxon classification report', |
|
91
|
|
|
|
|
|
|
}, |
|
92
|
|
|
|
|
|
|
{ name => 'output', |
|
93
|
|
|
|
|
|
|
flag => '--output', |
|
94
|
|
|
|
|
|
|
pattern => '{sample}.k2out', |
|
95
|
|
|
|
|
|
|
required => 0, |
|
96
|
|
|
|
|
|
|
help => 'Per-read classification output', |
|
97
|
|
|
|
|
|
|
}, |
|
98
|
|
|
|
|
|
|
], |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
outdir => { flag => '--outdir', short => '-o', required => 1 }, |
|
101
|
|
|
|
|
|
|
scratch => { use_tmpdir => 1, cleanup_on_failure => 1 }, |
|
102
|
|
|
|
|
|
|
); |
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# ── make_command(%args) ─────────────────────────────────────────────────────── |
|
106
|
|
|
|
|
|
|
# Builds the kraken2 invocation. Handles single-end and paired-end modes. |
|
107
|
|
|
|
|
|
|
# |
|
108
|
|
|
|
|
|
|
# %args keys used here: |
|
109
|
|
|
|
|
|
|
# r1, r2 - input FASTQ paths (r2 undef for single-end) |
|
110
|
|
|
|
|
|
|
# db - database directory |
|
111
|
|
|
|
|
|
|
# confidence - confidence threshold |
|
112
|
|
|
|
|
|
|
# threads - from slurm_sync |
|
113
|
|
|
|
|
|
|
# sample - derived sample name |
|
114
|
|
|
|
|
|
|
# |
|
115
|
|
|
|
|
|
|
# Output paths reference $SCRATCH (shell variable, not a Perl variable). |
|
116
|
|
|
|
|
|
|
sub make_command { |
|
117
|
3
|
|
|
3
|
1
|
5444
|
my ($self, %args) = @_; |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
my $pe = defined $args{r2} |
|
120
|
3
|
100
|
|
|
|
12
|
? "--paired -1 \"$args{r1}\" -2 \"$args{r2}\"" |
|
121
|
|
|
|
|
|
|
: "\"$args{r1}\""; |
|
122
|
|
|
|
|
|
|
|
|
123
|
3
|
|
|
|
|
66
|
return join(" \\\n ", |
|
124
|
|
|
|
|
|
|
'kraken2', |
|
125
|
|
|
|
|
|
|
"--threads $args{threads}", |
|
126
|
|
|
|
|
|
|
"--db \"$args{db}\"", |
|
127
|
|
|
|
|
|
|
"--confidence $args{confidence}", |
|
128
|
|
|
|
|
|
|
'--report "$SCRATCH/' . "$args{sample}.k2report\"", |
|
129
|
|
|
|
|
|
|
'--output "$SCRATCH/' . "$args{sample}.k2out\"", |
|
130
|
|
|
|
|
|
|
$pe, |
|
131
|
|
|
|
|
|
|
); |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# ── build(%args) ───────────────────────────────────────────────────────────── |
|
135
|
|
|
|
|
|
|
# Override to auto-calculate memory from the Kraken2 database folder size |
|
136
|
|
|
|
|
|
|
# before handing off to the base class. Only applies when --mem is not |
|
137
|
|
|
|
|
|
|
# explicitly set on the command line (slurm_memory not in %args). |
|
138
|
|
|
|
|
|
|
sub build { |
|
139
|
1
|
|
|
1
|
1
|
683
|
my ($self, %args) = @_; |
|
140
|
|
|
|
|
|
|
|
|
141
|
1
|
50
|
|
|
|
4
|
unless (defined $args{slurm_memory}) { |
|
142
|
|
|
|
|
|
|
# Resolve the db path the same way validate() does: arg → env → default |
|
143
|
|
|
|
|
|
|
my $db = $args{db} |
|
144
|
|
|
|
|
|
|
// $ENV{KRAKEN2_DB} |
|
145
|
|
|
|
|
|
|
// $self->{slurm_defaults}{db} |
|
146
|
0
|
|
0
|
|
|
0
|
// '/qib/databases/kraken2/standard'; |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
147
|
|
|
|
|
|
|
|
|
148
|
0
|
0
|
|
|
|
0
|
if (-d $db) { |
|
149
|
0
|
|
|
|
|
0
|
my $size_gb = _folder_size_gb($db); |
|
150
|
0
|
0
|
|
|
|
0
|
if ($size_gb > 0) { |
|
151
|
|
|
|
|
|
|
# 40% contingency + 100 GB fixed overhead, rounded up |
|
152
|
0
|
|
|
|
|
0
|
$args{slurm_memory} = ceil($size_gb * 1.4) + 100; |
|
153
|
0
|
|
|
|
|
0
|
warn "[nbilaunch] kraken2: db is ${size_gb}GB, " |
|
154
|
|
|
|
|
|
|
. "requesting $args{slurm_memory}GB RAM\n"; |
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
} |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
|
|
159
|
1
|
|
|
|
|
13
|
return $self->SUPER::build(%args); |
|
160
|
|
|
|
|
|
|
} |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# ── _folder_size_gb($dir) ───────────────────────────────────────────────────── |
|
163
|
|
|
|
|
|
|
# Returns the total disk usage of $dir in GB using 'du -sk'. |
|
164
|
|
|
|
|
|
|
# Returns 0 if du fails or the path is inaccessible. |
|
165
|
|
|
|
|
|
|
sub _folder_size_gb { |
|
166
|
0
|
|
|
0
|
|
|
my ($dir) = @_; |
|
167
|
|
|
|
|
|
|
# du -sk: POSIX-portable, output in kilobytes |
|
168
|
|
|
|
|
|
|
# Use single-quoted shell string to avoid backslash issues; escape only ' in path |
|
169
|
0
|
|
|
|
|
|
(my $safe = $dir) =~ s/'/'"'"'/g; |
|
170
|
0
|
|
|
|
|
|
my $out = `du -sk '$safe' 2>/dev/null`; |
|
171
|
0
|
0
|
0
|
|
|
|
return 0 unless defined $out && $out =~ /^(\d+)/; |
|
172
|
0
|
|
|
|
|
|
return $1 / (1024 * 1024); # KB → GB |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
1; |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
__END__ |