File Coverage

bin/runjob
Criterion Covered Total %
statement 212 312 67.9
branch 82 182 45.0
condition 28 82 34.1
subroutine 19 24 79.1
pod n/a
total 341 600 56.8


line stmt bran cond sub pod time code
1             #!/usr/bin/env perl
2             #ABSTRACT: Run a job in the cluster using NBI::Slurm
3             #PODNAME: runjob
4              
5 4     4   18906 use v5.12;
  4         13  
6 4     4   24 use warnings;
  4         6  
  4         256  
7 4     4   2761 use Getopt::Long;
  4         59249  
  4         24  
8 4     4   2029 use FindBin qw($RealBin);
  4         4479  
  4         426  
9 4     4   2476 use Data::Dumper;
  4         29590  
  4         264  
10 4     4   26 use File::Basename;
  4         23  
  4         180  
11 4     4   16 use File::Spec;
  4         5  
  4         73  
12 4     4   2053 use Term::ANSIColor qw(:constants);
  4         36073  
  4         3246  
13 4     4   35 use Cwd qw(abs_path getcwd);
  4         7  
  4         543  
14              
15 4         291189 my $BIN = basename($0);
16 4         13 $Data::Dumper::Sortkeys = 1;
17              
18 4 50       155 if (-e "$RealBin/../dist.ini") {
  0         0  
19 4 50       20 say STDERR "[dev mode] Using local lib" if ($ENV{"DEBUG"});
20 4     4   1605 use lib "$RealBin/../lib";
  4         2478  
  4         23  
21             }
22              
23 4     4   1955 use NBI::Slurm;
  4         14  
  4         466  
24 4     4   1666 use NBI::EcoScheduler;
  4         9  
  4         163  
25 4     4   23 use Cwd;
  4         4  
  4         264102  
26              
27 4         15 my $user_home_dir = $ENV{HOME};
28 4         40 my @afterok;
29 4         100 my $user_current_dir = getcwd();
30 4         10 my $username = $ENV{USER};
31 4         59 my $config = NBI::Slurm::load_config("$user_home_dir/.nbislurm.config");
32 4         10 my $version = $NBI::Slurm::VERSION;
33 4   50     22 my $queue = $config->{'queue'} // 'qib-short';
34 4   50     17 my $gpuqueue = $config->{'gpuqueue'} // 'qib-gpu';
35 4   50     21 my $threads = $config->{'threads'} // 1;
36 4   50     19 my $memory = $config->{'memory'} // 8000;
37 4   50     20 my $time = $config->{'time'} // "2h";
38 4   50     16 my $tmpdir = $config->{'tmpdir'} // "/tmp";
39 4         7 my $name;
40 4   50     17 my $email_address = $config->{'email'} // undef;
41 4   50     15 my $mail_type = $config->{'email_type'} // "none";
42 4   50     18 my $opt_placeholder = $config->{'placeholder'} // "#FILE#";
43 4         32 my $params_array_file;
44             my $command;
45 4         0 my $verbose;
46 4         0 my $debug;
47 4         0 my $run;
48 4         0 my $gpu;
49 4         0 my $opt_eco;
50 4         0 my $opt_no_eco;
51 4         0 my @slurm_options;
52             GetOptions(
53             'm|memory=s' => \$memory,
54             'c|cores|threads=i' => \$threads,
55             'q|queue=s' => \$queue,
56             'T|time=s' => \$time,
57             'w|tmpdir=s' => \$tmpdir,
58             'n|name=s' => \$name,
59             'r|run' => \$run,
60             'a|email-address=s' => \$email_address,
61             'e|mail-type=s' => \$mail_type, # 'BEGIN,END,FAIL,REQUEUE,ALL'
62             'after=s' => \@afterok,
63             'f|files=s' => \my @files,
64             'p|params-array=s' => \$params_array_file,
65             'placeholder=s' => \$opt_placeholder,
66             'gpu' => \$gpu,
67             'option=s' => \@slurm_options,
68             'eco' => \$opt_eco,
69             'no-eco' => \$opt_no_eco,
70             'verbose' => \$verbose,
71 0     0   0 'version' => sub { say "runjob v", $NBI::Slurm::VERSION; exit },
  0         0  
72             'debug' => \$debug,
73 0     0   0 'help' => sub { usage(1) },
74 4 50       54 ) or usage(1);
75              
76 4 50 33     6606 if ($opt_eco && $opt_no_eco) {
77 0         0 say STDERR RED, BOLD, "ERROR:", RESET, " --eco and --no-eco are mutually exclusive";
78 0         0 exit 1;
79             }
80              
81 4 50       13 $debug = 1 if ($ENV{"DEBUG"});
82 4 50       13 $verbose = 1 if ($debug);
83              
84             # Update queue based on time
85 4         22 my $time_hours = NBI::Opts::_time_to_hour($time);
86 4         34 $queue = update_queue($queue, $time_hours);
87 4         23 $command = join(" ", @ARGV);
88 4 50       10 usage(1) unless ($command);
89              
90 4 100 66     15 if (@files and defined $params_array_file) {
91 1         10 say STDERR RED, BOLD, "ERROR:", RESET, " --files and --params-array are mutually exclusive";
92 1         824 exit 1;
93             }
94              
95 3         14 my @abs_files = get_abs_files(@files);
96 3         9 my ($params_array_abs, $params_rows) = get_params_array_info($params_array_file, $command);
97 2 50       7 my $total_array_tasks = @abs_files ? scalar(@abs_files) : $params_rows;
98 2         6 my $array_task_limit = get_array_task_limit($config);
99 2         63 my @array_chunks = build_array_chunks($total_array_tasks, $array_task_limit);
100              
101              
102 2 50       10 if ($verbose) {
103 0         0 say STDERR YELLOW, BOLD, "PARAMETERS:",RESET;
104 0   0     0 say STDERR YELLOW, "Queue: ", RESET, $queue // "(not set)";
105 0   0     0 say STDERR YELLOW, "Memory: ", RESET, $memory // "(not set)";
106 0   0     0 say STDERR YELLOW, "Threads: ", RESET, $threads // "(not set)";
107 0   0     0 say STDERR YELLOW, "Time: ", RESET, $time // "(not set)";
108 0   0     0 say STDERR YELLOW, "Tmpdir: ", RESET, $tmpdir // "(not set)";
109 0   0     0 say STDERR YELLOW, "Name: ", RESET, $name // "(not set)";
110 0   0     0 say STDERR YELLOW, "Command: ", RESET, $command // "(not set)";
111 0 0       0 say STDERR YELLOW, "Params TSV: ", RESET, $params_array_abs if defined $params_array_abs;
112 0 0       0 say STDERR YELLOW, "Array size: ", RESET, $total_array_tasks if $total_array_tasks;
113 0 0       0 say STDERR YELLOW, "Array cap: ", RESET, $array_task_limit if defined $array_task_limit;
114             }
115             # Check threadshours
116 2 50       98 if (-d $time) {
117 0         0 say STDERR RED, BOLD, "ERROR:", RESET, " did you mean \"--tmpdir $time\" instead of $threads threads?";
118 0         0 usage();
119             }
120              
121             # Sanitize tempdir
122 2 50       40 if (-d $tmpdir) {
123 2         129 $tmpdir = File::Spec->rel2abs($tmpdir);
124 2 50       10 say STDERR "[DEBUG] Using $tmpdir as temporary directory" if ($debug);
125             } else {
126             # Check if it was meant to be a TIME instead
127 0 0 0     0 if ($tmpdir =~/ / or $tmpdir =~/(\d+)[dh]/) {
128 0         0 say STDERR RED, BOLD, "WARNING:", RESET, " $tmpdir looks like a time string, not a directory!";
129             }
130 0         0 eval {
131 0 0       0 mkdir($tmpdir) or die "Cannot create $tmpdir: $!";
132             };
133 0 0 0     0 if ($@) {
    0          
134 0         0 say STDERR RED, BOLD, "ERROR:", RESET, " Cannot create $tmpdir: $@";
135 0         0 exit 1;
136             } elsif ($verbose or $debug) {
137 0         0 say STDERR "[INFO] Created $tmpdir";
138             }
139             }
140              
141             # Sanitize memory
142 2 50 33     89 if ($memory =~/^(\d+)$/ and $1 < 200) {
143 0         0 say STDERR RED, BOLD, "WARNING:", RESET, " $memory is in Mb but <200, autoscaling to $memory GB";
144 0         0 $memory = $1 * 1000;
145             }
146 2 100       22 $name = autoname($command) unless (defined $name);
147 2 50       6 say STDERR "[DEBUG] Using $name as job name" if ($debug);
148              
149 2 50       7 my $afterok_string = defined $afterok[0] ? "-d afterok:" . join(":", @afterok) : undef;
150              
151             # Prepare additional SLURM options
152 2         5 my @additional_opts = ();
153 2 50       5 push @additional_opts, $afterok_string if defined $afterok_string;
154 2 50       7 push @additional_opts, "--gres=gpu:1" if $gpu;
155              
156 2 50       9 push @additional_opts, @slurm_options if @slurm_options;
157              
158 2 50       5 $queue = $gpuqueue if $gpu;
159              
160             #
161 2 100       31 if (not NBI::Slurm::valid_queue($queue)) {
162 1         12 my @valid_queues = NBI::Slurm::queues('CAN FAIL');
163 1 50       23 if (scalar @valid_queues) {
164 0         0 say STDERR RED, BOLD, "WARNING:", RESET, " $queue is not a valid queue: ", join(", ", @valid_queues);
165             } else {
166 1 50       13 if ($run) {
167 0         0 say STDERR RED, BOLD, "ERROR:", RESET, " You might be outside a slurm cluster. Run without --run to see the script", join(", ", @valid_queues);
168 0         0 exit 1;
169             } else {
170 1         41 say STDERR RED, BOLD, "WARNING:", RESET, " You might be outside a slurm cluster", join(", ", @valid_queues);
171             }
172            
173            
174             }
175            
176             }
177              
178             # Eco scheduling
179             # eco_default from config defaults to 1 (on) if not explicitly set
180 2 50       1525 my $eco_default = defined $config->{eco_default} ? $config->{eco_default} : $NBI::EcoScheduler::DEFAULTS{eco_default};
181 2 50       15 my $use_eco = $opt_no_eco ? 0
    100          
182             : $opt_eco ? 1
183             : $eco_default;
184              
185 2 100       13 if ($use_eco) {
186 1 50 33     19 if (defined $config->{start_time} || defined $config->{start_date}) {
187 0         0 say STDERR YELLOW, "[eco] --begin already set explicitly, ignoring eco scheduling", RESET;
188             }
189             }
190              
191             sub xdump {
192 0     0   0 my ($hash_of_hashes, $indent) = @_;
193 0   0     0 $indent //= ' ';
194 0         0 while (my ($key, $value) = each %$hash_of_hashes) {
195 0 0       0 if (ref $value eq 'HASH') {
196 0         0 print STDERR BOLD, "$indent$key :\n", RESET;
197 0         0 xdump($value, "$indent ");
198 0         0 print "$indent\n";
199             } else {
200 0         0 print STDERR "$indent$key = $value\n";
201             }
202             }
203             }
204              
205              
206 2 100       12 if (@array_chunks > 1) {
207 1         10 say STDERR YELLOW, "[array] Splitting $total_array_tasks tasks into ", scalar(@array_chunks), " jobs", RESET;
208             }
209              
210 2         543 my @jobs;
211 2 100       27 my $name_width = scalar(@array_chunks) > 1 ? length(scalar(@array_chunks)) : 0;
212 2         13 for my $chunk_idx (0 .. $#array_chunks) {
213 4         6 my ($array_offset, $chunk_tasks) = @{$array_chunks[$chunk_idx]};
  4         40  
214 4         8 my @job_files = @abs_files;
215 4         6 my $job_params_array = $params_array_abs;
216 4         6 my $job_params_rows = $params_rows;
217 4         9 my $job_array_offset = 0;
218              
219 4 50 33     31 if (@abs_files && @array_chunks > 1) {
    50          
220 0         0 @job_files = @abs_files[$array_offset .. ($array_offset + $chunk_tasks - 1)];
221             } elsif (defined $params_array_abs) {
222 4         8 $job_array_offset = $array_offset;
223             }
224              
225 4         5 my $job_name = $name;
226 4 100       14 if (@array_chunks > 1) {
227 3         11 $job_name = sprintf("%s.part%0*d", $name, $name_width, $chunk_idx + 1);
228             }
229              
230 4         78 my $opts = NBI::Opts->new(
231             -queue => $queue,
232             -threads => $threads,
233             -memory => $memory,
234             -time => $time,
235             -tmpdir => $tmpdir,
236             -email_address => $email_address,
237             -email_type => $mail_type,
238             -opts => \@additional_opts,
239             -files => \@job_files,
240             -params_array => $job_params_array,
241             -params_rows => $job_params_rows,
242             -placeholder => $opt_placeholder,
243             -array_offset => $job_array_offset,
244             -array_tasks => $chunk_tasks,
245             );
246              
247 4 100       15 if ($use_eco) {
248 1         5 my ($begin_epoch, $tier) = NBI::EcoScheduler::find_eco_begin($opts->hours, $config);
249 1 50       4 if (defined $begin_epoch) {
250 1         4 my $begin_str = NBI::EcoScheduler::epoch_to_slurm($begin_epoch);
251 1         14 my $delay = NBI::EcoScheduler::format_delay($begin_epoch);
252 1         6 ($opts->{start_date}, $opts->{start_time}) = split /T/, $begin_str;
253 1 50       15 my $tier_warn = $tier == 2 ? " (job may overrun eco window)"
    50          
254             : $tier == 3 ? " (job may overlap peak hours — best available)"
255             : "";
256 1 50       13 say STDERR GREEN, "[eco] Job scheduled for $begin_str (in $delay)$tier_warn", RESET if $chunk_idx == 0;
257             } else {
258 0 0       0 say STDERR YELLOW, "[eco] No eco slot found in lookahead period, submitting immediately", RESET if $chunk_idx == 0;
259             }
260             }
261              
262 4 50       456 if ($verbose) {
263 0 0       0 say STDERR GREEN, "CONFIG:\n", RESET if $chunk_idx == 0;
264 0 0       0 xdump($config) if $chunk_idx == 0;
265 0 0       0 print STDERR "\n" if $chunk_idx == 0;
266 0         0 say STDERR $opts->view();
267             }
268              
269 4         45 my $job = NBI::Job->new(
270             -name => $job_name,
271             -command => "cd \"$user_current_dir\"",
272             -opts => $opts,
273             );
274 4 50       9 if ($debug) {
275 0         0 say STDERR YELLOW, "JOB:\n", RESET, Dumper($job);
276             }
277 4         19 $job->append_command($command);
278 4         19 push @jobs, $job;
279             }
280              
281              
282 2         4 my $err = 0;
283 2 100       11 if ($run) {
284 1         2 my @job_ids;
285 1         2 for my $job (@jobs) {
286 3         20 my $j = $job->run();
287 3 50       46 if ($j) {
288 3 50       16 if ($verbose) {
289 0         0 say STDERR $job->view();
290             }
291 3         44 push @job_ids, $j;
292             } else {
293 0         0 say STDERR RED, "Job not submitted: $j", RESET;
294 0 0       0 if ($verbose) {
295 0         0 say STDERR $job->view();
296             }
297 0         0 $err = 1;
298 0         0 last;
299             }
300             }
301 1 50       26 if (!$err) {
302 1         17 say $_ for @job_ids;
303             }
304             } else {
305 1         4 for my $idx (0 .. $#jobs) {
306 1 50       31 say STDERR GREEN, "JOB SCRIPT\n", RESET if @jobs == 1;
307 1 50       66 say STDERR GREEN, "JOB SCRIPT ", ($idx + 1), "/", scalar(@jobs), "\n", RESET if @jobs > 1;
308 1         5 say $jobs[$idx]->script();
309             }
310             }
311              
312              
313 2 50       186 if ($err) {
    100          
314 0         0 exit 1;
315             } elsif ($run) {
316 1         8 my @submitted = map { $_->jobid } @jobs;
  3         25  
317 1         96 say STDERR GREEN, "Jobs submitted: ", join(",", @submitted), RESET;
318             }
319             sub usage {
320 0     0   0 say STDERR <
321              
322             Options:
323             -n, --name Job name [optional]
324             -q, --queue Queue name [default: nbi-short]
325             -m, --memory Memory to use [default: 8Gb]
326             -c, --cores Number of threads [default: 1]
327             -T, --time Time string [default: "0d 8h"]
328             --after INT Job ID to wait for [can be used multiple times]
329             -w, --tmpdir Temporary directory [default: /tmp]
330             -r, --run Run the job (otherwise, just print the script)
331             -f, --files Input files for array (either specify -f as many times as needed or
332             use a _quoted_ pattern like "*.fasta" to include multiple files)
333             -p, --params-array
334             TSV file for array jobs; refer to columns as ##1##, ##2##, ...
335             Mutually exclusive with --files
336             --placeholder Placeholder for array input files [default: #FILE#]
337             Ignored when --params-array is used
338             --gpu Request GPU resources (adds --gres=gpu:1)
339             --option VALUE Additional SLURM options (can be used multiple times)
340             --eco Schedule job in next low-energy window (overrides eco_default=0)
341             --no-eco Submit immediately, ignoring eco scheduling (overrides eco_default=1)
342             --verbose Verbose output
343             --help This help message
344             ----------------------------------------------------------
345             END
346 0 0       0 exit() if ($_[0]);
347             }
348              
349             sub autoname {
350 1     1   4 my $string = shift;
351 1         9 my @parts = split(/\s+/, $string);
352 1         13 my @ints = ("bash", "perl", "python", "python3", "R", "Rscript", "sh", "zsh");
353 1         9 my @subs = ("bwa", "samtools", "seqfu", "seqkit", "bedtools", "taxonkit", "kmcp", "seqtk", "usearch", "vsearch");
354            
355             # From each @parts, replace it with the first [A-Za-z0-9]+ part
356 1         9 for my $i (0..$#parts) {
357 4         19 $parts[$i] =~ /^([A-Za-z0-9]+)/;
358 4         14 $parts[$i] = $1;
359             }
360             # check if $fisrt is part of the @int array of strings
361 1 50 33     12 if (grep {$_ eq $parts[0]} @ints and defined $parts[1]) {
  8 50 33     44  
362 0         0 return $parts[1];
363 10         25 } elsif (grep {$_ eq $parts[0]} @subs and defined $parts[1]) {
364 0         0 return $parts[0] . "-" . $parts[1];
365             } else {
366 1         20 return $parts[0];
367             }
368              
369             }
370              
371              
372             sub update_queue {
373 4     4   10 my ($queue, $time) = @_;
374             # If the queue has a star, it can be updated
375 4 50       14 if ($queue !~ /\*/) {
376 4         16 return $queue;
377             }
378             # Get time in hours
379 0         0 my $mock_opt = NBI::Opts->new(-time => $time);
380 0         0 my $time_h = $mock_opt->hours;
381            
382 0 0       0 if ($time <= 2 ) {
    0          
383             # Replace * with short
384 0         0 $queue =~ s/\*/short/g;
385             } elsif ($time_h <= 48) {
386             # Replace * with medium
387 0         0 $queue =~ s/\*/medium/g;
388             } else {
389             # Replace * with long
390 0         0 $queue =~ s/\*/long/g;
391             }
392 0         0 return $queue;
393             }
394              
395              
396             sub expand_pattern_to_abs_paths {
397 0     0   0 my ($pattern) = @_;
398            
399             # Get the current working directory
400 0         0 my $current_dir = getcwd();
401              
402             # Expand the pattern
403 0         0 my @files = glob($pattern);
404              
405             # Convert to absolute paths
406 0         0 my @abs_paths = map { File::Spec->rel2abs($_, $current_dir) } @files;
  0         0  
407              
408 0         0 return @abs_paths;
409             }
410              
411             sub get_abs_files {
412 3     3   6 my @files = @_;
413 3         5 for my $file (@files) {
414            
415 0 0       0 if (-e $file) {
416 0         0 push @abs_files, abs_path($file);
417 0         0 next;
418             } else {
419 0         0 my @tmpfiles = ();
420 0         0 push @tmpfiles, expand_pattern_to_abs_paths($file);
421 0 0       0 if (scalar @tmpfiles) {
422 0         0 push @abs_files, @tmpfiles;
423             } else {
424 0         0 say STDERR RED, BOLD, "ERROR:", RESET, " $file does not exist!";
425 0         0 exit 1;
426             }
427             }
428            
429             }
430 3         6 return @abs_files;
431             }
432              
433             sub get_params_array_info {
434 3     3   7 my ($file, $command) = @_;
435 3 50       5 return (undef, 0) unless defined $file;
436              
437 3 50       63 unless (-e $file) {
438 0         0 say STDERR RED, BOLD, "ERROR:", RESET, " $file does not exist!";
439 0         0 exit 1;
440             }
441              
442 3         55 my $abs_file = abs_path($file);
443 3 50       134 open(my $fh, "<", $abs_file) or do {
444 0         0 say STDERR RED, BOLD, "ERROR:", RESET, " Cannot read $abs_file: $!";
445 0         0 exit 1;
446             };
447              
448 3         7 my %placeholders;
449 3         22 while ($command =~ /##(\d+)##/g) {
450 5         53 $placeholders{$1} = 1;
451             }
452 3         6 my $max_placeholder = 0;
453 3         11 for my $n (keys %placeholders) {
454 5 100       17 $max_placeholder = $n if $n > $max_placeholder;
455             }
456 3 50       11 if ($max_placeholder == 0) {
457 0         0 say STDERR RED, BOLD, "ERROR:", RESET, " --params-array requires placeholders like ##1## in the command";
458 0         0 exit 1;
459             }
460              
461 3         5 my $rows = 0;
462 3         118 while (my $line = <$fh>) {
463 12         18 chomp $line;
464 12         17 $line =~ s/\r$//;
465 12 100       28 next if $line =~ /^\s*$/;
466 11 100       18 next if $line =~ /^\s*#/;
467 10         17 my @fields = split /\t/, $line, -1;
468 10 100       17 if (scalar(@fields) < $max_placeholder) {
469 1         11 say STDERR RED, BOLD, "ERROR:", RESET, " $abs_file row ", ($rows + 1), " has ", scalar(@fields), " columns but the command requires ##$max_placeholder##";
470 1         803 exit 1;
471             }
472 9         22 $rows++;
473             }
474 2         19 close $fh;
475              
476 2 50       6 if ($rows == 0) {
477 0         0 say STDERR RED, BOLD, "ERROR:", RESET, " $abs_file does not contain any usable TSV rows";
478 0         0 exit 1;
479             }
480              
481 2         14 return ($abs_file, $rows);
482             }
483              
484             sub build_array_chunks {
485 2     2   12 my ($total_tasks, $task_limit) = @_;
486 2 50       20 return ([0, 0]) unless $total_tasks;
487 2 50 66     28 return ([0, $total_tasks]) unless defined $task_limit && $task_limit > 0 && $total_tasks > $task_limit;
      66        
488              
489 1         1 my @chunks;
490 1         3 for (my $offset = 0; $offset < $total_tasks; $offset += $task_limit) {
491 3         4 my $chunk_tasks = $task_limit;
492 3         3 my $remaining = $total_tasks - $offset;
493 3 100       5 $chunk_tasks = $remaining if $remaining < $chunk_tasks;
494 3         6 push @chunks, [$offset, $chunk_tasks];
495             }
496 1         15 return @chunks;
497             }
498              
499             sub get_array_task_limit {
500 2     2   4 my ($config) = @_;
501              
502 2 50 66     14 if (defined $ENV{NBI_MAX_ARRAY_SIZE} && $ENV{NBI_MAX_ARRAY_SIZE} =~ /^\d+$/ && $ENV{NBI_MAX_ARRAY_SIZE} > 0) {
      66        
503 1         2 return $ENV{NBI_MAX_ARRAY_SIZE};
504             }
505 1 0 33     4 if (defined $config->{max_array_size} && $config->{max_array_size} =~ /^\d+$/ && $config->{max_array_size} > 0) {
      33        
506 0         0 return $config->{max_array_size};
507             }
508              
509 1         6475 my $output = `scontrol show config 2>/dev/null`;
510 1 50 33     81 return undef if $? != 0 || !$output;
511              
512 0           my @limits;
513 0 0         if ($output =~ /^MaxArraySize\s*=\s*(\d+)/m) {
514 0 0         push @limits, $1 if $1 > 0;
515             }
516 0 0         if ($output =~ /max_array_tasks=(\d+)/) {
517 0 0         push @limits, $1 if $1 > 0;
518             }
519 0 0         return undef unless @limits;
520              
521 0           my $limit = shift @limits;
522 0           for my $candidate (@limits) {
523 0 0         $limit = $candidate if $candidate < $limit;
524             }
525 0           return $limit;
526             }
527              
528             __END__