| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#ABSTRACT: NBI Slurm module |
|
2
|
|
|
|
|
|
|
# |
|
3
|
|
|
|
|
|
|
# NBI::Slurm - Main entry-point module for the NBI::Slurm package. |
|
4
|
|
|
|
|
|
|
# |
|
5
|
|
|
|
|
|
|
# DESCRIPTION: |
|
6
|
|
|
|
|
|
|
# Provides utility functions and constants used across the package: |
|
7
|
|
|
|
|
|
|
# - %FORMAT_STRINGS : squeue format codes (jobid, user, memory, etc.) |
|
8
|
|
|
|
|
|
|
# - load_config() : reads ~/.nbislurm.config key=value settings |
|
9
|
|
|
|
|
|
|
# - has_squeue() : checks whether the squeue binary is available |
|
10
|
|
|
|
|
|
|
# - queues() : lists available SLURM partitions via sinfo |
|
11
|
|
|
|
|
|
|
# - valid_queue() : validates a queue name against the cluster |
|
12
|
|
|
|
|
|
|
# - execute_command(): runs a shell command and captures stdout/stderr/exit |
|
13
|
|
|
|
|
|
|
# - timelog() : returns a formatted timestamp string for logging |
|
14
|
|
|
|
|
|
|
# - days_since_update(): returns days since a file was last modified |
|
15
|
|
|
|
|
|
|
# |
|
16
|
|
|
|
|
|
|
# RELATIONSHIPS: |
|
17
|
|
|
|
|
|
|
# - Loads and re-exports NBI::Job and NBI::Opts so callers only need |
|
18
|
|
|
|
|
|
|
# "use NBI::Slurm". |
|
19
|
|
|
|
|
|
|
# - NBI::Queue and NBI::QueuedJob both import %FORMAT_STRINGS and |
|
20
|
|
|
|
|
|
|
# $NBI::Slurm::VERSION from this module. |
|
21
|
|
|
|
|
|
|
# - NBI::EcoScheduler uses $NBI::Slurm::VERSION and is called by |
|
22
|
|
|
|
|
|
|
# bin/runjob when eco scheduling is active. |
|
23
|
|
|
|
|
|
|
# |
|
24
|
15
|
|
|
15
|
|
2739574
|
use strict; |
|
|
15
|
|
|
|
|
33
|
|
|
|
15
|
|
|
|
|
632
|
|
|
25
|
15
|
|
|
15
|
|
75
|
use warnings; |
|
|
15
|
|
|
|
|
31
|
|
|
|
15
|
|
|
|
|
1132
|
|
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
package NBI::Slurm; |
|
28
|
15
|
|
|
15
|
|
8930
|
use NBI::Job; |
|
|
15
|
|
|
|
|
51
|
|
|
|
15
|
|
|
|
|
953
|
|
|
29
|
15
|
|
|
15
|
|
9238
|
use NBI::Opts; |
|
|
15
|
|
|
|
|
65
|
|
|
|
15
|
|
|
|
|
1244
|
|
|
30
|
15
|
|
|
15
|
|
125
|
use base qw(Exporter); |
|
|
15
|
|
|
|
|
27
|
|
|
|
15
|
|
|
|
|
4053
|
|
|
31
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
|
32
|
|
|
|
|
|
|
our @EXPORT = qw(Job Opts load_config has_squeue timelog execute_command %FORMAT_STRINGS); |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
$NBI::Slurm::VERSION = '0.17.2'; |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
our %FORMAT_STRINGS = ( |
|
40
|
|
|
|
|
|
|
'account' => '%a', |
|
41
|
|
|
|
|
|
|
'jobid' => '%A', |
|
42
|
|
|
|
|
|
|
'jobname' => '%j', |
|
43
|
|
|
|
|
|
|
'cpus' => '%C', |
|
44
|
|
|
|
|
|
|
'end_time' => '%E', |
|
45
|
|
|
|
|
|
|
'start_time' => '%S', |
|
46
|
|
|
|
|
|
|
'total_time' => '%l', |
|
47
|
|
|
|
|
|
|
'time_left' => '%L', |
|
48
|
|
|
|
|
|
|
'memory' => '%m', |
|
49
|
|
|
|
|
|
|
'command' => '%o', |
|
50
|
|
|
|
|
|
|
'queue' => '%P', |
|
51
|
|
|
|
|
|
|
'reason' => '%r', |
|
52
|
|
|
|
|
|
|
'status' => '%T', # short: %t |
|
53
|
|
|
|
|
|
|
'workdir' => '%Z', |
|
54
|
|
|
|
|
|
|
'user' => '%u', |
|
55
|
|
|
|
|
|
|
); |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub execute_command { |
|
58
|
0
|
|
|
0
|
1
|
0
|
my ($command) = @_; |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# Use File::Temp functionality available in core Perl |
|
61
|
15
|
|
|
15
|
|
15642
|
use File::Temp qw(tempfile); |
|
|
15
|
|
|
|
|
399790
|
|
|
|
15
|
|
|
|
|
23114
|
|
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# Create temporary files for capturing output |
|
64
|
0
|
|
|
|
|
0
|
my ($stdout_fh, $stdout_file) = tempfile(UNLINK => 1); |
|
65
|
0
|
|
|
|
|
0
|
my ($stderr_fh, $stderr_file) = tempfile(UNLINK => 1); |
|
66
|
0
|
|
|
|
|
0
|
close($stdout_fh); # Close handles so system() can write to files |
|
67
|
0
|
|
|
|
|
0
|
close($stderr_fh); |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# Execute command with output redirection |
|
70
|
0
|
|
|
|
|
0
|
my $full_command = "$command >$stdout_file 2>$stderr_file"; |
|
71
|
0
|
|
|
|
|
0
|
system($full_command); |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# Capture exit code (system() returns exit code << 8) |
|
74
|
0
|
|
|
|
|
0
|
my $exit_code = $? >> 8; |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# Read stdout |
|
77
|
0
|
|
|
|
|
0
|
my $stdout = ''; |
|
78
|
0
|
0
|
|
|
|
0
|
if (open(my $stdout_read_fh, '<', $stdout_file)) { |
|
79
|
0
|
|
|
|
|
0
|
$stdout = do { local $/; <$stdout_read_fh> }; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
80
|
0
|
|
|
|
|
0
|
close($stdout_read_fh); |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# Read stderr |
|
84
|
0
|
|
|
|
|
0
|
my $stderr = ''; |
|
85
|
0
|
0
|
|
|
|
0
|
if (open(my $stderr_read_fh, '<', $stderr_file)) { |
|
86
|
0
|
|
|
|
|
0
|
$stderr = do { local $/; <$stderr_read_fh> }; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
87
|
0
|
|
|
|
|
0
|
close($stderr_read_fh); |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# Files are automatically cleaned up due to UNLINK => 1 |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
return { |
|
93
|
0
|
|
|
|
|
0
|
stdout => $stdout, |
|
94
|
|
|
|
|
|
|
stderr => $stderr, |
|
95
|
|
|
|
|
|
|
exit_code => $exit_code |
|
96
|
|
|
|
|
|
|
}; |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub load_config { |
|
100
|
1
|
|
|
1
|
1
|
3
|
my $filename = shift; |
|
101
|
1
|
50
|
|
|
|
4
|
if (! $filename) { |
|
102
|
0
|
|
|
|
|
0
|
$filename = "$ENV{HOME}/.nbislurm.config"; |
|
103
|
|
|
|
|
|
|
} |
|
104
|
1
|
|
|
|
|
3
|
my $config = {}; |
|
105
|
1
|
50
|
|
|
|
183
|
if (! -e "$filename") { |
|
106
|
1
|
50
|
|
|
|
7
|
say STDERR "# Config file not found: $filename" if ($ENV{"DEBUG"}); |
|
107
|
1
|
|
|
|
|
4
|
return $config; |
|
108
|
|
|
|
|
|
|
} |
|
109
|
0
|
0
|
|
|
|
0
|
open(my $fh, "<", $filename) or die "Cannot open $filename: $!"; |
|
110
|
0
|
|
|
|
|
0
|
while (<$fh>) { |
|
111
|
0
|
|
|
|
|
0
|
chomp; |
|
112
|
0
|
0
|
|
|
|
0
|
next if (/^\s*$/); |
|
113
|
0
|
0
|
|
|
|
0
|
next if (/^#/); |
|
114
|
0
|
0
|
|
|
|
0
|
next if (/^;/); |
|
115
|
0
|
|
|
|
|
0
|
my ($key, $value) = split(/=/, $_); |
|
116
|
|
|
|
|
|
|
# discard keys with spaces |
|
117
|
0
|
0
|
|
|
|
0
|
next if ($key =~ /\s/); |
|
118
|
0
|
|
|
|
|
0
|
$config->{$key} = $value; |
|
119
|
|
|
|
|
|
|
} |
|
120
|
0
|
|
|
|
|
0
|
close $fh; |
|
121
|
0
|
|
|
|
|
0
|
return $config; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub has_squeue { |
|
126
|
1
|
|
|
1
|
1
|
259203
|
my $cmd = "squeue --version"; |
|
127
|
1
|
|
|
|
|
5249
|
my $output = `$cmd 2>&1`; |
|
128
|
1
|
50
|
|
|
|
91
|
if ($? == 0) { |
|
129
|
0
|
|
|
|
|
0
|
return 1; |
|
130
|
|
|
|
|
|
|
} else { |
|
131
|
1
|
|
|
|
|
35
|
return 0; |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
sub queues { |
|
136
|
0
|
|
|
0
|
1
|
|
my $can_fail = shift; |
|
137
|
|
|
|
|
|
|
# Retrieve queues from SLURM |
|
138
|
0
|
|
|
|
|
|
my $has_sinfo = undef; |
|
139
|
0
|
|
|
|
|
|
eval { |
|
140
|
0
|
|
|
|
|
|
$has_sinfo = `sinfo --version > /dev/null 2>&1`; |
|
141
|
|
|
|
|
|
|
}; |
|
142
|
|
|
|
|
|
|
|
|
143
|
0
|
0
|
|
|
|
|
chomp($has_sinfo) if defined $has_sinfo; |
|
144
|
0
|
0
|
0
|
|
|
|
if (not defined $has_sinfo and ! $can_fail) { |
|
145
|
0
|
|
|
|
|
|
Carp::croak "ERROR NBI::Slurm: sinfo failed. Are you in a SLURM cluster?\n"; |
|
146
|
|
|
|
|
|
|
} |
|
147
|
0
|
|
|
|
|
|
my $cmd = "timeout 5s sinfo --format '%P' --noheader"; |
|
148
|
0
|
|
|
|
|
|
my @output = `$cmd 2>/dev/null`; |
|
149
|
0
|
0
|
0
|
|
|
|
if ($? != 0 and ! $can_fail) { |
|
150
|
0
|
|
|
|
|
|
Carp::croak "ERROR NBI::Slurm: sinfo did not find queues. Are you in a SLURM cluster?\n"; |
|
151
|
|
|
|
|
|
|
} |
|
152
|
|
|
|
|
|
|
|
|
153
|
0
|
|
|
|
|
|
chomp @output; |
|
154
|
0
|
|
|
|
|
|
return @output; |
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
sub valid_queue { |
|
158
|
0
|
0
|
|
0
|
1
|
|
if ($ENV{'SKIP_SLURM_CHECK'}) { |
|
159
|
0
|
|
|
|
|
|
return 1; |
|
160
|
|
|
|
|
|
|
} |
|
161
|
0
|
|
|
|
|
|
my $queue = shift; |
|
162
|
0
|
|
|
|
|
|
my @queues = queues('CAN_FAIL'); |
|
163
|
0
|
|
|
|
|
|
my @input_queues = split(/,/, $queue); |
|
164
|
|
|
|
|
|
|
|
|
165
|
0
|
0
|
|
|
|
|
if (scalar(@input_queues) == 0) { |
|
166
|
|
|
|
|
|
|
# Let's assume it exists... TODO CHECK |
|
167
|
0
|
|
|
|
|
|
return 1; |
|
168
|
|
|
|
|
|
|
} |
|
169
|
0
|
|
|
|
|
|
foreach my $input_queue (@input_queues) { |
|
170
|
0
|
0
|
|
|
|
|
if (! grep { $_ eq $input_queue } @queues) { |
|
|
0
|
|
|
|
|
|
|
|
171
|
0
|
|
|
|
|
|
return 0; |
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
} |
|
174
|
0
|
|
|
|
|
|
return 1; |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
sub days_since_update { |
|
179
|
0
|
|
|
0
|
1
|
|
my $file_path = shift; |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# Check if the required modules can be loaded |
|
182
|
0
|
|
|
|
|
|
eval { |
|
183
|
0
|
|
|
|
|
|
require File::Spec; |
|
184
|
0
|
|
|
|
|
|
require Time::Piece; |
|
185
|
0
|
|
|
|
|
|
require Time::Seconds; |
|
186
|
|
|
|
|
|
|
}; |
|
187
|
0
|
0
|
|
|
|
|
if ($@) { |
|
188
|
0
|
|
|
|
|
|
return -1; # Failed to load required module(s) |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
# Check if the file exists |
|
192
|
0
|
0
|
|
|
|
|
unless (-e $file_path) { |
|
193
|
0
|
|
|
|
|
|
return -1; # File not found |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# Get the file's last modification time |
|
197
|
0
|
|
|
|
|
|
my $last_modified = (stat($file_path))[9]; |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
# Calculate the number of days since the last modification |
|
200
|
0
|
|
|
|
|
|
my $current_time = time(); |
|
201
|
0
|
|
|
|
|
|
my $days_since_update = int(($current_time - $last_modified) / (24 * 60 * 60)); |
|
202
|
|
|
|
|
|
|
|
|
203
|
0
|
|
|
|
|
|
return $days_since_update; |
|
204
|
|
|
|
|
|
|
} |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub timelog { |
|
207
|
0
|
|
|
0
|
1
|
|
my $tag = shift; |
|
208
|
0
|
0
|
|
|
|
|
$tag = "nbi-slurm" unless defined $tag; |
|
209
|
0
|
|
|
|
|
|
my ($sec, $min, $hour, $day, $month, $year) = localtime(time); |
|
210
|
0
|
|
|
|
|
|
$year += 1900; # Adjust year (localtime returns years since 1900) |
|
211
|
0
|
|
|
|
|
|
$month += 1; # Adjust month (localtime returns 0-11) |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
# Format with leading zeros |
|
214
|
0
|
|
|
|
|
|
return sprintf("[%s:%04d-%02d-%02d %02d:%02d:%02d]\t", |
|
215
|
|
|
|
|
|
|
$tag, $year, $month, $day, $hour, $min, $sec); |
|
216
|
|
|
|
|
|
|
} |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
1; |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
__END__ |