File Coverage

blib/lib/SimpleFlow.pm
Criterion Covered Total %
statement 202 219 92.2
branch 52 70 74.2
condition 25 34 73.5
subroutine 20 20 100.0
pod 2 2 100.0
total 301 345 87.2


line stmt bran cond sub pod time code
1             # ABSTRACT: SimpleFlow - easy, simple workflow manager (and logger); for keeping track of and debugging large and complex shell command workflows
2 1     1   134749 use strict;
  1         1  
  1         32  
3 1     1   3 use warnings FATAL => 'all';
  1         1  
  1         62  
4             require 5.010;
5 1     1   4 use feature 'say';
  1         2  
  1         95  
6 1     1   4 use DDP {output => 'STDOUT', array_max => 10, show_memsize => 1};
  1         2  
  1         7  
7 1     1   652 use Devel::Confess 'color';
  1         6057  
  1         3  
8 1     1   86 use Cwd 'getcwd';
  1         1  
  1         52  
9             package SimpleFlow;
10             our $VERSION = 0.13;
11 1     1   5 use Time::HiRes;
  1         1  
  1         7  
12 1     1   727 use Term::ANSIColor;
  1         7196  
  1         95  
13             # Windows portability: the legacy Windows console (cmd.exe) prints raw ANSI
14             # escape sequences as garbage. Disable colouring there unless a terminal that
15             # understands ANSI is in use (Windows Terminal, ConEmu, ANSICON). Unix and
16             # modern Windows terminals are left untouched.
17             BEGIN {
18             $ENV{ANSI_COLORS_DISABLED} = 1
19             if $^O eq 'MSWin32'
20             && !$ENV{WT_SESSION} # Windows Terminal
21             && !$ENV{ConEmuANSI} # ConEmu
22 1 0 33 1   24 && !$ENV{ANSICON}; # ANSICON
      33        
      0        
23             }
24 1     1   5 use Scalar::Util 'openhandle';
  1         2  
  1         47  
25 1     1   4 use DDP {output => 'STDOUT', array_max => 10, show_memsize => 1};
  1         1  
  1         7  
26 1     1   84 use Devel::Confess 'color';
  1         2  
  1         4  
27 1     1   74 use Cwd 'getcwd';
  1         1  
  1         60  
28 1     1   4 use warnings FATAL => 'all';
  1         1  
  1         44  
29 1     1   485 use Capture::Tiny 'capture';
  1         4082  
  1         59  
30 1     1   5 use List::Util 'max';
  1         1  
  1         37  
31 1     1   3 use Exporter 'import';
  1         2  
  1         2212  
32             our @EXPORT = qw(say2 task);
33             our @EXPORT_OK = @EXPORT;
34              
35             sub say2 { # say to both command line and log file
36 1     1 1 18 my ($msg, $fh) = @_;
37 1         12 my $current_sub = (split(/::/,(caller(0))[3]))[-1]; # https://stackoverflow.com/questions/2559792/how-can-i-get-the-name-of-the-current-subroutine-in-perl
38 1         75 my @c = caller;
39 1 50       18 if (not openhandle($fh)) {
40 0         0 die "the filehandle given to $current_sub with \"$msg\" from $c[1] line $c[2] isn't actually a filehandle";
41             }
42 1         3 $msg = "\@ $c[1] line $c[2] $msg";
43 1         8 say $msg;
44 1         2 say $fh $msg;
45 1         3 return $msg;
46             }
47              
48             sub task {
49 26     26 1 1225904 my ($args) = @_;
50 26         297 my $current_sub = (split(/::/,(caller(0))[3]))[-1];
51 26 50       1054 unless (ref $args eq 'HASH') {
52 0         0 die "args must be given as a hash ref, e.g. \"$current_sub({ data => \@blah })\"";
53             }
54 26         61 my @c = caller;
55 26         450 my @reqd_args = (
56             'cmd', # the shell command
57             );
58 26         57 my @undef_args = grep { !defined $args->{$_}} @reqd_args;
  26         112  
59 26 100       74 if (scalar @undef_args > 0) {
60 1         9 p @undef_args;
61 1         3798 die 'the above args are necessary, but were not defined.';
62             }
63 25         88 my @defined_args = ( @reqd_args,
64             'die', # die if not successful; 0 or 1
65             'dry.run', # dry run or not
66             'input.files', # check for input files; SCALAR or ARRAY
67             'log.fh',
68             'note', # a note for the log
69             'overwrite', #
70             'output.files' # product files that need to be checked; can be scalar or array
71             );
72 25         91 my @bad_args = grep { my $key = $_; not grep {$_ eq $key} @defined_args} keys %{ $args };
  52         63  
  52         80  
  416         604  
  25         63  
73 25 100       87 if (scalar @bad_args > 0) {
74 1         7 p @bad_args, array_max => scalar @bad_args;
75 1         3511 say "the above arguments are not recognized by $current_sub";
76 1         9 p @defined_args, array_max => scalar @defined_args;
77 1         5339 die "The above args are accepted by $current_sub";
78             }
79 24 100 100     178 if (
80             (defined $args->{'log.fh'}) &&
81             (not openhandle($args->{'log.fh'}))
82             ) {
83 1         5 p $args;
84 1         4037 die "the filehandle given to $current_sub isn't actually a filehandle";
85             }
86 23         42 my (%input_file_size, @existing_files, @output_files, @empty_filenames);
87 23 100       70 if (defined $args->{'input.files'}) {
88 4         15 my $ref = ref $args->{'input.files'};
89 4         8 my @missing_files;
90 4 100       25 if ($ref eq 'ARRAY') {
    50          
91 1         7 @missing_files = grep {not -f -r $_ } @{ $args->{'input.files'} };
  2         32  
  1         2  
92 1         4 %input_file_size = map { $_ => -s $_ } @{ $args->{'input.files'} };
  2         13  
  1         2  
93 1         2 @empty_filenames = grep {length $_ == 0} @{ $args->{'input.files'} };
  2         4  
  1         6  
94             } elsif ($ref eq '') { # scalar
95 3         9 @missing_files = grep {not -f -r $_ } ($args->{'input.files'});
  3         68  
96 3         8 %input_file_size = map { $_ => -s $_ } ($args->{'input.files'} );
  3         23  
97 3 50       6 @empty_filenames = grep {(defined $_) && (length $_ == 0)} ($args->{'input.files'});
  3         30  
98             } else {
99 0         0 p $args;
100 0         0 die 'ref type "' . $ref . '" is not allowed for "input.files"';
101             }
102 4 100       14 if (scalar @missing_files > 0) {
103 2         33 say STDERR 'this list of arguments:';
104 2         13 p $args;
105 2         8209 say STDERR 'Cannot run because these files are either missing or unreadable in: ' . getcwd();
106 2         8 p @missing_files;
107 2         6963 die 'the above files are missing or are not readable';
108             }
109             }
110 21 50       61 if (scalar @empty_filenames > 0) {
111 0         0 p $args;
112 0         0 die '0-length filenames are not allowed (found in "input.files")';
113             }
114 21         309 my $msg = "\@ $c[1] line $c[2] The command is:\n" . colored(['blue on_bright_red'], $args->{cmd});
115 21         2043 say $msg;
116 21 100       75 say {$args->{'log.fh'}} "\@ $c[1] line $c[2] The command is:\n $args->{cmd})" if defined $args->{'log.fh'};
  1         8  
117 21 100       73 if (defined $args->{'output.files'}) { # avoid "uninitialized value" warning
118 6         15 my $ref = ref $args->{'output.files'};
119 6 50       25 if ($ref eq 'ARRAY') {
    50          
120 0         0 @output_files = @{ $args->{'output.files'} };
  0         0  
121             } elsif ($ref eq '') { # a scalar
122 6         17 @output_files = $args->{'output.files'};
123             } else {
124 0         0 p $args;
125 0         0 die "$ref isn't allowed for \"output.files\"";
126             }
127             }
128 21         45 @empty_filenames = grep {length $_ == 0} @output_files; # 0-length filenames aren't allowed
  6         20  
129 21 100       66 if (scalar @empty_filenames > 0) {
130 1         5 p $args;
131 1         4083 die '0-length filenames are not allowed (found in "output.files"';
132             }
133 20 100       50 if (scalar @output_files > 0) {
134 5         10 @existing_files = grep {-f $_} @output_files;
  5         91  
135             }
136             my %r = (
137             cmd => $args->{cmd},
138 20         355 dir => getcwd(),
139             'source.file' => $c[1],
140             'source.line' => $c[2],
141             'output.files' => [@output_files],
142             );
143 20   100     206 $r{'die'} = $args->{'die'} // 1; # by default, true
144 20   100     199 $r{'dry.run'} = $args->{'dry.run'} // 0; # by default, false
145 20   100     71 $r{note} = $args->{note} // '';# by default, false
146 20   100     114 $r{overwrite} = $args->{overwrite} // 0; # by default, false
147 20         39 $r{'will.do'} = 'yes';
148 20 100       54 $r{'will.do'} = 'no: dry run' if $args->{'dry.run'};
149 20         37 my $string_max = 0;
150 20 100       48 if (defined $args->{'input.files'}) {
151 2         13 $r{'input.files'} = $args->{'input.files'};
152 2         7 $r{'input.file.size'} = \%input_file_size;
153             }
154 20         40 my %output_file_size = map {$_ => -s $_} @output_files;
  5         82  
155 20         75 foreach my $val (grep {ref $r{$_} eq ''} keys %r) {
  204         323  
156 181         368 $string_max = max($string_max, length $r{$val});
157             }
158 20 100 100     168 if ((!$args->{overwrite}) && (scalar @output_files > 0) && (scalar @existing_files == scalar @output_files)) { # this has been done before
      100        
159 1         4 $r{done} = 'before';
160 1         2 $r{'will.do'} = 'no';
161 1         7 say colored(['black on_green'], "\"$args->{cmd}\"\n") . ' has been done before';
162 1         39 $r{'output.file.size'} = \%output_file_size;
163 1         2 $r{duration} = 0;
164 1 50       3 p(%r, output => $args->{'log.fh'}, string_max => $string_max) if defined $args->{'log.fh'};
165 1         5 p %r, string_max => $string_max;
166 1         8365 return \%r;
167             } else {
168 19         52 $r{done} = 'not yet';
169             }
170 19 100       46 if ($r{'dry.run'}) {
171 1         5 say "\@ $c[1] line $c[2] in $r{dir} the command was going to be:";
172 1         4 say colored(['red on_black'], "\"$args->{cmd}\"");
173 1         36 say 'But this is a dry run';
174 1         2 say '-------------';
175 1         4 $r{duration} = 0;
176 1         7 return \%r;
177             }
178 18         68 my $t0 = Time::HiRes::time();
179 18         22 my $status;
180             ($r{stdout}, $r{stderr}, $status) = capture {
181 18     18   6675814 system( $args->{cmd} );
182 18         639 };
183 18         24960 my $t1 = Time::HiRes::time();
184 18         119 $r{duration} = $t1-$t0;
185             # Decode the raw wait status. On Unix the low 7 bits hold the death
186             # signal and the high byte holds the exit code. The signal MUST be read
187             # from the raw status *before* shifting -- the old code shifted first and
188             # then did ($exit & 127), so $r{signal} was always 0 and could never
189             # detect a kill by signal 9/15. Windows has no POSIX signals, and a -1
190             # return from system() means the command never launched.
191 18 50 33     361 if (!defined $status || $status == -1) {
    50          
192 0         0 $r{'exit'} = -1;
193 0         0 $r{signal} = 0;
194             } elsif ($^O eq 'MSWin32') {
195 0         0 $r{signal} = 0;
196 0         0 $r{'exit'} = $status >> 8;
197             } else {
198 18         132 $r{signal} = $status & 127; # FIX: taken from raw status, not from $exit
199 18         102 $r{'exit'} = $status >> 8;
200             }
201 18         86 foreach my $std ('stderr', 'stdout') {
202 36         154 $r{$std} =~ s/\s+$//; # remove trailing whitespace/newline
203 36         170 $string_max = max($string_max, length $r{$std});
204             }
205 18         164 $r{done} = 'now';
206 18         111 $r{'will.do'} = 'done';
207 18         86 my @missing_output_files = grep {not -f -r $_} @output_files;
  4         85  
208 18 100       125 if (scalar @missing_output_files > 0) {
209 1         7 $r{'will.do'} = 'FAILED';
210 1         31 say STDERR "this input to $current_sub:";
211 1         20 p $args;
212 1 50       6103 say {$args->{'log.fh'}} "this input to $current_sub:" if defined $args->{'log.fh'};
  0         0  
213 1 50       9 p($args, output => $args->{'log.fh'}, string_max => $string_max) if defined $args->{'log.fh'};
214 1         8 say STDERR 'has these output files missing:';
215 1 50       3 say {$args->{'log.fh'}} 'has these output files missing:' if defined $args->{'log.fh'};
  0         0  
216 1         9 p @missing_output_files;
217 1 50       3752 p(@missing_output_files, output => $args->{'log.fh'}, string_max => $string_max) if defined $args->{'log.fh'};
218 1         8 p %r, string_max => $string_max;
219 1 50       9280 p(%r, output => $args->{'log.fh'}, string_max => $string_max) if defined $args->{'log.fh'};
220 1 50       4 if ($args->{'die'}) {
221 0         0 die 'those above files should have been made but are missing';
222             } else {
223 1         5 say STDERR 'those above files should have been made but are missing';
224             }
225             }
226 18         46 %output_file_size = map {$_ => -s $_} @output_files;
  4         116  
227 18         150 $r{'output.file.size'} = \%output_file_size;
228             # p %output_file_size;
229 18   100     64 my @files_with_zero_size = grep { ($output_file_size{$_} // 0) == 0 } @output_files;
  4         44  
230 18 100       79 if (scalar @files_with_zero_size > 0) {
231 1         15 p @files_with_zero_size;
232 1         3191 warn 'the above output files have 0 size.';
233             }
234 18 100       950 p(%r, output => $args->{'log.fh'}) if defined $args->{'log.fh'};
235 18 100 100     10316 if (($r{'die'}) && ($r{'exit'} != 0)) {
236 1         11 $r{'will.do'} = 'FAILED';
237 1         26 p %r, string_max => $string_max;
238 1         10883 die "\"$args->{cmd}\" failed from $c[1] line $c[2]"
239             }
240 17         237 p %r, string_max => $string_max;
241 17         203045 return \%r;
242             }
243             1;
244              
245             =encoding utf8
246              
247             A tiny workflow manager and logger for Perl, like SnakeMake or NextFlow, but in pure Perl and aimed at making long, error-prone shell pipelines easy to B and B.
248              
249             Every step is a single C call. SimpleFlow checks the inputs before a
250             command runs and the outputs after, times the command, captures its C,
251             C, exit code and signal, optionally logs a full structured record, and
252             skips work that has already been done.
253              
254             Two subroutines are exported by default: L and L.
255              
256             =head1 Install
257              
258             With a CPAN client:
259              
260             cpanm SimpleFlow
261              
262             Or from a checkout:
263              
264             perl Makefile.PL
265             make
266             make test
267             make install
268              
269             =head1 Synopsis
270              
271             The simplest useful case: run a command and confirm it produced its output:
272              
273             use SimpleFlow qw(task say2);
274            
275             my $t = task({
276             cmd => 'which ls',
277             'output.files' => '/tmp/AFK3mnEK8L.log',
278             });
279              
280             C returns a hash reference describing exactly what happened:
281              
282             {
283             cmd "which ls",
284             die 1,
285             dir "/home/con/Scripts/SimpleFlow",
286             done "now",
287             dry.run 0,
288             duration 0.00191903114318848,
289             exit 0,
290             note "",
291             output.files [
292             [0] "/tmp/AFK3mnEK8L.log"
293             ],
294             overwrite 1,
295             signal 0,
296             source.file "t/01.t",
297             source.line 29,
298             stderr "",
299             stdout "/usr/bin/ls",
300             will.do "done"
301             }
302              
303             > B SimpleFlow runs whatever shell command you give it via
304             > C, so the I are your responsibility to keep
305             > cross-platform (e.g. C is Unix-only). SimpleFlow's own behaviour
306             > exit/signal decoding and coloured output is cross-platform; see the
307             > L.
308              
309             =head1 C
310              
311             my $result = task(\%args);
312              
313             Runs one shell command with checking, timing, capture and logging. Takes a
314             B; the only required key is C.
315              
316             =head2 Arguments
317              
318              
319              
320             =begin html
321              
322            
323            
324            
325             Key
326             Type
327             Default
328             Description
329            
330            
331            
332            
333             cmd
334             scalar
335             undef
336             Required. The shell command to run.
337            
338            
339             die
340             bool (0/1)
341             1
342             Die if the command fails (non-zero exit) or an output file is missing. Set to 0 to warn and continue instead.
343            
344            
345             dry.run
346             bool
347             0
348             Print the command (and log it) but do not execute it.
349            
350            
351             input.files
352             scalar or array
353             undef
354             File(s) that must exist and be readable before running; otherwise task dies.
355            
356            
357             output.files
358             scalar or array
359             undef
360             File(s) expected to exist after running; used both for the missing-output check and for skip detection.
361            
362            
363             log.fh
364             open filehandle
365             undef
366             If given, the full result record is also written here. Must be a real, open filehandle.
367            
368            
369             note
370             scalar
371             ''
372             Free-text note copied into the result and the log.
373            
374            
375             overwrite
376             bool
377             0
378             If false and all output.files already exist, the command is skipped. Set true to always run.
379            
380            
381            
382              
383             =end html
384              
385              
386              
387             Passing an unrecognised key, an empty filename, or a non-filehandle C
388             causes C to die: these are usually mistakes worth catching early.
389              
390             =head2 Return value
391              
392             C always returns a hash reference. The fields below are present after a
393             normal run; the L and L paths
394             omit the execution-only fields (C, C, C, C).
395              
396              
397              
398             =begin html
399              
400            
401            
402            
403             Field
404             Meaning
405            
406            
407            
408            
409             cmd
410             The command that was run.
411            
412            
413             dir
414             Working directory at execution time.
415            
416            
417             done
418             "now" (just ran), "before" (skipped, outputs already existed), or "not yet" (dry run).
419            
420            
421             will.do
422             "done", "no" (skipped), "no: dry run", or "FAILED".
423            
424            
425             duration
426             Wall-clock seconds the command took (0 for skips/dry runs).
427            
428            
429             exit
430             Exit code of the command (-1 if it could not be launched).
431            
432            
433             signal
434             Signal number if the command process was killed by a signal, else 0. Always 0 on Windows (no POSIX signals).
435            
436            
437             stdout, stderr
438             Captured output, with trailing whitespace stripped.
439            
440            
441             die, dry.run, overwrite, note
442             The (defaulted) argument values used.
443            
444            
445             output.files
446             Array ref of the output files (a scalar argument is normalised to a one-element array).
447            
448            
449             output.file.size
450             Hash of filename => size in bytes for the outputs.
451            
452            
453             input.files
454             The input argument, as given (present only if you passed input.files).
455            
456            
457             input.file.size
458             Hash of filename => size in bytes for the inputs (present only if you passed input.files).
459            
460            
461             source.file, source.line
462             Where in your code the task was called: handy when debugging a long pipeline.
463            
464            
465            
466              
467             =end html
468              
469              
470              
471             =head2 Skipping completed work
472              
473             If C is false (the default) and every file in C already
474             exists, C does B re-run the command. This makes pipelines
475             restartable: re-running the script picks up where it left off.
476              
477             open my $log, '>', 'logfile.txt';
478             my $t = task({
479             cmd => 'gmx grompp -f em.mdp -c box.gro -p topol.top -o em.tpr',
480             'input.files' => ['em.mdp', 'box.gro', 'topol.top'],
481             'output.files' => 'em.tpr',
482             'log.fh' => $log,
483             });
484             close $log;
485              
486             On the first run C is C<"now">; on a re-run (with C present) C
487             is C<"before"> and C is C<"no">. Pass C<< overwrite =E 1 >> to force it.
488              
489             =head2 Dry runs
490              
491             Useful for inspecting a pipeline without executing anything expensive:
492              
493             my $t = task({
494             cmd => 'a long-running, time-consuming command',
495             'dry.run' => 1,
496             'log.fh' => $fh,
497             });
498              
499             The command is printed (and logged) but not run; C is C<"no: dry run">.
500              
501             =head2 Failure behaviour
502              
503             By default (C<< die =E 1 >>) C dies if the command exits non-zero or if any
504             declared C are missing afterwards, so a broken step stops the
505             pipeline immediately. With C<< die =E 0 >>, C instead warns and returns its
506             result hash (with C<< will.do =E "FAILED" >>), letting you decide what to do.
507              
508             =head2 C
509              
510             say2($message, $filehandle);
511              
512             "Say to two places": prints C<$message> to standard output B to the given
513             log filehandle, prefixed with the calling file and line number so log entries
514             are traceable. The filehandle must be open, or C dies.
515              
516             open my $log, '>', 'run.log';
517             say2('starting equilibration', $log); # -> STDOUT and run.log
518             close $log;
519              
520             =head1 Dependencies
521              
522             Core/runtime modules used by SimpleFlow:
523              
524             =over
525              
526             =item * L captures C/C
527              
528             =item * L (C) pretty result/record printing
529              
530             =item * L better backtraces on death
531              
532             =item * L coloured terminal output
533              
534             =item * C, C, C, C core utilities
535              
536             =back
537              
538             The test suite additionally uses C and
539             L.
540              
541             =head1 Change log
542              
543             =head2 0.13 (2026-06-11)
544              
545             =head3 Fixed (Claude Opus 4.8 helped)
546              
547             =over
548              
549             =item * B C previously
550             computed the exit code (C<< $status EE 8 >>) and I derived the signal as
551             C<$exit & 127>. Because the signal lives in the low byte of the raw wait
552             status, which C<< EE 8 >> discards the C field was always wrong: a clean
553             C was reported as C, and a process actually killed by a
554             signal reported C. The signal is now read from the raw status before
555             shifting, so C and C are independent and accurate.
556              
557             =item * B<< No longer dies on a missing output file when C<< die =E 0 >>. >> The zero-size
558             check did C<(-s $file) == 0>, which is C when a declared output file
559             is absent. Under C<< use warnings FATAL =E 'all' >> that "uninitialized value"
560             warning was fatal, so a task that was meant to I about missing output
561             (with C<< die =E 0 >>) crashed instead. Missing sizes are now treated as C<0>, so
562             the task warns and returns its result hash as intended.
563              
564             =item * B<< The "already done" result is now logged with its C. >> In the
565             short-circuit path (output files already exist), C was set I
566             the record was written to the log, so the logged hash was missing it; the
567             duplicate C<< done =E 'before' >> assignment was also removed.
568              
569             =back
570              
571             =head3 Changed / Windows support
572              
573             =over
574              
575             =item * B Decoding now branches on C<$^O>: Windows has
576             no POSIX signals (C is reported as C<0> there), and a C that
577             fails to launch the command (C<-1>) yields C<< exit =E -1 >> instead of a garbage
578             value from shifting C<-1>.
579              
580             =item * B C
581             output is suppressed on C unless an ANSI-capable terminal is detected
582             (Windows Terminal, ConEmu, or ANSICON), so C no longer prints raw
583             escape sequences and redirected logs stay clean. Unix and modern Windows
584             terminals are unaffected.
585              
586             =back
587              
588             =head3 Tests
589              
590             =over
591              
592             =item * Rewrote C to be cross-platform: shell commands now invoke the running
593             Perl interpreter (C<"$^X" -e ...>) instead of Unix-only tools (C, C,
594             C, C), and temp files use the system temp directory instead of a
595             hard-coded C.
596              
597             =item * Added regression tests for both fixed bugs (exit/signal decoding; surviving a
598             missing output file with C<< die =E 0 >>).
599              
600             =item * Added coverage for the C field, the C / C
601             hashes, scalar-vs-array normalisation of C / C, the
602             C / C / C metadata, captured C / C
603             (including trailing-whitespace stripping), and argument validation (missing
604             C, unknown keys, bad C, missing input files).
605              
606             =back
607              
608             =head2 0.12
609              
610             exit code now matches what shell would show it as; signal now appears
611              
612             =head2 0.11
613              
614             max string length now corresponds to max of output strings, no more truncated output
615             added List::Util dependency for string length maxes
616             memory size now shows when output
617             directory is now output during dry runs