File Coverage

blib/lib/HPCI.pm

Criterion	Covered	Total	%
statement	16	18	88.8
branch			n/a
condition			n/a
subroutine	6	6	100.0
pod			n/a
total	22	24	91.6

line	stmt	sub	time	code
1				package HPCI;
2				### HPCI.pm ###################################################################
3
4				### INCLUDES ##################################################################
5
6				# safe Perl
7	18	18	4168585	use warnings;
	18		49
	18		717
8	18	18	108	use strict;
	18		45
	18		443
9	18	18	132	use Carp;
	18		49
	18		1027
10	18	18	8603	use Module::Load;
	18		18182
	18		115
11	18	18	10299	use Module::Load::Conditional qw(can_load);
	18		398939
	18		1299
12	18	18	22511	use List::MoreUtils qw(uniq);
	0
	0
13
14				our @extra_roles;
15
16				sub add_extra_role {
17				# next line is documentation
18				# my ($cluster, $level, $role) = @_;
19				shift; # get rid of HPCI class name
20				push @extra_roles, [ @_ ];
21				}
22
23				sub get_extra_roles {
24				my ($target_cluster, $target_level) = @_;
25				my @roles;
26				for my $role_bunch (@extra_roles) {
27				my ($cluster, $level, $roles) = @$role_bunch;
28				next unless $cluster eq 'ALL' \|\| $cluster eq $target_cluster;
29				next unless $level eq $target_level;
30				push @roles, ref $roles ? @$roles : $roles;
31				}
32				return @roles;
33				}
34
35				my $default_attrs = {};
36
37				sub add_default_attrs {
38				shift; # get rid of HPCI class name
39				my $newhash = ref($_[0]) eq 'HASH' ? shift : { @_ };
40				_merge_hash( $default_attrs, $newhash );
41				}
42
43				sub _merge_hash {
44				my( $target, $new, $path ) = @_;
45				$path \|\|= [];
46				croak "not a hash when merging attribute hash{".join('}{',@$path)."}"
47				unless ref($target) eq 'HASH' && ref($new) eq 'HASH';
48				while (my($k,$v) = each %$new) {
49				if (ref($v) eq 'HASH' \|\| (exists $target->{$k} && ref($target->{$k}) eq 'HASH')) {
50				$target->{$k} //= {};
51				_merge_hash( $target->{$k}, $v, [ @$path, $k ] );
52				}
53				else {
54				$target->{$k} = $v;
55				}
56				}
57				}
58
59				sub explist {
60				return (
61				map {
62				ref($_) eq 'ARRAY' ? @$_
63				: defined($_) ? ( $_ )
64				: ( )
65				} @_
66				);
67				}
68
69				# get the env_keys, in original order, but use LAST instance
70				# that retains the order specified in either default or args
71				# but lets the relative order in args take precedence for keys
72				# that are in both
73				#
74				# So, the order is:
75				# [ keys that are only in default in the order they were specified in default ]
76				# [ then keys that are in args in the order they were specified in args ]
77				# No complaint is made if the same key is specified twice in either default
78				# or args, the earlier one(s) are simply ignored.
79				sub keylist {
80				my @keys;
81				for my $arg (@_) {
82				my $keys = (delete $arg->{env_keys}) // [];
83				push @keys, @$keys;
84				}
85				return (reverse uniq reverse @keys);
86				}
87
88				sub group {
89				my $pkg = shift;
90				my $args =
91				scalar(@_) == 1 && ref($_[0]) eq 'HASH' ? shift
92				: scalar(@_) % 2 == 0 ? { @_ }
93				: croak("HPCI->group() requires a hashref or a hash in list form");
94				# copy the default attributes as a start
95				my $use_args = {};
96				_merge_hash( $use_args, $default_attrs );
97
98				# pull out the env_keys (if any)
99				my @keys = keylist( $use_args, $args );
100				my @key_specific = map { $_->{env_key_specific} // () } $use_args, $args;
101
102				# merge any specified env_key list that has a value available
103				for my $key (@keys) {
104				for my $key_spec (@key_specific) {
105				if (my $spec_args = $key_spec->{$key}) {
106				_merge_hash( $use_args, $spec_args );
107				}
108				}
109				}
110
111				my $cluster = $args->{cluster} // $use_args->{cluster}
112				// croak("HPCI->group() requires a cluster key in the argument hash");
113
114				for my $arg_set ($use_args, $args) {
115				if (my $spec_args = delete $arg_set->{cluster_specific}) {
116				_merge_hash( $use_args, $spec_args->{$cluster} // {} );
117				}
118				}
119				_merge_hash( $use_args, $args );
120				my $clmod = "HPCD::${cluster}::Group";
121				load $clmod;
122				return $clmod->new($use_args);
123				}
124
125
126				sub _trigger_mkdir {
127				my $self = shift; # an object with a log
128				my $dir = shift; # a Path::Class::Dir object
129				$self->info( "Created directory: $_" ) for $dir->mkpath;
130				}
131
132				=head1 NAME
133
134				HPCI
135
136				=head1 VERSION
137
138				Version 0.53
139
140				=cut
141
142				our $VERSION = '0.53';
143
144				our $LocalConfigFound;
145
146				$LocalConfigFound = can_load( modules => { 'HPCI::LocalConfig' => undef });
147
148				if (!$LocalConfigFound) {
149				my $err = $Module::Load::Conditional::ERROR;
150				if (defined $err && $err !~ /^Could not find or check module /) {
151				print STDERR "Conditional load of HPCI::LocalConfig failed. Error is:\n";
152				print STDERR "$err\n";
153				}
154				}
155
156				=head1 SYNOPSIS
157
158				use HPCI;
159
160				my $group = HPCI->group(
161				cluster => ($ENV{HPCI_CLUSTER} // 'uni'),
162				...
163				);
164				$group->stage(
165				name => 'analysis_A',
166				command => '...'
167				);
168				$group->stage(
169				name => 'analysis_B',
170				command => '...'
171				);
172				$group->stage(
173				name => 'analysis_C',
174				command => '...'
175				);
176				$group->stage(
177				name => 'report',
178				command => '...'
179				);
180				$group->add_deps(
181				pre_reqs => [ qw(analysis_A analysis_B analysis_C) ],
182				dep => 'report'
183				);
184
185				my $status_info = $group->execute;
186
187				my $exit_status = 0;
188				for my $stage ( qw(analysis_A analysis_B analysis_C report) ) {
189				if (my $stat = $status_info->{$stage}[-1]{exit_status}) {
190				$exit_status \|\|= $stat;
191				print stderr "Stage $stage failed, status $stat!\n";
192				}
193				}
194
195				exit(0); # all stages completed without error
196
197				=head1 OVERVIEW
198
199				HPCI (High Performance Computing Interface) provides an interface to
200				a range of types of computer aggregations (clusters, clouds, ...).
201				(The rest of this document will use I<cluster> henceforth to refer
202				to any type of aggregation that is supported by HPCI.)
203
204				A cluster is defined as a software interface that allows running
205				multiple programs on separate compute elements (nodes).
206
207				HPCI uses an HPCD (High Performance Computing Driver) module
208				to translate its standard interface into the appropriate access
209				mechanisms for the type of cluster that is selected. (If you have
210				used the DBI/DBD modules for accessing databases, this will seem
211				very familiar.)
212
213				The goal of this HPCI/HPCD split is to allow users to write
214				programs that make use of cluster facilities in a portable manner.
215				If there is a reason to run the same program using a different
216				type of cluster, it should only require change the cluster
217				definition attributes provided to one parent object creation; the
218				rest of code need not know or care about the changed cluster type.
219				Programs which are likely to be run on different cluster types will
220				usually be written to get the cluster attribute information from
221				a configuration file, or command line arguments - so the program
222				itself need not change at all.
223
224				Running a program on different types of clusters can happen for a
225				number of reasons. An organization might have access to multiple
226				types of cluster, such as an in-house cluster plus an external cloud.
227				Scholarly research often shares programs both to allow similar
228				research, or to validate existing research results.
229
230				HPCD modules can provide cluster-specific extensions. That can
231				either be a different kind of functionality, or it can be as simple
232				as allowing the teminology familiar to users of that cluster type
233				to be used in place of the generic terminology provided by HPCI.
234				However, using such extensions makes it harder to move to a
235				different cluster type. So, actually making use of such extensions
236				must be considered carefully.
237
238				=head1 The life cycle of a B<group>
239
240				A B<group> is the main mechanism for using HPCI. It is an object that
241				manages a group of computation steps (called B<stage>s), distributing them
242				across the cluster and keeping track of various housekeeping details like
243				when each stage can be run, checking for the result of each completed stage
244				run, deciding whether a failure should cause a stage to be retried to to
245				prevent other stages from being executed, and collecting the status for each
246				stage.
247
248				The life cycle of running a group of commands on a cluster is:
249
250				=over 4
251
252				=item create group
253
254				A B<group> object is created using the HPCI "class method" B<group>.
255				HPCI isn't really a class, it just appears to be one. Its B<group>
256				"class method" actually delegates creation of a group object to
257				the HPCD module that is indicated by the I<cluster> attribute
258				and it returns an cluster-specific group object that supports the
259				HPCI interface.
260
261				=item create stages
262
263				A B<stage> is created for each command that is to be executed on a
264				separate node of the cluster. This is created using the B<group>
265				object's method B<stage>.
266
267				=item define dependency ordering between the stages
268
269				An important reason for running a group of jobs on a cluster is the
270				ability to use multiple computers to run portions of the computation
271				at the same time, rather than having them compete for the rsources
272				of a single computer. However, often some stages will depend
273				upon the output of other stages. Such a dependent stage cannot
274				start executing until all pre-requisite stages have completed.
275				Specifying such dependency requirements is done with the B<group>
276				method B<add_deps>.
277
278				=item execution
279
280				Finally, the B<group> method B<execute> will run the entire set
281				of stages. It does not return until all stages have completed (or
282				have been skipped). Each stage will normally be run once, however
283				it is possible for some stages to be retried under some
284				failure conditions.
285				A failure of one stage (after retry possibilities have been exhausted)
286				can be a trigger for
287				completely skipping the execution of other stages. Each separate
288				execution of a stage (original or retry) is managed with an internal object
289				called a job - but a user program won't see job objects directly.
290
291				As many stages as possible are run simultaneously. This is limited by
292				the specified dependencies, by cluster-specific driver limits, and by
293				user-specified limits on concurrent execution.
294
295				=back
296
297				The objects that calling code deals with directly are a group object to
298				manage a group of stages, and a stage object for each separately run job.
299				Internally, there are also job objects for each retry of a stage, and a
300				log object for logging the execution process (alternately, the user can
301				provide their own Log4Perl compatible log object for HPCI to use - this may be
302				of use if you wish to merge logging of multiple groups and/or of other
303				processing within your program together in a single log).
304
305				There are also some facilities to provide local customization of the standard
306				usage of HPCI (see "Local Customization" below).
307
308				=head1 Output Tree Layout
309
310				There are a number of output files and directories created during a group execution.
311
312				The default layout of these is:
313
314				<base_dir> "."
315				<group_dir> <base_dir>/<name>-<YYYYMMDD-hhmmss>
316				<log> <group_dir>/<name>.log
317				<stage_dir> <group_dir>/<stage_name>
318				<script_file> <stage_dir>/script.sh
319				<job_dir> <stage_dir>/<retry_number>
320				stdout
321				stderr
322				final_retry symlink to final <job_dir>
323
324				Many of these files/directories can be re-assigned to different
325				location using group or stage attributes - shown above is the
326				default layout. Commonly, you will specifically use the I<base_dir>
327				attribute to choose a location other than the current directory for
328				placing the tree; or else use the I<group_dir> attribute if you want
329				to choose a location that does not create a sub-directory for you.
330				(If this is an already existing directory that is being re-used you
331				may end up with a mixture of old and new contents that are hard to
332				figure out.)
333
334				=over 4
335
336				=item base_dir
337
338				The top level of all the generated output. It defaults to ".",
339				but can be specified explicitly when the group is created with
340				the attribute B<base_dir>.
341
342				=item group_dir
343
344				By default, a new directory is created under B<base_dir>. Its name
345				is I<name>-I<YYYYMMDD>-I<hhmmss> - the name of the group along with
346				a timestamp of when the execution started. This can be over-ridden
347				when the group is created by providing the group attribute B<group_dir>.
348
349				=item log
350
351				The automatically provided log is written to the file I<"group.log">
352				directly under I<group_dir>. This logs information about the
353				execution of the entire group of stages. See B<Logging Attributes
354				of group object> below for ways of changing the default setting.
355
356				=item stage_dir
357
358				Each stage creates a sub-directory beneath I<group_dir> with the
359				same name as the stage. An alternate name can be used by providing
360				the B<dir> attribute when the stage object is created.
361
362				=item script_file
363
364				The script created to be executed on the cluster node. This wraps
365				the specified command with additional logic to pass on environment
366				and config info, and to set output redirection. It is called
367				"script.sh" and placed in I<stage_dir>.
368
369				=item job_dir
370
371				A sub-directory is created under I<stage_dir> for each attempt to
372				run the command. Usually, there will only be a single attempt.
373				However, if the cluster driver provides mechanisms for detecting
374				recoverable issues and then retries a command there can be more
375				than one attempt; or alternately, if a pre-requisite stage
376				fails there might be no attempt made (in that case, though,
377				the entire I<stage_dir> directory would not even get created).
378				These directories are simply named with the retry number ("0",
379				"1", ...).
380
381				=item stdout/stderr
382
383				Within each I<job_dir>, the files "stdout" and "stderr" collect
384				the standard output and standard error output from that (re)try
385				attempt to run the command.
386
387				=item final_retry
388
389				A symlink named "final_retry" is created within I<stage_dir> that
390				points to the I<job_dir> of the final (re)try. Since you often
391				don't care as much about the initial run tries as you do about the
392				last one, this symlink provides a consistant access path to that
393				final retry.
394
395				=back
396
397				=head1 HPCI "Class" Methods
398
399				You can pretend that B<HPCI> is a class with one primary class
400				method named B<group>.
401
402				There a few other class methods used for localization purposes, they
403				are decribed below in "Local Customization".
404
405				=head2 B<group> method
406
407				The B<group> method creates and returns a group object, which
408				you can treat like a B<HPCI::Group> object. (In fact, it really
409				returns an object of class B<HPCD::I<cluster>::Group>, but if you
410				ignore that fact then you can trivially have your program run on
411				some other cluster type.)
412
413				=head2 B<group> object
414
415				The description of attributes and methods for the B<group> object given here describe
416				the generic attributes and how they are treated for all cluster types.
417				Individual cluster drivers can modify this behaviour and can provide
418				additional attributes and methods for cluster-specific purposes.
419
420				=head3 Cluster-Related Attributes of B<group> object
421
422				The one necessary attribute is B<cluster>. For some specific
423				cluster types there may be additional attributes required for
424				connecting to the cluster software (authentification, usage
425				class info, etc.).
426
427				=over 4
428
429				=item cluster
430
431				The B<cluster> attribute specifies which type of cluster is to be used.
432				This is the only required attribute. (Some cluster types may have
433				additional attributes that are required for specifying connection
434				and authentification info.)
435
436				=item cluster_specific
437
438				The attribute B<cluster_specific> is optional. If provided, it should
439				contain a hashref of hashrefs. If the value specified for the I<cluster>
440				attribute is present as a key in the B<cluster_specific>
441				hash, the corresponding value will be used as a set of attribute values
442				when the group is created. Its elements will replace or augment any values
443				for the same attribute name provided to the group method. This will normally be
444				used if the program can be dynamically configured for different cluster
445				types, and there are different arg settings required for the different
446				types of cluster.
447
448				=back
449
450				=head3 Basic Attributes of B<group> object
451
452				=over 4
453
454				=item name
455
456				The B<name> you give to a group is used for creating the directory
457				where output is stored, and also in log messages. A default name
458				"default_group_name" is provided if you do not specific an explicit
459				name. Using the default name is adequate in simple programs which
460				only create one group, but for more complicated programs giving
461				separate names to each group is necessary to easily identify the
462				output of each group. The value of B<name> may also be used by
463				the cluster-specific driver to provide an identifier name (or the
464				basis of one) to the underlying cluster, if it needs one.
465
466				=item stage_defaults
467
468				The attribute B<stage_defaults> is optional. If provided, it should
469				contain a hashref. This hash will be used as default values for
470				every stage created by this group.
471
472				=back
473
474				=head3 Directory Layout Attributes of B<group> object
475
476				=over 4
477
478				=item base_dir
479
480				If none of the other directory layout attributes are used to
481				over-ride this, this attribute specifies the directory in which
482				all output directories and files will be created. This is
483				usually an existing directory; it defaults to the current
484				directory ".".
485
486				=item group_dir
487
488				This directory is usually created to contain the outputs of the
489				group execution. By default, it is directly under B<base_dir> with
490				a name that consists of the group name attribute and a timestamp
491				(e.g. "T_Definition-20150521-153256").
492
493				If you provide an explicit value for this parameter, then it
494				should not be an existing directory containing previous results.
495				(If it is, the log file will be appended to the previous one, but
496				the stage directories will over-write equivalently named directories
497				and files that are created in this run, while leaving unchanged any
498				that did not recur, so you'll have a mix of old and new contents.)
499				The names of files and directories created under B<group_dir> are
500				chosen to be consistent and easy to find automatically.
501
502				=back
503
504				=head3 Logging Attributes of B<group> object
505
506				An HPCI group logs its activities using a Log::Log4perl logger.
507				The logger can either be provided by the caller, or else HPCI will
508				create its own.
509
510				=over 4
511
512				=item log
513
514				This a Log::Log4perl::Logger object. If it is provided as an
515				attribute to the B<group> creation call, it will be used as it is,
516				and the other logging attributes will be ignored.
517
518				If it is not provided by the user, a new Log::Log4perl::Logger
519				object will be created using the attributes below to define where
520				it is logged to. This created logger will send all log entries to
521				a file, as well as sending all info and higher log entries to stderr.
522
523				=item log_path
524
525				If this attribute is provided (and the B<log> attribute is not
526				provided) it will be used as the full pathname of a file where the
527				log will be written. If it is not provided, it will use the path
528				B<log_dir>/B<log_file> by default.
529
530				=item log_dir
531
532				If neither B<log> or B<log_path> is provided, this attribute can
533				be used to specify the directory where the log file is to be written.
534				By default, it uses B<group_dir>.
535
536				=item log_file
537
538				If neither B<log> or B<log_path> is provided, this attribute can
539				be used to specify the file name to be written in the log directory.
540				By default, it uses the constant name "group.log".
541
542				=item log_level
543
544				You can provide this attribute to change the default log level setting from "info" to any of I<debug info warn error fatal>.
545
546				=item log_no_stderr, log_no_file
547
548				Normally, the default log is written to both stderr and to the log file.
549				Either of those can be suppressed by setting the corresponding attribute to a true value.
550				These attributes have no effect if the user proviedes their own logger instead of using the default one.
551
552				=back
553
554				=head3 Operational Attributes of B<group> object
555
556				=over 4
557
558				=item max_concurrent
559
560				This attribute specifies the maximum number of stages that will
561				be executing at one time. The default setting of 0 allows as
562				many stages as possible (all those that are not waiting for a
563				pre-requisite stage to complete) to run at the same time.
564
565				=item status
566
567				This attribute is set internally while stages are executed.
568				It contains the final result status from each stage run that
569				has completed. The B<execute> method returns this value when
570				execution completes, so you will usually not need to access it
571				explicitly yourself.
572
573				This value is a hashref (indexed by stage name). The values are
574				arrayrefs (indexed by run number 0..n). For each run, there is
575				a hash. The key B<exit_status> contains the exit status of the run.
576				If the stage was never run, B<exit_status> instead contains a text
577				message listing the reason that it was skipped.
578
579				=back
580
581				=head3 Environment Passing Attributes of B<group> object
582
583				You can set up a set of enviroment variables that will be provided to
584				all stages. (You can also set variables that are only for individual
585				stages - if so, they will modify any set you provide in the group.)
586
587				See B<HPCI::Env> for a description of these.
588
589				=head3 Method B<stage> of B<group> object
590
591				The method B<stage> is used to create a new stage object.
592				Its characteristics are described below.
593
594				The B<group> object keeps track of all B<stage> objects created
595				within that group so that they can all be managed properly when the
596				B<execute> method is invoked.
597
598				=head3 Method B<add_deps> of B<group> object
599
600				The method B<add_deps> is used to specify pre-requisite/dependent
601				relationships. It takes either a hashref or a list containing
602				pairs. One of the keys must be either B<pre_req> or B<pre_reqs>,
603				another must be either B<dep> or B<deps>.
604
605				The value for each of these keys can be either a scalar, or an arrayref
606				of scalar values. A scalar value can be either a B<stage> object (a reference),
607				the exact name of a stage object (a string), or a pattern that matches
608				the name of zero or more stages (a regexp).
609
610				HPCI will ensure that the stage or all of the stages specified for pre_req
611				or pre_reqs have completed execution before any of the dep (or deps) stages
612				is allowed to start executing.
613
614				The plural forms are provided for convenience - often the output
615				file from one preparation stage is required by many others, or the
616				output from many processing stages is needed by a stage that merges
617				results into a summary report. Rather than having to loop over the
618				pre_reqs and deps and calling B<add_deps> individually for every
619				individual dependency, a single call will handle the entire combination.
620
621				Allowing a regexp to match no stages at all makes it possible to write
622				an add_deps call for stages that are optional - no dependency will be
623				added if the optional stage was not created this run.
624
625				While it is recommended for code readability that you use the singular
626				form (B<dep> or B<pre_req>) is you are providing a single stage, and the
627				plural form (B<deps> or B<pre_reqs>) if you are providing a list of
628				stages, either can be used.
629
630				The B<add_deps> method can be called multiple times. HPCI will
631				accumlate the dependencies appropriately.
632
633				It is an error to provide a sequence of dependencies that form
634				a cycle in which a stage directly or indirectly has itself as a
635				pre-requisite. (Such a stage could never run. HPCI will detect
636				when all remaining stages are blocked by pre-requisites and abort,
637				but that might be after numerous stages have already been executed.)
638
639				=head3 METHOD execute of B<group> object
640
641				The B<execute> method is the final goal of building the group.
642				It schedules the execution of individual stages. It waits for
643				pre-requisites before running a stage. It provides for re-running
644				a stage if a soft failure has occurred that allows a retry. If a
645				failure that cannot be retried occurs, it can skip scheduling dependent
646				stages, or even stop scheduling all new stages.
647
648				=head2 Stage Object
649
650				=head3 Attributes
651
652				=over 4
653
654				=item name
655
656				A unique B<name> attribute must be provided for stages. It is a string.
657				There is no default value provided.
658
659				=item command
660
661				The B<command> attribute must be provided before the group is
662				executed. It can either be provided as a string attribute when the
663				stage is created, or by using the one of
664				the command-setting methods provided by the stage class.
665
666				See B<HPCI::Stage> for more details about the command setting
667				methods.
668
669				=item dir
670
671				The B<dir> attribute is optional. It specifies the direcory
672				in which files related to the stage are placed. By default,
673				it is I<group_dir>/I<stage_name>. You will usually not need to
674				change this.
675
676				=item cluster
677
678				The B<cluster> attribute is automatically passed on fro mthe B<group>
679				to each B<stage>. You are not likely to need this.
680
681				=item group
682
683				The B<group> that created a stage is automatically passed on (as a weak
684				reference) to the stage. You are not likely to need to use this attribute
685				in user code.
686
687				=item resources_required
688
689				=item retry_resources_required
690
691				The B<resources_required> and B<retry_resources_required> are used to
692				define resources that will be required by the stage when it executes.
693				These attributes are somewhat cluster specific - each cluster has
694				its own set of requirements for how a job submission must specify
695				the sort of resources that it will require.
696
697				The B<resources_required> attribute is a hash, specifying the
698				value for each resource that is to be considered.
699
700				The B<retry_resources_required> attribute is also a hash. For
701				each resource, you can specify an array of values. If the cluster
702				driver is able to detect that a run failed because the resource
703				was inadequate, it will retry the run with the next larger value
704				from this list.
705
706				See B<HPCI::Stage> for more details about resources.
707
708				=item force_retries
709
710				This attribute specifies an integer number of time to retry the
711				stage before comcluding that it has actually failed. You might use
712				this if your cluster has some nodes that work differently from
713				others and a stage might fail on one type of node but succeed on
714				another.
715
716				These retries are done after any cluster-specific retry mechanisms
717				have been used.
718
719				The default value for this attribute is 0 (zero), giving no forced
720				retries unless you specifically ask for them.
721
722				=item failure_action ('abort_group', 'abort_deps'*, or 'ignore')
723
724				Specifies the action to take if this stage fails (terminates with
725				a non-zero status).
726
727				There are three string values that it can have:
728
729				=over 4
730
731				=item - abort_deps (default)
732
733				If the stage fails, then any stages which depend upon it
734				(recursively) are not run. The group continues executing until
735				all stages which are not dependent upon this stage (including those
736				that have not yet been initiated) complete execution.
737
738				=item - abort_group
739
740				If the stage fails, then no other stages are started. The group
741				simply waits until stages that have already been started complete
742				and then returns.
743
744				=item - ignore
745
746				Execution continues unchanged, any dependent stages will be run when they are
747				no longer blocked.
748
749				=back
750
751				=item abort_group_on_failure abort_deps_on_failure ignore_failure
752
753				As an alternative to providing a value to the failute_action attribute
754				when you create a stage, you can instead provide one of the pseudo-attributes
755				'abort_group_on_failure', 'abort_deps_on_failure', or 'ignore_failure' with
756				a true value to specify 'abort_group', 'abort_deps', or 'ignore' respectively.
757
758				=item state
759
760				The B<state> is mostly an internal attribute but after the group has
761				finished execution you can use this to check whether the stage was
762				run successfully. After execution, B<state> will either be 'pass" or
763				'fail'.
764
765				=item Environment passing attributes
766
767				You can set up a set of environment variables that will be provided to
768				this stage. It will use set defined for the group as a basis (if such a set was
769				defined for the group), but that set can be changed for individual stages
770				or you can have no group default and only provide a set to specific stages
771				as needed. See B<HPCI::Env> for further details.
772
773				=back
774
775				=head3 Methods
776
777				=head4 command creation
778
779				There are a number of helper methods to assist in building different
780				types of commands to be provided for the B<command> attribute.
781				See B<HPCI::Stage> for details.
782
783				=head1 Local Configuration
784
785				TODO: write this section
786				- describe the HPCI::LocalConfig module
787				- describe the mechanism for adding extra roles to group, stage, etc.
788
789				=head1 Additional
790
791				This is an early public release of HPCI, and at present, there are
792				only two drivers available.
793
794				Only one cluster type is directly included within the HPCI package.
795				The cluster type B<HPCD::uni> runs on a "cluster" of only one
796				machine. It simply uses fork to submit individual stages and has
797				facility for retries and timeouts. This is the default cluster
798				type used for testing, as it will work natively on all types of
799				Unix systems. It is also possible to use this driver as a fallback,
800				in cases where the only available "real" cluster is not accessable
801				for some reason.
802
803				Additionally, there is the B<HPCD::SGE> driver available on CPAN.
804				It has seen heavy use within Boutros Lab.
805
806				Now that these packages have been released, it is likely new
807				cluster drivers will be written. People interested in developing
808				drivers for additional cluster types should contact the authors
809				of this package to co-ordinate releases, features needed, etc. at
810				B<mailto:BoutrosLabSoftware@oicr.on.ca>.
811
812				Additionally, you may wish to subscribe to the email list mentioned
813				at B<https:://lists.oicr.on.ca/mailman/listinfo/hpci-discuss>.
814				This is expected to be a low volume discussion group, although the
815				future will tell what the actual volume will be.
816
817				As additional capabilities of new cluster types are addressed, and as
818				different control needs used at other organizations are identified;
819				this interface will surely change. As far as possible, such changes
820				will be done in an upwardly compatible manner, but until a few more
821				drivers have been integrated there is the possibility of changes
822				that are not fully backward compatible. Watch the release notes
823				for warnings of such issues. At some point there will be a 1.0.0
824				release, at which point this expectation of (limited) incompatible
825				future change will be dropped. After that point, incompatible
826				changes will only be made for critical reasons.
827
828				The reason for separate distribution of cluster-specific HPCD
829				packages are fairly obvious:
830
831				=over 4
832
833				=item -
834
835				The maintainers of the HPCI package do not have access to every
836				possible cluster type, and it unlikely that anyone will have access
837				to all supported cluster types from one location, so the driver
838				modules will need to be tested separately anyhow.
839
840				=item -
841
842				A user of HPCI is equally not going to have need to access every
843				type of cluster that exists, so they will probably prefer to only
844				download the driver modules that they actually need.
845
846				=back
847
848				=head1 SEE ALSO
849
850				=over 4
851
852				=item HPCI::Group
853
854				Describes the interface common to all B<HPCI Group>
855				objects, regardless of the particular type of cluster that
856				is actually being used to run the stages. In the future, the
857				common interface may change somewhat as supprt for additional
858				cluster types is added and a better understanding of the common
859				features is achieved.
860
861				=item HPCI::Stage
862
863				Describes the interface common to stage object returned
864				by all B<HPCI Stage> objects, regardless of the
865				particular type of cluster that is actually being used to
866				run the stages. The common interface may change somewhat
867				as supprt for additional cluster types is added and a better
868				understanding of the common features is achieved.
869
870				=item HPCI::Logger
871
872				Describes the logger parameters in more detail.
873
874				=item HPCI::Env
875
876				Describes the environment passing parameters in more detail.
877
878				=item HPCD::I<$cluster>::Group
879
880				Describes the group interface unique to a specific type of cluster,
881				including any limitations or extensions to the generic interface.
882
883				=item HPCD::I<$cluster>::Stage
884
885				Describes the stage interface unique to a specific type of cluster,
886				including any limitations or extensions to the generic interface.
887
888				=back
889
890				=head1 AUTHOR
891
892				Christopher Lalansingh - Boutros Lab
893
894				John Macdonald - Boutros Lab
895
896				=head1 ACKNOWLEDGEMENTS
897
898				Paul Boutros, Phd, PI - Boutros Lab
899
900				The Ontario Institute for Cancer Research
901
902				=cut
903
904				1;
905