File Coverage

lib/Data/Hopen/G/DAG.pm

Criterion	Covered	Total	%
statement	182	182	100.0
branch	58	60	96.6
condition	6	7	85.7
subroutine	32	32	100.0
pod	6	6	100.0
total	284	287	98.9

line	stmt	bran	cond	sub	pod	time	code
1							# Data::Hopen::G::DAG - hopen build graph
2							package Data::Hopen::G::DAG;
3	9			9		3484	use strict;
	9					23
	9					404
4	9			9		60	use Data::Hopen::Base;
	9					18
	9					101
5
6							our $VERSION = '0.000021';
7
8	9			9		2728	use parent 'Data::Hopen::G::Op';
	9					20
	9					102
9							use Class::Tiny {
10	14					171	goals => sub { [] },
11	9					144	default_goal => undef,
12							winner => undef,
13
14							# Private attributes with simple defaults
15							#_node_by_name => sub { +{} }, # map from node names to nodes in either
16							# # _init_graph or _graph
17
18							# Private attributes - initialized by BUILD()
19							_graph => undef, # L instance
20							_final => undef, # The graph sink - all goals have edges to this
21
22							#Initialization operations
23							_init_graph => undef, # L
24							# for initializations
25							_init_first => undef, # Graph node for initialization - the first
26							# init operation to be performed.
27
28							# TODO? also support fini to run operations after _graph runs?
29	9			9		895	};
	9					14
30
31	9			9		11011	use Data::Hopen qw(hlog getparameters *QUIET);
	9					20
	9					1046
32	9			9		4478	use Data::Hopen::G::Goal;
	9					25
	9					380
33	9			9		1455	use Data::Hopen::G::Link;
	9					25
	9					275
34	9			9		45	use Data::Hopen::G::Node;
	9					19
	9					229
35	9			9		4080	use Data::Hopen::G::CollectOp;
	9					55
	9					487
36	9			9		55	use Data::Hopen::Util::Data qw(forward_opts);
	9					16
	9					495
37	9			9		4054	use Data::Hopen::OrderedPredecessorGraph;
	9					37
	9					372
38	9			9		58	use Getargs::Mixed; # parameters, which doesn't permit undef
	9					19
	9					714
39	9			9		51	use Hash::Merge;
	9					17
	9					341
40	9			9		8628	use Regexp::Assemble;
	9					180811
	9					523
41	9			9		80	use Scalar::Util qw(refaddr);
	9					18
	9					533
42	9			9		42	use Storable ();
	9					18
	9					284
43
44							# Class data {{{1
45
46							use constant {
47	9					22723	LINKS => 'link_list', # Graph edge attr: array of DHG::Link instances
48	9			9		43	};
	9					17
49
50							# A counter used for making unique names
51							my $_id_counter = 0; # threads: make shared
52
53							# }}}1
54							# Docs {{{1
55
56							=head1 NAME
57
58							Data::Hopen::G::DAG - A hopen build graph
59
60							=head1 SYNOPSIS
61
62							This class encapsulates the DAG for a particular set of one or more goals.
63							It is itself a L so that it can be composed into
64							other DAGs.
65
66							=head1 ATTRIBUTES
67
68							=head2 goals
69
70							Arrayref of the goals for this DAG.
71
72							=head2 default_goal
73
74							The default goal for this DAG.
75
76							=head2 winner
77
78							When a node has multiple predecessors, their outputs are combined using
79							L to form the input to that node. This sets the C
80							precedence. Valid values (case-insensitive) are:
81
82							=over
83
84							=item C or C<'combine'>
85
86							(the default): L. Same-name keys
87							are merged, so no data is lost.
88
89							=item C<'first'> or C<'keep'>
90
91							L. The first predecessor to add a value
92							under a particular key will win.
93
94							=item C<'last'> or C<'replace'>
95
96							L. The last predecessor to add a value
97							under a particular key will win.
98
99							=back
100
101							=head2 _graph
102
103							The actual L. If you find that you have to use it, please open an
104							issue so we can see about providing a documented API for your use case!
105
106							=head2 _final
107
108							The node to which all goals are connected.
109
110							=head2 _init_graph
111
112							A separate L of operations that will run before all the operations
113							in L. This is because I don't want to add an edge to every
114							single node just to force the topological sort to work out.
115
116							=head2 _init_first
117
118							The first node to be run in _init_graph.
119
120							=head1 FUNCTIONS
121
122							=cut
123
124							# }}}1
125
126							=head2 _run
127
128							Traverses the graph. The DAG is similar to a subroutine in this respect. The
129							outputs from all the goals of the DAG are aggregated and provided as the
130							outputs of the DAG. The output is a hash keyed by the name of each goal, with
131							each goal's outputs as the values under that name. Usage:
132
133							my $hrOutputs = $dag->run([-context=>$scope][, other options])
134
135							C<$scope> must be a L or subclass if provided.
136							Other options are as L.
137
138							When evaluating a node, the edges from its predecessors are traversed in
139							the order those predecessors were added to the graph.
140
141							=cut
142
143							# The implementation of run(). $self->scope has already been linked to the context.
144							sub _run {
145	30			30		212	my ($self, %args) = getparameters('self', [qw(; visitor)], @_);
146	30					1565	my $retval = {};
147
148							# --- Get the initialization ops ---
149
150	30					81	my @init_order = eval { $self->_init_graph->toposort };
	30					1050
151	30	100				1880	die "Initializations contain a cycle!" if $@;
152	29	100				1181	@init_order = () if $self->_init_graph->vertices == 1; # no init nodes => skip
153
154							# --- Get the runtime ops ---
155
156	29					1138	my @order = eval { $self->_graph->toposort };
	29					859
157							# TODO someday support multi-core-friendly topo-sort, so nodes can run
158							# in parallel until they block each other.
159	29	100				2372	die "Graph contains a cycle!" if $@;
160
161							# Remove _final from the order for now - I don't yet know what it means
162							# to traverse _final.
163	28	50	66			1240	warn "Last item in order isn't _final! This might indicate a bug in hopen, or that some graph edges are missing."
164							unless $QUIET or refaddr $order[$#order] == refaddr $self->_final;
165
166	28					314	@order = grep { refaddr $_ != refaddr $self->_final } @order;
	100					2604
167
168							# --- Check for non-connected ops, and goals with no inputs ---
169
170	28	100				246	unless($QUIET) {
171	27					552	foreach my $node ($self->_graph->isolated_vertices) {
172	1					543	warn "Node @{[$node->name]} is not connected to any other nodes";
	1					26
173							}
174
175	27					12287	foreach my $goal (@{$self->goals}) {
	27					928
176	27	100				768	warn "Goal @{[$goal->name]} has no inputs"
	1					112
177							if $self->_graph->is_predecessorless_vertex($goal);
178							}
179							}
180
181							# --- Set up for the merge ---
182
183	28					3005	state $STRATEGIES = { # regex => strategy
184							'(\|combine)' => 'combine',
185							'(first\|keep)' => 'keep',
186							'(last\|replace)' => 'replace',
187							};
188	28					118	state $STRATEGY_MAP = Regexp::Assemble->new->flags('i')->track(1)
189							->anchor_string_begin->anchor_string_end
190							->add(keys %$STRATEGIES);
191
192	28		100			6555	my $merge_strategy_idx = $STRATEGY_MAP->match($self->winner // '');
193	28	100				6941	die "Invalid winner value @{[$self->winner]}" unless defined $merge_strategy_idx;
	5					103
194	23					99	my $merge_strategy = $STRATEGIES->{$merge_strategy_idx};
195
196							# --- Traverse ---
197
198							# Note: while hacking, please make sure Goal nodes can appear
199							# anywhere in the graph.
200
201	23			18		244	hlog { my $x = 'Traversing DAG ' . $self->name; $x, '*' x (76-length($x)) };
	18					92
	18					109
202
203	23					861	my $graph = $self->_init_graph;
204	23					196	foreach my $node (@init_order, undef, @order) {
205
206	84	100				969	if(!defined($node)) { # undef is the marker between init and run
207	23					548	$graph = $self->_graph;
208	23					153	next;
209							}
210
211							# Inputs to this node. These are different from the DAG's inputs.
212							# The scope stack is (outer to inner) DAG's inputs, DAG's overrides,
213							# then $node_inputs, then the individual node's overrides.
214	61					331	my $node_inputs = Data::Hopen::Scope::Hash->new;
215							# TODO make this a DH::Scope::Inputs once it's implemented
216	61					3306	$node_inputs->outer($self->scope);
217							# Data specifically being provided to the current node, e.g.,
218							# on input edges, beats the scope of the DAG as a whole.
219	61					3415	$node_inputs->local(true);
220							# A CollectOp won't reach above the node's inputs by default.
221	61					1739	$node_inputs->merge_strategy($merge_strategy);
222
223							# Iterate over each node's edges and process any Links
224	61					575	foreach my $pred ($graph->ordered_predecessors($node)) {
225	35			29		8332	hlog { ('From', $pred->name, 'to', $node->name) };
	29					117
226
227							# Goals do not feed outputs to other Goals. This is so you can
228							# add edges between Goals to set their order while keeping the
229							# data for each Goal separate.
230							# TODO add tests for this. Also TODO decide whether this is
231							# actually the Right Thing!
232	35	50				276	next if eval { $pred->DOES('Data::Hopen::G::Goal') };
	35					396
233
234	35					1221	my $links = $graph->get_edge_attribute($pred, $node, LINKS);
235
236							# Simple case (no links): predecessor's outputs become our inputs
237	35	100				12316	unless($links) {
238	27			23		244	hlog { ' -- no links' };
	23					80
239	27					148	$node_inputs->merge(%{$pred->outputs});
	27					124
240							# TODO specify which set these are.
241							# Use the predecessor's identity as the set.
242	27					197	next;
243							}
244
245							# More complex case: Process all the links
246
247							# Helper function to wrap a hashref in the right scope for a link input
248							local *make_link_inputs = sub {
249	19			19		39	my $hrIn = shift;
250	19					87	my $scLinkInputs = Data::Hopen::Scope::Hash->new->put(%$hrIn);
251							# All links get the same outer scope --- they are parallel,
252							# not in series.
253							# TODO? use the predecessor's identity as the set.
254	19					404	$scLinkInputs->outer($self->scope);
255							# The links run at the same scope level as the node.
256	19					1352	$scLinkInputs->local(true);
257	19					138	return $scLinkInputs;
258	8					70	};
259
260							# Make the first link's input scope
261	8					37	my $hrPredOutputs = $pred->outputs;
262							# In one test, outputs was undef if not on its own line.
263	8					127	my $scLinkInputs = make_link_inputs($hrPredOutputs);
264
265							# Run the links in series - not parallel!
266	8					41	my $hrLinkOutputs = $scLinkInputs->as_hashref(-levels=>'local');
267	8					28	foreach my $link (@$links) {
268	11			9		174	hlog { ('From', $pred->name, 'via', $link->name, 'to', $node->name) };
	9					31
269
270	11					183	$hrLinkOutputs = $link->run(
271							-context=>$scLinkInputs,
272							# visitor not passed to links.
273							);
274	11					214	$scLinkInputs = make_link_inputs($hrLinkOutputs);
275							} #foreach incoming link
276
277	8					171	$node_inputs->merge(%$hrLinkOutputs);
278							# TODO specify which set these are.
279
280							} #foreach predecessor node
281
282	61					635	my $step_output = $node->run(-context=>$node_inputs,
283							forward_opts(\%args, {'-'=>1}, 'visitor')
284							);
285	61					1335	$node->outputs($step_output);
286
287							# Give the visitor a chance, and stash the results if necessary.
288	61	100				126	if(eval { $node->DOES('Data::Hopen::G::Goal') }) {
	61					745
289	23	100				102	$args{visitor}->visit_goal($node, $node_inputs) if $args{visitor};
290
291							# Save the result if there is one. Don't save {}.
292							# use $node->outputs, not $step_output, since the visitor may
293							# alter $node->outputs.
294	23	100				196	$retval->{$node->name} = $node->outputs if keys %{$node->outputs};
	23					90
295							} else {
296	38	100				163	$args{visitor}->visit_node($node, $node_inputs) if $args{visitor};
297							}
298
299	47			47		148	hlog { 'Finished node', $node->name, 'with outputs',
300	61					518	Dumper $node->outputs } 10;
301
302							} #foreach node in topo-sort order
303
304	23					2151	return $retval;
305							} #run()
306
307							=head1 ADDING DATA
308
309							=head2 goal
310
311							Creates a goal of the DAG. Goals are names for sequences of operations,
312							akin to top-level Makefile targets. Usage:
313
314							my $goalOp = $dag->goal('name')
315
316							Returns the L node that is the goal. By default, any
317							inputs passed into a goal are provided as outputs of that goal, and are
318							saved as outputs of the DAG under the goal's name.
319
320							The first call to C also sets L.
321
322							=cut
323
324							sub goal {
325	17	100		17	1	3636	my $self = shift or croak 'Need an instance';
326	16	100				259	my $name = shift or croak 'Need a goal name';
327	15					140	my $goal = Data::Hopen::G::Goal->new(name => $name);
328	15					401	$self->_graph->add_vertex($goal);
329							#$self->_node_by_name->{$name} = $goal;
330	15					1391	$self->_graph->add_edge($goal, $self->_final);
331	15	100				326	$self->default_goal($goal) unless $self->default_goal;
332	15					404	push @{$self->goals}, $goal;
	15					321
333	15					60	return $goal;
334							} #goal()
335
336							=head2 connect
337
338							=over 4
339
340							=item - C<< DAG:connect(, , , ) >>
341
342							B.
343							Connects output C of operation C as input C of
344							operation C. No processing is done between output and input.
345							C and C can be anything usable as a table index, provided
346							that table index appears in the corresponding operation's descriptor.
347
348							=item - C<< DAG:connect(, ) >>
349
350							Creates a dependency edge from C to C, indicating that C must be
351							run before C. Does not transfer any data from C to C.
352
353							=item - C<< DAG:connect(, , ) >>
354
355							Connects C to C via L C.
356							C may be undef, in which case this is treated as the two-parameter form.
357
358							If there are already link(s) on the edge from C to C, the new link
359							is added after the last existing link.
360
361							=back
362
363							TODO return the name of the edge? The edge instance itself? Maybe a
364							fluent interface to the DAG for chaining C calls?
365
366							TODO remove the out-edge and in-edge parameters?
367
368							=cut
369
370							sub connect {
371	21	100		21	1	8987	my $self = shift or croak 'Need an instance';
372	20					54	my ($op1, $out_edge, $in_edge, $op2, $link);
373
374							# Unpack args
375							#if(@_ == 4) {
376							# ($op2, $out_edge, $in_edge, $op2) = @_;
377							#} else the following
378	20	100				79	if(@_ == 3) {
		100
379	12					29	($op1, $link, $op2) = @_;
380							} elsif(@_ == 2) {
381	5					11	($op1, $op2) = @_;
382							} else {
383	3					245	die "Invalid arguments";
384							}
385
386							#my $out_edge = false; # No outputs TODO use these?
387							#my $in_edge = false; # No inputs
388
389	13	100		13		56	hlog { 'DAG::connect(): Edge from', $op1->name,
390							'via', $link ? $link->name : '(no link)',
391	17					144	'to', $op2->name };
392
393							# Add it to the graph (idempotent)
394	17					701	$self->_graph->add_edge($op1, $op2);
395							# $self->_node_by_name->{$_->name} = $_ foreach ($op1, $op2);
396
397							# Save the DHG::Link as an edge attribute (not idempotent!)
398	17	100				68	if($link) {
399	11		100			266	my $attrs = $self->_graph->get_edge_attribute($op1, $op2, LINKS) \|\| [];
400	11					3286	push @$attrs, $link;
401	11					288	$self->_graph->set_edge_attribute($op1, $op2, LINKS, $attrs);
402							}
403
404	17					3422	return undef; # TODO decide what to return
405							} #connect()
406
407							=head2 add
408
409							Add a regular node to the graph. An attempt to add the same node twice will be
410							ignored. Usage:
411
412							my $node = Data::Hopen::G::Op->new(name=>"whatever");
413							$dag->add($node);
414
415							Returns the node, for the sake of chaining.
416
417							=cut
418
419							sub add {
420	4			4	1	3734	my ($self, undef, $node) = parameters('self', ['node'], @_);
421	2	100				160	return if $self->_graph->has_vertex($node);
422	1			1		83	hlog { __PACKAGE__, $self->name, 'adding', Dumper($node) } 2;
	1					7
423
424	1					53	$self->_graph->add_vertex($node);
425							#$self->_node_by_name->{$node->name} = $node if $node->name;
426
427	1					97	return $node;
428							} #add()
429
430							=head2 init
431
432							Add an initialization operation to the graph. Initialization operations run
433							before all other operations. An attempt to add the same initialization
434							operation twice will be ignored. Usage:
435
436							my $op = Data::Hopen::G::Op->new(name=>"whatever");
437							$dag->init($op[, $first]);
438
439							If C<$first> is truthy, the op will be run before anything already in the
440							graph. However, later calls to C with C<$first> set will push
441							operations even before C<$op>.
442
443							Returns the node, for the sake of chaining.
444
445							=cut
446
447							sub init {
448	6	100		6	1	6929	my $self = shift or croak 'Need an instance';
449	5	100				329	my $op = shift or croak 'Need an op';
450	4					9	my $first = shift;
451	4	100				137	return if $self->_init_graph->has_vertex($op);
452
453	3					255	$self->_init_graph->add_vertex($op);
454							#$self->_node_by_name->{$op->name} = $op;
455
456	3	100				256	if($first) { # $op becomes the new _init_first node
457	1					32	$self->_init_graph->add_edge($op, $self->_init_first);
458	1					30	$self->_init_first($op);
459							} else { # Not first, so can happen anytime. Add it after the
460							# current first node.
461	2					83	$self->_init_graph->add_edge($self->_init_first, $op);
462							}
463
464	3					19	return $op;
465							} #init()
466
467							=head1 ACCESSORS
468
469							=head2 empty
470
471							Returns truthy if the only nodes in the graph are internal nodes.
472							Intended for use by hopen files.
473
474							=cut
475
476							sub empty {
477	3	100		3	1	2118	my $self = shift or croak 'Need an instance';
478	2					74	return ($self->_graph->vertices == 1);
479							# _final is the node in an empty() graph.
480							# We don't check the _init_graph since empty() is intended
481							# for use by hopen files, not toolsets.
482							} #empty()
483
484							=head1 OTHER
485
486							=head2 BUILD
487
488							Initialize the instance.
489
490							=cut
491
492							sub BUILD {
493							#use Data::Dumper;
494							#say Dumper(\@_);
495	17	100		17	1	359963	my $self = shift or croak 'Need an instance';
496	16					43	my $hrArgs = shift;
497
498							# DAGs always have names
499	16	100				52	$self->name('__R_DAG_' . $_id_counter++) unless $self->has_custom_name;
500
501							# Graph of normal operations
502	16					197	my $graph = Data::Hopen::OrderedPredecessorGraph->new( directed => true,
503							refvertexed => true);
504	16					31727	my $final = Data::Hopen::G::Node->new(
505							name => '__R_DAG_ROOT' . $_id_counter++);
506	16					1546	$graph->add_vertex($final);
507	16					2663	$self->_graph($graph);
508	16					408	$self->_final($final);
509
510							# Graph of initialization operations
511	16					144	my $init_graph = Data::Hopen::OrderedPredecessorGraph->new( directed => true,
512							refvertexed => true);
513	16					2885	my $init = Data::Hopen::G::CollectOp->new(
514							name => '__R_DAG_INIT' . $_id_counter++);
515	16					770	$init_graph->add_vertex($init);
516
517	16					1528	$self->_init_graph($init_graph);
518	16					383	$self->_init_first($init);
519							} #BUILD()
520
521							1;
522							# Rest of the docs {{{1
523							__END__