File Coverage

lib/Data/Hopen/G/DAG.pm
Criterion Covered Total %
statement 182 182 100.0
branch 58 60 96.6
condition 6 7 85.7
subroutine 32 32 100.0
pod 6 6 100.0
total 284 287 98.9


line stmt bran cond sub pod time code
1             # Data::Hopen::G::DAG - hopen build graph
2             package Data::Hopen::G::DAG;
3 9     9   3484 use strict;
  9         23  
  9         404  
4 9     9   60 use Data::Hopen::Base;
  9         18  
  9         101  
5              
6             our $VERSION = '0.000021';
7              
8 9     9   2728 use parent 'Data::Hopen::G::Op';
  9         20  
  9         102  
9             use Class::Tiny {
10 14         171 goals => sub { [] },
11 9         144 default_goal => undef,
12             winner => undef,
13              
14             # Private attributes with simple defaults
15             #_node_by_name => sub { +{} }, # map from node names to nodes in either
16             # # _init_graph or _graph
17              
18             # Private attributes - initialized by BUILD()
19             _graph => undef, # L instance
20             _final => undef, # The graph sink - all goals have edges to this
21              
22             #Initialization operations
23             _init_graph => undef, # L
24             # for initializations
25             _init_first => undef, # Graph node for initialization - the first
26             # init operation to be performed.
27              
28             # TODO? also support fini to run operations after _graph runs?
29 9     9   895 };
  9         14  
30              
31 9     9   11011 use Data::Hopen qw(hlog getparameters *QUIET);
  9         20  
  9         1046  
32 9     9   4478 use Data::Hopen::G::Goal;
  9         25  
  9         380  
33 9     9   1455 use Data::Hopen::G::Link;
  9         25  
  9         275  
34 9     9   45 use Data::Hopen::G::Node;
  9         19  
  9         229  
35 9     9   4080 use Data::Hopen::G::CollectOp;
  9         55  
  9         487  
36 9     9   55 use Data::Hopen::Util::Data qw(forward_opts);
  9         16  
  9         495  
37 9     9   4054 use Data::Hopen::OrderedPredecessorGraph;
  9         37  
  9         372  
38 9     9   58 use Getargs::Mixed; # parameters, which doesn't permit undef
  9         19  
  9         714  
39 9     9   51 use Hash::Merge;
  9         17  
  9         341  
40 9     9   8628 use Regexp::Assemble;
  9         180811  
  9         523  
41 9     9   80 use Scalar::Util qw(refaddr);
  9         18  
  9         533  
42 9     9   42 use Storable ();
  9         18  
  9         284  
43              
44             # Class data {{{1
45              
46             use constant {
47 9         22723 LINKS => 'link_list', # Graph edge attr: array of DHG::Link instances
48 9     9   43 };
  9         17  
49              
50             # A counter used for making unique names
51             my $_id_counter = 0; # threads: make shared
52              
53             # }}}1
54             # Docs {{{1
55              
56             =head1 NAME
57              
58             Data::Hopen::G::DAG - A hopen build graph
59              
60             =head1 SYNOPSIS
61              
62             This class encapsulates the DAG for a particular set of one or more goals.
63             It is itself a L so that it can be composed into
64             other DAGs.
65              
66             =head1 ATTRIBUTES
67              
68             =head2 goals
69              
70             Arrayref of the goals for this DAG.
71              
72             =head2 default_goal
73              
74             The default goal for this DAG.
75              
76             =head2 winner
77              
78             When a node has multiple predecessors, their outputs are combined using
79             L to form the input to that node. This sets the C
80             precedence. Valid values (case-insensitive) are:
81              
82             =over
83              
84             =item C or C<'combine'>
85              
86             (the default): L. Same-name keys
87             are merged, so no data is lost.
88              
89             =item C<'first'> or C<'keep'>
90              
91             L. The first predecessor to add a value
92             under a particular key will win.
93              
94             =item C<'last'> or C<'replace'>
95              
96             L. The last predecessor to add a value
97             under a particular key will win.
98              
99             =back
100              
101             =head2 _graph
102              
103             The actual L. If you find that you have to use it, please open an
104             issue so we can see about providing a documented API for your use case!
105              
106             =head2 _final
107              
108             The node to which all goals are connected.
109              
110             =head2 _init_graph
111              
112             A separate L of operations that will run before all the operations
113             in L. This is because I don't want to add an edge to every
114             single node just to force the topological sort to work out.
115              
116             =head2 _init_first
117              
118             The first node to be run in _init_graph.
119              
120             =head1 FUNCTIONS
121              
122             =cut
123              
124             # }}}1
125              
126             =head2 _run
127              
128             Traverses the graph. The DAG is similar to a subroutine in this respect. The
129             outputs from all the goals of the DAG are aggregated and provided as the
130             outputs of the DAG. The output is a hash keyed by the name of each goal, with
131             each goal's outputs as the values under that name. Usage:
132              
133             my $hrOutputs = $dag->run([-context=>$scope][, other options])
134              
135             C<$scope> must be a L or subclass if provided.
136             Other options are as L.
137              
138             When evaluating a node, the edges from its predecessors are traversed in
139             the order those predecessors were added to the graph.
140              
141             =cut
142              
143             # The implementation of run(). $self->scope has already been linked to the context.
144             sub _run {
145 30     30   212 my ($self, %args) = getparameters('self', [qw(; visitor)], @_);
146 30         1565 my $retval = {};
147              
148             # --- Get the initialization ops ---
149              
150 30         81 my @init_order = eval { $self->_init_graph->toposort };
  30         1050  
151 30 100       1880 die "Initializations contain a cycle!" if $@;
152 29 100       1181 @init_order = () if $self->_init_graph->vertices == 1; # no init nodes => skip
153              
154             # --- Get the runtime ops ---
155              
156 29         1138 my @order = eval { $self->_graph->toposort };
  29         859  
157             # TODO someday support multi-core-friendly topo-sort, so nodes can run
158             # in parallel until they block each other.
159 29 100       2372 die "Graph contains a cycle!" if $@;
160              
161             # Remove _final from the order for now - I don't yet know what it means
162             # to traverse _final.
163 28 50 66     1240 warn "Last item in order isn't _final! This might indicate a bug in hopen, or that some graph edges are missing."
164             unless $QUIET or refaddr $order[$#order] == refaddr $self->_final;
165              
166 28         314 @order = grep { refaddr $_ != refaddr $self->_final } @order;
  100         2604  
167              
168             # --- Check for non-connected ops, and goals with no inputs ---
169              
170 28 100       246 unless($QUIET) {
171 27         552 foreach my $node ($self->_graph->isolated_vertices) {
172 1         543 warn "Node @{[$node->name]} is not connected to any other nodes";
  1         26  
173             }
174              
175 27         12287 foreach my $goal (@{$self->goals}) {
  27         928  
176 27 100       768 warn "Goal @{[$goal->name]} has no inputs"
  1         112  
177             if $self->_graph->is_predecessorless_vertex($goal);
178             }
179             }
180              
181             # --- Set up for the merge ---
182              
183 28         3005 state $STRATEGIES = { # regex => strategy
184             '(|combine)' => 'combine',
185             '(first|keep)' => 'keep',
186             '(last|replace)' => 'replace',
187             };
188 28         118 state $STRATEGY_MAP = Regexp::Assemble->new->flags('i')->track(1)
189             ->anchor_string_begin->anchor_string_end
190             ->add(keys %$STRATEGIES);
191              
192 28   100     6555 my $merge_strategy_idx = $STRATEGY_MAP->match($self->winner // '');
193 28 100       6941 die "Invalid winner value @{[$self->winner]}" unless defined $merge_strategy_idx;
  5         103  
194 23         99 my $merge_strategy = $STRATEGIES->{$merge_strategy_idx};
195              
196             # --- Traverse ---
197              
198             # Note: while hacking, please make sure Goal nodes can appear
199             # anywhere in the graph.
200              
201 23     18   244 hlog { my $x = 'Traversing DAG ' . $self->name; $x, '*' x (76-length($x)) };
  18         92  
  18         109  
202              
203 23         861 my $graph = $self->_init_graph;
204 23         196 foreach my $node (@init_order, undef, @order) {
205              
206 84 100       969 if(!defined($node)) { # undef is the marker between init and run
207 23         548 $graph = $self->_graph;
208 23         153 next;
209             }
210              
211             # Inputs to this node. These are different from the DAG's inputs.
212             # The scope stack is (outer to inner) DAG's inputs, DAG's overrides,
213             # then $node_inputs, then the individual node's overrides.
214 61         331 my $node_inputs = Data::Hopen::Scope::Hash->new;
215             # TODO make this a DH::Scope::Inputs once it's implemented
216 61         3306 $node_inputs->outer($self->scope);
217             # Data specifically being provided to the current node, e.g.,
218             # on input edges, beats the scope of the DAG as a whole.
219 61         3415 $node_inputs->local(true);
220             # A CollectOp won't reach above the node's inputs by default.
221 61         1739 $node_inputs->merge_strategy($merge_strategy);
222              
223             # Iterate over each node's edges and process any Links
224 61         575 foreach my $pred ($graph->ordered_predecessors($node)) {
225 35     29   8332 hlog { ('From', $pred->name, 'to', $node->name) };
  29         117  
226              
227             # Goals do not feed outputs to other Goals. This is so you can
228             # add edges between Goals to set their order while keeping the
229             # data for each Goal separate.
230             # TODO add tests for this. Also TODO decide whether this is
231             # actually the Right Thing!
232 35 50       276 next if eval { $pred->DOES('Data::Hopen::G::Goal') };
  35         396  
233              
234 35         1221 my $links = $graph->get_edge_attribute($pred, $node, LINKS);
235              
236             # Simple case (no links): predecessor's outputs become our inputs
237 35 100       12316 unless($links) {
238 27     23   244 hlog { ' -- no links' };
  23         80  
239 27         148 $node_inputs->merge(%{$pred->outputs});
  27         124  
240             # TODO specify which set these are.
241             # Use the predecessor's identity as the set.
242 27         197 next;
243             }
244              
245             # More complex case: Process all the links
246              
247             # Helper function to wrap a hashref in the right scope for a link input
248             local *make_link_inputs = sub {
249 19     19   39 my $hrIn = shift;
250 19         87 my $scLinkInputs = Data::Hopen::Scope::Hash->new->put(%$hrIn);
251             # All links get the same outer scope --- they are parallel,
252             # not in series.
253             # TODO? use the predecessor's identity as the set.
254 19         404 $scLinkInputs->outer($self->scope);
255             # The links run at the same scope level as the node.
256 19         1352 $scLinkInputs->local(true);
257 19         138 return $scLinkInputs;
258 8         70 };
259              
260             # Make the first link's input scope
261 8         37 my $hrPredOutputs = $pred->outputs;
262             # In one test, outputs was undef if not on its own line.
263 8         127 my $scLinkInputs = make_link_inputs($hrPredOutputs);
264              
265             # Run the links in series - not parallel!
266 8         41 my $hrLinkOutputs = $scLinkInputs->as_hashref(-levels=>'local');
267 8         28 foreach my $link (@$links) {
268 11     9   174 hlog { ('From', $pred->name, 'via', $link->name, 'to', $node->name) };
  9         31  
269              
270 11         183 $hrLinkOutputs = $link->run(
271             -context=>$scLinkInputs,
272             # visitor not passed to links.
273             );
274 11         214 $scLinkInputs = make_link_inputs($hrLinkOutputs);
275             } #foreach incoming link
276              
277 8         171 $node_inputs->merge(%$hrLinkOutputs);
278             # TODO specify which set these are.
279              
280             } #foreach predecessor node
281              
282 61         635 my $step_output = $node->run(-context=>$node_inputs,
283             forward_opts(\%args, {'-'=>1}, 'visitor')
284             );
285 61         1335 $node->outputs($step_output);
286              
287             # Give the visitor a chance, and stash the results if necessary.
288 61 100       126 if(eval { $node->DOES('Data::Hopen::G::Goal') }) {
  61         745  
289 23 100       102 $args{visitor}->visit_goal($node, $node_inputs) if $args{visitor};
290              
291             # Save the result if there is one. Don't save {}.
292             # use $node->outputs, not $step_output, since the visitor may
293             # alter $node->outputs.
294 23 100       196 $retval->{$node->name} = $node->outputs if keys %{$node->outputs};
  23         90  
295             } else {
296 38 100       163 $args{visitor}->visit_node($node, $node_inputs) if $args{visitor};
297             }
298              
299 47     47   148 hlog { 'Finished node', $node->name, 'with outputs',
300 61         518 Dumper $node->outputs } 10;
301              
302             } #foreach node in topo-sort order
303              
304 23         2151 return $retval;
305             } #run()
306              
307             =head1 ADDING DATA
308              
309             =head2 goal
310              
311             Creates a goal of the DAG. Goals are names for sequences of operations,
312             akin to top-level Makefile targets. Usage:
313              
314             my $goalOp = $dag->goal('name')
315              
316             Returns the L node that is the goal. By default, any
317             inputs passed into a goal are provided as outputs of that goal, and are
318             saved as outputs of the DAG under the goal's name.
319              
320             The first call to C also sets L.
321              
322             =cut
323              
324             sub goal {
325 17 100   17 1 3636 my $self = shift or croak 'Need an instance';
326 16 100       259 my $name = shift or croak 'Need a goal name';
327 15         140 my $goal = Data::Hopen::G::Goal->new(name => $name);
328 15         401 $self->_graph->add_vertex($goal);
329             #$self->_node_by_name->{$name} = $goal;
330 15         1391 $self->_graph->add_edge($goal, $self->_final);
331 15 100       326 $self->default_goal($goal) unless $self->default_goal;
332 15         404 push @{$self->goals}, $goal;
  15         321  
333 15         60 return $goal;
334             } #goal()
335              
336             =head2 connect
337              
338             =over 4
339              
340             =item - C<< DAG:connect(, , , ) >>
341              
342             B.
343             Connects output C of operation C as input C of
344             operation C. No processing is done between output and input.
345             C and C can be anything usable as a table index, provided
346             that table index appears in the corresponding operation's descriptor.
347              
348             =item - C<< DAG:connect(, ) >>
349              
350             Creates a dependency edge from C to C, indicating that C must be
351             run before C. Does not transfer any data from C to C.
352              
353             =item - C<< DAG:connect(, , ) >>
354              
355             Connects C to C via L C.
356             C may be undef, in which case this is treated as the two-parameter form.
357              
358             If there are already link(s) on the edge from C to C, the new link
359             is added after the last existing link.
360              
361             =back
362              
363             TODO return the name of the edge? The edge instance itself? Maybe a
364             fluent interface to the DAG for chaining C calls?
365              
366             TODO remove the out-edge and in-edge parameters?
367              
368             =cut
369              
370             sub connect {
371 21 100   21 1 8987 my $self = shift or croak 'Need an instance';
372 20         54 my ($op1, $out_edge, $in_edge, $op2, $link);
373              
374             # Unpack args
375             #if(@_ == 4) {
376             # ($op2, $out_edge, $in_edge, $op2) = @_;
377             #} else the following
378 20 100       79 if(@_ == 3) {
    100          
379 12         29 ($op1, $link, $op2) = @_;
380             } elsif(@_ == 2) {
381 5         11 ($op1, $op2) = @_;
382             } else {
383 3         245 die "Invalid arguments";
384             }
385              
386             #my $out_edge = false; # No outputs TODO use these?
387             #my $in_edge = false; # No inputs
388              
389 13 100   13   56 hlog { 'DAG::connect(): Edge from', $op1->name,
390             'via', $link ? $link->name : '(no link)',
391 17         144 'to', $op2->name };
392              
393             # Add it to the graph (idempotent)
394 17         701 $self->_graph->add_edge($op1, $op2);
395             # $self->_node_by_name->{$_->name} = $_ foreach ($op1, $op2);
396              
397             # Save the DHG::Link as an edge attribute (not idempotent!)
398 17 100       68 if($link) {
399 11   100     266 my $attrs = $self->_graph->get_edge_attribute($op1, $op2, LINKS) || [];
400 11         3286 push @$attrs, $link;
401 11         288 $self->_graph->set_edge_attribute($op1, $op2, LINKS, $attrs);
402             }
403              
404 17         3422 return undef; # TODO decide what to return
405             } #connect()
406              
407             =head2 add
408              
409             Add a regular node to the graph. An attempt to add the same node twice will be
410             ignored. Usage:
411              
412             my $node = Data::Hopen::G::Op->new(name=>"whatever");
413             $dag->add($node);
414              
415             Returns the node, for the sake of chaining.
416              
417             =cut
418              
419             sub add {
420 4     4 1 3734 my ($self, undef, $node) = parameters('self', ['node'], @_);
421 2 100       160 return if $self->_graph->has_vertex($node);
422 1     1   83 hlog { __PACKAGE__, $self->name, 'adding', Dumper($node) } 2;
  1         7  
423              
424 1         53 $self->_graph->add_vertex($node);
425             #$self->_node_by_name->{$node->name} = $node if $node->name;
426              
427 1         97 return $node;
428             } #add()
429              
430             =head2 init
431              
432             Add an initialization operation to the graph. Initialization operations run
433             before all other operations. An attempt to add the same initialization
434             operation twice will be ignored. Usage:
435              
436             my $op = Data::Hopen::G::Op->new(name=>"whatever");
437             $dag->init($op[, $first]);
438              
439             If C<$first> is truthy, the op will be run before anything already in the
440             graph. However, later calls to C with C<$first> set will push
441             operations even before C<$op>.
442              
443             Returns the node, for the sake of chaining.
444              
445             =cut
446              
447             sub init {
448 6 100   6 1 6929 my $self = shift or croak 'Need an instance';
449 5 100       329 my $op = shift or croak 'Need an op';
450 4         9 my $first = shift;
451 4 100       137 return if $self->_init_graph->has_vertex($op);
452              
453 3         255 $self->_init_graph->add_vertex($op);
454             #$self->_node_by_name->{$op->name} = $op;
455              
456 3 100       256 if($first) { # $op becomes the new _init_first node
457 1         32 $self->_init_graph->add_edge($op, $self->_init_first);
458 1         30 $self->_init_first($op);
459             } else { # Not first, so can happen anytime. Add it after the
460             # current first node.
461 2         83 $self->_init_graph->add_edge($self->_init_first, $op);
462             }
463              
464 3         19 return $op;
465             } #init()
466              
467             =head1 ACCESSORS
468              
469             =head2 empty
470              
471             Returns truthy if the only nodes in the graph are internal nodes.
472             Intended for use by hopen files.
473              
474             =cut
475              
476             sub empty {
477 3 100   3 1 2118 my $self = shift or croak 'Need an instance';
478 2         74 return ($self->_graph->vertices == 1);
479             # _final is the node in an empty() graph.
480             # We don't check the _init_graph since empty() is intended
481             # for use by hopen files, not toolsets.
482             } #empty()
483              
484             =head1 OTHER
485              
486             =head2 BUILD
487              
488             Initialize the instance.
489              
490             =cut
491              
492             sub BUILD {
493             #use Data::Dumper;
494             #say Dumper(\@_);
495 17 100   17 1 359963 my $self = shift or croak 'Need an instance';
496 16         43 my $hrArgs = shift;
497              
498             # DAGs always have names
499 16 100       52 $self->name('__R_DAG_' . $_id_counter++) unless $self->has_custom_name;
500              
501             # Graph of normal operations
502 16         197 my $graph = Data::Hopen::OrderedPredecessorGraph->new( directed => true,
503             refvertexed => true);
504 16         31727 my $final = Data::Hopen::G::Node->new(
505             name => '__R_DAG_ROOT' . $_id_counter++);
506 16         1546 $graph->add_vertex($final);
507 16         2663 $self->_graph($graph);
508 16         408 $self->_final($final);
509              
510             # Graph of initialization operations
511 16         144 my $init_graph = Data::Hopen::OrderedPredecessorGraph->new( directed => true,
512             refvertexed => true);
513 16         2885 my $init = Data::Hopen::G::CollectOp->new(
514             name => '__R_DAG_INIT' . $_id_counter++);
515 16         770 $init_graph->add_vertex($init);
516              
517 16         1528 $self->_init_graph($init_graph);
518 16         383 $self->_init_first($init);
519             } #BUILD()
520              
521             1;
522             # Rest of the docs {{{1
523             __END__