File Coverage

blib/lib/MarpaX/Demo/StringParser.pm
Criterion Covered Total %
statement 201 250 80.4
branch 50 74 67.5
condition 7 15 46.6
subroutine 28 30 93.3
pod 4 5 80.0
total 290 374 77.5


line stmt bran cond sub pod time code
1             package MarpaX::Demo::StringParser;
2              
3 1     1   61055 use strict;
  1         2  
  1         38  
4 1     1   5 use utf8;
  1         2  
  1         8  
5 1     1   17 use warnings;
  1         5  
  1         35  
6 1     1   3 use warnings qw(FATAL utf8); # Fatalize encoding glitches.
  1         2  
  1         36  
7              
8 1     1   631 use File::Slurp; # For read_file().
  1         11978  
  1         79  
9              
10 1     1   1024 use Log::Handler;
  1         34507  
  1         8  
11              
12 1     1   676 use Marpa::R2;
  1         132905  
  1         17  
13              
14 1     1   956 use Moo;
  1         12412  
  1         7  
15              
16 1     1   2138 use Set::Array;
  1         11707  
  1         25  
17              
18 1     1   781 use Text::CSV;
  1         10965  
  1         9  
19              
20 1     1   1309 use Tree::DAG_Node;
  1         16783  
  1         24  
21              
22 1     1   1070 use Types::Standard qw/Any ArrayRef HashRef Int Str/;
  1         99144  
  1         28  
23              
24 1     1   1743 use Try::Tiny;
  1         19  
  1         5309  
25              
26             has bnf =>
27             (
28             default => sub{return ''},
29             is => 'rw',
30             isa => Any,
31             required => 0,
32             );
33              
34             has description =>
35             (
36             default => sub{return ''},
37             is => 'rw',
38             isa => Str,
39             required => 0,
40             );
41              
42             has grammar =>
43             (
44             default => sub{return ''},
45             is => 'rw',
46             isa => Any, # 'Marpa::R2::Scanless::G'.
47             required => 0,
48             );
49              
50             has graph_text =>
51             (
52             default => sub{return ''},
53             is => 'rw',
54             isa => Str,
55             required => 0,
56             );
57              
58             has input_file =>
59             (
60             default => sub{return ''},
61             is => 'rw',
62             isa => Str,
63             required => 0,
64             );
65              
66             has logger =>
67             (
68             default => sub{return undef},
69             is => 'rw',
70             isa => Any,
71             required => 0,
72             );
73              
74             has maxlevel =>
75             (
76             default => sub{return 'notice'},
77             is => 'rw',
78             isa => Str,
79             required => 0,
80             );
81              
82             has minlevel =>
83             (
84             default => sub{return 'error'},
85             is => 'rw',
86             isa => Str,
87             required => 0,
88             );
89              
90             has known_events =>
91             (
92             default => sub{return {} },
93             is => 'rw',
94             isa => HashRef,
95             required => 0,
96             );
97              
98             has recce =>
99             (
100             default => sub{return ''},
101             is => 'rw',
102             isa => Any, # 'Marpa::R2::Scanless::R'.
103             required => 0,
104             );
105              
106             has stack =>
107             (
108             default => sub{return []},
109             is => 'rw',
110             isa => ArrayRef,
111             required => 0,
112             );
113              
114             has tree =>
115             (
116             default => sub{return ''},
117             is => 'rw',
118             isa => Any,
119             required => 0,
120             );
121              
122             has uid =>
123             (
124             default => sub{return 0},
125             is => 'rw',
126             isa => Int,
127             required => 0,
128             );
129              
130             our $VERSION = '2.03';
131              
132             # --------------------------------------------------
133             # For accepted and rejected by Marpa, see
134             # Marpa-R2-2.094000/lib/Marpa/R2/meta/metag.bnf.
135              
136             sub BUILD
137             {
138 6     6 0 318 my($self) = @_;
139              
140 6 50       170 if (! defined $self -> logger)
141             {
142 6         1200 $self -> logger(Log::Handler -> new);
143 6         1319 $self -> logger -> add
144             (
145             screen =>
146             {
147             alias => 'logger',
148             maxlevel => $self -> maxlevel,
149             message_layout => '%m',
150             minlevel => $self -> minlevel,
151             }
152             );
153             }
154              
155             # Policy: Event names are always the same as the name of the corresponding lexeme.
156              
157             $self -> bnf
158             (
159             <<'END_OF_GRAMMAR'
160              
161             :default ::= action => [values]
162              
163             lexeme default = latm => 1 # Longest Acceptable Token Match.
164              
165             :start ::= graph_grammar
166              
167             graph_grammar ::= graph_definition
168              
169             # Graph stuff.
170              
171             graph_definition ::= node_definition
172             | edge_definition
173             # Node stuff
174              
175             node_definition ::= node_statement
176             | node_statement graph_definition
177              
178             node_statement ::= node_name_token
179             | node_name_token attribute_definition
180             | node_statement (',') node_statement
181              
182             node_name_token ::= start_node end_node # Allow for the anonymous node.
183             | start_node node_name end_node
184              
185             # Edge stuff
186              
187             edge_definition ::= edge_statement
188             | edge_statement graph_definition
189              
190             edge_statement ::= edge_name
191             | edge_name attribute_definition
192             | edge_statement (',') edge_statement
193              
194             edge_name ::= directed_edge
195             | undirected_edge
196              
197             # Attribute stuff.
198              
199             attribute_definition ::= attribute_statement+
200              
201             attribute_statement ::= start_attributes string_token_set end_attributes
202              
203             string_token_set ::= string_token_pair+
204              
205             string_token_pair ::= literal_label
206             | attribute_name (':') attribute_value
207              
208             # Lexemes in alphabetical order.
209              
210             :lexeme ~ attribute_name pause => before event => attribute_name
211              
212             attribute_name ~ string_char_set+
213              
214             :lexeme ~ attribute_value pause => before event => attribute_value
215              
216             attribute_value ~ string_char_set+
217              
218             :lexeme ~ directed_edge pause => before event => directed_edge priority => 2
219             directed_edge ~ '->'
220              
221             :lexeme ~ end_attributes pause => before event => end_attributes priority => 1
222             end_attributes ~ '}'
223              
224             :lexeme ~ end_node pause => before event => end_node priority => 1
225             end_node ~ ']'
226              
227             escaped_char ~ '\' [[:print:]]
228              
229             # Use ' here just for the UltraEdit syntax hiliter.
230              
231             :lexeme ~ literal_label pause => before event => literal_label priority => 1
232             literal_label ~ 'label'
233              
234             :lexeme ~ node_name pause => before event => node_name
235              
236             node_name ~ string_char_set+
237              
238             :lexeme ~ start_attributes pause => before event => start_attributes
239             start_attributes ~ '{'
240              
241             :lexeme ~ start_node pause => before event => start_node
242             start_node ~ '['
243              
244             string_char_set ~ escaped_char
245             | [^;:}\]] # Neither a separator [;:] nor a terminator [}\]].
246              
247             :lexeme ~ undirected_edge pause => before event => undirected_edge priority => 2
248             undirected_edge ~ '--'
249              
250             # Boilerplate.
251              
252             :discard ~ whitespace
253             whitespace ~ [\s]+
254              
255             END_OF_GRAMMAR
256 6         17361 );
257              
258 6         1497 $self -> grammar
259             (
260             Marpa::R2::Scanless::G -> new
261             ({
262             source => \$self -> bnf
263             })
264             );
265              
266 6         1099902 $self -> recce
267             (
268             Marpa::R2::Scanless::R -> new
269             ({
270             grammar => $self -> grammar,
271             })
272             );
273              
274 6         6682 my(%event);
275              
276 6         258 for my $line (split(/\n/, $self -> bnf) )
277             {
278 564 100       3341 $event{$1} = 1 if ($line =~ /event\s+=>\s+(\w+)/);
279             }
280              
281 6         326 $self -> known_events(\%event);
282              
283             # Since $self -> tree has not been initialized yet,
284             # we can't call our _add_daughter() until after this statement.
285              
286 6         1291 $self -> tree(Tree::DAG_Node -> new({name => 'root', attributes => {uid => 0} }));
287 6         2178 $self -> stack([$self -> tree -> root]);
288              
289             # This cut-down version of Graph::Easy::Marpa has no prolog (unlike Graph::Marpa).
290             # So, all tokens in the input are descended from the 'graph' node.
291              
292 6         1489 for my $name (qw/prolog graph/)
293             {
294 12         789 $self -> _add_daughter($name, {});
295             }
296              
297             # The 'prolog' daughter is the parent of all items in the prolog, but is not used here.
298             # It is used in GraphViz2::Marpa;
299             # The 'graph' daughter gets pushed onto the stack because in this module's grammar,
300             # all items belong to the graph.
301              
302 6         1780 my(@daughters) = $self -> tree -> daughters;
303 6         134 my($index) = 1; # 0 => prolog, 1 => graph.
304 6         175 my($stack) = $self -> stack;
305              
306 6         63 push @$stack, $daughters[$index];
307              
308 6         298 $self -> stack($stack);
309              
310             } # End of BUILD.
311              
312             # ------------------------------------------------
313              
314             sub _add_daughter
315             {
316 62     62   164 my($self, $name, $attributes) = @_;
317 62         2594 $$attributes{uid} = $self -> uid($self -> uid + 1);
318 62         4477 my($node) = Tree::DAG_Node -> new({name => $name, attributes => $attributes});
319 62         9330 my($stack) = $self -> stack;
320              
321 62         982 $$stack[$#$stack] -> add_daughter($node);
322              
323             } # End of _add_daughter.
324              
325             # --------------------------------------------------
326              
327             sub clean_after
328             {
329 42     42 1 287 my($self, $s) = @_;
330              
331 42         408 $s =~ s/^\s+//;
332 42         184 $s =~ s/\s+$//;
333 42         229 $s =~ s/^([\"\'])(.*)\1$/$2/; # The backslashes are just for the UltraEdit syntax hiliter.
334              
335 42         188 return $s;
336              
337             } # End of clean_after.
338              
339             # --------------------------------------------------
340              
341             sub clean_before
342             {
343 6     6 1 78 my($self, $s) = @_;
344              
345 6         117 $s =~ s/\s*;\s*$//;
346 6         23 $s =~ s/^\s+//;
347 6         325 $s =~ s/\s+$//;
348 6         96 $s =~ s/^(<)\s+/$1/;
349 6         28 $s =~ s/\s+(>)$/$1/;
350              
351 6         24 return $s;
352              
353             } # End of clean_before.
354              
355             # --------------------------------------------------
356              
357             sub log
358             {
359 92     92 1 19570 my($self, $level, $s) = @_;
360              
361 92 50       3826 $self -> logger -> log($level => $s) if ($self -> logger);
362              
363             } # End of log.
364              
365             # --------------------------------------------------
366              
367             sub _process
368             {
369 6     6   16 my($self) = @_;
370 6         292 my($string) = $self -> clean_before($self -> graph_text);
371 6         27 my($length) = length $string;
372 6         18 my($last_event) = '';
373 6         14 my($format) = '%-20s %5s %5s %5s %-s';
374              
375 6         103 $self -> log(debug => sprintf($format, 'Event', 'Start', 'Span', 'Pos', 'Lexeme') );
376              
377             # We use read()/lexeme_read()/resume() because we pause at each lexeme.
378              
379 6         692 my($event_name);
380             my(@fields);
381 0         0 my($lexeme, $literal);
382 0         0 my($span, $start);
383              
384 6         205 for
385             (
386             my $pos = $self -> recce -> read(\$string);
387             $pos < $length;
388             $pos = $self -> recce -> resume($pos)
389             )
390             {
391 74         40094 $event_name = $self -> _validate_event;
392 74         2760 ($start, $span) = $self -> recce -> pause_span;
393 74         3537 $pos = $self -> recce -> lexeme_read($event_name);
394 74         15747 $literal = substr($string, $start, $pos - $start);
395 74         16371 $lexeme = $self -> recce -> literal($start, $span);
396              
397 74         3728 $self -> log(debug => sprintf($format, $event_name, $start, $span, $pos, $lexeme) );
398              
399 74 100       8056 if ($event_name eq 'attribute_name')
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    50          
    0          
400             {
401 11         54 $fields[0] = $self -> clean_after($literal);
402             }
403             elsif ($event_name eq 'attribute_value')
404             {
405 11         58 $literal = $self -> clean_after($literal);
406              
407 11         108 $self -> _add_daughter($fields[0], {value => $literal});
408              
409 11         1720 @fields = ();
410              
411             # Skip the separator.
412              
413 11   100     209 while ( ($pos < (length($string) - 1) ) && (substr($string, $pos, 1) =~ /[\s;]/) ) { $pos++ };
  18         136  
414             }
415             elsif ($event_name eq 'directed_edge')
416             {
417 5         26 $self -> _add_daughter('edge_id', {value => $self -> clean_after($literal)});
418             }
419             elsif ($event_name eq 'end_attributes')
420             {
421 9         46 $self -> _process_brace($literal);
422             }
423             elsif ($event_name eq 'end_node')
424             {
425             # Is this the anonymous node?
426              
427 7 100       44 if ($last_event eq 'start_node')
428             {
429 1         10 $self -> _add_daughter('node_id', {value => ''});
430             }
431             }
432             elsif ($event_name eq 'literal_label')
433             {
434 9         30 push @fields, $literal;
435              
436 9         296 $pos = $self -> _process_label($self -> recce, \@fields, $string, $length, $pos);
437 9         31 @fields = ();
438             }
439             elsif ($event_name eq 'node_name')
440             {
441 6         37 $literal = $self -> clean_after($literal);
442              
443 6         53 $self -> _add_daughter('node_id', {value => $literal});
444             }
445             elsif ($event_name eq 'start_attributes')
446             {
447 9         39 $self -> _process_brace($literal);
448             }
449             elsif ($event_name eq 'start_node')
450             {
451             # Do nothing.
452             }
453             elsif ($event_name eq 'undirected_edge')
454             {
455 0         0 $self -> _add_daughter('edge_id', {value => $self -> clean_after($literal)});
456             }
457              
458 74         7348 $last_event = $event_name;
459             }
460              
461 6 50       1815 if ($self -> recce -> ambiguity_metric > 1)
462             {
463 0         0 $self -> log(notice => 'Ambiguous parse');
464             }
465              
466 6 50       1060 if (my $ambiguous_status = $self -> recce -> ambiguous)
467             {
468 0         0 $self -> log(notice => "Parse is ambiguous: $ambiguous_status.");
469             }
470              
471             # Return a defined value for success and undef for failure.
472              
473 6         387 return $self -> recce -> value;
474              
475             } # End of _process.
476              
477             # --------------------------------------------------
478              
479             sub _process_brace
480             {
481 18     18   52 my($self, $name) = @_;
482              
483             # When a '{' is encountered, the last thing pushed becomes it's parent.
484             # Likewise, if '}' is encountered, we pop the stack.
485              
486 18         634 my($stack) = $self -> stack;
487              
488 18 100       197 if ($name eq '{')
489             {
490 9         1195 my(@daughters) = $$stack[$#$stack] -> daughters;
491              
492 9         145 push @$stack, $daughters[$#daughters];
493              
494 9         66 $self -> _process_token('literal', $name);
495             }
496             else
497             {
498 9         58 $self -> _process_token('literal', $name);
499              
500 9         1328 pop @$stack;
501              
502 9         311 $self -> stack($stack);
503             }
504              
505             } # End of _process_brace.
506              
507             # ------------------------------------------------
508              
509             sub _process_html
510             {
511 1     1   5 my($self, $recce, $fields, $string, $length, $pos) = @_;
512              
513 1         4 my($bracket_count) = 0;
514 1         5 my($open_bracket) = '<';
515 1         6 my($close_bracket) = '>';
516 1         3 my($previous_char) = '';
517 1         3 my($label) = '';
518              
519 1         3 my($char);
520              
521 1         7 while ($pos < $length)
522             {
523 68         80 $char = substr($string, $pos, 1);
524 68         73 $label .= $char;
525              
526 68 50       341 if ($previous_char eq '\\')
    100          
    100          
527             {
528             }
529             elsif ($char eq $open_bracket)
530             {
531 7         10 $bracket_count++;
532             }
533             elsif ($char eq $close_bracket)
534             {
535 7         10 $bracket_count--;
536              
537 7 100       19 if ($bracket_count == 0)
538             {
539 1         4 $pos++;
540              
541 1         4 last;
542             }
543             }
544              
545 67         82 $previous_char = $char;
546              
547 67         112 $pos++;
548             }
549              
550 1         10 $label = $self -> clean_after($label);
551              
552 1 50 33     32 if ( ($label =~ /^$/) )
553             {
554 0         0 my($line, $column) = $recce -> line_column;
555              
556 0         0 die "Mismatched <> in HTML !$label! at (line, column) = ($line, $column)\n";
557             }
558              
559 1         5 push @$fields, $label;
560              
561 1         7 return $self -> _skip_separator($string, $length, $pos, ';');
562              
563             } # End of _process_html.
564              
565             # ------------------------------------------------
566              
567             sub _process_label
568             {
569 9     9   117 my($self, $recce, $fields, $string, $length, $pos) = @_;
570              
571 9         45 $pos = $self -> _skip_separator($string, $length, $pos, ':');
572              
573 9 50       36 return $pos if ($pos >= $length);
574              
575 9         35 my($char) = substr($string, $pos, 1);
576              
577 9 50       68 if ($char eq "'")
    50          
    100          
578             {
579 0         0 $pos = $self -> _process_quotes($recce, $fields, $string, $length, $pos, "'");
580             }
581             elsif ($char eq '"')
582             {
583 0         0 $pos = $self -> _process_quotes($recce, $fields, $string, $length, $pos, '"');
584             }
585             elsif ($char eq '<')
586             {
587 1         11 $pos = $self -> _process_html($recce, $fields, $string, $length, $pos);
588             }
589             else
590             {
591 8         44 $pos = $self -> _process_unquoted($recce, $fields, $string, $length, $pos);
592             }
593              
594 9         53 for (my $i = 0; $i < $#$fields; $i += 2)
595             {
596 9         95 $self -> _add_daughter($$fields[$i], {value => $$fields[$i + 1]});
597             }
598              
599 9         2275 return $pos;
600              
601             } # End of _process_label.
602              
603             # ------------------------------------------------
604              
605             sub _process_quotes
606             {
607 0     0   0 my($self, $recce, $fields, $string, $length, $pos, $terminator) = @_;
608              
609 0         0 my($previous_char) = '';
610 0         0 my($label) = '';
611 0         0 my($quote_count) = 0;
612              
613 0         0 my($char);
614              
615 0         0 while ($pos < $length)
616             {
617 0         0 $char = substr($string, $pos, 1);
618              
619 0 0 0     0 if ( ($previous_char ne '\\') && ($char eq $terminator) )
620             {
621 0         0 $quote_count++;
622              
623 0 0       0 if ($quote_count == 2)
624             {
625 0         0 $label .= $char;
626              
627 0         0 $pos++;
628              
629 0         0 last;
630             }
631             }
632              
633 0         0 $label .= $char;
634 0         0 $previous_char = $char;
635              
636 0         0 $pos++;
637             }
638              
639             # Don't call clean_after, since it removes the ' and " we are about to check.
640              
641 0         0 $label =~ s/^\s+//;
642 0         0 $label =~ s/\s+$//;
643              
644 0 0 0     0 if ( ($label =~ /^['"]/) && ($label !~ /^(['"]).*\1$/) )
645             {
646             # Use ' and " here just for the UltraEdit syntax hiliter.
647              
648 0         0 my($line, $column) = $recce -> line_column;
649              
650 0         0 die "Mismatched quotes in label !$label! at (line, column) = ($line, $column)\n";
651             }
652              
653 0         0 $label = $self -> clean_after($label);
654              
655 0         0 push @$fields, $label;
656              
657 0         0 $self -> log(debug => "_process_quotes(). Label !$label!");
658              
659 0         0 return $self -> _skip_separator($string, $length, $pos, ';');
660              
661             } # End of _process_quotes.
662              
663             # --------------------------------------------------
664              
665             sub _process_token
666             {
667 18     18   52 my($self, $name, $value) = @_;
668              
669 18         140 $self -> _add_daughter($name, {value => $value});
670              
671             } # End of _process_token.
672              
673             # ------------------------------------------------
674              
675             sub _process_unquoted
676             {
677 8     8   25 my($self, $recce, $fields, $string, $length, $pos) = @_;
678 8         52 my($re) = qr/[;}]/;
679              
680 8 50       89 if (substr($string, $pos, 1) =~ $re)
681             {
682 0         0 push @$fields, '';
683              
684 0         0 return $pos;
685             }
686              
687 8         24 my($previous_char) = '';
688 8         20 my($label) = '';
689 8         15 my($quote_count) = 0;
690              
691 8         13 my($char);
692              
693 8         30 while ($pos < $length)
694             {
695 99         159 $char = substr($string, $pos, 1);
696              
697 99 100 100     600 last if ( ($previous_char ne '\\') && ($char =~ $re) );
698              
699 91         104 $label .= $char;
700 91         201 $previous_char = $char;
701              
702 91         163 $pos++;
703             }
704              
705 8         42 $label = $self -> clean_after($label);
706              
707 8         28 push @$fields, $label;
708              
709 8         35 return $self -> _skip_separator($string, $length, $pos, ';');
710              
711             } # End of _process_unquoted.
712              
713             # --------------------------------------------------
714              
715             sub run
716             {
717 6     6 1 32407 my($self) = @_;
718              
719 6 50       363 if ($self -> description)
    50          
720             {
721             # Assume graph is a single line without comments.
722              
723 0         0 $self -> graph_text($self -> description);
724             }
725             elsif ($self -> input_file)
726             {
727             # Quick removal of whole-line C++ and hash comments.
728              
729 6         2791 $self -> graph_text(join(' ', grep{! m!^(?:#|//)!} read_file($self -> input_file, binmode => ':encoding(utf-8)') ) );
  19         23198  
730             }
731             else
732             {
733 0         0 die "Error: You must provide a graph using one of -input_file or -description\n";
734             }
735              
736             # Return 0 for success and 1 for failure.
737              
738 6         1275 my($result) = 0;
739              
740             try
741             {
742 6 50   6   533 if (defined (my $value = $self -> _process) )
743             {
744 6         76251 $self -> log(info => join("\n", @{$self -> tree -> tree2string}) );
  6         1781  
745             }
746             else
747             {
748 0         0 $result = 1;
749              
750 0         0 $self -> log(error => 'Parse failed');
751             }
752             }
753             catch
754             {
755 0     0   0 $result = 1;
756              
757 0         0 $self -> log(error => "Parse failed. Error: $_");
758 6         126 };
759              
760 6         10914 $self -> log(info => "Parse result: $result (0 is success)");
761              
762             # Return 0 for success and 1 for failure.
763              
764 6         154198 return $result;
765              
766             } # End of run.
767              
768             # ------------------------------------------------
769              
770             sub _skip_separator
771             {
772 18     18   46 my($self, $string, $length, $pos, $separator) = @_;
773 18         926 my($re) = qr/[\s$separator]/;
774              
775 18         133 my($char);
776              
777 18         167 while ($pos < $length - 1)
778             {
779 42         218 $char = substr($string, $pos, 1);
780              
781 42 100       422 last if ($char !~ $re);
782              
783 26         65 $pos++;
784             }
785              
786 18         210 return $pos;
787              
788             } # End of _skip_separator.
789              
790             # ------------------------------------------------
791              
792             sub _validate_event
793             {
794 74     74   261 my($self) = @_;
795 74         123 my(@event) = @{$self -> recce -> events};
  74         6508  
796 74         1572 my($event_count) = scalar @event;
797              
798 74 50       371 if ($event_count > 1)
799             {
800 0         0 $self -> log(error => "Events triggered: $event_count (should be 1). Names: " . join(', ', map{${$_}[0]} @event) . '.');
  0         0  
  0         0  
801              
802 0         0 die "The code only handles 1 event at a time\n";
803             }
804              
805 74         109 my($event_name) = ${$event[0]}[0];
  74         419  
806              
807 74 50       109 if (! ${$self -> known_events}{$event_name})
  74         2705  
808             {
809 0         0 my($msg) = "Unexpected event name '$event_name'";
810              
811 0         0 $self -> log(error => $msg);
812              
813 0         0 die "$msg\n";
814             }
815              
816 74         1174 return $event_name;
817              
818             } # End of _validate_event.
819              
820             # --------------------------------------------------
821              
822             1;
823              
824             =pod
825              
826             =head1 NAME
827              
828             L - A Marpa-based parser for the DASH language
829              
830             =head1 Synopsis
831              
832             Typical usage:
833              
834             perl -Ilib scripts/parse.pl -de '[node]{color:blue; label: "Node name"}' -max info
835             perl -Ilib scripts/parse.pl -i data/node.04.dash -max info
836              
837             You can use scripts/parse.sh to simplify this process, but it assumes you're input file is in data/:
838              
839             scripts/parse.sh node.04 -max info
840              
841             See L for sample
842             input and output.
843              
844             Also, see L
845             based on this module.
846              
847             =head1 Description
848              
849             This module implements a parser for L (below), a wrapper language around Graphviz's
850             L. That is, the module is a pre-processor for the
851             DOT language.
852              
853             Specifically, this module demonstrates how to use L's capabilities to have Marpa
854             repeatedly pass control back to code in your own module, during the parse, to handle those cases
855             where you don't want Marpa's default processing to occur.
856              
857             This allows the code to deal with the classic case of where you wish to preserve whitespace in some
858             contexts, but also want Marpa to discard whitespace in all other contexts.
859              
860             DASH is easier to use than DOT, which means the user can specify graphs very simply, without having
861             to learn DOT.
862              
863             The DASH language is actually a cut-down version of the language used by L. For a full
864             explanation of the Graph::Easy language, see L.
865              
866             The wrapper is parsed into a tree of tokens managed by L.
867              
868             If requested by the user, the tree is passed to the default renderer
869             L. Various options allow the user to control the output, as
870             an SVG (PNG, ...) image, and to save the DOT version of the graph.
871              
872             In the past, the code in this module was part of Graph::Easy::Marpa, but that latter module has
873             been deleted from CPAN, and all it's new code and features, together with bug fixes, is in the
874             current module.
875              
876             Note that this module's usage of Marpa's adverbs I and I should be regarded as an
877             intermediate/advanced technique. For people just beginning to use Marpa, use of the I adverb
878             is the recommended technique.
879              
880             The article mentioned above discusses important issues regarding the timing sequence of I
881             and I.
882              
883             All this assumes a relatively recent version of Marpa, one in which its Scanless interface (SLIF)
884             is implemented. I'm currently (2014-10-10) using L V 2.096000.
885              
886             Lastly, the parser and renderer will be incorporated into the next major release (V 2.00) of
887             L, which parses DOT files.
888              
889             =head1 Installation
890              
891             Install L as you would for any C module:
892              
893             Run:
894              
895             cpanm MarpaX::Demo::StringParser
896              
897             or run:
898              
899             sudo cpan MarpaX::Demo::StringParser
900              
901             or unpack the distro, and then either:
902              
903             perl Build.PL
904             ./Build
905             ./Build test
906             sudo ./Build install
907              
908             or:
909              
910             perl Makefile.PL
911             make (or dmake or nmake)
912             make test
913             make install
914              
915             =head1 Scripts Shipped with this Module
916              
917             All scripts are shipped in the scripts/ directory.
918              
919             =over 4
920              
921             =item o copy.config.pl
922              
923             This is for use by the author. It just copies the config file out of the distro, so the script
924             generate.index.pl (which uses HTML template stuff) can find it.
925              
926             =item o find.config.pl
927              
928             This cross-checks the output of copy.config.pl.
929              
930             =item o dash2svg.pl
931              
932             Converts all data/*.dash files into the corresponding html/*.svg files.
933              
934             Used by generate.demo.sh.
935              
936             =item o generate.demo.sh
937              
938             This generates all the SVG files for the data/*.dash files, and then generates html/index.html.
939              
940             And then it copies the demo output to my dev web server's doc root, where I can cross-check it.
941              
942             =item o generate.index.pl
943              
944             This constructs a web page containing all the html/*.svg files.
945              
946             =item o parse.pl
947              
948             This runs a parse on a single input file. Run 'parse.pl -h' for details.
949              
950             =item o parse.sh
951              
952             This simplifies running parse.pl.
953              
954             =item o pod2html.sh
955              
956             This converts all lib/*.pm files into their corresponding *.html versions, for proof-reading and
957             uploading to my real web site.
958              
959             =item o render.pl
960              
961             This runs a parse on a single input file, and coverts the output into an SVG file. Run 'render.pl -h'
962             for details.
963              
964             =item o render.sh
965              
966             This simplifies running render.pl.
967              
968             =back
969              
970             =head1 Constructor and Initialization
971              
972             C is called as C<< my($parser) = MarpaX::Demo::StringParser -> new(k1 => v1, k2 => v2, ...) >>.
973              
974             It returns a new object of type C.
975              
976             Key-value pairs accepted in the parameter list (see corresponding methods for details
977             [e.g. description($graph)]):
978              
979             =over 4
980              
981             =item o description => '[node.1]->[node.2]'
982              
983             Specify a string for the graph definition.
984              
985             You are strongly encouraged to surround this string with '...' to protect it from your shell if using
986             this module directly from the command line.
987              
988             See also the I key which reads the graph from a file.
989              
990             The I key takes precedence over the I key.
991              
992             Default: ''.
993              
994             =item o input_file => $graph_file_name
995              
996             Read the graph definition from this file.
997              
998             See also the I key to read the graph from the command line.
999              
1000             The whole file is slurped in as a single graph.
1001              
1002             The first lines of the file can start with /^\s*#/, and will be discarded as comments.
1003              
1004             The I key takes precedence over the I key.
1005              
1006             Default: ''.
1007              
1008             =item o logger => $logger_object
1009              
1010             Specify a logger object.
1011              
1012             To disable logging, just set logger to the empty string.
1013              
1014             Default: An object of type L.
1015              
1016             =item o maxlevel => $level
1017              
1018             This option is only used if this module creates an object of type L.
1019              
1020             See L.
1021              
1022             Default: 'notice'. A typical choice is 'info' or 'debug'.
1023              
1024             =item o minlevel => $level
1025              
1026             This option is only used if this module creates an object of type L.
1027              
1028             See L.
1029              
1030             Default: 'error'.
1031              
1032             No lower levels are used.
1033              
1034             =back
1035              
1036             =head1 Methods
1037              
1038             =head2 clean_before($s)
1039              
1040             Cleans the input string before the next step in the parse process.
1041              
1042             Typically only ever called once.
1043              
1044             Returns the cleaned string.
1045              
1046             =head2 clean_after($s)
1047              
1048             Cleans the input string after each step in the parse process.
1049              
1050             Typically called many times, once on each output token.
1051              
1052             Returns the cleaned string.
1053              
1054             =head2 description([$graph])
1055              
1056             Here, the [] indicate an optional parameter.
1057              
1058             Gets or sets the graph string to be parsed.
1059              
1060             See also the L method.
1061              
1062             The value supplied to the description() method takes precedence over the value read from the input file.
1063              
1064             Also, I is an option to new().
1065              
1066             =head2 graph_text([$graph])
1067              
1068             Here, the [] indicate an optional parameter.
1069              
1070             Returns the value of the graph definition string, from either the command line or a file.
1071              
1072             =head2 input_file([$graph_file_name])
1073              
1074             Here, the [] indicate an optional parameter.
1075              
1076             Gets or sets the name of the file to read the graph definition from.
1077              
1078             See also the L method.
1079              
1080             The whole file is slurped in as a single graph.
1081              
1082             The first few lines of the file can start with /^\s*#/, and will be discarded as comments.
1083              
1084             The value supplied to the description() method takes precedence over the value read from the input file.
1085              
1086             Also, I is an option to new().
1087              
1088             =head2 log($level, $s)
1089              
1090             Calls $self -> logger -> log($level => $s) if ($self -> logger).
1091              
1092             =head2 run()
1093              
1094             This is the only method the caller needs to call. All parameters are supplied to new().
1095              
1096             Returns 0 for success and 1 for failure.
1097              
1098             =head2 recce()
1099              
1100             Returns an object of type L.
1101              
1102             =head2 tree()
1103              
1104             Returns an object of type L.
1105              
1106             =head1 DASH Syntax
1107              
1108             See L for sample
1109             input and output.
1110              
1111             The examples in the following sections are almost all taken from data/*.dash, in the distro.
1112              
1113             =head2 Graphs in DASH
1114              
1115             1: A graph definition may continue over multiple lines.
1116             2: Lines beginning with either '#' or '//' are discarded as comments.
1117             3: A node name or an edge name must never be split over multiple lines.
1118             4: Attributes may be split over lines, but do not split either the name or value of the
1119             attribute over multiple lines.
1120             Note: Attribute values can contain various escaped characters, e.g. \n.
1121             5: A graph may start or end with an edge, and even have contiguous edges.
1122             See data/edge.06.dash (or the demo page). Graphviz does not allow any of these
1123             possibilities, so the default renderer fabricates anonymous nodes and inserts them where
1124             they will satisfy the requirements of Graphviz.
1125              
1126             Examples:
1127              
1128             1: A graph split over 10 lines:
1129             [node.1] {label: "n 1"}
1130             -> {label: 'e 1'}
1131             -> {label: e 2}
1132             [] {label: n 2}
1133             -> {label : e 3}
1134             [node.3] {label: "n 3"}
1135             -> {label: 'e 4'},
1136             -> {label: e 5}
1137             [] {label: n 2}
1138             -> {label : e 6}
1139             2: A graph split over 14 lines:
1140             ->
1141             ->
1142              
1143             [node]
1144             [node] ->
1145             -> {label: Start} -> {color: red} [node.1] {color: green} -> [node.2]
1146             [node.1] [node.2] [node.3]
1147              
1148             []
1149             [node.1]
1150             [node 1]
1151             ['node.2']
1152             ["node.3"]
1153             [ From here ] -> [ To there ]
1154              
1155             =head2 Nodes in DASH
1156              
1157             Node names:
1158              
1159             1: Are delimited by '[' and ']'.
1160             2: May be quoted with " or '.
1161             3: Allow escaped characters, using '\'.
1162             4: Allow internal spaces, even if not quoted.
1163             5: May be separated with nothing (juxtaposed), with whitespace, or with ','.
1164             This is called 'Daisy-chaining'.
1165              
1166             See L for the origin of this term.
1167              
1168             Examples:
1169              
1170             1: The anonymous node: []
1171             2: The anonymous node, with attributes (explained below): []{color:red}
1172             3: A named node: [Marpa]
1173             4: Juxtaposed nodes: [Perl][Marpa] or [Perl] [Marpa] or [Perl], [Marpa]
1174             5: A named node with an internal space: [Perl 6]
1175             6: A named node with attributes: [node.1]{label: A and B}
1176             7: A named node with spaces: [ node.1 ]
1177             These spaces are discarded.
1178             8: A named node with attributes, with spaces: [ node.1 ] { label : ' A Z ' }
1179             The spaces around 'node.1' are discarded.
1180             The spaces around ' A Z ' are discarded.
1181             The spaces inside ' A Z ' are preserved (because of the quotes).
1182             Double-quotes act in the same way.
1183             9: A named node with attributes, with spaces:
1184             [ node.1 ] { label : Flight Path from Melbourne to London }
1185             Space preservation is as above.
1186             10: A named node with escaped characters: [\[node\]]
1187             The '[' and ']' chars are preserved.
1188             11: A named node with [] in name: [[ \]]
1189             However, since '[' and ']' delimit node names, you are I advised to escape such
1190             characters.
1191             12: A named node with quotes, spaces, and escaped chars: [" a \' b \" c"]
1192             13: A complete graph:
1193             [node.1]
1194             -> {arrowhead: odot; arrowtail: ediamond; color: green; dir: both; label: A 1; penwidth: 1}
1195             -> {color: blue; label: B 2; penwidth: 3}
1196             -> {arrowhead: box; arrowtail: invdot; color: maroon; dir: both; label: C 3; penwidth: 5}
1197             [] {label: 'Some node'}
1198             -> [node.2]
1199              
1200             =head2 Edges in DASH
1201              
1202             Edge names:
1203              
1204             1: Are '->'
1205             This is part of a directed graph.
1206             2: Or '--'
1207             This is part of an undirected graph.
1208             3: May be separated with nothing (juxtaposed), with whitespace, or with ','.
1209             This is called 'Daisy-chaining'.
1210              
1211             See L for the origin of this term.
1212              
1213             It makes no sense to combine '->' and '--' in a single graph, because Graphviz will automatically
1214             reject such input. In other words, directed and undirected graphs are mutually exclusive.
1215              
1216             So, if any edge in your graph is undirected (you use '--'), then every edge must use '--' and the
1217             same for '->'.
1218              
1219             Examples:
1220              
1221             1: An edge with attributes: -> {color:cornflowerblue; label: This edge's color is blueish ;}
1222             2: Juxtaposed edges without any spacing and without attributes: ------
1223             3: Juxtaposed edges (without comma) with attributes:
1224             -- {color: cornflowerblue; label: Top row\nBottom row}
1225             -- {color:red; label: Edges use cornflowerblue and red}
1226             4: An edge with attributes, with some escaped characters:
1227             -> {color:cornflowerblue; label: Use various escaped chars (\' \" \< \>) in label}
1228              
1229             =head2 Attributes in DASH
1230              
1231             Attributes:
1232              
1233             1: Are delimited by '{' and '}'.
1234             2: Consist of a C and a C, separated by ':'.
1235             3: Are separated by ';'.
1236             4: The DOT language defines a set of escape characters acceptable in such a C.
1237             5: Allow quotes and whitespace as per node names.
1238             This must be true because the same non-Marpa parsers are used for both.
1239             6: Attribute values can be HTML-like. See the Graphviz docs for why we say 'HTML-like' and
1240             not HTML. See data/table.*.ge for examples.
1241              
1242             See L for details.
1243              
1244             Examples:
1245              
1246             1: -- {color: cornflowerblue; label: Top row\nBottom row}
1247             Note the use of '\n' in the value of the label.
1248              
1249             =head1 FAQ
1250              
1251             =head2 What is the grammar parsed by this module?
1252              
1253             See L just above.
1254              
1255             =head2 How is the parsed graph stored in RAM?
1256              
1257             Items are stored in a tree managed by L.
1258              
1259             The sample code in the L will display a tree:
1260              
1261             perl -Ilib scripts/parse.pl -i data/node.04.dash -max info
1262              
1263             Output:
1264              
1265             root. Attributes: {uid => "0"}
1266             |---prolog. Attributes: {uid => "1"}
1267             |---graph. Attributes: {uid => "2"}
1268             |---node_id. Attributes: {uid => "3", value => "node.1"}
1269             | |---literal. Attributes: {uid => "4", value => "{"}
1270             | |---label. Attributes: {uid => "5", value => "A and B"}
1271             | |---literal. Attributes: {uid => "6", value => "}"}
1272             |---node_id. Attributes: {uid => "7", value => "node.2"}
1273             |---literal. Attributes: {uid => "8", value => "{"}
1274             |---label. Attributes: {uid => "9", value => "A or B"}
1275             |---literal. Attributes: {uid => "10", value => "}"}
1276             Parse result: 0 (0 is success)
1277              
1278             See also the next question.
1279              
1280             =head2 What is the structure of the tree of parsed tokens?
1281              
1282             From the previous answer, you can see the root has 2 daughters, with the 'prolog' daughter not
1283             currently used. It is used by L.
1284              
1285             The 'graph' daughter (sub-tree) is what's processed by the default rendering engine
1286             L to convert the tree (i.e. the input file) into a DOT file
1287             and into an image.
1288              
1289             =head2 Does this module handle utf8?
1290              
1291             Yes. See the last sample on L.
1292              
1293             =head2 Why doesn't the parser handle my HTML-style labels?
1294              
1295             Traps for young players:
1296              
1297             =over 4
1298              
1299             =item o The
component must include the '/'
1300              
1301             =back
1302              
1303             =head2 Why do I get error messages like the following?
1304              
1305             Error: :1: syntax error near line 1
1306             context: digraph >>> Graph <<< {
1307              
1308             Graphviz reserves some words as keywords, meaning they can't be used as an ID, e.g. for the name of the graph.
1309             So, don't do this:
1310              
1311             strict graph graph{...}
1312             strict graph Graph{...}
1313             strict graph strict{...}
1314             etc...
1315              
1316             Likewise for non-strict graphs, and digraphs. You can however add double-quotes around such reserved words:
1317              
1318             strict graph "graph"{...}
1319              
1320             Even better, use a more meaningful name for your graph...
1321              
1322             The keywords are: node, edge, graph, digraph, subgraph and strict. Compass points are not keywords.
1323              
1324             See L in the discussion of the syntax of DOT
1325             for details.
1326              
1327             =head2 What is the homepage of Marpa?
1328              
1329             L.
1330              
1331             =head2 How do I reconcile Marpa's approach with classic lexing and parsing?
1332              
1333             I've included in a recent article a section called
1334             L
1335             which is aimed at helping us think about this issue.
1336              
1337             =head2 How did you generate the html/*.svg files?
1338              
1339             With a private script which uses L V 2.00. This script is not shipped
1340             in order to avoid a dependency on that module. Also, another private script which validates Build.PL and
1341             Makefile.PL would complain about the missing dependency.
1342              
1343             See L for details.
1344              
1345             =head1 Machine-Readable Change Log
1346              
1347             The file Changes was converted into Changelog.ini by L.
1348              
1349             =head1 Version Numbers
1350              
1351             Version numbers < 1.00 represent development versions. From 1.00 up, they are production versions.
1352              
1353             =head1 Repository
1354              
1355             L
1356              
1357             =head1 Support
1358              
1359             Email the author, or log a bug on RT:
1360              
1361             L.
1362              
1363             =head1 Author
1364              
1365             L was written by Ron Savage Iron@savage.net.auE> in 2013.
1366              
1367             Marpa's homepage: .
1368              
1369             My homepage: L.
1370              
1371             =head1 Copyright
1372              
1373             Australian copyright (c) 2013, Ron Savage.
1374              
1375             All Programs of mine are 'OSI Certified Open Source Software';
1376             you can redistribute them and/or modify them under the terms of
1377             The Artistic License, a copy of which is available at:
1378             http://www.opensource.org/licenses/index.html
1379              
1380             =cut